diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,120419 +1,5619 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.6949017643232586, + "epoch": 0.016160506147052525, "eval_steps": 500, - "global_step": 172000, + "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 4.040126536763131e-05, - "grad_norm": 37380620.0, - "learning_rate": 2e-08, - "loss": 1265640.2, + "epoch": 2.0200632683815657e-05, + "grad_norm": 316.38043212890625, + "learning_rate": 2e-09, + "loss": 26.2272, "step": 10 }, { - "epoch": 8.080253073526263e-05, - "grad_norm": 9634068.0, - "learning_rate": 4e-08, - "loss": 1485744.3, + "epoch": 4.040126536763131e-05, + "grad_norm": 576.4369506835938, + "learning_rate": 4e-09, + "loss": 32.9983, "step": 20 }, { - "epoch": 0.00012120379610289395, - "grad_norm": 14280229.0, - "learning_rate": 6e-08, - "loss": 1357241.2, + "epoch": 6.060189805144697e-05, + "grad_norm": 472.15478515625, + "learning_rate": 6e-09, + "loss": 23.2158, "step": 30 }, { - "epoch": 0.00016160506147052525, - "grad_norm": 47433768.0, - "learning_rate": 8e-08, - "loss": 1486355.4, + "epoch": 8.080253073526263e-05, + "grad_norm": 20.11992835998535, + "learning_rate": 8e-09, + "loss": 19.9305, "step": 40 }, { - "epoch": 0.00020200632683815657, - "grad_norm": 36763048.0, - "learning_rate": 1.0000000000000001e-07, - "loss": 1302607.4, + "epoch": 0.00010100316341907829, + "grad_norm": 766.17529296875, + "learning_rate": 1e-08, + "loss": 35.6484, "step": 50 }, { - "epoch": 0.0002424075922057879, - "grad_norm": 58413324.0, - "learning_rate": 1.2e-07, - "loss": 1164601.1, + "epoch": 0.00012120379610289395, + "grad_norm": 649.330810546875, + "learning_rate": 1.2e-08, + "loss": 23.5938, "step": 60 }, { - "epoch": 0.0002828088575734192, - "grad_norm": 20137730.0, - "learning_rate": 1.4e-07, - "loss": 1635228.2, + "epoch": 0.0001414044287867096, + "grad_norm": 638.9619750976562, + "learning_rate": 1.4000000000000001e-08, + "loss": 20.7426, "step": 70 }, { - "epoch": 0.0003232101229410505, - "grad_norm": 110655184.0, - "learning_rate": 1.6e-07, - "loss": 1525543.8, + "epoch": 0.00016160506147052525, + "grad_norm": 703.6967163085938, + "learning_rate": 1.6e-08, + "loss": 33.9524, "step": 80 }, { - "epoch": 0.0003636113883086818, - "grad_norm": 14134144.0, - "learning_rate": 1.8e-07, - "loss": 1310257.0, + "epoch": 0.0001818056941543409, + "grad_norm": 785.22998046875, + "learning_rate": 1.8000000000000002e-08, + "loss": 29.1029, "step": 90 }, { - "epoch": 0.00040401265367631315, - "grad_norm": 19299640.0, - "learning_rate": 2.0000000000000002e-07, - "loss": 851063.5, + "epoch": 0.00020200632683815657, + "grad_norm": 676.3937377929688, + "learning_rate": 2e-08, + "loss": 26.1458, "step": 100 }, { - "epoch": 0.00044441391904394446, - "grad_norm": 17030668.0, - "learning_rate": 2.2e-07, - "loss": 1405910.8, + "epoch": 0.00022220695952197223, + "grad_norm": 246.762451171875, + "learning_rate": 2.2000000000000002e-08, + "loss": 25.4453, "step": 110 }, { - "epoch": 0.0004848151844115758, - "grad_norm": 5774159.0, - "learning_rate": 2.4e-07, - "loss": 1021530.2, + "epoch": 0.0002424075922057879, + "grad_norm": 1033.1109619140625, + "learning_rate": 2.4e-08, + "loss": 38.0849, "step": 120 }, { - "epoch": 0.000525216449779207, - "grad_norm": 39249404.0, - "learning_rate": 2.6e-07, - "loss": 1118213.8, + "epoch": 0.0002626082248896035, + "grad_norm": 724.1307983398438, + "learning_rate": 2.6e-08, + "loss": 42.023, "step": 130 }, { - "epoch": 0.0005656177151468384, - "grad_norm": 3952896.25, - "learning_rate": 2.8e-07, - "loss": 802054.8, + "epoch": 0.0002828088575734192, + "grad_norm": 493.44427490234375, + "learning_rate": 2.8000000000000003e-08, + "loss": 27.2123, "step": 140 }, { - "epoch": 0.0006060189805144697, - "grad_norm": 17492202.0, - "learning_rate": 3.0000000000000004e-07, - "loss": 782337.9, + "epoch": 0.00030300949025723485, + "grad_norm": 665.4328002929688, + "learning_rate": 3.0000000000000004e-08, + "loss": 34.0098, "step": 150 }, { - "epoch": 0.000646420245882101, - "grad_norm": 14565722.0, - "learning_rate": 3.2e-07, - "loss": 762118.95, + "epoch": 0.0003232101229410505, + "grad_norm": 844.9791259765625, + "learning_rate": 3.2e-08, + "loss": 34.3665, "step": 160 }, { - "epoch": 0.0006868215112497323, - "grad_norm": 12661834.0, - "learning_rate": 3.4e-07, - "loss": 617497.6, + "epoch": 0.00034341075562486617, + "grad_norm": 32.71566390991211, + "learning_rate": 3.4e-08, + "loss": 33.4008, "step": 170 }, { - "epoch": 0.0007272227766173637, - "grad_norm": 4945727.5, - "learning_rate": 3.6e-07, - "loss": 360572.95, + "epoch": 0.0003636113883086818, + "grad_norm": 1014.241455078125, + "learning_rate": 3.6000000000000005e-08, + "loss": 37.7545, "step": 180 }, { - "epoch": 0.000767624041984995, - "grad_norm": 3134309.5, - "learning_rate": 3.8e-07, - "loss": 247792.15, + "epoch": 0.0003838120209924975, + "grad_norm": 883.66259765625, + "learning_rate": 3.8e-08, + "loss": 27.2009, "step": 190 }, { - "epoch": 0.0008080253073526263, - "grad_norm": 23188752.0, - "learning_rate": 4.0000000000000003e-07, - "loss": 302759.75, + "epoch": 0.00040401265367631315, + "grad_norm": 984.9612426757812, + "learning_rate": 4e-08, + "loss": 16.6661, "step": 200 }, { - "epoch": 0.0008484265727202576, - "grad_norm": 2640093.75, - "learning_rate": 4.2e-07, - "loss": 177911.875, + "epoch": 0.0004242132863601288, + "grad_norm": 553.194091796875, + "learning_rate": 4.2e-08, + "loss": 36.3195, "step": 210 }, { - "epoch": 0.0008888278380878889, - "grad_norm": 3475973.0, - "learning_rate": 4.4e-07, - "loss": 124657.5875, + "epoch": 0.00044441391904394446, + "grad_norm": 313.7496032714844, + "learning_rate": 4.4000000000000004e-08, + "loss": 16.8871, "step": 220 }, { - "epoch": 0.0009292291034555202, - "grad_norm": 7612417.5, - "learning_rate": 4.6e-07, - "loss": 133348.3, + "epoch": 0.0004646145517277601, + "grad_norm": 525.6804809570312, + "learning_rate": 4.6e-08, + "loss": 17.3601, "step": 230 }, { - "epoch": 0.0009696303688231516, - "grad_norm": 3544448.5, - "learning_rate": 4.8e-07, - "loss": 108178.025, + "epoch": 0.0004848151844115758, + "grad_norm": 206.4374237060547, + "learning_rate": 4.8e-08, + "loss": 34.4888, "step": 240 }, { - "epoch": 0.0010100316341907828, - "grad_norm": 1792657.375, - "learning_rate": 5.000000000000001e-07, - "loss": 147530.7625, + "epoch": 0.0005050158170953914, + "grad_norm": 665.89453125, + "learning_rate": 5.0000000000000004e-08, + "loss": 40.5891, "step": 250 }, { - "epoch": 0.001050432899558414, - "grad_norm": 807344.25, - "learning_rate": 5.2e-07, - "loss": 46811.725, + "epoch": 0.000525216449779207, + "grad_norm": 1490.542724609375, + "learning_rate": 5.2e-08, + "loss": 26.4835, "step": 260 }, { - "epoch": 0.0010908341649260454, - "grad_norm": 9978717.0, - "learning_rate": 5.4e-07, - "loss": 46902.9469, + "epoch": 0.0005454170824630227, + "grad_norm": 157.42401123046875, + "learning_rate": 5.400000000000001e-08, + "loss": 10.1466, "step": 270 }, { - "epoch": 0.0011312354302936767, - "grad_norm": 1081468.125, - "learning_rate": 5.6e-07, - "loss": 61798.675, + "epoch": 0.0005656177151468384, + "grad_norm": 896.1552124023438, + "learning_rate": 5.6000000000000005e-08, + "loss": 46.9781, "step": 280 }, { - "epoch": 0.001171636695661308, - "grad_norm": 836054.125, - "learning_rate": 5.8e-07, - "loss": 18199.025, + "epoch": 0.000585818347830654, + "grad_norm": 1364.9993896484375, + "learning_rate": 5.8e-08, + "loss": 40.9419, "step": 290 }, { - "epoch": 0.0012120379610289394, - "grad_norm": 713206.125, - "learning_rate": 6.000000000000001e-07, - "loss": 16179.9375, + "epoch": 0.0006060189805144697, + "grad_norm": 1216.1474609375, + "learning_rate": 6.000000000000001e-08, + "loss": 30.2287, "step": 300 }, { - "epoch": 0.0012524392263965707, - "grad_norm": 736075.8125, - "learning_rate": 6.2e-07, - "loss": 18567.8047, + "epoch": 0.0006262196131982854, + "grad_norm": 328.39678955078125, + "learning_rate": 6.2e-08, + "loss": 38.7195, "step": 310 }, { - "epoch": 0.001292840491764202, - "grad_norm": 524556.5, - "learning_rate": 6.4e-07, - "loss": 18508.9297, + "epoch": 0.000646420245882101, + "grad_norm": 433.4118957519531, + "learning_rate": 6.4e-08, + "loss": 22.1306, "step": 320 }, { - "epoch": 0.0013332417571318333, - "grad_norm": 74135.0703125, - "learning_rate": 6.6e-07, - "loss": 7819.6453, + "epoch": 0.0006666208785659167, + "grad_norm": 420.328857421875, + "learning_rate": 6.600000000000001e-08, + "loss": 27.7195, "step": 330 }, { - "epoch": 0.0013736430224994647, - "grad_norm": 94151.4921875, - "learning_rate": 6.8e-07, - "loss": 6291.5781, + "epoch": 0.0006868215112497323, + "grad_norm": 185.5102081298828, + "learning_rate": 6.8e-08, + "loss": 37.9056, "step": 340 }, { - "epoch": 0.001414044287867096, - "grad_norm": 17262.64453125, - "learning_rate": 7.000000000000001e-07, - "loss": 2254.5939, + "epoch": 0.000707022143933548, + "grad_norm": 337.02044677734375, + "learning_rate": 7e-08, + "loss": 22.7152, "step": 350 }, { - "epoch": 0.0014544455532347273, - "grad_norm": 6977.82470703125, - "learning_rate": 7.2e-07, - "loss": 806.7898, + "epoch": 0.0007272227766173637, + "grad_norm": 670.1768188476562, + "learning_rate": 7.200000000000001e-08, + "loss": 28.3785, "step": 360 }, { - "epoch": 0.0014948468186023586, - "grad_norm": 720.22705078125, - "learning_rate": 7.400000000000001e-07, - "loss": 1413.3216, + "epoch": 0.0007474234093011793, + "grad_norm": 475.4079895019531, + "learning_rate": 7.400000000000001e-08, + "loss": 22.1121, "step": 370 }, { - "epoch": 0.00153524808396999, - "grad_norm": 12234.6259765625, - "learning_rate": 7.6e-07, - "loss": 569.8974, + "epoch": 0.000767624041984995, + "grad_norm": 607.6908569335938, + "learning_rate": 7.6e-08, + "loss": 31.8589, "step": 380 }, { - "epoch": 0.0015756493493376213, - "grad_norm": 12985.419921875, - "learning_rate": 7.8e-07, - "loss": 645.2221, + "epoch": 0.0007878246746688106, + "grad_norm": 800.4915161132812, + "learning_rate": 7.8e-08, + "loss": 21.862, "step": 390 }, { - "epoch": 0.0016160506147052526, - "grad_norm": 17890.517578125, - "learning_rate": 8.000000000000001e-07, - "loss": 556.4382, + "epoch": 0.0008080253073526263, + "grad_norm": 1185.45849609375, + "learning_rate": 8e-08, + "loss": 35.8806, "step": 400 }, { - "epoch": 0.001656451880072884, - "grad_norm": 1073.23046875, - "learning_rate": 8.200000000000001e-07, - "loss": 579.9454, + "epoch": 0.000828225940036442, + "grad_norm": 452.5521240234375, + "learning_rate": 8.200000000000002e-08, + "loss": 17.0217, "step": 410 }, { - "epoch": 0.0016968531454405152, - "grad_norm": 946.015380859375, - "learning_rate": 8.4e-07, - "loss": 339.6989, + "epoch": 0.0008484265727202576, + "grad_norm": 125.98954010009766, + "learning_rate": 8.4e-08, + "loss": 29.4705, "step": 420 }, { - "epoch": 0.0017372544108081465, - "grad_norm": 919.689697265625, - "learning_rate": 8.6e-07, - "loss": 544.2018, + "epoch": 0.0008686272054040733, + "grad_norm": 301.3433532714844, + "learning_rate": 8.6e-08, + "loss": 22.8971, "step": 430 }, { - "epoch": 0.0017776556761757779, - "grad_norm": 1076.61962890625, - "learning_rate": 8.8e-07, - "loss": 477.4546, + "epoch": 0.0008888278380878889, + "grad_norm": 578.7197875976562, + "learning_rate": 8.800000000000001e-08, + "loss": 20.5219, "step": 440 }, { - "epoch": 0.0018180569415434092, - "grad_norm": 6896.8623046875, - "learning_rate": 9e-07, - "loss": 551.458, + "epoch": 0.0009090284707717046, + "grad_norm": 1337.2423095703125, + "learning_rate": 9e-08, + "loss": 38.1059, "step": 450 }, { - "epoch": 0.0018584582069110405, - "grad_norm": 1263.240478515625, - "learning_rate": 9.2e-07, - "loss": 478.671, + "epoch": 0.0009292291034555202, + "grad_norm": 971.8609619140625, + "learning_rate": 9.2e-08, + "loss": 28.7212, "step": 460 }, { - "epoch": 0.0018988594722786718, - "grad_norm": 6494.14501953125, - "learning_rate": 9.400000000000001e-07, - "loss": 551.3243, + "epoch": 0.0009494297361393359, + "grad_norm": 635.0576171875, + "learning_rate": 9.400000000000001e-08, + "loss": 28.1718, "step": 470 }, { - "epoch": 0.0019392607376463031, - "grad_norm": 1367.335205078125, - "learning_rate": 9.6e-07, - "loss": 566.3516, + "epoch": 0.0009696303688231516, + "grad_norm": 347.902587890625, + "learning_rate": 9.6e-08, + "loss": 35.6753, "step": 480 }, { - "epoch": 0.0019796620030139342, - "grad_norm": 2059.615478515625, - "learning_rate": 9.8e-07, - "loss": 440.9915, + "epoch": 0.0009898310015069671, + "grad_norm": 804.568115234375, + "learning_rate": 9.8e-08, + "loss": 52.5154, "step": 490 }, { - "epoch": 0.0020200632683815656, - "grad_norm": 1089.5028076171875, - "learning_rate": 1.0000000000000002e-06, - "loss": 450.5991, + "epoch": 0.0010100316341907828, + "grad_norm": 357.3641662597656, + "learning_rate": 1.0000000000000001e-07, + "loss": 23.8221, "step": 500 }, { - "epoch": 0.002060464533749197, - "grad_norm": 1123.5267333984375, - "learning_rate": 1.0200000000000002e-06, - "loss": 450.4043, + "epoch": 0.0010302322668745984, + "grad_norm": 695.1265869140625, + "learning_rate": 1.0200000000000001e-07, + "loss": 29.7152, "step": 510 }, { - "epoch": 0.002100865799116828, - "grad_norm": 1164.44677734375, - "learning_rate": 1.04e-06, - "loss": 401.3136, + "epoch": 0.001050432899558414, + "grad_norm": 466.5853271484375, + "learning_rate": 1.04e-07, + "loss": 23.3024, "step": 520 }, { - "epoch": 0.0021412670644844595, - "grad_norm": 1139.4189453125, - "learning_rate": 1.06e-06, - "loss": 510.3279, + "epoch": 0.0010706335322422298, + "grad_norm": 725.106201171875, + "learning_rate": 1.0600000000000001e-07, + "loss": 20.0807, "step": 530 }, { - "epoch": 0.002181668329852091, - "grad_norm": 1584.498779296875, - "learning_rate": 1.08e-06, - "loss": 276.7529, + "epoch": 0.0010908341649260454, + "grad_norm": 500.51708984375, + "learning_rate": 1.0800000000000001e-07, + "loss": 16.9731, "step": 540 }, { - "epoch": 0.002222069595219722, - "grad_norm": 13004.5224609375, - "learning_rate": 1.1e-06, - "loss": 517.8619, + "epoch": 0.001111034797609861, + "grad_norm": 643.19580078125, + "learning_rate": 1.1e-07, + "loss": 35.556, "step": 550 }, { - "epoch": 0.0022624708605873535, - "grad_norm": 6897.6220703125, - "learning_rate": 1.12e-06, - "loss": 371.9735, + "epoch": 0.0011312354302936767, + "grad_norm": 606.3302001953125, + "learning_rate": 1.1200000000000001e-07, + "loss": 27.9096, "step": 560 }, { - "epoch": 0.002302872125954985, - "grad_norm": 5952.30859375, - "learning_rate": 1.14e-06, - "loss": 321.4458, + "epoch": 0.0011514360629774924, + "grad_norm": 455.76116943359375, + "learning_rate": 1.1400000000000001e-07, + "loss": 27.992, "step": 570 }, { - "epoch": 0.002343273391322616, - "grad_norm": 1368.4573974609375, - "learning_rate": 1.16e-06, - "loss": 444.6263, + "epoch": 0.001171636695661308, + "grad_norm": 890.6718139648438, + "learning_rate": 1.16e-07, + "loss": 20.3737, "step": 580 }, { - "epoch": 0.0023836746566902474, - "grad_norm": 6530.33154296875, - "learning_rate": 1.18e-06, - "loss": 576.2317, + "epoch": 0.0011918373283451237, + "grad_norm": 521.8313598632812, + "learning_rate": 1.1800000000000001e-07, + "loss": 21.4314, "step": 590 }, { - "epoch": 0.0024240759220578788, - "grad_norm": 918.2885131835938, - "learning_rate": 1.2000000000000002e-06, - "loss": 319.5849, + "epoch": 0.0012120379610289394, + "grad_norm": 526.7328491210938, + "learning_rate": 1.2000000000000002e-07, + "loss": 35.9307, "step": 600 }, { - "epoch": 0.00246447718742551, - "grad_norm": 5709.6923828125, - "learning_rate": 1.2200000000000002e-06, - "loss": 544.9418, + "epoch": 0.001232238593712755, + "grad_norm": 491.2201843261719, + "learning_rate": 1.22e-07, + "loss": 15.4924, "step": 610 }, { - "epoch": 0.0025048784527931414, - "grad_norm": 767.517822265625, - "learning_rate": 1.24e-06, - "loss": 475.3013, + "epoch": 0.0012524392263965707, + "grad_norm": 1669.0023193359375, + "learning_rate": 1.24e-07, + "loss": 31.0749, "step": 620 }, { - "epoch": 0.0025452797181607727, - "grad_norm": 1049.8690185546875, - "learning_rate": 1.26e-06, - "loss": 505.3646, + "epoch": 0.0012726398590803864, + "grad_norm": 747.734619140625, + "learning_rate": 1.2600000000000002e-07, + "loss": 27.6358, "step": 630 }, { - "epoch": 0.002585680983528404, - "grad_norm": 6113.7568359375, - "learning_rate": 1.28e-06, - "loss": 371.3924, + "epoch": 0.001292840491764202, + "grad_norm": 415.5186767578125, + "learning_rate": 1.28e-07, + "loss": 34.9266, "step": 640 }, { - "epoch": 0.0026260822488960354, - "grad_norm": 961.7456665039062, - "learning_rate": 1.3e-06, - "loss": 434.1364, + "epoch": 0.0013130411244480177, + "grad_norm": 812.7051391601562, + "learning_rate": 1.3e-07, + "loss": 28.9554, "step": 650 }, { - "epoch": 0.0026664835142636667, - "grad_norm": 827.18310546875, - "learning_rate": 1.32e-06, - "loss": 325.7226, + "epoch": 0.0013332417571318333, + "grad_norm": 391.8579406738281, + "learning_rate": 1.3200000000000002e-07, + "loss": 16.3211, "step": 660 }, { - "epoch": 0.002706884779631298, - "grad_norm": 800.2735595703125, - "learning_rate": 1.34e-06, - "loss": 362.5384, + "epoch": 0.001353442389815649, + "grad_norm": 569.1922607421875, + "learning_rate": 1.34e-07, + "loss": 40.6097, "step": 670 }, { - "epoch": 0.0027472860449989293, - "grad_norm": 2117.720703125, - "learning_rate": 1.36e-06, - "loss": 459.5714, + "epoch": 0.0013736430224994647, + "grad_norm": 784.5740356445312, + "learning_rate": 1.36e-07, + "loss": 38.8948, "step": 680 }, { - "epoch": 0.0027876873103665606, - "grad_norm": 11787.2021484375, - "learning_rate": 1.3800000000000001e-06, - "loss": 452.4481, + "epoch": 0.0013938436551832803, + "grad_norm": 284.9936218261719, + "learning_rate": 1.3800000000000002e-07, + "loss": 28.3477, "step": 690 }, { - "epoch": 0.002828088575734192, - "grad_norm": 1027.6024169921875, - "learning_rate": 1.4000000000000001e-06, - "loss": 370.987, + "epoch": 0.001414044287867096, + "grad_norm": 518.3870239257812, + "learning_rate": 1.4e-07, + "loss": 25.8091, "step": 700 }, { - "epoch": 0.0028684898411018233, - "grad_norm": 2432.72802734375, - "learning_rate": 1.4200000000000002e-06, - "loss": 396.6002, + "epoch": 0.0014342449205509116, + "grad_norm": 448.25848388671875, + "learning_rate": 1.4200000000000003e-07, + "loss": 21.6013, "step": 710 }, { - "epoch": 0.0029088911064694546, - "grad_norm": 4263.763671875, - "learning_rate": 1.44e-06, - "loss": 514.6806, + "epoch": 0.0014544455532347273, + "grad_norm": 175.99990844726562, + "learning_rate": 1.4400000000000002e-07, + "loss": 30.1821, "step": 720 }, { - "epoch": 0.002949292371837086, - "grad_norm": 1061.5069580078125, - "learning_rate": 1.46e-06, - "loss": 367.6588, + "epoch": 0.001474646185918543, + "grad_norm": 863.377685546875, + "learning_rate": 1.46e-07, + "loss": 51.8515, "step": 730 }, { - "epoch": 0.0029896936372047172, - "grad_norm": 7566.0791015625, - "learning_rate": 1.4800000000000002e-06, - "loss": 387.3527, + "epoch": 0.0014948468186023586, + "grad_norm": 175.85592651367188, + "learning_rate": 1.4800000000000003e-07, + "loss": 25.4575, "step": 740 }, { - "epoch": 0.0030300949025723486, - "grad_norm": 1594.1578369140625, - "learning_rate": 1.5e-06, - "loss": 286.798, + "epoch": 0.0015150474512861743, + "grad_norm": 640.6859741210938, + "learning_rate": 1.5000000000000002e-07, + "loss": 26.2666, "step": 750 }, { - "epoch": 0.00307049616793998, - "grad_norm": 1798.3486328125, - "learning_rate": 1.52e-06, - "loss": 265.2009, + "epoch": 0.00153524808396999, + "grad_norm": 811.0674438476562, + "learning_rate": 1.52e-07, + "loss": 24.3395, "step": 760 }, { - "epoch": 0.003110897433307611, - "grad_norm": 1887.8646240234375, - "learning_rate": 1.54e-06, - "loss": 511.3708, + "epoch": 0.0015554487166538056, + "grad_norm": 628.3569946289062, + "learning_rate": 1.5400000000000003e-07, + "loss": 25.5079, "step": 770 }, { - "epoch": 0.0031512986986752425, - "grad_norm": 1046.9554443359375, - "learning_rate": 1.56e-06, - "loss": 330.1199, + "epoch": 0.0015756493493376213, + "grad_norm": 796.8837890625, + "learning_rate": 1.56e-07, + "loss": 22.1897, "step": 780 }, { - "epoch": 0.003191699964042874, - "grad_norm": 14393.11328125, - "learning_rate": 1.5800000000000003e-06, - "loss": 459.7904, + "epoch": 0.001595849982021437, + "grad_norm": 634.3350830078125, + "learning_rate": 1.5800000000000004e-07, + "loss": 13.3538, "step": 790 }, { - "epoch": 0.003232101229410505, - "grad_norm": 1148.2391357421875, - "learning_rate": 1.6000000000000001e-06, - "loss": 297.0352, + "epoch": 0.0016160506147052526, + "grad_norm": 0.0, + "learning_rate": 1.6e-07, + "loss": 40.9726, "step": 800 }, { - "epoch": 0.0032725024947781365, - "grad_norm": 1655.5443115234375, - "learning_rate": 1.62e-06, - "loss": 380.9321, + "epoch": 0.0016362512473890682, + "grad_norm": 891.0134887695312, + "learning_rate": 1.62e-07, + "loss": 33.7623, "step": 810 }, { - "epoch": 0.003312903760145768, - "grad_norm": 1723.20947265625, - "learning_rate": 1.6400000000000002e-06, - "loss": 356.5419, + "epoch": 0.001656451880072884, + "grad_norm": 124.5013198852539, + "learning_rate": 1.6400000000000004e-07, + "loss": 32.6563, "step": 820 }, { - "epoch": 0.003353305025513399, - "grad_norm": 962.674560546875, - "learning_rate": 1.6600000000000002e-06, - "loss": 244.5568, + "epoch": 0.0016766525127566996, + "grad_norm": 557.0490112304688, + "learning_rate": 1.66e-07, + "loss": 11.069, "step": 830 }, { - "epoch": 0.0033937062908810304, - "grad_norm": 1860.3314208984375, - "learning_rate": 1.68e-06, - "loss": 314.3494, + "epoch": 0.0016968531454405152, + "grad_norm": 370.4107971191406, + "learning_rate": 1.68e-07, + "loss": 18.7746, "step": 840 }, { - "epoch": 0.0034341075562486618, - "grad_norm": 677.5274047851562, - "learning_rate": 1.7000000000000002e-06, - "loss": 389.2205, + "epoch": 0.0017170537781243309, + "grad_norm": 470.0067443847656, + "learning_rate": 1.7000000000000001e-07, + "loss": 53.0731, "step": 850 }, { - "epoch": 0.003474508821616293, - "grad_norm": 847.7564697265625, - "learning_rate": 1.72e-06, - "loss": 421.029, + "epoch": 0.0017372544108081465, + "grad_norm": 250.24252319335938, + "learning_rate": 1.72e-07, + "loss": 19.3808, "step": 860 }, { - "epoch": 0.0035149100869839244, - "grad_norm": 1153.310791015625, - "learning_rate": 1.7399999999999999e-06, - "loss": 338.7876, + "epoch": 0.0017574550434919622, + "grad_norm": 1099.9942626953125, + "learning_rate": 1.74e-07, + "loss": 50.3866, "step": 870 }, { - "epoch": 0.0035553113523515557, - "grad_norm": 702.7095947265625, - "learning_rate": 1.76e-06, - "loss": 375.8682, + "epoch": 0.0017776556761757779, + "grad_norm": 599.7510375976562, + "learning_rate": 1.7600000000000001e-07, + "loss": 23.1743, "step": 880 }, { - "epoch": 0.003595712617719187, - "grad_norm": 898.8328857421875, - "learning_rate": 1.7800000000000001e-06, - "loss": 437.3662, + "epoch": 0.0017978563088595935, + "grad_norm": 401.0577087402344, + "learning_rate": 1.78e-07, + "loss": 18.497, "step": 890 }, { - "epoch": 0.0036361138830868184, - "grad_norm": 1392.5733642578125, - "learning_rate": 1.8e-06, - "loss": 327.5625, + "epoch": 0.0018180569415434092, + "grad_norm": 1774.6864013671875, + "learning_rate": 1.8e-07, + "loss": 39.7275, "step": 900 }, { - "epoch": 0.0036765151484544497, - "grad_norm": 1176.8975830078125, - "learning_rate": 1.8200000000000002e-06, - "loss": 331.6111, + "epoch": 0.0018382575742272248, + "grad_norm": 950.1702880859375, + "learning_rate": 1.8200000000000002e-07, + "loss": 34.1336, "step": 910 }, { - "epoch": 0.003716916413822081, - "grad_norm": 844.1708374023438, - "learning_rate": 1.84e-06, - "loss": 462.9411, + "epoch": 0.0018584582069110405, + "grad_norm": 681.1715087890625, + "learning_rate": 1.84e-07, + "loss": 22.6729, "step": 920 }, { - "epoch": 0.0037573176791897123, - "grad_norm": 1110.1351318359375, - "learning_rate": 1.86e-06, - "loss": 291.5723, + "epoch": 0.0018786588395948562, + "grad_norm": 548.5640869140625, + "learning_rate": 1.86e-07, + "loss": 17.3724, "step": 930 }, { - "epoch": 0.0037977189445573436, - "grad_norm": 1204.96826171875, - "learning_rate": 1.8800000000000002e-06, - "loss": 453.21, + "epoch": 0.0018988594722786718, + "grad_norm": 570.0945434570312, + "learning_rate": 1.8800000000000002e-07, + "loss": 40.4859, "step": 940 }, { - "epoch": 0.003838120209924975, - "grad_norm": 788.923095703125, - "learning_rate": 1.9e-06, - "loss": 338.4679, + "epoch": 0.0019190601049624875, + "grad_norm": 411.9521789550781, + "learning_rate": 1.9e-07, + "loss": 19.4746, "step": 950 }, { - "epoch": 0.0038785214752926063, - "grad_norm": 828.4852294921875, - "learning_rate": 1.92e-06, - "loss": 304.8583, + "epoch": 0.0019392607376463031, + "grad_norm": 781.8963623046875, + "learning_rate": 1.92e-07, + "loss": 54.7843, "step": 960 }, { - "epoch": 0.003918922740660237, - "grad_norm": 3126.61181640625, - "learning_rate": 1.94e-06, - "loss": 368.0714, + "epoch": 0.0019594613703301186, + "grad_norm": 296.8689270019531, + "learning_rate": 1.9400000000000002e-07, + "loss": 29.0034, "step": 970 }, { - "epoch": 0.0039593240060278685, - "grad_norm": 3534.13232421875, - "learning_rate": 1.96e-06, - "loss": 321.7589, + "epoch": 0.0019796620030139342, + "grad_norm": 842.5889282226562, + "learning_rate": 1.96e-07, + "loss": 37.4423, "step": 980 }, { - "epoch": 0.0039997252713955, - "grad_norm": 676.08251953125, - "learning_rate": 1.98e-06, - "loss": 336.8801, + "epoch": 0.00199986263569775, + "grad_norm": 948.009033203125, + "learning_rate": 1.9800000000000003e-07, + "loss": 30.0773, "step": 990 }, { - "epoch": 0.004040126536763131, - "grad_norm": 1422.3154296875, - "learning_rate": 2.0000000000000003e-06, - "loss": 310.1541, + "epoch": 0.0020200632683815656, + "grad_norm": 215.3368377685547, + "learning_rate": 2.0000000000000002e-07, + "loss": 26.6014, "step": 1000 }, { - "epoch": 0.0040805278021307624, - "grad_norm": 754.2792358398438, - "learning_rate": 2.02e-06, - "loss": 294.2774, + "epoch": 0.0020402639010653812, + "grad_norm": 494.67578125, + "learning_rate": 2.02e-07, + "loss": 21.6492, "step": 1010 }, { - "epoch": 0.004120929067498394, - "grad_norm": 1239.5504150390625, - "learning_rate": 2.0400000000000004e-06, - "loss": 379.9203, + "epoch": 0.002060464533749197, + "grad_norm": 234.77099609375, + "learning_rate": 2.0400000000000003e-07, + "loss": 24.3645, "step": 1020 }, { - "epoch": 0.004161330332866025, - "grad_norm": 1107.9769287109375, - "learning_rate": 2.06e-06, - "loss": 394.3968, + "epoch": 0.0020806651664330125, + "grad_norm": 327.6997985839844, + "learning_rate": 2.0600000000000002e-07, + "loss": 24.5405, "step": 1030 }, { - "epoch": 0.004201731598233656, - "grad_norm": 747.5755615234375, - "learning_rate": 2.08e-06, - "loss": 357.4806, + "epoch": 0.002100865799116828, + "grad_norm": 611.9917602539062, + "learning_rate": 2.08e-07, + "loss": 24.577, "step": 1040 }, { - "epoch": 0.004242132863601288, - "grad_norm": 897.203125, - "learning_rate": 2.1000000000000002e-06, - "loss": 376.0113, + "epoch": 0.002121066431800644, + "grad_norm": 133.8150177001953, + "learning_rate": 2.1000000000000003e-07, + "loss": 33.8902, "step": 1050 }, { - "epoch": 0.004282534128968919, - "grad_norm": 3561.832763671875, - "learning_rate": 2.12e-06, - "loss": 385.5077, + "epoch": 0.0021412670644844595, + "grad_norm": 163.73504638671875, + "learning_rate": 2.1200000000000002e-07, + "loss": 17.783, "step": 1060 }, { - "epoch": 0.00432293539433655, - "grad_norm": 2549.35205078125, - "learning_rate": 2.14e-06, - "loss": 448.231, + "epoch": 0.002161467697168275, + "grad_norm": 193.5026397705078, + "learning_rate": 2.14e-07, + "loss": 20.8054, "step": 1070 }, { - "epoch": 0.004363336659704182, - "grad_norm": 746.3321533203125, - "learning_rate": 2.16e-06, - "loss": 308.5054, + "epoch": 0.002181668329852091, + "grad_norm": 624.8909301757812, + "learning_rate": 2.1600000000000003e-07, + "loss": 10.6967, "step": 1080 }, { - "epoch": 0.004403737925071813, - "grad_norm": 4975.05615234375, - "learning_rate": 2.1800000000000003e-06, - "loss": 604.6913, + "epoch": 0.0022018689625359065, + "grad_norm": 430.49560546875, + "learning_rate": 2.1800000000000002e-07, + "loss": 45.0397, "step": 1090 }, { - "epoch": 0.004444139190439444, - "grad_norm": 865.9938354492188, - "learning_rate": 2.2e-06, - "loss": 239.4941, + "epoch": 0.002222069595219722, + "grad_norm": 1073.7850341796875, + "learning_rate": 2.2e-07, + "loss": 42.1204, "step": 1100 }, { - "epoch": 0.004484540455807076, - "grad_norm": 1072.074462890625, - "learning_rate": 2.2200000000000003e-06, - "loss": 401.7543, + "epoch": 0.002242270227903538, + "grad_norm": 486.751708984375, + "learning_rate": 2.2200000000000003e-07, + "loss": 23.1064, "step": 1110 }, { - "epoch": 0.004524941721174707, - "grad_norm": 1182.0032958984375, - "learning_rate": 2.24e-06, - "loss": 330.9987, + "epoch": 0.0022624708605873535, + "grad_norm": 1003.8533935546875, + "learning_rate": 2.2400000000000002e-07, + "loss": 43.3564, "step": 1120 }, { - "epoch": 0.004565342986542338, - "grad_norm": 814.4903564453125, - "learning_rate": 2.26e-06, - "loss": 379.418, + "epoch": 0.002282671493271169, + "grad_norm": 572.0302734375, + "learning_rate": 2.26e-07, + "loss": 23.8053, "step": 1130 }, { - "epoch": 0.00460574425190997, - "grad_norm": 2526.64501953125, - "learning_rate": 2.28e-06, - "loss": 389.4854, + "epoch": 0.002302872125954985, + "grad_norm": 447.3976745605469, + "learning_rate": 2.2800000000000003e-07, + "loss": 18.1645, "step": 1140 }, { - "epoch": 0.004646145517277601, - "grad_norm": 1358.009033203125, - "learning_rate": 2.3e-06, - "loss": 432.4065, + "epoch": 0.0023230727586388005, + "grad_norm": 931.8524780273438, + "learning_rate": 2.3000000000000002e-07, + "loss": 30.1086, "step": 1150 }, { - "epoch": 0.004686546782645232, - "grad_norm": 1503.1463623046875, - "learning_rate": 2.32e-06, - "loss": 417.3679, + "epoch": 0.002343273391322616, + "grad_norm": 875.0267333984375, + "learning_rate": 2.32e-07, + "loss": 44.5649, "step": 1160 }, { - "epoch": 0.0047269480480128636, - "grad_norm": 2745.59619140625, - "learning_rate": 2.34e-06, - "loss": 339.9085, + "epoch": 0.0023634740240064318, + "grad_norm": 683.9933471679688, + "learning_rate": 2.3400000000000003e-07, + "loss": 28.6991, "step": 1170 }, { - "epoch": 0.004767349313380495, - "grad_norm": 636.4331665039062, - "learning_rate": 2.36e-06, - "loss": 324.7576, + "epoch": 0.0023836746566902474, + "grad_norm": 1272.7320556640625, + "learning_rate": 2.3600000000000002e-07, + "loss": 53.9565, "step": 1180 }, { - "epoch": 0.004807750578748126, - "grad_norm": 1743.3262939453125, - "learning_rate": 2.38e-06, - "loss": 337.6445, + "epoch": 0.002403875289374063, + "grad_norm": 540.0521240234375, + "learning_rate": 2.3800000000000004e-07, + "loss": 23.6139, "step": 1190 }, { - "epoch": 0.0048481518441157575, - "grad_norm": 912.2630004882812, - "learning_rate": 2.4000000000000003e-06, - "loss": 342.137, + "epoch": 0.0024240759220578788, + "grad_norm": 578.9132690429688, + "learning_rate": 2.4000000000000003e-07, + "loss": 22.7089, "step": 1200 }, { - "epoch": 0.004888553109483389, - "grad_norm": 1142.1710205078125, - "learning_rate": 2.42e-06, - "loss": 280.1062, + "epoch": 0.0024442765547416944, + "grad_norm": 382.25244140625, + "learning_rate": 2.42e-07, + "loss": 33.4145, "step": 1210 }, { - "epoch": 0.00492895437485102, - "grad_norm": 981.51806640625, - "learning_rate": 2.4400000000000004e-06, - "loss": 291.3894, + "epoch": 0.00246447718742551, + "grad_norm": 1396.89697265625, + "learning_rate": 2.44e-07, + "loss": 35.8553, "step": 1220 }, { - "epoch": 0.0049693556402186515, - "grad_norm": 1795.322265625, - "learning_rate": 2.46e-06, - "loss": 349.2701, + "epoch": 0.0024846778201093257, + "grad_norm": 507.25848388671875, + "learning_rate": 2.46e-07, + "loss": 39.8554, "step": 1230 }, { - "epoch": 0.005009756905586283, - "grad_norm": 1389.8359375, - "learning_rate": 2.48e-06, - "loss": 363.471, + "epoch": 0.0025048784527931414, + "grad_norm": 236.29849243164062, + "learning_rate": 2.48e-07, + "loss": 20.3849, "step": 1240 }, { - "epoch": 0.005050158170953914, - "grad_norm": 1739.7503662109375, - "learning_rate": 2.5e-06, - "loss": 414.4333, + "epoch": 0.002525079085476957, + "grad_norm": 766.2069702148438, + "learning_rate": 2.5000000000000004e-07, + "loss": 35.435, "step": 1250 }, { - "epoch": 0.0050905594363215454, - "grad_norm": 1108.7142333984375, - "learning_rate": 2.52e-06, - "loss": 376.2317, + "epoch": 0.0025452797181607727, + "grad_norm": 92.45375061035156, + "learning_rate": 2.5200000000000003e-07, + "loss": 33.7958, "step": 1260 }, { - "epoch": 0.005130960701689177, - "grad_norm": 1234.8914794921875, - "learning_rate": 2.54e-06, - "loss": 432.4711, + "epoch": 0.0025654803508445884, + "grad_norm": 206.84521484375, + "learning_rate": 2.54e-07, + "loss": 19.2633, "step": 1270 }, { - "epoch": 0.005171361967056808, - "grad_norm": 1600.1077880859375, - "learning_rate": 2.56e-06, - "loss": 273.5047, + "epoch": 0.002585680983528404, + "grad_norm": 1618.84521484375, + "learning_rate": 2.56e-07, + "loss": 36.3744, "step": 1280 }, { - "epoch": 0.005211763232424439, - "grad_norm": 5998.88134765625, - "learning_rate": 2.5800000000000003e-06, - "loss": 417.7149, + "epoch": 0.0026058816162122197, + "grad_norm": 322.9664001464844, + "learning_rate": 2.58e-07, + "loss": 19.4322, "step": 1290 }, { - "epoch": 0.005252164497792071, - "grad_norm": 1022.2550659179688, - "learning_rate": 2.6e-06, - "loss": 337.5613, + "epoch": 0.0026260822488960354, + "grad_norm": 479.2985534667969, + "learning_rate": 2.6e-07, + "loss": 44.4554, "step": 1300 }, { - "epoch": 0.005292565763159702, - "grad_norm": 821.97998046875, - "learning_rate": 2.6200000000000003e-06, - "loss": 355.1617, + "epoch": 0.002646282881579851, + "grad_norm": 839.0703125, + "learning_rate": 2.6200000000000004e-07, + "loss": 25.7475, "step": 1310 }, { - "epoch": 0.005332967028527333, - "grad_norm": 0.0, - "learning_rate": 2.64e-06, - "loss": 441.1887, + "epoch": 0.0026664835142636667, + "grad_norm": 496.9524841308594, + "learning_rate": 2.6400000000000003e-07, + "loss": 14.5702, "step": 1320 }, { - "epoch": 0.005373368293894965, - "grad_norm": 1180.22607421875, - "learning_rate": 2.66e-06, - "loss": 349.4191, + "epoch": 0.0026866841469474823, + "grad_norm": 206.1656951904297, + "learning_rate": 2.66e-07, + "loss": 30.1201, "step": 1330 }, { - "epoch": 0.005413769559262596, - "grad_norm": 1288.874755859375, - "learning_rate": 2.68e-06, - "loss": 358.9353, + "epoch": 0.002706884779631298, + "grad_norm": 688.9887084960938, + "learning_rate": 2.68e-07, + "loss": 20.7835, "step": 1340 }, { - "epoch": 0.005454170824630227, - "grad_norm": 858.6513671875, - "learning_rate": 2.7e-06, - "loss": 298.1967, + "epoch": 0.0027270854123151137, + "grad_norm": 173.92941284179688, + "learning_rate": 2.7e-07, + "loss": 38.9963, "step": 1350 }, { - "epoch": 0.005494572089997859, - "grad_norm": 826.8380737304688, - "learning_rate": 2.72e-06, - "loss": 266.4493, + "epoch": 0.0027472860449989293, + "grad_norm": 416.7894287109375, + "learning_rate": 2.72e-07, + "loss": 40.8383, "step": 1360 }, { - "epoch": 0.00553497335536549, - "grad_norm": 1527.0496826171875, - "learning_rate": 2.74e-06, - "loss": 385.1624, + "epoch": 0.002767486677682745, + "grad_norm": 602.2549438476562, + "learning_rate": 2.7400000000000004e-07, + "loss": 22.1494, "step": 1370 }, { - "epoch": 0.005575374620733121, - "grad_norm": 1727.4271240234375, - "learning_rate": 2.7600000000000003e-06, - "loss": 409.4935, + "epoch": 0.0027876873103665606, + "grad_norm": 1489.0169677734375, + "learning_rate": 2.7600000000000004e-07, + "loss": 30.0528, "step": 1380 }, { - "epoch": 0.005615775886100753, - "grad_norm": 1242.2200927734375, - "learning_rate": 2.78e-06, - "loss": 423.3242, + "epoch": 0.0028078879430503763, + "grad_norm": 410.69769287109375, + "learning_rate": 2.7800000000000003e-07, + "loss": 36.4594, "step": 1390 }, { - "epoch": 0.005656177151468384, - "grad_norm": 784.5136108398438, - "learning_rate": 2.8000000000000003e-06, - "loss": 378.621, + "epoch": 0.002828088575734192, + "grad_norm": 99.08312225341797, + "learning_rate": 2.8e-07, + "loss": 19.1275, "step": 1400 }, { - "epoch": 0.005696578416836015, - "grad_norm": 686.701416015625, - "learning_rate": 2.82e-06, - "loss": 366.9333, + "epoch": 0.0028482892084180076, + "grad_norm": 579.4361572265625, + "learning_rate": 2.82e-07, + "loss": 23.6028, "step": 1410 }, { - "epoch": 0.0057369796822036466, - "grad_norm": 2546.25390625, - "learning_rate": 2.8400000000000003e-06, - "loss": 372.8342, + "epoch": 0.0028684898411018233, + "grad_norm": 932.9219970703125, + "learning_rate": 2.8400000000000005e-07, + "loss": 33.0755, "step": 1420 }, { - "epoch": 0.005777380947571278, - "grad_norm": 1681.410400390625, - "learning_rate": 2.86e-06, - "loss": 383.352, + "epoch": 0.002888690473785639, + "grad_norm": 736.8807373046875, + "learning_rate": 2.8600000000000005e-07, + "loss": 33.5045, "step": 1430 }, { - "epoch": 0.005817782212938909, - "grad_norm": 1604.5748291015625, - "learning_rate": 2.88e-06, - "loss": 390.5337, + "epoch": 0.0029088911064694546, + "grad_norm": 1126.778076171875, + "learning_rate": 2.8800000000000004e-07, + "loss": 33.3801, "step": 1440 }, { - "epoch": 0.0058581834783065405, - "grad_norm": 642.487060546875, - "learning_rate": 2.9e-06, - "loss": 304.9479, + "epoch": 0.0029290917391532703, + "grad_norm": 1085.3497314453125, + "learning_rate": 2.9000000000000003e-07, + "loss": 28.1494, "step": 1450 }, { - "epoch": 0.005898584743674172, - "grad_norm": 2208.190185546875, - "learning_rate": 2.92e-06, - "loss": 419.7739, + "epoch": 0.002949292371837086, + "grad_norm": 374.22149658203125, + "learning_rate": 2.92e-07, + "loss": 32.5694, "step": 1460 }, { - "epoch": 0.005938986009041803, - "grad_norm": 558.7657470703125, - "learning_rate": 2.9400000000000002e-06, - "loss": 243.1836, + "epoch": 0.0029694930045209016, + "grad_norm": 238.65066528320312, + "learning_rate": 2.94e-07, + "loss": 18.5137, "step": 1470 }, { - "epoch": 0.0059793872744094345, - "grad_norm": 866.2562866210938, - "learning_rate": 2.9600000000000005e-06, - "loss": 254.9255, + "epoch": 0.0029896936372047172, + "grad_norm": 2642.603759765625, + "learning_rate": 2.9600000000000006e-07, + "loss": 33.0043, "step": 1480 }, { - "epoch": 0.006019788539777066, - "grad_norm": 827.721923828125, - "learning_rate": 2.9800000000000003e-06, - "loss": 301.6956, + "epoch": 0.003009894269888533, + "grad_norm": 371.8793640136719, + "learning_rate": 2.9800000000000005e-07, + "loss": 22.4324, "step": 1490 }, { - "epoch": 0.006060189805144697, - "grad_norm": 1020.5379638671875, - "learning_rate": 3e-06, - "loss": 236.8173, + "epoch": 0.0030300949025723486, + "grad_norm": 463.6688232421875, + "learning_rate": 3.0000000000000004e-07, + "loss": 19.1007, "step": 1500 }, { - "epoch": 0.0061005910705123284, - "grad_norm": 12217.869140625, - "learning_rate": 3.0200000000000003e-06, - "loss": 315.658, + "epoch": 0.0030502955352561642, + "grad_norm": 710.45068359375, + "learning_rate": 3.0200000000000003e-07, + "loss": 17.671, "step": 1510 }, { - "epoch": 0.00614099233587996, - "grad_norm": 1178.291259765625, - "learning_rate": 3.04e-06, - "loss": 300.3993, + "epoch": 0.00307049616793998, + "grad_norm": 231.896240234375, + "learning_rate": 3.04e-07, + "loss": 25.546, "step": 1520 }, { - "epoch": 0.006181393601247591, - "grad_norm": 2459.315185546875, - "learning_rate": 3.06e-06, - "loss": 315.2213, + "epoch": 0.0030906968006237955, + "grad_norm": 263.27044677734375, + "learning_rate": 3.06e-07, + "loss": 27.5296, "step": 1530 }, { - "epoch": 0.006221794866615222, - "grad_norm": 0.0, - "learning_rate": 3.08e-06, - "loss": 271.6832, + "epoch": 0.003110897433307611, + "grad_norm": 447.924072265625, + "learning_rate": 3.0800000000000006e-07, + "loss": 65.7333, "step": 1540 }, { - "epoch": 0.006262196131982854, - "grad_norm": 1094.0899658203125, - "learning_rate": 3.1e-06, - "loss": 273.5612, + "epoch": 0.003131098065991427, + "grad_norm": 415.6585998535156, + "learning_rate": 3.1000000000000005e-07, + "loss": 17.1337, "step": 1550 }, { - "epoch": 0.006302597397350485, - "grad_norm": 1611.595703125, - "learning_rate": 3.12e-06, - "loss": 291.6243, + "epoch": 0.0031512986986752425, + "grad_norm": 743.8123779296875, + "learning_rate": 3.12e-07, + "loss": 31.4509, "step": 1560 }, { - "epoch": 0.006342998662718116, - "grad_norm": 1336.66259765625, - "learning_rate": 3.14e-06, - "loss": 322.2612, + "epoch": 0.003171499331359058, + "grad_norm": 379.4447326660156, + "learning_rate": 3.14e-07, + "loss": 33.4806, "step": 1570 }, { - "epoch": 0.006383399928085748, - "grad_norm": 2437.919189453125, - "learning_rate": 3.1600000000000007e-06, - "loss": 281.6811, + "epoch": 0.003191699964042874, + "grad_norm": 1378.80615234375, + "learning_rate": 3.160000000000001e-07, + "loss": 41.5661, "step": 1580 }, { - "epoch": 0.006423801193453379, - "grad_norm": 945.635498046875, - "learning_rate": 3.1800000000000005e-06, - "loss": 331.3091, + "epoch": 0.0032119005967266895, + "grad_norm": 721.19384765625, + "learning_rate": 3.1800000000000007e-07, + "loss": 24.3997, "step": 1590 }, { - "epoch": 0.00646420245882101, - "grad_norm": 1270.96826171875, - "learning_rate": 3.2000000000000003e-06, - "loss": 337.9903, + "epoch": 0.003232101229410505, + "grad_norm": 447.39154052734375, + "learning_rate": 3.2e-07, + "loss": 20.2326, "step": 1600 }, { - "epoch": 0.006504603724188642, - "grad_norm": 1054.9232177734375, - "learning_rate": 3.22e-06, - "loss": 371.3227, + "epoch": 0.003252301862094321, + "grad_norm": 616.3694458007812, + "learning_rate": 3.22e-07, + "loss": 28.001, "step": 1610 }, { - "epoch": 0.006545004989556273, - "grad_norm": 1814.677734375, - "learning_rate": 3.24e-06, - "loss": 390.2153, + "epoch": 0.0032725024947781365, + "grad_norm": 942.26904296875, + "learning_rate": 3.24e-07, + "loss": 34.0719, "step": 1620 }, { - "epoch": 0.006585406254923904, - "grad_norm": 648.9537963867188, - "learning_rate": 3.2599999999999997e-06, - "loss": 368.1877, + "epoch": 0.003292703127461952, + "grad_norm": 340.3528137207031, + "learning_rate": 3.26e-07, + "loss": 23.7475, "step": 1630 }, { - "epoch": 0.006625807520291536, - "grad_norm": 1473.499267578125, - "learning_rate": 3.2800000000000004e-06, - "loss": 386.7809, + "epoch": 0.003312903760145768, + "grad_norm": 616.3563842773438, + "learning_rate": 3.280000000000001e-07, + "loss": 25.5421, "step": 1640 }, { - "epoch": 0.006666208785659167, - "grad_norm": 2527.870849609375, - "learning_rate": 3.3e-06, - "loss": 414.1417, + "epoch": 0.0033331043928295835, + "grad_norm": 287.14404296875, + "learning_rate": 3.3e-07, + "loss": 21.6099, "step": 1650 }, { - "epoch": 0.006706610051026798, - "grad_norm": 1029.5302734375, - "learning_rate": 3.3200000000000004e-06, - "loss": 258.2289, + "epoch": 0.003353305025513399, + "grad_norm": 325.98907470703125, + "learning_rate": 3.32e-07, + "loss": 19.8994, "step": 1660 }, { - "epoch": 0.0067470113163944296, - "grad_norm": 748.2296142578125, - "learning_rate": 3.34e-06, - "loss": 348.1511, + "epoch": 0.0033735056581972148, + "grad_norm": 455.73468017578125, + "learning_rate": 3.34e-07, + "loss": 24.4489, "step": 1670 }, { - "epoch": 0.006787412581762061, - "grad_norm": 2257.800537109375, - "learning_rate": 3.36e-06, - "loss": 326.2408, + "epoch": 0.0033937062908810304, + "grad_norm": 970.495361328125, + "learning_rate": 3.36e-07, + "loss": 25.1155, "step": 1680 }, { - "epoch": 0.006827813847129692, - "grad_norm": 987.0545654296875, - "learning_rate": 3.38e-06, - "loss": 285.0146, + "epoch": 0.003413906923564846, + "grad_norm": 820.18359375, + "learning_rate": 3.38e-07, + "loss": 35.97, "step": 1690 }, { - "epoch": 0.0068682151124973235, - "grad_norm": 3093.51171875, - "learning_rate": 3.4000000000000005e-06, - "loss": 357.8721, + "epoch": 0.0034341075562486618, + "grad_norm": 135.357177734375, + "learning_rate": 3.4000000000000003e-07, + "loss": 22.5324, "step": 1700 }, { - "epoch": 0.006908616377864955, - "grad_norm": 769.6875610351562, - "learning_rate": 3.4200000000000003e-06, - "loss": 312.937, + "epoch": 0.0034543081889324774, + "grad_norm": 280.0291748046875, + "learning_rate": 3.42e-07, + "loss": 27.6606, "step": 1710 }, { - "epoch": 0.006949017643232586, - "grad_norm": 5268.0009765625, - "learning_rate": 3.44e-06, - "loss": 437.4973, + "epoch": 0.003474508821616293, + "grad_norm": 345.8559265136719, + "learning_rate": 3.44e-07, + "loss": 39.866, "step": 1720 }, { - "epoch": 0.0069894189086002175, - "grad_norm": 875.3474731445312, - "learning_rate": 3.46e-06, - "loss": 303.2312, + "epoch": 0.0034947094543001087, + "grad_norm": 705.6763305664062, + "learning_rate": 3.46e-07, + "loss": 18.8156, "step": 1730 }, { - "epoch": 0.007029820173967849, - "grad_norm": 988.2050170898438, - "learning_rate": 3.4799999999999997e-06, - "loss": 358.3378, + "epoch": 0.0035149100869839244, + "grad_norm": 416.8535461425781, + "learning_rate": 3.48e-07, + "loss": 33.5592, "step": 1740 }, { - "epoch": 0.00707022143933548, - "grad_norm": 1848.81884765625, - "learning_rate": 3.5000000000000004e-06, - "loss": 354.324, + "epoch": 0.00353511071966774, + "grad_norm": 552.7217407226562, + "learning_rate": 3.5000000000000004e-07, + "loss": 12.8888, "step": 1750 }, { - "epoch": 0.0071106227047031114, - "grad_norm": 985.3126220703125, - "learning_rate": 3.52e-06, - "loss": 387.196, + "epoch": 0.0035553113523515557, + "grad_norm": 208.27780151367188, + "learning_rate": 3.5200000000000003e-07, + "loss": 27.6918, "step": 1760 }, { - "epoch": 0.007151023970070743, - "grad_norm": 847.1213989257812, - "learning_rate": 3.5400000000000004e-06, - "loss": 298.6457, + "epoch": 0.0035755119850353714, + "grad_norm": 537.1244506835938, + "learning_rate": 3.54e-07, + "loss": 44.3155, "step": 1770 }, { - "epoch": 0.007191425235438374, - "grad_norm": 1782.748779296875, - "learning_rate": 3.5600000000000002e-06, - "loss": 410.3063, + "epoch": 0.003595712617719187, + "grad_norm": 284.45733642578125, + "learning_rate": 3.56e-07, + "loss": 28.734, "step": 1780 }, { - "epoch": 0.007231826500806005, - "grad_norm": 731.6522216796875, - "learning_rate": 3.58e-06, - "loss": 233.0396, + "epoch": 0.0036159132504030027, + "grad_norm": 387.14532470703125, + "learning_rate": 3.58e-07, + "loss": 18.956, "step": 1790 }, { - "epoch": 0.007272227766173637, - "grad_norm": 498.69366455078125, - "learning_rate": 3.6e-06, - "loss": 287.9018, + "epoch": 0.0036361138830868184, + "grad_norm": 370.3746337890625, + "learning_rate": 3.6e-07, + "loss": 27.5186, "step": 1800 }, { - "epoch": 0.007312629031541268, - "grad_norm": 906.2838134765625, - "learning_rate": 3.6200000000000005e-06, - "loss": 345.1769, + "epoch": 0.003656314515770634, + "grad_norm": 302.4902038574219, + "learning_rate": 3.6200000000000004e-07, + "loss": 30.0567, "step": 1810 }, { - "epoch": 0.007353030296908899, - "grad_norm": 713.8414916992188, - "learning_rate": 3.6400000000000003e-06, - "loss": 291.5412, + "epoch": 0.0036765151484544497, + "grad_norm": 381.73919677734375, + "learning_rate": 3.6400000000000003e-07, + "loss": 29.6831, "step": 1820 }, { - "epoch": 0.007393431562276531, - "grad_norm": 1315.886962890625, - "learning_rate": 3.66e-06, - "loss": 394.6411, + "epoch": 0.0036967157811382653, + "grad_norm": 463.9573974609375, + "learning_rate": 3.66e-07, + "loss": 51.3698, "step": 1830 }, { - "epoch": 0.007433832827644162, - "grad_norm": 1429.893798828125, - "learning_rate": 3.68e-06, - "loss": 247.4847, + "epoch": 0.003716916413822081, + "grad_norm": 156.73703002929688, + "learning_rate": 3.68e-07, + "loss": 27.487, "step": 1840 }, { - "epoch": 0.007474234093011793, - "grad_norm": 1034.032470703125, - "learning_rate": 3.7e-06, - "loss": 406.1072, + "epoch": 0.0037371170465058967, + "grad_norm": 124.4668197631836, + "learning_rate": 3.7e-07, + "loss": 14.7367, "step": 1850 }, { - "epoch": 0.007514635358379425, - "grad_norm": 955.8413696289062, - "learning_rate": 3.72e-06, - "loss": 392.0067, + "epoch": 0.0037573176791897123, + "grad_norm": 733.3515625, + "learning_rate": 3.72e-07, + "loss": 28.2449, "step": 1860 }, { - "epoch": 0.007555036623747056, - "grad_norm": 0.0, - "learning_rate": 3.7400000000000006e-06, - "loss": 235.0364, + "epoch": 0.003777518311873528, + "grad_norm": 337.3455810546875, + "learning_rate": 3.7400000000000004e-07, + "loss": 38.7027, "step": 1870 }, { - "epoch": 0.007595437889114687, - "grad_norm": 13884.802734375, - "learning_rate": 3.7600000000000004e-06, - "loss": 336.9673, + "epoch": 0.0037977189445573436, + "grad_norm": 987.9752197265625, + "learning_rate": 3.7600000000000003e-07, + "loss": 46.9669, "step": 1880 }, { - "epoch": 0.007635839154482319, - "grad_norm": 3753.5439453125, - "learning_rate": 3.7800000000000002e-06, - "loss": 362.8436, + "epoch": 0.0038179195772411593, + "grad_norm": 186.5377655029297, + "learning_rate": 3.78e-07, + "loss": 38.3815, "step": 1890 }, { - "epoch": 0.00767624041984995, - "grad_norm": 1046.0020751953125, - "learning_rate": 3.8e-06, - "loss": 370.7911, + "epoch": 0.003838120209924975, + "grad_norm": 49.196285247802734, + "learning_rate": 3.8e-07, + "loss": 18.3016, "step": 1900 }, { - "epoch": 0.007716641685217581, - "grad_norm": 2058.227783203125, - "learning_rate": 3.82e-06, - "loss": 356.5814, + "epoch": 0.0038583208426087906, + "grad_norm": 309.545166015625, + "learning_rate": 3.82e-07, + "loss": 17.9834, "step": 1910 }, { - "epoch": 0.0077570429505852126, - "grad_norm": 833.1560668945312, - "learning_rate": 3.84e-06, - "loss": 362.6392, + "epoch": 0.0038785214752926063, + "grad_norm": 540.118408203125, + "learning_rate": 3.84e-07, + "loss": 29.434, "step": 1920 }, { - "epoch": 0.007797444215952844, - "grad_norm": 1860.0916748046875, - "learning_rate": 3.86e-06, - "loss": 327.3118, + "epoch": 0.003898722107976422, + "grad_norm": 20.59901237487793, + "learning_rate": 3.8600000000000004e-07, + "loss": 34.1916, "step": 1930 }, { - "epoch": 0.007837845481320474, - "grad_norm": 2047.00146484375, - "learning_rate": 3.88e-06, - "loss": 431.6941, + "epoch": 0.003918922740660237, + "grad_norm": 742.4661254882812, + "learning_rate": 3.8800000000000003e-07, + "loss": 26.6047, "step": 1940 }, { - "epoch": 0.007878246746688106, - "grad_norm": 1433.5374755859375, - "learning_rate": 3.9e-06, - "loss": 283.749, + "epoch": 0.003939123373344053, + "grad_norm": 275.4309387207031, + "learning_rate": 3.9e-07, + "loss": 15.0218, "step": 1950 }, { - "epoch": 0.007918648012055737, - "grad_norm": 1340.44287109375, - "learning_rate": 3.92e-06, - "loss": 323.1868, + "epoch": 0.0039593240060278685, + "grad_norm": 547.0128173828125, + "learning_rate": 3.92e-07, + "loss": 30.1546, "step": 1960 }, { - "epoch": 0.007959049277423368, - "grad_norm": 5642.4560546875, - "learning_rate": 3.9399999999999995e-06, - "loss": 428.7537, + "epoch": 0.003979524638711684, + "grad_norm": 109.45767211914062, + "learning_rate": 3.94e-07, + "loss": 22.6386, "step": 1970 }, { - "epoch": 0.007999450542791, - "grad_norm": 519.6063232421875, - "learning_rate": 3.96e-06, - "loss": 304.1046, + "epoch": 0.0039997252713955, + "grad_norm": 498.2757263183594, + "learning_rate": 3.9600000000000005e-07, + "loss": 28.6018, "step": 1980 }, { - "epoch": 0.008039851808158631, - "grad_norm": 1227.509765625, - "learning_rate": 3.98e-06, - "loss": 295.7083, + "epoch": 0.0040199259040793155, + "grad_norm": 2670.8740234375, + "learning_rate": 3.9800000000000004e-07, + "loss": 45.6287, "step": 1990 }, { - "epoch": 0.008080253073526262, - "grad_norm": 788.1338500976562, - "learning_rate": 4.000000000000001e-06, - "loss": 383.1164, + "epoch": 0.004040126536763131, + "grad_norm": 733.6322021484375, + "learning_rate": 4.0000000000000003e-07, + "loss": 22.9162, "step": 2000 }, { - "epoch": 0.008120654338893894, - "grad_norm": 1502.9964599609375, - "learning_rate": 4.0200000000000005e-06, - "loss": 312.3206, + "epoch": 0.004060327169446947, + "grad_norm": 774.7802734375, + "learning_rate": 4.02e-07, + "loss": 19.3823, "step": 2010 }, { - "epoch": 0.008161055604261525, - "grad_norm": 6165.92529296875, - "learning_rate": 4.04e-06, - "loss": 307.8958, + "epoch": 0.0040805278021307624, + "grad_norm": 323.9580078125, + "learning_rate": 4.04e-07, + "loss": 23.0773, "step": 2020 }, { - "epoch": 0.008201456869629156, - "grad_norm": 915.6934814453125, - "learning_rate": 4.06e-06, - "loss": 374.9606, + "epoch": 0.004100728434814578, + "grad_norm": 1654.9927978515625, + "learning_rate": 4.06e-07, + "loss": 37.3542, "step": 2030 }, { - "epoch": 0.008241858134996788, - "grad_norm": 730.7386474609375, - "learning_rate": 4.080000000000001e-06, - "loss": 329.4409, + "epoch": 0.004120929067498394, + "grad_norm": 831.7189331054688, + "learning_rate": 4.0800000000000005e-07, + "loss": 19.7393, "step": 2040 }, { - "epoch": 0.008282259400364419, - "grad_norm": 1635.5831298828125, - "learning_rate": 4.1000000000000006e-06, - "loss": 209.305, + "epoch": 0.004141129700182209, + "grad_norm": 921.7901611328125, + "learning_rate": 4.1000000000000004e-07, + "loss": 44.1891, "step": 2050 }, { - "epoch": 0.00832266066573205, - "grad_norm": 4476.35888671875, - "learning_rate": 4.12e-06, - "loss": 415.3398, + "epoch": 0.004161330332866025, + "grad_norm": 432.77813720703125, + "learning_rate": 4.1200000000000004e-07, + "loss": 17.2742, "step": 2060 }, { - "epoch": 0.008363061931099681, - "grad_norm": 1422.126220703125, - "learning_rate": 4.14e-06, - "loss": 360.3677, + "epoch": 0.004181530965549841, + "grad_norm": 1103.80029296875, + "learning_rate": 4.1400000000000003e-07, + "loss": 34.2473, "step": 2070 }, { - "epoch": 0.008403463196467313, - "grad_norm": 620.626220703125, - "learning_rate": 4.16e-06, - "loss": 310.8916, + "epoch": 0.004201731598233656, + "grad_norm": 534.106689453125, + "learning_rate": 4.16e-07, + "loss": 45.6846, "step": 2080 }, { - "epoch": 0.008443864461834944, - "grad_norm": 1371.109130859375, - "learning_rate": 4.18e-06, - "loss": 339.1394, + "epoch": 0.004221932230917472, + "grad_norm": 867.72412109375, + "learning_rate": 4.18e-07, + "loss": 47.1584, "step": 2090 }, { - "epoch": 0.008484265727202575, - "grad_norm": 1084.7042236328125, - "learning_rate": 4.2000000000000004e-06, - "loss": 282.2104, + "epoch": 0.004242132863601288, + "grad_norm": 230.99696350097656, + "learning_rate": 4.2000000000000006e-07, + "loss": 18.7068, "step": 2100 }, { - "epoch": 0.008524666992570207, - "grad_norm": 678.981689453125, - "learning_rate": 4.22e-06, - "loss": 248.9129, + "epoch": 0.004262333496285103, + "grad_norm": 531.1705322265625, + "learning_rate": 4.2200000000000005e-07, + "loss": 33.7492, "step": 2110 }, { - "epoch": 0.008565068257937838, - "grad_norm": 1121.5936279296875, - "learning_rate": 4.24e-06, - "loss": 333.5619, + "epoch": 0.004282534128968919, + "grad_norm": 897.9804077148438, + "learning_rate": 4.2400000000000004e-07, + "loss": 29.6693, "step": 2120 }, { - "epoch": 0.00860546952330547, - "grad_norm": 795.3826293945312, - "learning_rate": 4.26e-06, - "loss": 293.3469, + "epoch": 0.004302734761652735, + "grad_norm": 1721.717529296875, + "learning_rate": 4.2600000000000003e-07, + "loss": 39.9116, "step": 2130 }, { - "epoch": 0.0086458707886731, - "grad_norm": 1109.0904541015625, - "learning_rate": 4.28e-06, - "loss": 365.5112, + "epoch": 0.00432293539433655, + "grad_norm": 799.24658203125, + "learning_rate": 4.28e-07, + "loss": 36.7716, "step": 2140 }, { - "epoch": 0.008686272054040732, - "grad_norm": 778.438232421875, - "learning_rate": 4.2999999999999995e-06, - "loss": 211.6606, + "epoch": 0.004343136027020366, + "grad_norm": 243.7528076171875, + "learning_rate": 4.3e-07, + "loss": 17.6017, "step": 2150 }, { - "epoch": 0.008726673319408363, - "grad_norm": 1132.38818359375, - "learning_rate": 4.32e-06, - "loss": 386.3405, + "epoch": 0.004363336659704182, + "grad_norm": 611.0807495117188, + "learning_rate": 4.3200000000000006e-07, + "loss": 28.1989, "step": 2160 }, { - "epoch": 0.008767074584775995, - "grad_norm": 986.1419677734375, - "learning_rate": 4.34e-06, - "loss": 326.4904, + "epoch": 0.004383537292387997, + "grad_norm": 146.26116943359375, + "learning_rate": 4.3400000000000005e-07, + "loss": 35.3578, "step": 2170 }, { - "epoch": 0.008807475850143626, - "grad_norm": 880.376220703125, - "learning_rate": 4.360000000000001e-06, - "loss": 308.4621, + "epoch": 0.004403737925071813, + "grad_norm": 1156.3270263671875, + "learning_rate": 4.3600000000000004e-07, + "loss": 41.4281, "step": 2180 }, { - "epoch": 0.008847877115511257, - "grad_norm": 2747.5908203125, - "learning_rate": 4.38e-06, - "loss": 240.2334, + "epoch": 0.004423938557755629, + "grad_norm": 309.5587463378906, + "learning_rate": 4.3800000000000003e-07, + "loss": 18.0717, "step": 2190 }, { - "epoch": 0.008888278380878889, - "grad_norm": 1022.6881103515625, - "learning_rate": 4.4e-06, - "loss": 348.5623, + "epoch": 0.004444139190439444, + "grad_norm": 573.9265747070312, + "learning_rate": 4.4e-07, + "loss": 21.9999, "step": 2200 }, { - "epoch": 0.00892867964624652, - "grad_norm": 701.6885986328125, - "learning_rate": 4.420000000000001e-06, - "loss": 239.2583, + "epoch": 0.00446433982312326, + "grad_norm": 86.95663452148438, + "learning_rate": 4.4200000000000007e-07, + "loss": 33.4672, "step": 2210 }, { - "epoch": 0.008969080911614151, - "grad_norm": 812.2908325195312, - "learning_rate": 4.440000000000001e-06, - "loss": 305.8652, + "epoch": 0.004484540455807076, + "grad_norm": 818.9146118164062, + "learning_rate": 4.4400000000000006e-07, + "loss": 30.7386, "step": 2220 }, { - "epoch": 0.009009482176981783, - "grad_norm": 936.9786376953125, - "learning_rate": 4.4600000000000005e-06, - "loss": 406.4304, + "epoch": 0.004504741088490891, + "grad_norm": 523.1422729492188, + "learning_rate": 4.4600000000000005e-07, + "loss": 29.29, "step": 2230 }, { - "epoch": 0.009049883442349414, - "grad_norm": 849.5304565429688, - "learning_rate": 4.48e-06, - "loss": 365.7475, + "epoch": 0.004524941721174707, + "grad_norm": 1081.639892578125, + "learning_rate": 4.4800000000000004e-07, + "loss": 24.7567, "step": 2240 }, { - "epoch": 0.009090284707717045, - "grad_norm": 1223.7852783203125, - "learning_rate": 4.5e-06, - "loss": 295.9342, + "epoch": 0.004545142353858523, + "grad_norm": 438.20458984375, + "learning_rate": 4.5000000000000003e-07, + "loss": 24.615, "step": 2250 }, { - "epoch": 0.009130685973084677, - "grad_norm": 2563.434814453125, - "learning_rate": 4.52e-06, - "loss": 282.5878, + "epoch": 0.004565342986542338, + "grad_norm": 248.22909545898438, + "learning_rate": 4.52e-07, + "loss": 25.8396, "step": 2260 }, { - "epoch": 0.009171087238452308, - "grad_norm": 3302.397705078125, - "learning_rate": 4.540000000000001e-06, - "loss": 292.655, + "epoch": 0.004585543619226154, + "grad_norm": 861.9166870117188, + "learning_rate": 4.5400000000000007e-07, + "loss": 29.9438, "step": 2270 }, { - "epoch": 0.00921148850381994, - "grad_norm": 726.690185546875, - "learning_rate": 4.56e-06, - "loss": 278.9063, + "epoch": 0.00460574425190997, + "grad_norm": 823.51318359375, + "learning_rate": 4.5600000000000006e-07, + "loss": 30.2976, "step": 2280 }, { - "epoch": 0.00925188976918757, - "grad_norm": 611.144287109375, - "learning_rate": 4.58e-06, - "loss": 297.2235, + "epoch": 0.004625944884593785, + "grad_norm": 1101.48779296875, + "learning_rate": 4.5800000000000005e-07, + "loss": 32.0947, "step": 2290 }, { - "epoch": 0.009292291034555202, - "grad_norm": 962.685302734375, - "learning_rate": 4.6e-06, - "loss": 224.3765, + "epoch": 0.004646145517277601, + "grad_norm": 512.0722045898438, + "learning_rate": 4.6000000000000004e-07, + "loss": 37.1648, "step": 2300 }, { - "epoch": 0.009332692299922833, - "grad_norm": 792.4270629882812, - "learning_rate": 4.62e-06, - "loss": 339.5073, + "epoch": 0.004666346149961417, + "grad_norm": 1179.549560546875, + "learning_rate": 4.6200000000000003e-07, + "loss": 41.9576, "step": 2310 }, { - "epoch": 0.009373093565290464, - "grad_norm": 993.0485229492188, - "learning_rate": 4.64e-06, - "loss": 268.8986, + "epoch": 0.004686546782645232, + "grad_norm": 472.5940246582031, + "learning_rate": 4.64e-07, + "loss": 22.1231, "step": 2320 }, { - "epoch": 0.009413494830658096, - "grad_norm": 10737.74609375, - "learning_rate": 4.66e-06, - "loss": 290.0178, + "epoch": 0.004706747415329048, + "grad_norm": 0.0, + "learning_rate": 4.6600000000000007e-07, + "loss": 22.0882, "step": 2330 }, { - "epoch": 0.009453896096025727, - "grad_norm": 1019.1024780273438, - "learning_rate": 4.68e-06, - "loss": 290.3607, + "epoch": 0.0047269480480128636, + "grad_norm": 783.9161376953125, + "learning_rate": 4.6800000000000006e-07, + "loss": 29.3172, "step": 2340 }, { - "epoch": 0.009494297361393358, - "grad_norm": 582.261962890625, - "learning_rate": 4.7e-06, - "loss": 263.2255, + "epoch": 0.004747148680696679, + "grad_norm": 1057.4423828125, + "learning_rate": 4.7000000000000005e-07, + "loss": 28.8642, "step": 2350 }, { - "epoch": 0.00953469862676099, - "grad_norm": 928.1985473632812, - "learning_rate": 4.72e-06, - "loss": 496.6325, + "epoch": 0.004767349313380495, + "grad_norm": 563.2919921875, + "learning_rate": 4.7200000000000004e-07, + "loss": 21.4855, "step": 2360 }, { - "epoch": 0.009575099892128621, - "grad_norm": 1354.3148193359375, - "learning_rate": 4.74e-06, - "loss": 317.678, + "epoch": 0.0047875499460643105, + "grad_norm": 594.2389526367188, + "learning_rate": 4.7400000000000004e-07, + "loss": 38.7323, "step": 2370 }, { - "epoch": 0.009615501157496252, - "grad_norm": 1359.7713623046875, - "learning_rate": 4.76e-06, - "loss": 263.0336, + "epoch": 0.004807750578748126, + "grad_norm": 207.3581085205078, + "learning_rate": 4.760000000000001e-07, + "loss": 28.6296, "step": 2380 }, { - "epoch": 0.009655902422863884, - "grad_norm": 759.7086791992188, - "learning_rate": 4.780000000000001e-06, - "loss": 328.0415, + "epoch": 0.004827951211431942, + "grad_norm": 874.9584350585938, + "learning_rate": 4.78e-07, + "loss": 22.6688, "step": 2390 }, { - "epoch": 0.009696303688231515, - "grad_norm": 593.6005249023438, - "learning_rate": 4.800000000000001e-06, - "loss": 368.25, + "epoch": 0.0048481518441157575, + "grad_norm": 76.04383087158203, + "learning_rate": 4.800000000000001e-07, + "loss": 21.4943, "step": 2400 }, { - "epoch": 0.009736704953599146, - "grad_norm": 732.736328125, - "learning_rate": 4.8200000000000004e-06, - "loss": 175.4389, + "epoch": 0.004868352476799573, + "grad_norm": 961.9263305664062, + "learning_rate": 4.82e-07, + "loss": 17.3612, "step": 2410 }, { - "epoch": 0.009777106218966778, - "grad_norm": 1101.7640380859375, - "learning_rate": 4.84e-06, - "loss": 293.6888, + "epoch": 0.004888553109483389, + "grad_norm": 789.2753295898438, + "learning_rate": 4.84e-07, + "loss": 17.5381, "step": 2420 }, { - "epoch": 0.009817507484334409, - "grad_norm": 4335.19873046875, - "learning_rate": 4.86e-06, - "loss": 348.8911, + "epoch": 0.0049087537421672045, + "grad_norm": 526.267578125, + "learning_rate": 4.86e-07, + "loss": 15.962, "step": 2430 }, { - "epoch": 0.00985790874970204, - "grad_norm": 1072.8892822265625, - "learning_rate": 4.880000000000001e-06, - "loss": 231.7467, + "epoch": 0.00492895437485102, + "grad_norm": 909.1647338867188, + "learning_rate": 4.88e-07, + "loss": 31.8372, "step": 2440 }, { - "epoch": 0.009898310015069672, - "grad_norm": 608.339111328125, - "learning_rate": 4.9000000000000005e-06, - "loss": 241.5925, + "epoch": 0.004949155007534836, + "grad_norm": 392.2228698730469, + "learning_rate": 4.900000000000001e-07, + "loss": 28.5269, "step": 2450 }, { - "epoch": 0.009938711280437303, - "grad_norm": 1088.7662353515625, - "learning_rate": 4.92e-06, - "loss": 185.2763, + "epoch": 0.0049693556402186515, + "grad_norm": 572.967529296875, + "learning_rate": 4.92e-07, + "loss": 21.5063, "step": 2460 }, { - "epoch": 0.009979112545804934, - "grad_norm": 901.8641967773438, - "learning_rate": 4.94e-06, - "loss": 355.3011, + "epoch": 0.004989556272902467, + "grad_norm": 1103.427001953125, + "learning_rate": 4.940000000000001e-07, + "loss": 32.4583, "step": 2470 }, { - "epoch": 0.010019513811172566, - "grad_norm": 1227.393798828125, - "learning_rate": 4.96e-06, - "loss": 310.6582, + "epoch": 0.005009756905586283, + "grad_norm": 816.0195922851562, + "learning_rate": 4.96e-07, + "loss": 27.6086, "step": 2480 }, { - "epoch": 0.010059915076540197, - "grad_norm": 820.9786376953125, - "learning_rate": 4.98e-06, - "loss": 388.506, + "epoch": 0.0050299575382700985, + "grad_norm": 440.2334289550781, + "learning_rate": 4.98e-07, + "loss": 29.0266, "step": 2490 }, { - "epoch": 0.010100316341907828, - "grad_norm": 631.5418701171875, - "learning_rate": 5e-06, - "loss": 186.973, + "epoch": 0.005050158170953914, + "grad_norm": 528.32421875, + "learning_rate": 5.000000000000001e-07, + "loss": 47.6612, "step": 2500 }, { - "epoch": 0.01014071760727546, - "grad_norm": 976.1464233398438, - "learning_rate": 5.02e-06, - "loss": 260.0689, + "epoch": 0.00507035880363773, + "grad_norm": 596.6814575195312, + "learning_rate": 5.02e-07, + "loss": 26.5699, "step": 2510 }, { - "epoch": 0.010181118872643091, - "grad_norm": 537.5474853515625, - "learning_rate": 5.04e-06, - "loss": 245.8573, + "epoch": 0.0050905594363215454, + "grad_norm": 503.26715087890625, + "learning_rate": 5.040000000000001e-07, + "loss": 35.7597, "step": 2520 }, { - "epoch": 0.010221520138010722, - "grad_norm": 1130.83154296875, - "learning_rate": 5.06e-06, - "loss": 213.0081, + "epoch": 0.005110760069005361, + "grad_norm": 402.7926330566406, + "learning_rate": 5.06e-07, + "loss": 49.0305, "step": 2530 }, { - "epoch": 0.010261921403378354, - "grad_norm": 1045.5367431640625, - "learning_rate": 5.08e-06, - "loss": 273.8134, + "epoch": 0.005130960701689177, + "grad_norm": 734.1798706054688, + "learning_rate": 5.08e-07, + "loss": 33.3107, "step": 2540 }, { - "epoch": 0.010302322668745985, - "grad_norm": 934.2299194335938, - "learning_rate": 5.1e-06, - "loss": 351.3407, + "epoch": 0.005151161334372992, + "grad_norm": 430.47332763671875, + "learning_rate": 5.1e-07, + "loss": 17.1561, "step": 2550 }, { - "epoch": 0.010342723934113616, - "grad_norm": 603.625, - "learning_rate": 5.12e-06, - "loss": 189.3821, + "epoch": 0.005171361967056808, + "grad_norm": 171.5887451171875, + "learning_rate": 5.12e-07, + "loss": 24.0431, "step": 2560 }, { - "epoch": 0.010383125199481247, - "grad_norm": 2282.93115234375, - "learning_rate": 5.140000000000001e-06, - "loss": 306.1943, + "epoch": 0.005191562599740624, + "grad_norm": 303.52740478515625, + "learning_rate": 5.140000000000001e-07, + "loss": 25.7299, "step": 2570 }, { - "epoch": 0.010423526464848879, - "grad_norm": 1158.8792724609375, - "learning_rate": 5.1600000000000006e-06, - "loss": 293.8449, + "epoch": 0.005211763232424439, + "grad_norm": 103.0549087524414, + "learning_rate": 5.16e-07, + "loss": 50.7616, "step": 2580 }, { - "epoch": 0.01046392773021651, - "grad_norm": 1000.369140625, - "learning_rate": 5.18e-06, - "loss": 252.3016, + "epoch": 0.005231963865108255, + "grad_norm": 713.0372314453125, + "learning_rate": 5.180000000000001e-07, + "loss": 30.2423, "step": 2590 }, { - "epoch": 0.010504328995584141, - "grad_norm": 1298.2783203125, - "learning_rate": 5.2e-06, - "loss": 304.2064, + "epoch": 0.005252164497792071, + "grad_norm": 860.4334716796875, + "learning_rate": 5.2e-07, + "loss": 22.214, "step": 2600 }, { - "epoch": 0.010544730260951773, - "grad_norm": 2631.5205078125, - "learning_rate": 5.220000000000001e-06, - "loss": 272.8798, + "epoch": 0.005272365130475886, + "grad_norm": 2196.531494140625, + "learning_rate": 5.22e-07, + "loss": 48.7065, "step": 2610 }, { - "epoch": 0.010585131526319404, - "grad_norm": 1061.5810546875, - "learning_rate": 5.240000000000001e-06, - "loss": 403.6962, + "epoch": 0.005292565763159702, + "grad_norm": 193.64439392089844, + "learning_rate": 5.240000000000001e-07, + "loss": 20.2737, "step": 2620 }, { - "epoch": 0.010625532791687035, - "grad_norm": 835.53369140625, - "learning_rate": 5.2600000000000005e-06, - "loss": 367.6394, + "epoch": 0.005312766395843518, + "grad_norm": 609.69140625, + "learning_rate": 5.26e-07, + "loss": 47.1675, "step": 2630 }, { - "epoch": 0.010665934057054667, - "grad_norm": 867.9429931640625, - "learning_rate": 5.28e-06, - "loss": 321.1476, + "epoch": 0.005332967028527333, + "grad_norm": 1125.103759765625, + "learning_rate": 5.280000000000001e-07, + "loss": 44.6597, "step": 2640 }, { - "epoch": 0.010706335322422298, - "grad_norm": 3146.14013671875, - "learning_rate": 5.3e-06, - "loss": 266.9892, + "epoch": 0.005353167661211149, + "grad_norm": 624.8217163085938, + "learning_rate": 5.3e-07, + "loss": 29.0659, "step": 2650 }, { - "epoch": 0.01074673658778993, - "grad_norm": 640.1483764648438, - "learning_rate": 5.32e-06, - "loss": 206.9268, + "epoch": 0.005373368293894965, + "grad_norm": 366.8795471191406, + "learning_rate": 5.32e-07, + "loss": 28.1106, "step": 2660 }, { - "epoch": 0.01078713785315756, - "grad_norm": 842.3226928710938, - "learning_rate": 5.3400000000000005e-06, - "loss": 325.724, + "epoch": 0.00539356892657878, + "grad_norm": 585.3496704101562, + "learning_rate": 5.340000000000001e-07, + "loss": 22.2636, "step": 2670 }, { - "epoch": 0.010827539118525192, - "grad_norm": 717.4343872070312, - "learning_rate": 5.36e-06, - "loss": 314.9767, + "epoch": 0.005413769559262596, + "grad_norm": 785.0697631835938, + "learning_rate": 5.36e-07, + "loss": 54.8097, "step": 2680 }, { - "epoch": 0.010867940383892823, - "grad_norm": 5089.775390625, - "learning_rate": 5.38e-06, - "loss": 329.5498, + "epoch": 0.005433970191946412, + "grad_norm": 433.1402893066406, + "learning_rate": 5.380000000000001e-07, + "loss": 14.2988, "step": 2690 }, { - "epoch": 0.010908341649260455, - "grad_norm": 1157.4693603515625, - "learning_rate": 5.4e-06, - "loss": 339.251, + "epoch": 0.005454170824630227, + "grad_norm": 614.6635131835938, + "learning_rate": 5.4e-07, + "loss": 36.9219, "step": 2700 }, { - "epoch": 0.010948742914628086, - "grad_norm": 1268.937744140625, - "learning_rate": 5.42e-06, - "loss": 361.0937, + "epoch": 0.005474371457314043, + "grad_norm": 1097.97314453125, + "learning_rate": 5.420000000000001e-07, + "loss": 32.1943, "step": 2710 }, { - "epoch": 0.010989144179995717, - "grad_norm": 855.2471313476562, - "learning_rate": 5.44e-06, - "loss": 285.6735, + "epoch": 0.005494572089997859, + "grad_norm": 221.27752685546875, + "learning_rate": 5.44e-07, + "loss": 17.7383, "step": 2720 }, { - "epoch": 0.011029545445363349, - "grad_norm": 777.139892578125, - "learning_rate": 5.46e-06, - "loss": 275.3022, + "epoch": 0.005514772722681674, + "grad_norm": 846.3823852539062, + "learning_rate": 5.46e-07, + "loss": 32.384, "step": 2730 }, { - "epoch": 0.01106994671073098, - "grad_norm": 2281.784423828125, - "learning_rate": 5.48e-06, - "loss": 264.5153, + "epoch": 0.00553497335536549, + "grad_norm": 1222.7154541015625, + "learning_rate": 5.480000000000001e-07, + "loss": 40.9341, "step": 2740 }, { - "epoch": 0.011110347976098611, - "grad_norm": 737.9111328125, - "learning_rate": 5.500000000000001e-06, - "loss": 200.1999, + "epoch": 0.005555173988049306, + "grad_norm": 1121.3111572265625, + "learning_rate": 5.5e-07, + "loss": 23.7586, "step": 2750 }, { - "epoch": 0.011150749241466243, - "grad_norm": 7024.08251953125, - "learning_rate": 5.5200000000000005e-06, - "loss": 316.7326, + "epoch": 0.005575374620733121, + "grad_norm": 1418.0941162109375, + "learning_rate": 5.520000000000001e-07, + "loss": 44.7959, "step": 2760 }, { - "epoch": 0.011191150506833874, - "grad_norm": 1147.371826171875, - "learning_rate": 5.54e-06, - "loss": 238.9098, + "epoch": 0.005595575253416937, + "grad_norm": 352.4377746582031, + "learning_rate": 5.54e-07, + "loss": 18.5532, "step": 2770 }, { - "epoch": 0.011231551772201505, - "grad_norm": 873.0858154296875, - "learning_rate": 5.56e-06, - "loss": 249.4478, + "epoch": 0.005615775886100753, + "grad_norm": 682.9746704101562, + "learning_rate": 5.560000000000001e-07, + "loss": 62.9547, "step": 2780 }, { - "epoch": 0.011271953037569137, - "grad_norm": 2017.5811767578125, - "learning_rate": 5.580000000000001e-06, - "loss": 280.6727, + "epoch": 0.005635976518784568, + "grad_norm": 69.55480194091797, + "learning_rate": 5.580000000000001e-07, + "loss": 44.0819, "step": 2790 }, { - "epoch": 0.011312354302936768, - "grad_norm": 689.9805908203125, - "learning_rate": 5.600000000000001e-06, - "loss": 238.7173, + "epoch": 0.005656177151468384, + "grad_norm": 178.6747589111328, + "learning_rate": 5.6e-07, + "loss": 28.2389, "step": 2800 }, { - "epoch": 0.0113527555683044, - "grad_norm": 1248.52490234375, - "learning_rate": 5.62e-06, - "loss": 243.3881, + "epoch": 0.0056763777841522, + "grad_norm": 662.1491088867188, + "learning_rate": 5.620000000000001e-07, + "loss": 19.7388, "step": 2810 }, { - "epoch": 0.01139315683367203, - "grad_norm": 756.8421020507812, - "learning_rate": 5.64e-06, - "loss": 192.3346, + "epoch": 0.005696578416836015, + "grad_norm": 310.38323974609375, + "learning_rate": 5.64e-07, + "loss": 39.9212, "step": 2820 }, { - "epoch": 0.011433558099039662, - "grad_norm": 1675.668701171875, - "learning_rate": 5.66e-06, - "loss": 323.5421, + "epoch": 0.005716779049519831, + "grad_norm": 256.0929870605469, + "learning_rate": 5.660000000000001e-07, + "loss": 22.1921, "step": 2830 }, { - "epoch": 0.011473959364407293, - "grad_norm": 730.6026000976562, - "learning_rate": 5.680000000000001e-06, - "loss": 235.7116, + "epoch": 0.0057369796822036466, + "grad_norm": 934.5228881835938, + "learning_rate": 5.680000000000001e-07, + "loss": 32.7585, "step": 2840 }, { - "epoch": 0.011514360629774924, - "grad_norm": 962.4594116210938, - "learning_rate": 5.7000000000000005e-06, - "loss": 284.3505, + "epoch": 0.005757180314887462, + "grad_norm": 703.0299072265625, + "learning_rate": 5.7e-07, + "loss": 37.5443, "step": 2850 }, { - "epoch": 0.011554761895142556, - "grad_norm": 737.1028442382812, - "learning_rate": 5.72e-06, - "loss": 279.0431, + "epoch": 0.005777380947571278, + "grad_norm": 687.6595458984375, + "learning_rate": 5.720000000000001e-07, + "loss": 24.1511, "step": 2860 }, { - "epoch": 0.011595163160510187, - "grad_norm": 1873.3394775390625, - "learning_rate": 5.74e-06, - "loss": 293.4966, + "epoch": 0.0057975815802550935, + "grad_norm": 400.9228820800781, + "learning_rate": 5.74e-07, + "loss": 23.191, "step": 2870 }, { - "epoch": 0.011635564425877818, - "grad_norm": 699.8680419921875, - "learning_rate": 5.76e-06, - "loss": 201.6691, + "epoch": 0.005817782212938909, + "grad_norm": 810.8984375, + "learning_rate": 5.760000000000001e-07, + "loss": 28.8146, "step": 2880 }, { - "epoch": 0.01167596569124545, - "grad_norm": 1195.6563720703125, - "learning_rate": 5.78e-06, - "loss": 339.2745, + "epoch": 0.005837982845622725, + "grad_norm": 528.00634765625, + "learning_rate": 5.78e-07, + "loss": 27.1717, "step": 2890 }, { - "epoch": 0.011716366956613081, - "grad_norm": 1244.9783935546875, - "learning_rate": 5.8e-06, - "loss": 295.2553, + "epoch": 0.0058581834783065405, + "grad_norm": 282.5981140136719, + "learning_rate": 5.800000000000001e-07, + "loss": 19.146, "step": 2900 }, { - "epoch": 0.011756768221980712, - "grad_norm": 1545.2462158203125, - "learning_rate": 5.82e-06, - "loss": 380.4514, + "epoch": 0.005878384110990356, + "grad_norm": 767.5343627929688, + "learning_rate": 5.820000000000001e-07, + "loss": 29.7168, "step": 2910 }, { - "epoch": 0.011797169487348344, - "grad_norm": 580.0228271484375, - "learning_rate": 5.84e-06, - "loss": 219.7412, + "epoch": 0.005898584743674172, + "grad_norm": 1321.3330078125, + "learning_rate": 5.84e-07, + "loss": 51.2614, "step": 2920 }, { - "epoch": 0.011837570752715975, - "grad_norm": 741.6790771484375, - "learning_rate": 5.86e-06, - "loss": 223.957, + "epoch": 0.0059187853763579875, + "grad_norm": 422.82183837890625, + "learning_rate": 5.860000000000001e-07, + "loss": 25.8771, "step": 2930 }, { - "epoch": 0.011877972018083606, - "grad_norm": 633.643798828125, - "learning_rate": 5.8800000000000005e-06, - "loss": 292.0679, + "epoch": 0.005938986009041803, + "grad_norm": 170.95977783203125, + "learning_rate": 5.88e-07, + "loss": 16.7884, "step": 2940 }, { - "epoch": 0.011918373283451238, - "grad_norm": 11992.681640625, - "learning_rate": 5.9e-06, - "loss": 365.1848, + "epoch": 0.005959186641725619, + "grad_norm": 752.912841796875, + "learning_rate": 5.900000000000001e-07, + "loss": 24.5547, "step": 2950 }, { - "epoch": 0.011958774548818869, - "grad_norm": 2407.924072265625, - "learning_rate": 5.920000000000001e-06, - "loss": 284.2829, + "epoch": 0.0059793872744094345, + "grad_norm": 47.68494415283203, + "learning_rate": 5.920000000000001e-07, + "loss": 22.6978, "step": 2960 }, { - "epoch": 0.0119991758141865, - "grad_norm": 9947.2861328125, - "learning_rate": 5.940000000000001e-06, - "loss": 336.0969, + "epoch": 0.00599958790709325, + "grad_norm": 537.8870849609375, + "learning_rate": 5.94e-07, + "loss": 49.1119, "step": 2970 }, { - "epoch": 0.012039577079554132, - "grad_norm": 1071.607177734375, - "learning_rate": 5.9600000000000005e-06, - "loss": 288.0666, + "epoch": 0.006019788539777066, + "grad_norm": 487.58251953125, + "learning_rate": 5.960000000000001e-07, + "loss": 33.8852, "step": 2980 }, { - "epoch": 0.012079978344921763, - "grad_norm": 1607.6715087890625, - "learning_rate": 5.98e-06, - "loss": 334.0282, + "epoch": 0.0060399891724608815, + "grad_norm": 578.7280883789062, + "learning_rate": 5.98e-07, + "loss": 19.5557, "step": 2990 }, { - "epoch": 0.012120379610289394, - "grad_norm": 616.33642578125, - "learning_rate": 6e-06, - "loss": 176.2524, + "epoch": 0.006060189805144697, + "grad_norm": 357.2806701660156, + "learning_rate": 6.000000000000001e-07, + "loss": 13.2956, "step": 3000 }, { - "epoch": 0.012160780875657026, - "grad_norm": 1787.1187744140625, - "learning_rate": 6.02e-06, - "loss": 262.5472, + "epoch": 0.006080390437828513, + "grad_norm": 49.976646423339844, + "learning_rate": 6.02e-07, + "loss": 23.7729, "step": 3010 }, { - "epoch": 0.012201182141024657, - "grad_norm": 680.6809692382812, - "learning_rate": 6.040000000000001e-06, - "loss": 195.7003, + "epoch": 0.0061005910705123284, + "grad_norm": 1392.4190673828125, + "learning_rate": 6.040000000000001e-07, + "loss": 27.7141, "step": 3020 }, { - "epoch": 0.012241583406392288, - "grad_norm": 1115.2088623046875, - "learning_rate": 6.0600000000000004e-06, - "loss": 291.6268, + "epoch": 0.006120791703196144, + "grad_norm": 547.042236328125, + "learning_rate": 6.060000000000001e-07, + "loss": 22.2183, "step": 3030 }, { - "epoch": 0.01228198467175992, - "grad_norm": 1072.4854736328125, - "learning_rate": 6.08e-06, - "loss": 399.983, + "epoch": 0.00614099233587996, + "grad_norm": 363.829833984375, + "learning_rate": 6.08e-07, + "loss": 25.6521, "step": 3040 }, { - "epoch": 0.01232238593712755, - "grad_norm": 1406.6270751953125, - "learning_rate": 6.1e-06, - "loss": 332.8581, + "epoch": 0.006161192968563775, + "grad_norm": 1138.9224853515625, + "learning_rate": 6.100000000000001e-07, + "loss": 28.1272, "step": 3050 }, { - "epoch": 0.012362787202495182, - "grad_norm": 915.8823852539062, - "learning_rate": 6.12e-06, - "loss": 242.4036, + "epoch": 0.006181393601247591, + "grad_norm": 388.612548828125, + "learning_rate": 6.12e-07, + "loss": 25.8512, "step": 3060 }, { - "epoch": 0.012403188467862813, - "grad_norm": 1265.71826171875, - "learning_rate": 6.1400000000000005e-06, - "loss": 240.312, + "epoch": 0.006201594233931407, + "grad_norm": 633.0219116210938, + "learning_rate": 6.140000000000001e-07, + "loss": 19.1016, "step": 3070 }, { - "epoch": 0.012443589733230445, - "grad_norm": 1575.8834228515625, - "learning_rate": 6.16e-06, - "loss": 207.945, + "epoch": 0.006221794866615222, + "grad_norm": 0.0, + "learning_rate": 6.160000000000001e-07, + "loss": 38.8512, "step": 3080 }, { - "epoch": 0.012483990998598076, - "grad_norm": 1585.046142578125, - "learning_rate": 6.18e-06, - "loss": 328.1433, + "epoch": 0.006241995499299038, + "grad_norm": 579.1844482421875, + "learning_rate": 6.180000000000001e-07, + "loss": 23.9865, "step": 3090 }, { - "epoch": 0.012524392263965707, - "grad_norm": 3717.249755859375, - "learning_rate": 6.2e-06, - "loss": 368.012, + "epoch": 0.006262196131982854, + "grad_norm": 860.4633178710938, + "learning_rate": 6.200000000000001e-07, + "loss": 20.4271, "step": 3100 }, { - "epoch": 0.012564793529333339, - "grad_norm": 784.789306640625, - "learning_rate": 6.22e-06, - "loss": 339.9229, + "epoch": 0.006282396764666669, + "grad_norm": 406.67791748046875, + "learning_rate": 6.22e-07, + "loss": 32.6324, "step": 3110 }, { - "epoch": 0.01260519479470097, - "grad_norm": 1485.326171875, - "learning_rate": 6.24e-06, - "loss": 345.8715, + "epoch": 0.006302597397350485, + "grad_norm": 110.9633560180664, + "learning_rate": 6.24e-07, + "loss": 14.6998, "step": 3120 }, { - "epoch": 0.012645596060068601, - "grad_norm": 883.3239135742188, - "learning_rate": 6.26e-06, - "loss": 215.2944, + "epoch": 0.006322798030034301, + "grad_norm": 445.50244140625, + "learning_rate": 6.260000000000001e-07, + "loss": 16.6768, "step": 3130 }, { - "epoch": 0.012685997325436233, - "grad_norm": 665.870849609375, - "learning_rate": 6.28e-06, - "loss": 247.7601, + "epoch": 0.006342998662718116, + "grad_norm": 1032.125244140625, + "learning_rate": 6.28e-07, + "loss": 37.2087, "step": 3140 }, { - "epoch": 0.012726398590803864, - "grad_norm": 4960.85546875, - "learning_rate": 6.300000000000001e-06, - "loss": 272.3599, + "epoch": 0.006363199295401932, + "grad_norm": 284.1348876953125, + "learning_rate": 6.3e-07, + "loss": 25.7215, "step": 3150 }, { - "epoch": 0.012766799856171495, - "grad_norm": 1986.215576171875, - "learning_rate": 6.320000000000001e-06, - "loss": 261.7445, + "epoch": 0.006383399928085748, + "grad_norm": 1127.0638427734375, + "learning_rate": 6.320000000000002e-07, + "loss": 43.177, "step": 3160 }, { - "epoch": 0.012807201121539127, - "grad_norm": 689.2445678710938, - "learning_rate": 6.34e-06, - "loss": 186.6741, + "epoch": 0.006403600560769563, + "grad_norm": 858.960205078125, + "learning_rate": 6.34e-07, + "loss": 29.6178, "step": 3170 }, { - "epoch": 0.012847602386906758, - "grad_norm": 1272.3314208984375, - "learning_rate": 6.360000000000001e-06, - "loss": 362.3708, + "epoch": 0.006423801193453379, + "grad_norm": 632.6044921875, + "learning_rate": 6.360000000000001e-07, + "loss": 24.2948, "step": 3180 }, { - "epoch": 0.01288800365227439, - "grad_norm": 1243.4554443359375, - "learning_rate": 6.38e-06, - "loss": 223.126, + "epoch": 0.006444001826137195, + "grad_norm": 385.60772705078125, + "learning_rate": 6.38e-07, + "loss": 29.6091, "step": 3190 }, { - "epoch": 0.01292840491764202, - "grad_norm": 0.0, - "learning_rate": 6.4000000000000006e-06, - "loss": 220.924, + "epoch": 0.00646420245882101, + "grad_norm": 735.461181640625, + "learning_rate": 6.4e-07, + "loss": 36.3189, "step": 3200 }, { - "epoch": 0.012968806183009652, - "grad_norm": 1140.0751953125, - "learning_rate": 6.4199999999999995e-06, - "loss": 304.4165, + "epoch": 0.006484403091504826, + "grad_norm": 806.14208984375, + "learning_rate": 6.42e-07, + "loss": 40.9133, "step": 3210 }, { - "epoch": 0.013009207448377283, - "grad_norm": 865.4572143554688, - "learning_rate": 6.44e-06, - "loss": 225.4173, + "epoch": 0.006504603724188642, + "grad_norm": 197.8910675048828, + "learning_rate": 6.44e-07, + "loss": 22.794, "step": 3220 }, { - "epoch": 0.013049608713744915, - "grad_norm": 1003.0341796875, - "learning_rate": 6.460000000000001e-06, - "loss": 318.8464, + "epoch": 0.006524804356872457, + "grad_norm": 464.4063720703125, + "learning_rate": 6.460000000000001e-07, + "loss": 25.8888, "step": 3230 }, { - "epoch": 0.013090009979112546, - "grad_norm": 3230.90966796875, - "learning_rate": 6.48e-06, - "loss": 266.8362, + "epoch": 0.006545004989556273, + "grad_norm": 897.4555053710938, + "learning_rate": 6.48e-07, + "loss": 45.3982, "step": 3240 }, { - "epoch": 0.013130411244480177, - "grad_norm": 5314.744140625, - "learning_rate": 6.5000000000000004e-06, - "loss": 356.5823, + "epoch": 0.006565205622240089, + "grad_norm": 1184.5333251953125, + "learning_rate": 6.5e-07, + "loss": 45.8197, "step": 3250 }, { - "epoch": 0.013170812509847809, - "grad_norm": 1545.9931640625, - "learning_rate": 6.519999999999999e-06, - "loss": 267.9298, + "epoch": 0.006585406254923904, + "grad_norm": 245.31578063964844, + "learning_rate": 6.52e-07, + "loss": 22.9282, "step": 3260 }, { - "epoch": 0.01321121377521544, - "grad_norm": 1124.9794921875, - "learning_rate": 6.54e-06, - "loss": 350.8778, + "epoch": 0.00660560688760772, + "grad_norm": 396.6833190917969, + "learning_rate": 6.54e-07, + "loss": 36.6589, "step": 3270 }, { - "epoch": 0.013251615040583071, - "grad_norm": 1584.3470458984375, - "learning_rate": 6.560000000000001e-06, - "loss": 268.6283, + "epoch": 0.006625807520291536, + "grad_norm": 827.7760620117188, + "learning_rate": 6.560000000000002e-07, + "loss": 27.5275, "step": 3280 }, { - "epoch": 0.013292016305950703, - "grad_norm": 1276.875244140625, - "learning_rate": 6.58e-06, - "loss": 401.9857, + "epoch": 0.006646008152975351, + "grad_norm": 698.2625122070312, + "learning_rate": 6.58e-07, + "loss": 43.7663, "step": 3290 }, { - "epoch": 0.013332417571318334, - "grad_norm": 1092.377197265625, - "learning_rate": 6.6e-06, - "loss": 331.368, + "epoch": 0.006666208785659167, + "grad_norm": 804.8634033203125, + "learning_rate": 6.6e-07, + "loss": 42.3979, "step": 3300 }, { - "epoch": 0.013372818836685965, - "grad_norm": 1179.2205810546875, - "learning_rate": 6.62e-06, - "loss": 177.4306, + "epoch": 0.006686409418342983, + "grad_norm": 381.66253662109375, + "learning_rate": 6.62e-07, + "loss": 28.6512, "step": 3310 }, { - "epoch": 0.013413220102053596, - "grad_norm": 1153.752685546875, - "learning_rate": 6.640000000000001e-06, - "loss": 229.9581, + "epoch": 0.006706610051026798, + "grad_norm": 274.6836242675781, + "learning_rate": 6.64e-07, + "loss": 13.3558, "step": 3320 }, { - "epoch": 0.013453621367421228, - "grad_norm": 998.432373046875, - "learning_rate": 6.660000000000001e-06, - "loss": 266.7427, + "epoch": 0.006726810683710614, + "grad_norm": 640.2205200195312, + "learning_rate": 6.660000000000002e-07, + "loss": 31.3027, "step": 3330 }, { - "epoch": 0.013494022632788859, - "grad_norm": 1151.6092529296875, - "learning_rate": 6.68e-06, - "loss": 331.803, + "epoch": 0.0067470113163944296, + "grad_norm": 545.2610473632812, + "learning_rate": 6.68e-07, + "loss": 19.2501, "step": 3340 }, { - "epoch": 0.01353442389815649, - "grad_norm": 2472.0546875, - "learning_rate": 6.700000000000001e-06, - "loss": 236.3656, + "epoch": 0.006767211949078245, + "grad_norm": 652.1705322265625, + "learning_rate": 6.7e-07, + "loss": 37.1378, "step": 3350 }, { - "epoch": 0.013574825163524122, - "grad_norm": 1243.1534423828125, - "learning_rate": 6.72e-06, - "loss": 180.7836, + "epoch": 0.006787412581762061, + "grad_norm": 1061.9658203125, + "learning_rate": 6.72e-07, + "loss": 21.7822, "step": 3360 }, { - "epoch": 0.013615226428891753, - "grad_norm": 1034.3489990234375, - "learning_rate": 6.740000000000001e-06, - "loss": 343.4018, + "epoch": 0.0068076132144458765, + "grad_norm": 556.8729248046875, + "learning_rate": 6.74e-07, + "loss": 24.4347, "step": 3370 }, { - "epoch": 0.013655627694259384, - "grad_norm": 905.5031127929688, - "learning_rate": 6.76e-06, - "loss": 140.6687, + "epoch": 0.006827813847129692, + "grad_norm": 522.956787109375, + "learning_rate": 6.76e-07, + "loss": 27.4032, "step": 3380 }, { - "epoch": 0.013696028959627016, - "grad_norm": 1340.9151611328125, - "learning_rate": 6.78e-06, - "loss": 289.7602, + "epoch": 0.006848014479813508, + "grad_norm": 531.7928466796875, + "learning_rate": 6.78e-07, + "loss": 19.0332, "step": 3390 }, { - "epoch": 0.013736430224994647, - "grad_norm": 960.6358642578125, - "learning_rate": 6.800000000000001e-06, - "loss": 226.2994, + "epoch": 0.0068682151124973235, + "grad_norm": 804.1568603515625, + "learning_rate": 6.800000000000001e-07, + "loss": 34.7435, "step": 3400 }, { - "epoch": 0.013776831490362278, - "grad_norm": 1423.4761962890625, - "learning_rate": 6.82e-06, - "loss": 430.8338, + "epoch": 0.006888415745181139, + "grad_norm": 1052.230712890625, + "learning_rate": 6.82e-07, + "loss": 25.6694, "step": 3410 }, { - "epoch": 0.01381723275572991, - "grad_norm": 1304.45263671875, - "learning_rate": 6.840000000000001e-06, - "loss": 328.4004, + "epoch": 0.006908616377864955, + "grad_norm": 526.1654663085938, + "learning_rate": 6.84e-07, + "loss": 22.8071, "step": 3420 }, { - "epoch": 0.013857634021097541, - "grad_norm": 9199.7373046875, - "learning_rate": 6.8599999999999995e-06, - "loss": 270.6068, + "epoch": 0.0069288170105487705, + "grad_norm": 1492.260986328125, + "learning_rate": 6.86e-07, + "loss": 20.8931, "step": 3430 }, { - "epoch": 0.013898035286465172, - "grad_norm": 776.5551147460938, - "learning_rate": 6.88e-06, - "loss": 176.4451, + "epoch": 0.006949017643232586, + "grad_norm": 2416.557373046875, + "learning_rate": 6.88e-07, + "loss": 45.9088, "step": 3440 }, { - "epoch": 0.013938436551832804, - "grad_norm": 774.5919799804688, - "learning_rate": 6.900000000000001e-06, - "loss": 258.9075, + "epoch": 0.006969218275916402, + "grad_norm": 138.57374572753906, + "learning_rate": 6.900000000000001e-07, + "loss": 20.0898, "step": 3450 }, { - "epoch": 0.013978837817200435, - "grad_norm": 1491.8228759765625, - "learning_rate": 6.92e-06, - "loss": 295.3933, + "epoch": 0.0069894189086002175, + "grad_norm": 517.4945678710938, + "learning_rate": 6.92e-07, + "loss": 26.9102, "step": 3460 }, { - "epoch": 0.014019239082568066, - "grad_norm": 1491.8526611328125, - "learning_rate": 6.9400000000000005e-06, - "loss": 239.9352, + "epoch": 0.007009619541284033, + "grad_norm": 426.5979919433594, + "learning_rate": 6.94e-07, + "loss": 39.5767, "step": 3470 }, { - "epoch": 0.014059640347935698, - "grad_norm": 1934.498779296875, - "learning_rate": 6.9599999999999994e-06, - "loss": 275.0346, + "epoch": 0.007029820173967849, + "grad_norm": 208.76681518554688, + "learning_rate": 6.96e-07, + "loss": 42.304, "step": 3480 }, { - "epoch": 0.014100041613303329, - "grad_norm": 1181.4918212890625, - "learning_rate": 6.98e-06, - "loss": 238.241, + "epoch": 0.0070500208066516645, + "grad_norm": 478.20050048828125, + "learning_rate": 6.98e-07, + "loss": 23.9574, "step": 3490 }, { - "epoch": 0.01414044287867096, - "grad_norm": 1785.65478515625, - "learning_rate": 7.000000000000001e-06, - "loss": 163.911, + "epoch": 0.00707022143933548, + "grad_norm": 1073.7861328125, + "learning_rate": 7.000000000000001e-07, + "loss": 40.8396, "step": 3500 }, { - "epoch": 0.014180844144038592, - "grad_norm": 601.7042846679688, - "learning_rate": 7.0200000000000006e-06, - "loss": 200.9242, + "epoch": 0.007090422072019296, + "grad_norm": 164.99581909179688, + "learning_rate": 7.02e-07, + "loss": 54.3387, "step": 3510 }, { - "epoch": 0.014221245409406223, - "grad_norm": 1390.438720703125, - "learning_rate": 7.04e-06, - "loss": 225.1192, + "epoch": 0.0071106227047031114, + "grad_norm": 559.994873046875, + "learning_rate": 7.040000000000001e-07, + "loss": 23.7497, "step": 3520 }, { - "epoch": 0.014261646674773854, - "grad_norm": 1239.8173828125, - "learning_rate": 7.06e-06, - "loss": 207.541, + "epoch": 0.007130823337386927, + "grad_norm": 248.2763671875, + "learning_rate": 7.06e-07, + "loss": 14.4129, "step": 3530 }, { - "epoch": 0.014302047940141486, - "grad_norm": 5047.65478515625, - "learning_rate": 7.080000000000001e-06, - "loss": 200.282, + "epoch": 0.007151023970070743, + "grad_norm": 115.21528625488281, + "learning_rate": 7.08e-07, + "loss": 20.9172, "step": 3540 }, { - "epoch": 0.014342449205509117, - "grad_norm": 684.9512939453125, - "learning_rate": 7.1e-06, - "loss": 200.1313, + "epoch": 0.007171224602754558, + "grad_norm": 529.1926879882812, + "learning_rate": 7.1e-07, + "loss": 29.7155, "step": 3550 }, { - "epoch": 0.014382850470876748, - "grad_norm": 538.3458862304688, - "learning_rate": 7.1200000000000004e-06, - "loss": 271.5458, + "epoch": 0.007191425235438374, + "grad_norm": 931.08056640625, + "learning_rate": 7.12e-07, + "loss": 48.6689, "step": 3560 }, { - "epoch": 0.01442325173624438, - "grad_norm": 1814.7567138671875, - "learning_rate": 7.140000000000001e-06, - "loss": 227.9855, + "epoch": 0.00721162586812219, + "grad_norm": 325.41485595703125, + "learning_rate": 7.140000000000001e-07, + "loss": 19.5409, "step": 3570 }, { - "epoch": 0.01446365300161201, - "grad_norm": 617.8508911132812, - "learning_rate": 7.16e-06, - "loss": 152.5538, + "epoch": 0.007231826500806005, + "grad_norm": 255.26974487304688, + "learning_rate": 7.16e-07, + "loss": 13.3323, "step": 3580 }, { - "epoch": 0.014504054266979642, - "grad_norm": 1195.5587158203125, - "learning_rate": 7.180000000000001e-06, - "loss": 220.7645, + "epoch": 0.007252027133489821, + "grad_norm": 174.78330993652344, + "learning_rate": 7.18e-07, + "loss": 27.6813, "step": 3590 }, { - "epoch": 0.014544455532347273, - "grad_norm": 963.2885131835938, - "learning_rate": 7.2e-06, - "loss": 237.8198, + "epoch": 0.007272227766173637, + "grad_norm": 259.9703369140625, + "learning_rate": 7.2e-07, + "loss": 23.6667, "step": 3600 }, { - "epoch": 0.014584856797714905, - "grad_norm": 1143.399658203125, - "learning_rate": 7.22e-06, - "loss": 246.5691, + "epoch": 0.007292428398857452, + "grad_norm": 510.34515380859375, + "learning_rate": 7.22e-07, + "loss": 43.1148, "step": 3610 }, { - "epoch": 0.014625258063082536, - "grad_norm": 936.9075927734375, - "learning_rate": 7.240000000000001e-06, - "loss": 225.2431, + "epoch": 0.007312629031541268, + "grad_norm": 369.45806884765625, + "learning_rate": 7.240000000000001e-07, + "loss": 18.297, "step": 3620 }, { - "epoch": 0.014665659328450167, - "grad_norm": 505.3612365722656, - "learning_rate": 7.26e-06, - "loss": 209.9184, + "epoch": 0.007332829664225084, + "grad_norm": 492.9736022949219, + "learning_rate": 7.26e-07, + "loss": 34.2994, "step": 3630 }, { - "epoch": 0.014706060593817799, - "grad_norm": 4472.5458984375, - "learning_rate": 7.280000000000001e-06, - "loss": 251.8955, + "epoch": 0.007353030296908899, + "grad_norm": 611.5191650390625, + "learning_rate": 7.280000000000001e-07, + "loss": 40.5437, "step": 3640 }, { - "epoch": 0.01474646185918543, - "grad_norm": 1152.7425537109375, - "learning_rate": 7.2999999999999996e-06, - "loss": 267.1611, + "epoch": 0.007373230929592715, + "grad_norm": 478.23126220703125, + "learning_rate": 7.3e-07, + "loss": 42.8498, "step": 3650 }, { - "epoch": 0.014786863124553061, - "grad_norm": 1535.3359375, - "learning_rate": 7.32e-06, - "loss": 258.2474, + "epoch": 0.007393431562276531, + "grad_norm": 822.2637329101562, + "learning_rate": 7.32e-07, + "loss": 42.8805, "step": 3660 }, { - "epoch": 0.014827264389920693, - "grad_norm": 788.0407104492188, - "learning_rate": 7.340000000000001e-06, - "loss": 308.7459, + "epoch": 0.007413632194960346, + "grad_norm": 572.3778686523438, + "learning_rate": 7.340000000000001e-07, + "loss": 21.5523, "step": 3670 }, { - "epoch": 0.014867665655288324, - "grad_norm": 3632.613037109375, - "learning_rate": 7.36e-06, - "loss": 270.6488, + "epoch": 0.007433832827644162, + "grad_norm": 610.9832153320312, + "learning_rate": 7.36e-07, + "loss": 24.501, "step": 3680 }, { - "epoch": 0.014908066920655955, - "grad_norm": 1338.3414306640625, - "learning_rate": 7.3800000000000005e-06, - "loss": 402.1751, + "epoch": 0.007454033460327978, + "grad_norm": 444.3849792480469, + "learning_rate": 7.380000000000001e-07, + "loss": 29.0815, "step": 3690 }, { - "epoch": 0.014948468186023587, - "grad_norm": 2017.5732421875, - "learning_rate": 7.4e-06, - "loss": 335.9066, + "epoch": 0.007474234093011793, + "grad_norm": 334.1302490234375, + "learning_rate": 7.4e-07, + "loss": 29.4803, "step": 3700 }, { - "epoch": 0.014988869451391218, - "grad_norm": 866.8572998046875, - "learning_rate": 7.420000000000001e-06, - "loss": 228.7708, + "epoch": 0.007494434725695609, + "grad_norm": 484.7538757324219, + "learning_rate": 7.420000000000001e-07, + "loss": 34.7338, "step": 3710 }, { - "epoch": 0.01502927071675885, - "grad_norm": 2627.818359375, - "learning_rate": 7.44e-06, - "loss": 237.4983, + "epoch": 0.007514635358379425, + "grad_norm": 557.7028198242188, + "learning_rate": 7.44e-07, + "loss": 45.3084, "step": 3720 }, { - "epoch": 0.01506967198212648, - "grad_norm": 1014.5845947265625, - "learning_rate": 7.4600000000000006e-06, - "loss": 289.3517, + "epoch": 0.00753483599106324, + "grad_norm": 419.6079406738281, + "learning_rate": 7.46e-07, + "loss": 38.1011, "step": 3730 }, { - "epoch": 0.015110073247494112, - "grad_norm": 1206.6943359375, - "learning_rate": 7.480000000000001e-06, - "loss": 253.7986, + "epoch": 0.007555036623747056, + "grad_norm": 0.0, + "learning_rate": 7.480000000000001e-07, + "loss": 34.1749, "step": 3740 }, { - "epoch": 0.015150474512861743, - "grad_norm": 1855.4755859375, - "learning_rate": 7.5e-06, - "loss": 384.5513, + "epoch": 0.007575237256430872, + "grad_norm": 608.0559692382812, + "learning_rate": 7.5e-07, + "loss": 12.6068, "step": 3750 }, { - "epoch": 0.015190875778229375, - "grad_norm": 745.6865844726562, - "learning_rate": 7.520000000000001e-06, - "loss": 246.6602, + "epoch": 0.007595437889114687, + "grad_norm": 1825.6767578125, + "learning_rate": 7.520000000000001e-07, + "loss": 36.8878, "step": 3760 }, { - "epoch": 0.015231277043597006, - "grad_norm": 1637.106201171875, - "learning_rate": 7.54e-06, - "loss": 291.9528, + "epoch": 0.007615638521798503, + "grad_norm": 347.3094787597656, + "learning_rate": 7.54e-07, + "loss": 26.2681, "step": 3770 }, { - "epoch": 0.015271678308964637, - "grad_norm": 2112.7109375, - "learning_rate": 7.5600000000000005e-06, - "loss": 228.7163, + "epoch": 0.007635839154482319, + "grad_norm": 1999.683837890625, + "learning_rate": 7.56e-07, + "loss": 40.3283, "step": 3780 }, { - "epoch": 0.015312079574332269, - "grad_norm": 1741.5140380859375, - "learning_rate": 7.580000000000001e-06, - "loss": 238.9058, + "epoch": 0.007656039787166134, + "grad_norm": 520.2977905273438, + "learning_rate": 7.580000000000001e-07, + "loss": 34.4978, "step": 3790 }, { - "epoch": 0.0153524808396999, - "grad_norm": 991.5599365234375, - "learning_rate": 7.6e-06, - "loss": 172.588, + "epoch": 0.00767624041984995, + "grad_norm": 457.4718322753906, + "learning_rate": 7.6e-07, + "loss": 37.1305, "step": 3800 }, { - "epoch": 0.015392882105067531, - "grad_norm": 1450.9693603515625, - "learning_rate": 7.620000000000001e-06, - "loss": 298.7596, + "epoch": 0.007696441052533766, + "grad_norm": 241.23533630371094, + "learning_rate": 7.620000000000001e-07, + "loss": 34.1395, "step": 3810 }, { - "epoch": 0.015433283370435162, - "grad_norm": 602.7521362304688, - "learning_rate": 7.64e-06, - "loss": 212.8531, + "epoch": 0.007716641685217581, + "grad_norm": 1090.16943359375, + "learning_rate": 7.64e-07, + "loss": 32.3401, "step": 3820 }, { - "epoch": 0.015473684635802794, - "grad_norm": 1867.60546875, - "learning_rate": 7.660000000000001e-06, - "loss": 314.0657, + "epoch": 0.007736842317901397, + "grad_norm": 799.810546875, + "learning_rate": 7.660000000000001e-07, + "loss": 18.844, "step": 3830 }, { - "epoch": 0.015514085901170425, - "grad_norm": 6575.3671875, - "learning_rate": 7.68e-06, - "loss": 181.4101, + "epoch": 0.0077570429505852126, + "grad_norm": 513.3098754882812, + "learning_rate": 7.68e-07, + "loss": 56.5756, "step": 3840 }, { - "epoch": 0.015554487166538056, - "grad_norm": 811.929443359375, - "learning_rate": 7.7e-06, - "loss": 203.4754, + "epoch": 0.007777243583269028, + "grad_norm": 412.8589172363281, + "learning_rate": 7.7e-07, + "loss": 28.4811, "step": 3850 }, { - "epoch": 0.015594888431905688, - "grad_norm": 1337.7786865234375, - "learning_rate": 7.72e-06, - "loss": 251.0148, + "epoch": 0.007797444215952844, + "grad_norm": 818.9476318359375, + "learning_rate": 7.720000000000001e-07, + "loss": 23.9097, "step": 3860 }, { - "epoch": 0.01563528969727332, - "grad_norm": 1872.110595703125, - "learning_rate": 7.74e-06, - "loss": 220.2635, + "epoch": 0.00781764484863666, + "grad_norm": 660.37158203125, + "learning_rate": 7.74e-07, + "loss": 33.1159, "step": 3870 }, { - "epoch": 0.01567569096264095, - "grad_norm": 421.63671875, - "learning_rate": 7.76e-06, - "loss": 219.4246, + "epoch": 0.007837845481320474, + "grad_norm": 554.0394287109375, + "learning_rate": 7.760000000000001e-07, + "loss": 41.6907, "step": 3880 }, { - "epoch": 0.01571609222800858, - "grad_norm": 901.9820556640625, - "learning_rate": 7.78e-06, - "loss": 253.3245, + "epoch": 0.00785804611400429, + "grad_norm": 609.9306640625, + "learning_rate": 7.78e-07, + "loss": 23.2773, "step": 3890 }, { - "epoch": 0.01575649349337621, - "grad_norm": 1378.5067138671875, - "learning_rate": 7.8e-06, - "loss": 272.5337, + "epoch": 0.007878246746688106, + "grad_norm": 509.7363586425781, + "learning_rate": 7.8e-07, + "loss": 27.8946, "step": 3900 }, { - "epoch": 0.015796894758743844, - "grad_norm": 899.2632446289062, - "learning_rate": 7.820000000000001e-06, - "loss": 221.5219, + "epoch": 0.007898447379371922, + "grad_norm": 617.0547485351562, + "learning_rate": 7.820000000000001e-07, + "loss": 40.5662, "step": 3910 }, { - "epoch": 0.015837296024111474, - "grad_norm": 2511.511474609375, - "learning_rate": 7.84e-06, - "loss": 183.7072, + "epoch": 0.007918648012055737, + "grad_norm": 45.31270217895508, + "learning_rate": 7.84e-07, + "loss": 19.5, "step": 3920 }, { - "epoch": 0.015877697289479107, - "grad_norm": 1480.0848388671875, - "learning_rate": 7.860000000000001e-06, - "loss": 226.5134, + "epoch": 0.007938848644739553, + "grad_norm": 936.0264282226562, + "learning_rate": 7.860000000000001e-07, + "loss": 31.398, "step": 3930 }, { - "epoch": 0.015918098554846737, - "grad_norm": 1251.0631103515625, - "learning_rate": 7.879999999999999e-06, - "loss": 220.8395, + "epoch": 0.007959049277423368, + "grad_norm": 4046.48974609375, + "learning_rate": 7.88e-07, + "loss": 61.3853, "step": 3940 }, { - "epoch": 0.01595849982021437, - "grad_norm": 1017.7593994140625, - "learning_rate": 7.9e-06, - "loss": 299.8441, + "epoch": 0.007979249910107185, + "grad_norm": 678.829345703125, + "learning_rate": 7.900000000000001e-07, + "loss": 52.0202, "step": 3950 }, { - "epoch": 0.015998901085582, - "grad_norm": 750.101318359375, - "learning_rate": 7.92e-06, - "loss": 256.8377, + "epoch": 0.007999450542791, + "grad_norm": 174.16665649414062, + "learning_rate": 7.920000000000001e-07, + "loss": 18.2646, "step": 3960 }, { - "epoch": 0.016039302350949632, - "grad_norm": 850.2648315429688, - "learning_rate": 7.94e-06, - "loss": 300.5525, + "epoch": 0.008019651175474816, + "grad_norm": 631.7503051757812, + "learning_rate": 7.94e-07, + "loss": 22.1172, "step": 3970 }, { - "epoch": 0.016079703616317262, - "grad_norm": 688.6962280273438, - "learning_rate": 7.96e-06, - "loss": 238.5739, + "epoch": 0.008039851808158631, + "grad_norm": 863.083984375, + "learning_rate": 7.960000000000001e-07, + "loss": 31.1579, "step": 3980 }, { - "epoch": 0.016120104881684895, - "grad_norm": 1160.3623046875, - "learning_rate": 7.98e-06, - "loss": 230.5895, + "epoch": 0.008060052440842447, + "grad_norm": 395.8502197265625, + "learning_rate": 7.98e-07, + "loss": 36.5626, "step": 3990 }, { - "epoch": 0.016160506147052525, - "grad_norm": 768.995849609375, - "learning_rate": 8.000000000000001e-06, - "loss": 273.8474, + "epoch": 0.008080253073526262, + "grad_norm": 551.8558349609375, + "learning_rate": 8.000000000000001e-07, + "loss": 27.3743, "step": 4000 }, { - "epoch": 0.016200907412420158, - "grad_norm": 733.1216430664062, - "learning_rate": 8.02e-06, - "loss": 213.2722, + "epoch": 0.008100453706210079, + "grad_norm": 350.1429138183594, + "learning_rate": 8.02e-07, + "loss": 27.0405, "step": 4010 }, { - "epoch": 0.016241308677787787, - "grad_norm": 0.0, - "learning_rate": 8.040000000000001e-06, - "loss": 222.9751, + "epoch": 0.008120654338893894, + "grad_norm": 1742.940185546875, + "learning_rate": 8.04e-07, + "loss": 44.1298, "step": 4020 }, { - "epoch": 0.01628170994315542, - "grad_norm": 800.3453979492188, - "learning_rate": 8.06e-06, - "loss": 268.3867, + "epoch": 0.00814085497157771, + "grad_norm": 773.5975341796875, + "learning_rate": 8.060000000000001e-07, + "loss": 22.7084, "step": 4030 }, { - "epoch": 0.01632211120852305, - "grad_norm": 1095.6239013671875, - "learning_rate": 8.08e-06, - "loss": 321.233, + "epoch": 0.008161055604261525, + "grad_norm": 996.5304565429688, + "learning_rate": 8.08e-07, + "loss": 40.6753, "step": 4040 }, { - "epoch": 0.016362512473890683, - "grad_norm": 2067.449462890625, - "learning_rate": 8.1e-06, - "loss": 218.0929, + "epoch": 0.008181256236945341, + "grad_norm": 671.2755126953125, + "learning_rate": 8.100000000000001e-07, + "loss": 39.0983, "step": 4050 }, { - "epoch": 0.016402913739258312, - "grad_norm": 1188.5040283203125, - "learning_rate": 8.12e-06, - "loss": 326.4176, + "epoch": 0.008201456869629156, + "grad_norm": 350.6563415527344, + "learning_rate": 8.12e-07, + "loss": 30.7569, "step": 4060 }, { - "epoch": 0.016443315004625945, - "grad_norm": 1143.6793212890625, - "learning_rate": 8.14e-06, - "loss": 244.4551, + "epoch": 0.008221657502312973, + "grad_norm": 1149.395263671875, + "learning_rate": 8.140000000000001e-07, + "loss": 28.9356, "step": 4070 }, { - "epoch": 0.016483716269993575, - "grad_norm": 878.0779418945312, - "learning_rate": 8.160000000000001e-06, - "loss": 235.9173, + "epoch": 0.008241858134996788, + "grad_norm": 990.6906127929688, + "learning_rate": 8.160000000000001e-07, + "loss": 39.5627, "step": 4080 }, { - "epoch": 0.016524117535361208, - "grad_norm": 737.4267578125, - "learning_rate": 8.18e-06, - "loss": 331.0821, + "epoch": 0.008262058767680604, + "grad_norm": 364.4676208496094, + "learning_rate": 8.18e-07, + "loss": 14.7625, "step": 4090 }, { - "epoch": 0.016564518800728838, - "grad_norm": 1495.6761474609375, - "learning_rate": 8.200000000000001e-06, - "loss": 248.3052, + "epoch": 0.008282259400364419, + "grad_norm": 811.5744018554688, + "learning_rate": 8.200000000000001e-07, + "loss": 22.5022, "step": 4100 }, { - "epoch": 0.01660492006609647, - "grad_norm": 814.0003051757812, - "learning_rate": 8.22e-06, - "loss": 199.0726, + "epoch": 0.008302460033048235, + "grad_norm": 582.2328491210938, + "learning_rate": 8.22e-07, + "loss": 39.7949, "step": 4110 }, { - "epoch": 0.0166453213314641, - "grad_norm": 634.8568725585938, - "learning_rate": 8.24e-06, - "loss": 313.8576, + "epoch": 0.00832266066573205, + "grad_norm": 1127.8995361328125, + "learning_rate": 8.240000000000001e-07, + "loss": 37.47, "step": 4120 }, { - "epoch": 0.016685722596831733, - "grad_norm": 714.36328125, - "learning_rate": 8.26e-06, - "loss": 215.2515, + "epoch": 0.008342861298415867, + "grad_norm": 502.9598693847656, + "learning_rate": 8.260000000000001e-07, + "loss": 50.2378, "step": 4130 }, { - "epoch": 0.016726123862199363, - "grad_norm": 1302.689697265625, - "learning_rate": 8.28e-06, - "loss": 262.0066, + "epoch": 0.008363061931099681, + "grad_norm": 630.9242553710938, + "learning_rate": 8.280000000000001e-07, + "loss": 25.1314, "step": 4140 }, { - "epoch": 0.016766525127566996, - "grad_norm": 1403.1646728515625, - "learning_rate": 8.3e-06, - "loss": 314.0872, + "epoch": 0.008383262563783498, + "grad_norm": 632.32958984375, + "learning_rate": 8.300000000000001e-07, + "loss": 42.2978, "step": 4150 }, { - "epoch": 0.016806926392934626, - "grad_norm": 944.2760009765625, - "learning_rate": 8.32e-06, - "loss": 276.365, + "epoch": 0.008403463196467313, + "grad_norm": 513.2865600585938, + "learning_rate": 8.32e-07, + "loss": 22.9661, "step": 4160 }, { - "epoch": 0.01684732765830226, - "grad_norm": 2125.658447265625, - "learning_rate": 8.34e-06, - "loss": 337.9024, + "epoch": 0.00842366382915113, + "grad_norm": 437.45941162109375, + "learning_rate": 8.340000000000001e-07, + "loss": 24.9955, "step": 4170 }, { - "epoch": 0.016887728923669888, - "grad_norm": 2500.66455078125, - "learning_rate": 8.36e-06, - "loss": 241.9205, + "epoch": 0.008443864461834944, + "grad_norm": 732.6044311523438, + "learning_rate": 8.36e-07, + "loss": 37.1115, "step": 4180 }, { - "epoch": 0.01692813018903752, - "grad_norm": 637.8106689453125, - "learning_rate": 8.380000000000001e-06, - "loss": 239.1331, + "epoch": 0.00846406509451876, + "grad_norm": 270.413330078125, + "learning_rate": 8.380000000000001e-07, + "loss": 20.6703, "step": 4190 }, { - "epoch": 0.01696853145440515, - "grad_norm": 1099.7269287109375, - "learning_rate": 8.400000000000001e-06, - "loss": 170.0579, + "epoch": 0.008484265727202575, + "grad_norm": 8.429944038391113, + "learning_rate": 8.400000000000001e-07, + "loss": 25.673, "step": 4200 }, { - "epoch": 0.017008932719772784, - "grad_norm": 777.8555297851562, - "learning_rate": 8.42e-06, - "loss": 269.5526, + "epoch": 0.008504466359886392, + "grad_norm": 555.270751953125, + "learning_rate": 8.42e-07, + "loss": 22.8865, "step": 4210 }, { - "epoch": 0.017049333985140414, - "grad_norm": 624.0740356445312, - "learning_rate": 8.44e-06, - "loss": 180.0498, + "epoch": 0.008524666992570207, + "grad_norm": 638.0531005859375, + "learning_rate": 8.440000000000001e-07, + "loss": 21.5478, "step": 4220 }, { - "epoch": 0.017089735250508047, - "grad_norm": 1288.8555908203125, - "learning_rate": 8.46e-06, - "loss": 287.3699, + "epoch": 0.008544867625254023, + "grad_norm": 1256.038818359375, + "learning_rate": 8.46e-07, + "loss": 41.5287, "step": 4230 }, { - "epoch": 0.017130136515875676, - "grad_norm": 689.3584594726562, - "learning_rate": 8.48e-06, - "loss": 308.6741, + "epoch": 0.008565068257937838, + "grad_norm": 414.123046875, + "learning_rate": 8.480000000000001e-07, + "loss": 19.9983, "step": 4240 }, { - "epoch": 0.01717053778124331, - "grad_norm": 1398.036376953125, - "learning_rate": 8.500000000000002e-06, - "loss": 266.2167, + "epoch": 0.008585268890621655, + "grad_norm": 594.0344848632812, + "learning_rate": 8.500000000000001e-07, + "loss": 33.3682, "step": 4250 }, { - "epoch": 0.01721093904661094, - "grad_norm": 789.7576293945312, - "learning_rate": 8.52e-06, - "loss": 255.249, + "epoch": 0.00860546952330547, + "grad_norm": 580.2907104492188, + "learning_rate": 8.520000000000001e-07, + "loss": 26.1064, "step": 4260 }, { - "epoch": 0.017251340311978572, - "grad_norm": 815.0007934570312, - "learning_rate": 8.540000000000001e-06, - "loss": 194.3588, + "epoch": 0.008625670155989286, + "grad_norm": 910.2868041992188, + "learning_rate": 8.540000000000001e-07, + "loss": 25.9703, "step": 4270 }, { - "epoch": 0.0172917415773462, - "grad_norm": 1327.666259765625, - "learning_rate": 8.56e-06, - "loss": 329.2384, + "epoch": 0.0086458707886731, + "grad_norm": 401.0901184082031, + "learning_rate": 8.56e-07, + "loss": 31.2655, "step": 4280 }, { - "epoch": 0.017332142842713835, - "grad_norm": 626.3024291992188, - "learning_rate": 8.580000000000001e-06, - "loss": 137.7933, + "epoch": 0.008666071421356917, + "grad_norm": 586.83447265625, + "learning_rate": 8.580000000000001e-07, + "loss": 22.6068, "step": 4290 }, { - "epoch": 0.017372544108081464, - "grad_norm": 1358.869384765625, - "learning_rate": 8.599999999999999e-06, - "loss": 234.6338, + "epoch": 0.008686272054040732, + "grad_norm": 384.7112731933594, + "learning_rate": 8.6e-07, + "loss": 20.9198, "step": 4300 }, { - "epoch": 0.017412945373449097, - "grad_norm": 935.4063110351562, - "learning_rate": 8.62e-06, - "loss": 326.7994, + "epoch": 0.008706472686724549, + "grad_norm": 861.9733276367188, + "learning_rate": 8.620000000000001e-07, + "loss": 30.633, "step": 4310 }, { - "epoch": 0.017453346638816727, - "grad_norm": 1433.051025390625, - "learning_rate": 8.64e-06, - "loss": 242.4297, + "epoch": 0.008726673319408363, + "grad_norm": 508.044189453125, + "learning_rate": 8.640000000000001e-07, + "loss": 37.4199, "step": 4320 }, { - "epoch": 0.01749374790418436, - "grad_norm": 1025.8055419921875, - "learning_rate": 8.66e-06, - "loss": 168.3089, + "epoch": 0.00874687395209218, + "grad_norm": 757.0943603515625, + "learning_rate": 8.66e-07, + "loss": 31.3223, "step": 4330 }, { - "epoch": 0.01753414916955199, - "grad_norm": 2163.64794921875, - "learning_rate": 8.68e-06, - "loss": 246.4337, + "epoch": 0.008767074584775995, + "grad_norm": 481.5451965332031, + "learning_rate": 8.680000000000001e-07, + "loss": 30.192, "step": 4340 }, { - "epoch": 0.017574550434919622, - "grad_norm": 1108.7593994140625, - "learning_rate": 8.7e-06, - "loss": 211.9252, + "epoch": 0.008787275217459811, + "grad_norm": 1465.626220703125, + "learning_rate": 8.7e-07, + "loss": 36.7876, "step": 4350 }, { - "epoch": 0.017614951700287252, - "grad_norm": 1170.3670654296875, - "learning_rate": 8.720000000000001e-06, - "loss": 196.4572, + "epoch": 0.008807475850143626, + "grad_norm": 537.8186645507812, + "learning_rate": 8.720000000000001e-07, + "loss": 18.696, "step": 4360 }, { - "epoch": 0.017655352965654885, - "grad_norm": 822.1085815429688, - "learning_rate": 8.740000000000001e-06, - "loss": 236.9191, + "epoch": 0.008827676482827443, + "grad_norm": 421.6712951660156, + "learning_rate": 8.740000000000001e-07, + "loss": 16.2779, "step": 4370 }, { - "epoch": 0.017695754231022515, - "grad_norm": 551.1714477539062, - "learning_rate": 8.76e-06, - "loss": 262.1803, + "epoch": 0.008847877115511257, + "grad_norm": 625.7432861328125, + "learning_rate": 8.760000000000001e-07, + "loss": 23.2634, "step": 4380 }, { - "epoch": 0.017736155496390148, - "grad_norm": 941.1051635742188, - "learning_rate": 8.78e-06, - "loss": 197.1894, + "epoch": 0.008868077748195074, + "grad_norm": 630.1290893554688, + "learning_rate": 8.780000000000001e-07, + "loss": 22.0466, "step": 4390 }, { - "epoch": 0.017776556761757777, - "grad_norm": 812.2485961914062, - "learning_rate": 8.8e-06, - "loss": 304.7698, + "epoch": 0.008888278380878889, + "grad_norm": 19.57767105102539, + "learning_rate": 8.8e-07, + "loss": 32.943, "step": 4400 }, { - "epoch": 0.01781695802712541, - "grad_norm": 740.1641845703125, - "learning_rate": 8.82e-06, - "loss": 207.9336, + "epoch": 0.008908479013562705, + "grad_norm": 563.815185546875, + "learning_rate": 8.820000000000001e-07, + "loss": 20.2435, "step": 4410 }, { - "epoch": 0.01785735929249304, - "grad_norm": 1118.2490234375, - "learning_rate": 8.840000000000002e-06, - "loss": 201.0788, + "epoch": 0.00892867964624652, + "grad_norm": 565.490234375, + "learning_rate": 8.840000000000001e-07, + "loss": 30.1036, "step": 4420 }, { - "epoch": 0.017897760557860673, - "grad_norm": 2099.23583984375, - "learning_rate": 8.86e-06, - "loss": 222.6882, + "epoch": 0.008948880278930336, + "grad_norm": 553.708984375, + "learning_rate": 8.860000000000001e-07, + "loss": 24.4606, "step": 4430 }, { - "epoch": 0.017938161823228303, - "grad_norm": 1029.5682373046875, - "learning_rate": 8.880000000000001e-06, - "loss": 194.8024, + "epoch": 0.008969080911614151, + "grad_norm": 433.3461608886719, + "learning_rate": 8.880000000000001e-07, + "loss": 24.9225, "step": 4440 }, { - "epoch": 0.017978563088595936, - "grad_norm": 1188.818603515625, - "learning_rate": 8.9e-06, - "loss": 206.5788, + "epoch": 0.008989281544297968, + "grad_norm": 939.7255859375, + "learning_rate": 8.900000000000001e-07, + "loss": 32.4033, "step": 4450 }, { - "epoch": 0.018018964353963565, - "grad_norm": 820.3424682617188, - "learning_rate": 8.920000000000001e-06, - "loss": 222.0988, + "epoch": 0.009009482176981783, + "grad_norm": 207.4413604736328, + "learning_rate": 8.920000000000001e-07, + "loss": 38.8472, "step": 4460 }, { - "epoch": 0.0180593656193312, - "grad_norm": 1431.5162353515625, - "learning_rate": 8.939999999999999e-06, - "loss": 210.3697, + "epoch": 0.0090296828096656, + "grad_norm": 1215.0419921875, + "learning_rate": 8.94e-07, + "loss": 41.1395, "step": 4470 }, { - "epoch": 0.018099766884698828, - "grad_norm": 1079.22314453125, - "learning_rate": 8.96e-06, - "loss": 236.7236, + "epoch": 0.009049883442349414, + "grad_norm": 761.4100341796875, + "learning_rate": 8.960000000000001e-07, + "loss": 31.6368, "step": 4480 }, { - "epoch": 0.01814016815006646, - "grad_norm": 1381.0302734375, - "learning_rate": 8.98e-06, - "loss": 200.5311, + "epoch": 0.00907008407503323, + "grad_norm": 815.1241455078125, + "learning_rate": 8.980000000000001e-07, + "loss": 25.794, "step": 4490 }, { - "epoch": 0.01818056941543409, - "grad_norm": 799.3221435546875, - "learning_rate": 9e-06, - "loss": 229.802, + "epoch": 0.009090284707717045, + "grad_norm": 664.5227661132812, + "learning_rate": 9.000000000000001e-07, + "loss": 26.1395, "step": 4500 }, { - "epoch": 0.018220970680801724, - "grad_norm": 1207.725830078125, - "learning_rate": 9.02e-06, - "loss": 214.4318, + "epoch": 0.009110485340400862, + "grad_norm": 621.558349609375, + "learning_rate": 9.020000000000001e-07, + "loss": 20.145, "step": 4510 }, { - "epoch": 0.018261371946169353, - "grad_norm": 725.3284301757812, - "learning_rate": 9.04e-06, - "loss": 193.9023, + "epoch": 0.009130685973084677, + "grad_norm": 1977.96240234375, + "learning_rate": 9.04e-07, + "loss": 31.9802, "step": 4520 }, { - "epoch": 0.018301773211536986, - "grad_norm": 1104.83642578125, - "learning_rate": 9.06e-06, - "loss": 220.9035, + "epoch": 0.009150886605768493, + "grad_norm": 638.37109375, + "learning_rate": 9.060000000000001e-07, + "loss": 20.8006, "step": 4530 }, { - "epoch": 0.018342174476904616, - "grad_norm": 1509.99462890625, - "learning_rate": 9.080000000000001e-06, - "loss": 188.4938, + "epoch": 0.009171087238452308, + "grad_norm": 1067.8172607421875, + "learning_rate": 9.080000000000001e-07, + "loss": 55.018, "step": 4540 }, { - "epoch": 0.01838257574227225, - "grad_norm": 1129.591064453125, - "learning_rate": 9.100000000000001e-06, - "loss": 308.8094, + "epoch": 0.009191287871136124, + "grad_norm": 1054.7120361328125, + "learning_rate": 9.100000000000001e-07, + "loss": 28.5558, "step": 4550 }, { - "epoch": 0.01842297700763988, - "grad_norm": 2700.010986328125, - "learning_rate": 9.12e-06, - "loss": 310.8748, + "epoch": 0.00921148850381994, + "grad_norm": 297.5315856933594, + "learning_rate": 9.120000000000001e-07, + "loss": 29.3597, "step": 4560 }, { - "epoch": 0.01846337827300751, - "grad_norm": 1198.5032958984375, - "learning_rate": 9.14e-06, - "loss": 219.3821, + "epoch": 0.009231689136503756, + "grad_norm": 683.9236450195312, + "learning_rate": 9.140000000000001e-07, + "loss": 20.7864, "step": 4570 }, { - "epoch": 0.01850377953837514, - "grad_norm": 907.4744873046875, - "learning_rate": 9.16e-06, - "loss": 272.8794, + "epoch": 0.00925188976918757, + "grad_norm": 822.9036865234375, + "learning_rate": 9.160000000000001e-07, + "loss": 26.9667, "step": 4580 }, { - "epoch": 0.018544180803742774, - "grad_norm": 480.4779968261719, - "learning_rate": 9.180000000000002e-06, - "loss": 303.4508, + "epoch": 0.009272090401871387, + "grad_norm": 630.2400512695312, + "learning_rate": 9.180000000000001e-07, + "loss": 12.3359, "step": 4590 }, { - "epoch": 0.018584582069110404, - "grad_norm": 5315.6416015625, - "learning_rate": 9.2e-06, - "loss": 298.2705, + "epoch": 0.009292291034555202, + "grad_norm": 608.8886108398438, + "learning_rate": 9.200000000000001e-07, + "loss": 23.5769, "step": 4600 }, { - "epoch": 0.018624983334478037, - "grad_norm": 872.8356323242188, - "learning_rate": 9.220000000000002e-06, - "loss": 203.8982, + "epoch": 0.009312491667239018, + "grad_norm": 827.4570922851562, + "learning_rate": 9.220000000000001e-07, + "loss": 18.0923, "step": 4610 }, { - "epoch": 0.018665384599845666, - "grad_norm": 593.9172973632812, - "learning_rate": 9.24e-06, - "loss": 175.9667, + "epoch": 0.009332692299922833, + "grad_norm": 412.2288818359375, + "learning_rate": 9.240000000000001e-07, + "loss": 38.5852, "step": 4620 }, { - "epoch": 0.0187057858652133, - "grad_norm": 869.2783203125, - "learning_rate": 9.260000000000001e-06, - "loss": 180.2353, + "epoch": 0.00935289293260665, + "grad_norm": 950.310302734375, + "learning_rate": 9.260000000000001e-07, + "loss": 28.0527, "step": 4630 }, { - "epoch": 0.01874618713058093, - "grad_norm": 1448.7855224609375, - "learning_rate": 9.28e-06, - "loss": 180.4345, + "epoch": 0.009373093565290464, + "grad_norm": 442.42523193359375, + "learning_rate": 9.28e-07, + "loss": 20.4062, "step": 4640 }, { - "epoch": 0.018786588395948562, - "grad_norm": 2569.0322265625, - "learning_rate": 9.3e-06, - "loss": 263.0333, + "epoch": 0.009393294197974281, + "grad_norm": 524.7567749023438, + "learning_rate": 9.300000000000001e-07, + "loss": 27.0559, "step": 4650 }, { - "epoch": 0.01882698966131619, - "grad_norm": 1611.3876953125, - "learning_rate": 9.32e-06, - "loss": 286.5421, + "epoch": 0.009413494830658096, + "grad_norm": 602.6281127929688, + "learning_rate": 9.320000000000001e-07, + "loss": 26.432, "step": 4660 }, { - "epoch": 0.018867390926683825, - "grad_norm": 762.2740478515625, - "learning_rate": 9.34e-06, - "loss": 272.6539, + "epoch": 0.009433695463341912, + "grad_norm": 1005.23974609375, + "learning_rate": 9.340000000000001e-07, + "loss": 30.3101, "step": 4670 }, { - "epoch": 0.018907792192051454, - "grad_norm": 1819.1978759765625, - "learning_rate": 9.36e-06, - "loss": 197.3641, + "epoch": 0.009453896096025727, + "grad_norm": 511.9738464355469, + "learning_rate": 9.360000000000001e-07, + "loss": 20.6791, "step": 4680 }, { - "epoch": 0.018948193457419087, - "grad_norm": 1281.1929931640625, - "learning_rate": 9.38e-06, - "loss": 199.3052, + "epoch": 0.009474096728709544, + "grad_norm": 503.60693359375, + "learning_rate": 9.380000000000001e-07, + "loss": 26.5743, "step": 4690 }, { - "epoch": 0.018988594722786717, - "grad_norm": 4967.193359375, - "learning_rate": 9.4e-06, - "loss": 230.312, + "epoch": 0.009494297361393358, + "grad_norm": 674.8211059570312, + "learning_rate": 9.400000000000001e-07, + "loss": 31.4228, "step": 4700 }, { - "epoch": 0.01902899598815435, - "grad_norm": 1749.0260009765625, - "learning_rate": 9.420000000000001e-06, - "loss": 245.5051, + "epoch": 0.009514497994077175, + "grad_norm": 473.37396240234375, + "learning_rate": 9.420000000000002e-07, + "loss": 44.0672, "step": 4710 }, { - "epoch": 0.01906939725352198, - "grad_norm": 0.0, - "learning_rate": 9.44e-06, - "loss": 190.8588, + "epoch": 0.00953469862676099, + "grad_norm": 831.3383178710938, + "learning_rate": 9.440000000000001e-07, + "loss": 47.9142, "step": 4720 }, { - "epoch": 0.019109798518889613, - "grad_norm": 1861.452880859375, - "learning_rate": 9.460000000000001e-06, - "loss": 190.6377, + "epoch": 0.009554899259444806, + "grad_norm": 2031.18798828125, + "learning_rate": 9.460000000000001e-07, + "loss": 43.8193, "step": 4730 }, { - "epoch": 0.019150199784257242, - "grad_norm": 2230.98876953125, - "learning_rate": 9.48e-06, - "loss": 252.1078, + "epoch": 0.009575099892128621, + "grad_norm": 1036.27978515625, + "learning_rate": 9.480000000000001e-07, + "loss": 19.8682, "step": 4740 }, { - "epoch": 0.019190601049624875, - "grad_norm": 803.5484008789062, - "learning_rate": 9.5e-06, - "loss": 254.6172, + "epoch": 0.009595300524812438, + "grad_norm": 367.123046875, + "learning_rate": 9.500000000000001e-07, + "loss": 19.759, "step": 4750 }, { - "epoch": 0.019231002314992505, - "grad_norm": 765.6219482421875, - "learning_rate": 9.52e-06, - "loss": 249.983, + "epoch": 0.009615501157496252, + "grad_norm": 1182.7408447265625, + "learning_rate": 9.520000000000002e-07, + "loss": 29.7404, "step": 4760 }, { - "epoch": 0.019271403580360138, - "grad_norm": 932.5681762695312, - "learning_rate": 9.54e-06, - "loss": 235.5316, + "epoch": 0.009635701790180069, + "grad_norm": 326.5006408691406, + "learning_rate": 9.54e-07, + "loss": 62.1775, "step": 4770 }, { - "epoch": 0.019311804845727767, - "grad_norm": 981.9686889648438, - "learning_rate": 9.560000000000002e-06, - "loss": 229.3628, + "epoch": 0.009655902422863884, + "grad_norm": 206.4904022216797, + "learning_rate": 9.56e-07, + "loss": 20.3732, "step": 4780 }, { - "epoch": 0.0193522061110954, - "grad_norm": 436.2134704589844, - "learning_rate": 9.58e-06, - "loss": 158.716, + "epoch": 0.0096761030555477, + "grad_norm": 148.8917999267578, + "learning_rate": 9.58e-07, + "loss": 32.3255, "step": 4790 }, { - "epoch": 0.01939260737646303, - "grad_norm": 792.7994995117188, - "learning_rate": 9.600000000000001e-06, - "loss": 170.3208, + "epoch": 0.009696303688231515, + "grad_norm": 392.4678649902344, + "learning_rate": 9.600000000000001e-07, + "loss": 36.1733, "step": 4800 }, { - "epoch": 0.019433008641830663, - "grad_norm": 844.0726318359375, - "learning_rate": 9.62e-06, - "loss": 224.4318, + "epoch": 0.009716504320915332, + "grad_norm": 1750.965087890625, + "learning_rate": 9.62e-07, + "loss": 28.3648, "step": 4810 }, { - "epoch": 0.019473409907198293, - "grad_norm": 2132.043701171875, - "learning_rate": 9.640000000000001e-06, - "loss": 252.1749, + "epoch": 0.009736704953599146, + "grad_norm": 149.6742706298828, + "learning_rate": 9.64e-07, + "loss": 16.249, "step": 4820 }, { - "epoch": 0.019513811172565926, - "grad_norm": 2546.78271484375, - "learning_rate": 9.66e-06, - "loss": 207.1322, + "epoch": 0.009756905586282963, + "grad_norm": 268.1315002441406, + "learning_rate": 9.660000000000002e-07, + "loss": 24.6534, "step": 4830 }, { - "epoch": 0.019554212437933555, - "grad_norm": 1447.4708251953125, - "learning_rate": 9.68e-06, - "loss": 163.8928, + "epoch": 0.009777106218966778, + "grad_norm": 312.3633728027344, + "learning_rate": 9.68e-07, + "loss": 19.7529, "step": 4840 }, { - "epoch": 0.01959461370330119, - "grad_norm": 1765.4603271484375, - "learning_rate": 9.7e-06, - "loss": 285.0885, + "epoch": 0.009797306851650594, + "grad_norm": 335.4103698730469, + "learning_rate": 9.7e-07, + "loss": 25.3802, "step": 4850 }, { - "epoch": 0.019635014968668818, - "grad_norm": 385.1683349609375, - "learning_rate": 9.72e-06, - "loss": 159.1181, + "epoch": 0.009817507484334409, + "grad_norm": 588.1651000976562, + "learning_rate": 9.72e-07, + "loss": 37.7548, "step": 4860 }, { - "epoch": 0.01967541623403645, - "grad_norm": 3586.742919921875, - "learning_rate": 9.74e-06, - "loss": 281.9318, + "epoch": 0.009837708117018226, + "grad_norm": 1084.7984619140625, + "learning_rate": 9.740000000000001e-07, + "loss": 28.1253, "step": 4870 }, { - "epoch": 0.01971581749940408, - "grad_norm": 1368.7694091796875, - "learning_rate": 9.760000000000001e-06, - "loss": 191.7896, + "epoch": 0.00985790874970204, + "grad_norm": 841.3825073242188, + "learning_rate": 9.76e-07, + "loss": 17.4928, "step": 4880 }, { - "epoch": 0.019756218764771714, - "grad_norm": 826.7830810546875, - "learning_rate": 9.78e-06, - "loss": 174.5084, + "epoch": 0.009878109382385857, + "grad_norm": 258.460205078125, + "learning_rate": 9.78e-07, + "loss": 26.3879, "step": 4890 }, { - "epoch": 0.019796620030139343, - "grad_norm": 890.3446044921875, - "learning_rate": 9.800000000000001e-06, - "loss": 290.2659, + "epoch": 0.009898310015069672, + "grad_norm": 299.3013916015625, + "learning_rate": 9.800000000000001e-07, + "loss": 13.678, "step": 4900 }, { - "epoch": 0.019837021295506976, - "grad_norm": 1318.1915283203125, - "learning_rate": 9.820000000000001e-06, - "loss": 272.5934, + "epoch": 0.009918510647753488, + "grad_norm": 191.6232452392578, + "learning_rate": 9.82e-07, + "loss": 21.8842, "step": 4910 }, { - "epoch": 0.019877422560874606, - "grad_norm": 772.6763916015625, - "learning_rate": 9.84e-06, - "loss": 151.6077, + "epoch": 0.009938711280437303, + "grad_norm": 565.6702270507812, + "learning_rate": 9.84e-07, + "loss": 11.0409, "step": 4920 }, { - "epoch": 0.01991782382624224, - "grad_norm": 1587.762451171875, - "learning_rate": 9.86e-06, - "loss": 248.4316, + "epoch": 0.00995891191312112, + "grad_norm": 291.95526123046875, + "learning_rate": 9.86e-07, + "loss": 36.9195, "step": 4930 }, { - "epoch": 0.01995822509160987, - "grad_norm": 1252.715576171875, - "learning_rate": 9.88e-06, - "loss": 258.2403, + "epoch": 0.009979112545804934, + "grad_norm": 428.879150390625, + "learning_rate": 9.880000000000001e-07, + "loss": 26.7375, "step": 4940 }, { - "epoch": 0.0199986263569775, - "grad_norm": 1054.4132080078125, - "learning_rate": 9.900000000000002e-06, - "loss": 212.0352, + "epoch": 0.00999931317848875, + "grad_norm": 177.6942138671875, + "learning_rate": 9.9e-07, + "loss": 26.29, "step": 4950 }, { - "epoch": 0.02003902762234513, - "grad_norm": 1351.8643798828125, - "learning_rate": 9.92e-06, - "loss": 135.1369, + "epoch": 0.010019513811172566, + "grad_norm": 662.8630981445312, + "learning_rate": 9.92e-07, + "loss": 25.1648, "step": 4960 }, { - "epoch": 0.020079428887712764, - "grad_norm": 0.0, - "learning_rate": 9.940000000000001e-06, - "loss": 259.982, + "epoch": 0.010039714443856382, + "grad_norm": 489.55889892578125, + "learning_rate": 9.940000000000001e-07, + "loss": 41.3737, "step": 4970 }, { - "epoch": 0.020119830153080394, - "grad_norm": 1335.3504638671875, - "learning_rate": 9.96e-06, - "loss": 232.5153, + "epoch": 0.010059915076540197, + "grad_norm": 350.4941101074219, + "learning_rate": 9.96e-07, + "loss": 42.1294, "step": 4980 }, { - "epoch": 0.020160231418448027, - "grad_norm": 677.4559326171875, - "learning_rate": 9.980000000000001e-06, - "loss": 240.9019, + "epoch": 0.010080115709224013, + "grad_norm": 320.653076171875, + "learning_rate": 9.98e-07, + "loss": 26.9559, "step": 4990 }, { - "epoch": 0.020200632683815656, - "grad_norm": 1134.4188232421875, - "learning_rate": 1e-05, - "loss": 172.6664, + "epoch": 0.010100316341907828, + "grad_norm": 231.248046875, + "learning_rate": 1.0000000000000002e-06, + "loss": 6.5142, "step": 5000 }, { - "epoch": 0.02024103394918329, - "grad_norm": 1606.3121337890625, - "learning_rate": 1.002e-05, - "loss": 218.7504, + "epoch": 0.010120516974591645, + "grad_norm": 521.7716674804688, + "learning_rate": 1.002e-06, + "loss": 31.8698, "step": 5010 }, { - "epoch": 0.02028143521455092, - "grad_norm": 1248.30615234375, - "learning_rate": 1.004e-05, - "loss": 143.733, + "epoch": 0.01014071760727546, + "grad_norm": 800.10888671875, + "learning_rate": 1.004e-06, + "loss": 28.1427, "step": 5020 }, { - "epoch": 0.020321836479918552, - "grad_norm": 1033.4849853515625, - "learning_rate": 1.006e-05, - "loss": 252.8728, + "epoch": 0.010160918239959276, + "grad_norm": 420.4501037597656, + "learning_rate": 1.006e-06, + "loss": 28.2438, "step": 5030 }, { - "epoch": 0.020362237745286182, - "grad_norm": 1231.0445556640625, - "learning_rate": 1.008e-05, - "loss": 243.1658, + "epoch": 0.010181118872643091, + "grad_norm": 480.6717224121094, + "learning_rate": 1.0080000000000001e-06, + "loss": 20.2761, "step": 5040 }, { - "epoch": 0.020402639010653815, - "grad_norm": 1193.17236328125, - "learning_rate": 1.0100000000000002e-05, - "loss": 181.7209, + "epoch": 0.010201319505326907, + "grad_norm": 1004.8648681640625, + "learning_rate": 1.01e-06, + "loss": 22.479, "step": 5050 }, { - "epoch": 0.020443040276021444, - "grad_norm": 0.0, - "learning_rate": 1.012e-05, - "loss": 276.8536, + "epoch": 0.010221520138010722, + "grad_norm": 591.0374755859375, + "learning_rate": 1.012e-06, + "loss": 12.5654, "step": 5060 }, { - "epoch": 0.020483441541389077, - "grad_norm": 1306.9161376953125, - "learning_rate": 1.0140000000000001e-05, - "loss": 205.8765, + "epoch": 0.010241720770694539, + "grad_norm": 921.0972290039062, + "learning_rate": 1.0140000000000002e-06, + "loss": 28.9, "step": 5070 }, { - "epoch": 0.020523842806756707, - "grad_norm": 3135.527099609375, - "learning_rate": 1.016e-05, - "loss": 227.9693, + "epoch": 0.010261921403378354, + "grad_norm": 537.0875244140625, + "learning_rate": 1.016e-06, + "loss": 14.163, "step": 5080 }, { - "epoch": 0.02056424407212434, - "grad_norm": 2155.43994140625, - "learning_rate": 1.018e-05, - "loss": 284.9991, + "epoch": 0.01028212203606217, + "grad_norm": 672.0023193359375, + "learning_rate": 1.018e-06, + "loss": 40.8729, "step": 5090 }, { - "epoch": 0.02060464533749197, - "grad_norm": 528.388427734375, - "learning_rate": 1.02e-05, - "loss": 214.0478, + "epoch": 0.010302322668745985, + "grad_norm": 761.9652709960938, + "learning_rate": 1.02e-06, + "loss": 29.6747, "step": 5100 }, { - "epoch": 0.020645046602859603, - "grad_norm": 924.0242309570312, - "learning_rate": 1.022e-05, - "loss": 253.1813, + "epoch": 0.010322523301429801, + "grad_norm": 934.7273559570312, + "learning_rate": 1.0220000000000001e-06, + "loss": 23.6142, "step": 5110 }, { - "epoch": 0.020685447868227232, - "grad_norm": 1019.5430297851562, - "learning_rate": 1.024e-05, - "loss": 193.0214, + "epoch": 0.010342723934113616, + "grad_norm": 364.7134704589844, + "learning_rate": 1.024e-06, + "loss": 29.4059, "step": 5120 }, { - "epoch": 0.020725849133594865, - "grad_norm": 3559.9951171875, - "learning_rate": 1.026e-05, - "loss": 180.7234, + "epoch": 0.010362924566797433, + "grad_norm": 662.9189453125, + "learning_rate": 1.026e-06, + "loss": 25.676, "step": 5130 }, { - "epoch": 0.020766250398962495, - "grad_norm": 686.9107055664062, - "learning_rate": 1.0280000000000002e-05, - "loss": 177.0602, + "epoch": 0.010383125199481247, + "grad_norm": 1470.47802734375, + "learning_rate": 1.0280000000000002e-06, + "loss": 58.6466, "step": 5140 }, { - "epoch": 0.020806651664330128, - "grad_norm": 616.1886596679688, - "learning_rate": 1.03e-05, - "loss": 150.3324, + "epoch": 0.010403325832165064, + "grad_norm": 1297.0006103515625, + "learning_rate": 1.03e-06, + "loss": 34.2537, "step": 5150 }, { - "epoch": 0.020847052929697758, - "grad_norm": 768.7440185546875, - "learning_rate": 1.0320000000000001e-05, - "loss": 218.8612, + "epoch": 0.010423526464848879, + "grad_norm": 546.3383178710938, + "learning_rate": 1.032e-06, + "loss": 18.1663, "step": 5160 }, { - "epoch": 0.02088745419506539, - "grad_norm": 927.5042724609375, - "learning_rate": 1.0340000000000001e-05, - "loss": 247.3814, + "epoch": 0.010443727097532695, + "grad_norm": 1221.919189453125, + "learning_rate": 1.0340000000000002e-06, + "loss": 27.4059, "step": 5170 }, { - "epoch": 0.02092785546043302, - "grad_norm": 1545.854736328125, - "learning_rate": 1.036e-05, - "loss": 202.6357, + "epoch": 0.01046392773021651, + "grad_norm": 460.9737243652344, + "learning_rate": 1.0360000000000001e-06, + "loss": 16.2116, "step": 5180 }, { - "epoch": 0.020968256725800653, - "grad_norm": 950.5018310546875, - "learning_rate": 1.038e-05, - "loss": 245.9979, + "epoch": 0.010484128362900327, + "grad_norm": 413.9505920410156, + "learning_rate": 1.038e-06, + "loss": 21.2082, "step": 5190 }, { - "epoch": 0.021008657991168283, - "grad_norm": 829.0062866210938, - "learning_rate": 1.04e-05, - "loss": 244.4165, + "epoch": 0.010504328995584141, + "grad_norm": 227.77557373046875, + "learning_rate": 1.04e-06, + "loss": 27.9558, "step": 5200 }, { - "epoch": 0.021049059256535916, - "grad_norm": 1268.8380126953125, - "learning_rate": 1.042e-05, - "loss": 216.124, + "epoch": 0.010524529628267958, + "grad_norm": 606.6741943359375, + "learning_rate": 1.0420000000000001e-06, + "loss": 20.9653, "step": 5210 }, { - "epoch": 0.021089460521903546, - "grad_norm": 1645.070068359375, - "learning_rate": 1.0440000000000002e-05, - "loss": 224.5982, + "epoch": 0.010544730260951773, + "grad_norm": 541.234619140625, + "learning_rate": 1.044e-06, + "loss": 36.7112, "step": 5220 }, { - "epoch": 0.02112986178727118, - "grad_norm": 1425.95947265625, - "learning_rate": 1.046e-05, - "loss": 241.4969, + "epoch": 0.01056493089363559, + "grad_norm": 145.1033477783203, + "learning_rate": 1.046e-06, + "loss": 30.0167, "step": 5230 }, { - "epoch": 0.021170263052638808, - "grad_norm": 1239.9669189453125, - "learning_rate": 1.0480000000000001e-05, - "loss": 296.2492, + "epoch": 0.010585131526319404, + "grad_norm": 373.5177917480469, + "learning_rate": 1.0480000000000002e-06, + "loss": 31.7274, "step": 5240 }, { - "epoch": 0.02121066431800644, - "grad_norm": 3276.483154296875, - "learning_rate": 1.05e-05, - "loss": 257.1036, + "epoch": 0.01060533215900322, + "grad_norm": 563.7691650390625, + "learning_rate": 1.0500000000000001e-06, + "loss": 30.9538, "step": 5250 }, { - "epoch": 0.02125106558337407, - "grad_norm": 1606.2689208984375, - "learning_rate": 1.0520000000000001e-05, - "loss": 273.2055, + "epoch": 0.010625532791687035, + "grad_norm": 170.43959045410156, + "learning_rate": 1.052e-06, + "loss": 44.6468, "step": 5260 }, { - "epoch": 0.021291466848741704, - "grad_norm": 2068.524658203125, - "learning_rate": 1.0539999999999999e-05, - "loss": 209.8239, + "epoch": 0.010645733424370852, + "grad_norm": 695.9778442382812, + "learning_rate": 1.054e-06, + "loss": 42.9755, "step": 5270 }, { - "epoch": 0.021331868114109333, - "grad_norm": 1133.3856201171875, - "learning_rate": 1.056e-05, - "loss": 248.1236, + "epoch": 0.010665934057054667, + "grad_norm": 390.760009765625, + "learning_rate": 1.0560000000000001e-06, + "loss": 28.6079, "step": 5280 }, { - "epoch": 0.021372269379476967, - "grad_norm": 1275.0018310546875, - "learning_rate": 1.058e-05, - "loss": 238.0178, + "epoch": 0.010686134689738483, + "grad_norm": 483.7234191894531, + "learning_rate": 1.058e-06, + "loss": 22.693, "step": 5290 }, { - "epoch": 0.021412670644844596, - "grad_norm": 759.3450927734375, - "learning_rate": 1.06e-05, - "loss": 269.6895, + "epoch": 0.010706335322422298, + "grad_norm": 694.895263671875, + "learning_rate": 1.06e-06, + "loss": 31.7093, "step": 5300 }, { - "epoch": 0.02145307191021223, - "grad_norm": 4057.62744140625, - "learning_rate": 1.062e-05, - "loss": 268.4212, + "epoch": 0.010726535955106115, + "grad_norm": 0.03282935544848442, + "learning_rate": 1.0620000000000002e-06, + "loss": 29.5697, "step": 5310 }, { - "epoch": 0.02149347317557986, - "grad_norm": 933.5989379882812, - "learning_rate": 1.064e-05, - "loss": 200.5304, + "epoch": 0.01074673658778993, + "grad_norm": 197.685546875, + "learning_rate": 1.064e-06, + "loss": 25.7651, "step": 5320 }, { - "epoch": 0.021533874440947492, - "grad_norm": 709.9401245117188, - "learning_rate": 1.0660000000000001e-05, - "loss": 227.5934, + "epoch": 0.010766937220473746, + "grad_norm": 692.8652954101562, + "learning_rate": 1.066e-06, + "loss": 28.2902, "step": 5330 }, { - "epoch": 0.02157427570631512, - "grad_norm": 1363.613525390625, - "learning_rate": 1.0680000000000001e-05, - "loss": 195.4376, + "epoch": 0.01078713785315756, + "grad_norm": 295.2193603515625, + "learning_rate": 1.0680000000000002e-06, + "loss": 35.8585, "step": 5340 }, { - "epoch": 0.021614676971682754, - "grad_norm": 860.6060180664062, - "learning_rate": 1.0700000000000001e-05, - "loss": 220.5302, + "epoch": 0.010807338485841377, + "grad_norm": 179.0888214111328, + "learning_rate": 1.0700000000000001e-06, + "loss": 13.3468, "step": 5350 }, { - "epoch": 0.021655078237050384, - "grad_norm": 809.3516845703125, - "learning_rate": 1.072e-05, - "loss": 289.8672, + "epoch": 0.010827539118525192, + "grad_norm": 369.4256896972656, + "learning_rate": 1.072e-06, + "loss": 42.9108, "step": 5360 }, { - "epoch": 0.021695479502418017, - "grad_norm": 688.4315185546875, - "learning_rate": 1.074e-05, - "loss": 231.9644, + "epoch": 0.010847739751209009, + "grad_norm": 380.3387756347656, + "learning_rate": 1.074e-06, + "loss": 35.2701, "step": 5370 }, { - "epoch": 0.021735880767785647, - "grad_norm": 2667.476318359375, - "learning_rate": 1.076e-05, - "loss": 270.5944, + "epoch": 0.010867940383892823, + "grad_norm": 735.1011962890625, + "learning_rate": 1.0760000000000002e-06, + "loss": 37.2414, "step": 5380 }, { - "epoch": 0.02177628203315328, - "grad_norm": 1891.450927734375, - "learning_rate": 1.0780000000000002e-05, - "loss": 249.0208, + "epoch": 0.01088814101657664, + "grad_norm": 459.38720703125, + "learning_rate": 1.078e-06, + "loss": 36.9988, "step": 5390 }, { - "epoch": 0.02181668329852091, - "grad_norm": 3481.829345703125, - "learning_rate": 1.08e-05, - "loss": 301.6664, + "epoch": 0.010908341649260455, + "grad_norm": 1027.1058349609375, + "learning_rate": 1.08e-06, + "loss": 22.0428, "step": 5400 }, { - "epoch": 0.021857084563888542, - "grad_norm": 956.6407470703125, - "learning_rate": 1.0820000000000001e-05, - "loss": 245.2214, + "epoch": 0.010928542281944271, + "grad_norm": 2424.2099609375, + "learning_rate": 1.0820000000000002e-06, + "loss": 31.5498, "step": 5410 }, { - "epoch": 0.021897485829256172, - "grad_norm": 4194.12841796875, - "learning_rate": 1.084e-05, - "loss": 260.2813, + "epoch": 0.010948742914628086, + "grad_norm": 194.22132873535156, + "learning_rate": 1.0840000000000001e-06, + "loss": 73.3618, "step": 5420 }, { - "epoch": 0.021937887094623805, - "grad_norm": 3680.693603515625, - "learning_rate": 1.0860000000000001e-05, - "loss": 249.7451, + "epoch": 0.010968943547311902, + "grad_norm": 756.6248168945312, + "learning_rate": 1.086e-06, + "loss": 74.6274, "step": 5430 }, { - "epoch": 0.021978288359991435, - "grad_norm": 1049.5306396484375, - "learning_rate": 1.088e-05, - "loss": 227.2931, + "epoch": 0.010989144179995717, + "grad_norm": 472.0841369628906, + "learning_rate": 1.088e-06, + "loss": 22.9419, "step": 5440 }, { - "epoch": 0.022018689625359068, - "grad_norm": 10489.931640625, - "learning_rate": 1.09e-05, - "loss": 334.8502, + "epoch": 0.011009344812679534, + "grad_norm": 600.0186157226562, + "learning_rate": 1.0900000000000002e-06, + "loss": 25.0131, "step": 5450 }, { - "epoch": 0.022059090890726697, - "grad_norm": 1121.204833984375, - "learning_rate": 1.092e-05, - "loss": 220.4895, + "epoch": 0.011029545445363349, + "grad_norm": 305.5976257324219, + "learning_rate": 1.092e-06, + "loss": 31.3711, "step": 5460 }, { - "epoch": 0.02209949215609433, - "grad_norm": 1375.3389892578125, - "learning_rate": 1.094e-05, - "loss": 212.8332, + "epoch": 0.011049746078047165, + "grad_norm": 108.77481842041016, + "learning_rate": 1.094e-06, + "loss": 17.3308, "step": 5470 }, { - "epoch": 0.02213989342146196, - "grad_norm": 2596.01318359375, - "learning_rate": 1.096e-05, - "loss": 242.3982, + "epoch": 0.01106994671073098, + "grad_norm": 1116.296142578125, + "learning_rate": 1.0960000000000002e-06, + "loss": 37.9978, "step": 5480 }, { - "epoch": 0.022180294686829593, - "grad_norm": 1790.5255126953125, - "learning_rate": 1.098e-05, - "loss": 228.799, + "epoch": 0.011090147343414796, + "grad_norm": 390.4914245605469, + "learning_rate": 1.0980000000000001e-06, + "loss": 14.3431, "step": 5490 }, { - "epoch": 0.022220695952197222, - "grad_norm": 1782.9239501953125, - "learning_rate": 1.1000000000000001e-05, - "loss": 247.0614, + "epoch": 0.011110347976098611, + "grad_norm": 598.1354370117188, + "learning_rate": 1.1e-06, + "loss": 30.984, "step": 5500 }, { - "epoch": 0.022261097217564856, - "grad_norm": 3071.753173828125, - "learning_rate": 1.1020000000000001e-05, - "loss": 190.9815, + "epoch": 0.011130548608782428, + "grad_norm": 711.5731201171875, + "learning_rate": 1.1020000000000002e-06, + "loss": 30.1973, "step": 5510 }, { - "epoch": 0.022301498482932485, - "grad_norm": 778.1385498046875, - "learning_rate": 1.1040000000000001e-05, - "loss": 208.1198, + "epoch": 0.011150749241466243, + "grad_norm": 371.409423828125, + "learning_rate": 1.1040000000000001e-06, + "loss": 24.4026, "step": 5520 }, { - "epoch": 0.022341899748300118, - "grad_norm": 4282.1689453125, - "learning_rate": 1.106e-05, - "loss": 187.0153, + "epoch": 0.011170949874150059, + "grad_norm": 665.7667846679688, + "learning_rate": 1.106e-06, + "loss": 19.5368, "step": 5530 }, { - "epoch": 0.022382301013667748, - "grad_norm": 3060.544921875, - "learning_rate": 1.108e-05, - "loss": 243.2019, + "epoch": 0.011191150506833874, + "grad_norm": 502.86566162109375, + "learning_rate": 1.108e-06, + "loss": 18.791, "step": 5540 }, { - "epoch": 0.02242270227903538, - "grad_norm": 1174.474853515625, - "learning_rate": 1.11e-05, - "loss": 240.466, + "epoch": 0.01121135113951769, + "grad_norm": 404.4401550292969, + "learning_rate": 1.1100000000000002e-06, + "loss": 20.3444, "step": 5550 }, { - "epoch": 0.02246310354440301, - "grad_norm": 835.2548828125, - "learning_rate": 1.112e-05, - "loss": 195.4574, + "epoch": 0.011231551772201505, + "grad_norm": 323.6523132324219, + "learning_rate": 1.1120000000000001e-06, + "loss": 36.3596, "step": 5560 }, { - "epoch": 0.022503504809770643, - "grad_norm": 679.6596069335938, - "learning_rate": 1.114e-05, - "loss": 116.5087, + "epoch": 0.011251752404885322, + "grad_norm": 743.5354614257812, + "learning_rate": 1.114e-06, + "loss": 29.4033, "step": 5570 }, { - "epoch": 0.022543906075138273, - "grad_norm": 4872.7197265625, - "learning_rate": 1.1160000000000002e-05, - "loss": 208.2604, + "epoch": 0.011271953037569137, + "grad_norm": 281.8879699707031, + "learning_rate": 1.1160000000000002e-06, + "loss": 23.6649, "step": 5580 }, { - "epoch": 0.022584307340505906, - "grad_norm": 507.1872253417969, - "learning_rate": 1.118e-05, - "loss": 240.8744, + "epoch": 0.011292153670252953, + "grad_norm": 812.1799926757812, + "learning_rate": 1.1180000000000001e-06, + "loss": 34.1818, "step": 5590 }, { - "epoch": 0.022624708605873536, - "grad_norm": 1005.294189453125, - "learning_rate": 1.1200000000000001e-05, - "loss": 184.084, + "epoch": 0.011312354302936768, + "grad_norm": 148.88833618164062, + "learning_rate": 1.12e-06, + "loss": 28.1046, "step": 5600 }, { - "epoch": 0.02266510987124117, - "grad_norm": 7254.05029296875, - "learning_rate": 1.122e-05, - "loss": 216.7616, + "epoch": 0.011332554935620584, + "grad_norm": 471.3416748046875, + "learning_rate": 1.122e-06, + "loss": 24.278, "step": 5610 }, { - "epoch": 0.0227055111366088, - "grad_norm": 1527.6485595703125, - "learning_rate": 1.124e-05, - "loss": 197.5231, + "epoch": 0.0113527555683044, + "grad_norm": 353.2952575683594, + "learning_rate": 1.1240000000000002e-06, + "loss": 30.7999, "step": 5620 }, { - "epoch": 0.02274591240197643, - "grad_norm": 1460.7916259765625, - "learning_rate": 1.126e-05, - "loss": 280.5447, + "epoch": 0.011372956200988216, + "grad_norm": 585.4158325195312, + "learning_rate": 1.126e-06, + "loss": 16.3788, "step": 5630 }, { - "epoch": 0.02278631366734406, - "grad_norm": 1024.780517578125, - "learning_rate": 1.128e-05, - "loss": 248.3504, + "epoch": 0.01139315683367203, + "grad_norm": 629.95703125, + "learning_rate": 1.128e-06, + "loss": 22.8701, "step": 5640 }, { - "epoch": 0.022826714932711694, - "grad_norm": 1925.320068359375, - "learning_rate": 1.13e-05, - "loss": 226.1158, + "epoch": 0.011413357466355847, + "grad_norm": 1085.2030029296875, + "learning_rate": 1.1300000000000002e-06, + "loss": 31.6779, "step": 5650 }, { - "epoch": 0.022867116198079324, - "grad_norm": 1352.9869384765625, - "learning_rate": 1.132e-05, - "loss": 243.9569, + "epoch": 0.011433558099039662, + "grad_norm": 1200.4349365234375, + "learning_rate": 1.1320000000000001e-06, + "loss": 26.5144, "step": 5660 }, { - "epoch": 0.022907517463446957, - "grad_norm": 1091.9189453125, - "learning_rate": 1.134e-05, - "loss": 186.5363, + "epoch": 0.011453758731723478, + "grad_norm": 869.242919921875, + "learning_rate": 1.134e-06, + "loss": 26.5842, "step": 5670 }, { - "epoch": 0.022947918728814586, - "grad_norm": 544.18994140625, - "learning_rate": 1.1360000000000001e-05, - "loss": 178.7458, + "epoch": 0.011473959364407293, + "grad_norm": 107.99213409423828, + "learning_rate": 1.1360000000000002e-06, + "loss": 27.156, "step": 5680 }, { - "epoch": 0.02298831999418222, - "grad_norm": 0.0, - "learning_rate": 1.1380000000000001e-05, - "loss": 237.9985, + "epoch": 0.01149415999709111, + "grad_norm": 908.802734375, + "learning_rate": 1.1380000000000002e-06, + "loss": 34.6566, "step": 5690 }, { - "epoch": 0.02302872125954985, - "grad_norm": 1084.4674072265625, - "learning_rate": 1.1400000000000001e-05, - "loss": 201.7748, + "epoch": 0.011514360629774924, + "grad_norm": 497.6568298339844, + "learning_rate": 1.14e-06, + "loss": 21.8019, "step": 5700 }, { - "epoch": 0.023069122524917482, - "grad_norm": 1035.4659423828125, - "learning_rate": 1.142e-05, - "loss": 207.2124, + "epoch": 0.011534561262458741, + "grad_norm": 293.9489440917969, + "learning_rate": 1.142e-06, + "loss": 22.4049, "step": 5710 }, { - "epoch": 0.02310952379028511, - "grad_norm": 733.1090087890625, - "learning_rate": 1.144e-05, - "loss": 178.1889, + "epoch": 0.011554761895142556, + "grad_norm": 295.17230224609375, + "learning_rate": 1.1440000000000002e-06, + "loss": 37.8035, "step": 5720 }, { - "epoch": 0.023149925055652745, - "grad_norm": 876.4588012695312, - "learning_rate": 1.146e-05, - "loss": 158.9155, + "epoch": 0.011574962527826372, + "grad_norm": 475.5271301269531, + "learning_rate": 1.1460000000000001e-06, + "loss": 24.764, "step": 5730 }, { - "epoch": 0.023190326321020374, - "grad_norm": 657.6229248046875, - "learning_rate": 1.148e-05, - "loss": 194.4797, + "epoch": 0.011595163160510187, + "grad_norm": 1417.2218017578125, + "learning_rate": 1.148e-06, + "loss": 31.2254, "step": 5740 }, { - "epoch": 0.023230727586388007, - "grad_norm": 1495.9139404296875, - "learning_rate": 1.1500000000000002e-05, - "loss": 222.9885, + "epoch": 0.011615363793194004, + "grad_norm": 271.8209533691406, + "learning_rate": 1.1500000000000002e-06, + "loss": 14.2182, "step": 5750 }, { - "epoch": 0.023271128851755637, - "grad_norm": 2601.6259765625, - "learning_rate": 1.152e-05, - "loss": 304.1291, + "epoch": 0.011635564425877818, + "grad_norm": 612.4166259765625, + "learning_rate": 1.1520000000000002e-06, + "loss": 18.7975, "step": 5760 }, { - "epoch": 0.02331153011712327, - "grad_norm": 1693.1273193359375, - "learning_rate": 1.1540000000000001e-05, - "loss": 199.6243, + "epoch": 0.011655765058561635, + "grad_norm": 314.46990966796875, + "learning_rate": 1.154e-06, + "loss": 19.6362, "step": 5770 }, { - "epoch": 0.0233519313824909, - "grad_norm": 945.2630615234375, - "learning_rate": 1.156e-05, - "loss": 126.1058, + "epoch": 0.01167596569124545, + "grad_norm": 627.936767578125, + "learning_rate": 1.156e-06, + "loss": 38.5957, "step": 5780 }, { - "epoch": 0.023392332647858533, - "grad_norm": 616.036865234375, - "learning_rate": 1.1580000000000001e-05, - "loss": 265.3511, + "epoch": 0.011696166323929266, + "grad_norm": 1000.9813842773438, + "learning_rate": 1.1580000000000002e-06, + "loss": 34.4684, "step": 5790 }, { - "epoch": 0.023432733913226162, - "grad_norm": 908.6356811523438, - "learning_rate": 1.16e-05, - "loss": 215.007, + "epoch": 0.011716366956613081, + "grad_norm": 637.470703125, + "learning_rate": 1.1600000000000001e-06, + "loss": 34.59, "step": 5800 }, { - "epoch": 0.023473135178593795, - "grad_norm": 1166.1875, - "learning_rate": 1.162e-05, - "loss": 171.8394, + "epoch": 0.011736567589296898, + "grad_norm": 750.3966674804688, + "learning_rate": 1.162e-06, + "loss": 44.1128, "step": 5810 }, { - "epoch": 0.023513536443961425, - "grad_norm": 936.7083740234375, - "learning_rate": 1.164e-05, - "loss": 209.5779, + "epoch": 0.011756768221980712, + "grad_norm": 1139.5133056640625, + "learning_rate": 1.1640000000000002e-06, + "loss": 23.7449, "step": 5820 }, { - "epoch": 0.023553937709329058, - "grad_norm": 689.0086669921875, - "learning_rate": 1.166e-05, - "loss": 165.4548, + "epoch": 0.011776968854664529, + "grad_norm": 370.25567626953125, + "learning_rate": 1.1660000000000001e-06, + "loss": 17.9951, "step": 5830 }, { - "epoch": 0.023594338974696687, - "grad_norm": 1105.93212890625, - "learning_rate": 1.168e-05, - "loss": 275.4574, + "epoch": 0.011797169487348344, + "grad_norm": 194.2151336669922, + "learning_rate": 1.168e-06, + "loss": 23.3352, "step": 5840 }, { - "epoch": 0.02363474024006432, - "grad_norm": 2241.417236328125, - "learning_rate": 1.1700000000000001e-05, - "loss": 359.449, + "epoch": 0.01181737012003216, + "grad_norm": 812.2650756835938, + "learning_rate": 1.1700000000000002e-06, + "loss": 21.2374, "step": 5850 }, { - "epoch": 0.02367514150543195, - "grad_norm": 1302.2642822265625, - "learning_rate": 1.172e-05, - "loss": 259.1196, + "epoch": 0.011837570752715975, + "grad_norm": 422.30401611328125, + "learning_rate": 1.1720000000000002e-06, + "loss": 19.3065, "step": 5860 }, { - "epoch": 0.023715542770799583, - "grad_norm": 1307.6990966796875, - "learning_rate": 1.1740000000000001e-05, - "loss": 215.3923, + "epoch": 0.011857771385399792, + "grad_norm": 1042.8720703125, + "learning_rate": 1.1740000000000001e-06, + "loss": 38.1709, "step": 5870 }, { - "epoch": 0.023755944036167213, - "grad_norm": 523.6953735351562, - "learning_rate": 1.1760000000000001e-05, - "loss": 189.7961, + "epoch": 0.011877972018083606, + "grad_norm": 463.48712158203125, + "learning_rate": 1.176e-06, + "loss": 20.1112, "step": 5880 }, { - "epoch": 0.023796345301534846, - "grad_norm": 1645.471923828125, - "learning_rate": 1.178e-05, - "loss": 197.7059, + "epoch": 0.011898172650767423, + "grad_norm": 251.95362854003906, + "learning_rate": 1.1780000000000002e-06, + "loss": 19.2058, "step": 5890 }, { - "epoch": 0.023836746566902475, - "grad_norm": 4609.2265625, - "learning_rate": 1.18e-05, - "loss": 312.7925, + "epoch": 0.011918373283451238, + "grad_norm": 644.2528076171875, + "learning_rate": 1.1800000000000001e-06, + "loss": 55.8893, "step": 5900 }, { - "epoch": 0.02387714783227011, - "grad_norm": 886.41015625, - "learning_rate": 1.182e-05, - "loss": 227.9965, + "epoch": 0.011938573916135054, + "grad_norm": 546.8013305664062, + "learning_rate": 1.182e-06, + "loss": 65.9524, "step": 5910 }, { - "epoch": 0.023917549097637738, - "grad_norm": 1345.154052734375, - "learning_rate": 1.1840000000000002e-05, - "loss": 143.6501, + "epoch": 0.011958774548818869, + "grad_norm": 649.1343994140625, + "learning_rate": 1.1840000000000002e-06, + "loss": 27.965, "step": 5920 }, { - "epoch": 0.02395795036300537, - "grad_norm": 2758.603515625, - "learning_rate": 1.186e-05, - "loss": 189.3002, + "epoch": 0.011978975181502685, + "grad_norm": 526.4266357421875, + "learning_rate": 1.1860000000000002e-06, + "loss": 34.2897, "step": 5930 }, { - "epoch": 0.023998351628373, - "grad_norm": 933.2481079101562, - "learning_rate": 1.1880000000000001e-05, - "loss": 170.3514, + "epoch": 0.0119991758141865, + "grad_norm": 1568.1280517578125, + "learning_rate": 1.188e-06, + "loss": 35.1816, "step": 5940 }, { - "epoch": 0.024038752893740634, - "grad_norm": 1366.0477294921875, - "learning_rate": 1.19e-05, - "loss": 204.9711, + "epoch": 0.012019376446870317, + "grad_norm": 318.4168395996094, + "learning_rate": 1.19e-06, + "loss": 22.1523, "step": 5950 }, { - "epoch": 0.024079154159108263, - "grad_norm": 870.7377319335938, - "learning_rate": 1.1920000000000001e-05, - "loss": 182.9471, + "epoch": 0.012039577079554132, + "grad_norm": 402.7890930175781, + "learning_rate": 1.1920000000000002e-06, + "loss": 33.6881, "step": 5960 }, { - "epoch": 0.024119555424475896, - "grad_norm": 1715.831298828125, - "learning_rate": 1.1940000000000001e-05, - "loss": 206.0364, + "epoch": 0.012059777712237948, + "grad_norm": 497.604736328125, + "learning_rate": 1.1940000000000001e-06, + "loss": 32.794, "step": 5970 }, { - "epoch": 0.024159956689843526, - "grad_norm": 1668.3841552734375, - "learning_rate": 1.196e-05, - "loss": 226.4282, + "epoch": 0.012079978344921763, + "grad_norm": 444.6767883300781, + "learning_rate": 1.196e-06, + "loss": 37.6437, "step": 5980 }, { - "epoch": 0.02420035795521116, - "grad_norm": 1048.4296875, - "learning_rate": 1.198e-05, - "loss": 109.3125, + "epoch": 0.01210017897760558, + "grad_norm": 863.6239624023438, + "learning_rate": 1.1980000000000002e-06, + "loss": 26.2152, "step": 5990 }, { - "epoch": 0.02424075922057879, - "grad_norm": 879.1497192382812, - "learning_rate": 1.2e-05, - "loss": 201.5203, + "epoch": 0.012120379610289394, + "grad_norm": 592.9383544921875, + "learning_rate": 1.2000000000000002e-06, + "loss": 14.3755, "step": 6000 }, { - "epoch": 0.02428116048594642, - "grad_norm": 1213.1204833984375, - "learning_rate": 1.202e-05, - "loss": 163.2346, + "epoch": 0.01214058024297321, + "grad_norm": 1376.8350830078125, + "learning_rate": 1.202e-06, + "loss": 36.9957, "step": 6010 }, { - "epoch": 0.02432156175131405, - "grad_norm": 1427.281494140625, - "learning_rate": 1.204e-05, - "loss": 174.4642, + "epoch": 0.012160780875657026, + "grad_norm": 894.4027709960938, + "learning_rate": 1.204e-06, + "loss": 25.3654, "step": 6020 }, { - "epoch": 0.024361963016681684, - "grad_norm": 1276.7529296875, - "learning_rate": 1.206e-05, - "loss": 204.9076, + "epoch": 0.012180981508340842, + "grad_norm": 587.0653686523438, + "learning_rate": 1.2060000000000002e-06, + "loss": 29.8984, "step": 6030 }, { - "epoch": 0.024402364282049314, - "grad_norm": 2190.304443359375, - "learning_rate": 1.2080000000000001e-05, - "loss": 159.9595, + "epoch": 0.012201182141024657, + "grad_norm": 121.9262466430664, + "learning_rate": 1.2080000000000001e-06, + "loss": 20.7306, "step": 6040 }, { - "epoch": 0.024442765547416947, - "grad_norm": 785.987060546875, - "learning_rate": 1.2100000000000001e-05, - "loss": 209.5406, + "epoch": 0.012221382773708473, + "grad_norm": 488.919677734375, + "learning_rate": 1.21e-06, + "loss": 27.6776, "step": 6050 }, { - "epoch": 0.024483166812784576, - "grad_norm": 611.1929931640625, - "learning_rate": 1.2120000000000001e-05, - "loss": 221.088, + "epoch": 0.012241583406392288, + "grad_norm": 461.9215393066406, + "learning_rate": 1.2120000000000002e-06, + "loss": 29.0396, "step": 6060 }, { - "epoch": 0.02452356807815221, - "grad_norm": 1893.1925048828125, - "learning_rate": 1.214e-05, - "loss": 215.4067, + "epoch": 0.012261784039076105, + "grad_norm": 399.4093322753906, + "learning_rate": 1.214e-06, + "loss": 60.5062, "step": 6070 }, { - "epoch": 0.02456396934351984, - "grad_norm": 4432.23095703125, - "learning_rate": 1.216e-05, - "loss": 221.237, + "epoch": 0.01228198467175992, + "grad_norm": 167.42552185058594, + "learning_rate": 1.216e-06, + "loss": 29.8078, "step": 6080 }, { - "epoch": 0.02460437060888747, - "grad_norm": 1176.1290283203125, - "learning_rate": 1.2180000000000002e-05, - "loss": 220.3091, + "epoch": 0.012302185304443734, + "grad_norm": 412.9476623535156, + "learning_rate": 1.2180000000000002e-06, + "loss": 25.1903, "step": 6090 }, { - "epoch": 0.0246447718742551, - "grad_norm": 1318.6954345703125, - "learning_rate": 1.22e-05, - "loss": 250.0935, + "epoch": 0.01232238593712755, + "grad_norm": 402.8238220214844, + "learning_rate": 1.2200000000000002e-06, + "loss": 39.7322, "step": 6100 }, { - "epoch": 0.02468517313962273, - "grad_norm": 1313.9632568359375, - "learning_rate": 1.2220000000000002e-05, - "loss": 226.1311, + "epoch": 0.012342586569811366, + "grad_norm": 503.3050537109375, + "learning_rate": 1.2220000000000001e-06, + "loss": 19.9504, "step": 6110 }, { - "epoch": 0.024725574404990364, - "grad_norm": 1210.4168701171875, - "learning_rate": 1.224e-05, - "loss": 348.8165, + "epoch": 0.012362787202495182, + "grad_norm": 251.07025146484375, + "learning_rate": 1.224e-06, + "loss": 29.8188, "step": 6120 }, { - "epoch": 0.024765975670357994, - "grad_norm": 1387.2384033203125, - "learning_rate": 1.2260000000000001e-05, - "loss": 208.124, + "epoch": 0.012382987835178997, + "grad_norm": 1289.874755859375, + "learning_rate": 1.2260000000000002e-06, + "loss": 26.9208, "step": 6130 }, { - "epoch": 0.024806376935725627, - "grad_norm": 1911.8060302734375, - "learning_rate": 1.2280000000000001e-05, - "loss": 208.8157, + "epoch": 0.012403188467862813, + "grad_norm": 839.939453125, + "learning_rate": 1.2280000000000001e-06, + "loss": 29.809, "step": 6140 }, { - "epoch": 0.024846778201093257, - "grad_norm": 770.5685424804688, - "learning_rate": 1.23e-05, - "loss": 205.734, + "epoch": 0.012423389100546628, + "grad_norm": 0.0, + "learning_rate": 1.23e-06, + "loss": 10.9996, "step": 6150 }, { - "epoch": 0.02488717946646089, - "grad_norm": 718.35498046875, - "learning_rate": 1.232e-05, - "loss": 200.1725, + "epoch": 0.012443589733230445, + "grad_norm": 687.5811157226562, + "learning_rate": 1.2320000000000002e-06, + "loss": 30.5085, "step": 6160 }, { - "epoch": 0.02492758073182852, - "grad_norm": 1146.25244140625, - "learning_rate": 1.234e-05, - "loss": 197.4777, + "epoch": 0.01246379036591426, + "grad_norm": 127.76237487792969, + "learning_rate": 1.234e-06, + "loss": 29.9668, "step": 6170 }, { - "epoch": 0.024967981997196152, - "grad_norm": 773.2630004882812, - "learning_rate": 1.236e-05, - "loss": 217.971, + "epoch": 0.012483990998598076, + "grad_norm": 1359.7757568359375, + "learning_rate": 1.2360000000000001e-06, + "loss": 30.7715, "step": 6180 }, { - "epoch": 0.025008383262563782, - "grad_norm": 888.3221435546875, - "learning_rate": 1.238e-05, - "loss": 198.487, + "epoch": 0.012504191631281891, + "grad_norm": 436.6647644042969, + "learning_rate": 1.238e-06, + "loss": 33.9171, "step": 6190 }, { - "epoch": 0.025048784527931415, - "grad_norm": 971.0499267578125, - "learning_rate": 1.24e-05, - "loss": 239.6828, + "epoch": 0.012524392263965707, + "grad_norm": 281.2616882324219, + "learning_rate": 1.2400000000000002e-06, + "loss": 41.9189, "step": 6200 }, { - "epoch": 0.025089185793299044, - "grad_norm": 10692.5146484375, - "learning_rate": 1.2420000000000001e-05, - "loss": 258.4363, + "epoch": 0.012544592896649522, + "grad_norm": 758.2962036132812, + "learning_rate": 1.2420000000000001e-06, + "loss": 33.9747, "step": 6210 }, { - "epoch": 0.025129587058666678, - "grad_norm": 791.8817138671875, - "learning_rate": 1.244e-05, - "loss": 213.3801, + "epoch": 0.012564793529333339, + "grad_norm": 684.6299438476562, + "learning_rate": 1.244e-06, + "loss": 35.7783, "step": 6220 }, { - "epoch": 0.025169988324034307, - "grad_norm": 1000.6807861328125, - "learning_rate": 1.2460000000000001e-05, - "loss": 181.9501, + "epoch": 0.012584994162017154, + "grad_norm": 1113.127197265625, + "learning_rate": 1.2460000000000002e-06, + "loss": 48.0597, "step": 6230 }, { - "epoch": 0.02521038958940194, - "grad_norm": 892.7822875976562, - "learning_rate": 1.248e-05, - "loss": 167.6201, + "epoch": 0.01260519479470097, + "grad_norm": 840.9987182617188, + "learning_rate": 1.248e-06, + "loss": 22.5444, "step": 6240 }, { - "epoch": 0.02525079085476957, - "grad_norm": 537.8553466796875, - "learning_rate": 1.25e-05, - "loss": 242.3135, + "epoch": 0.012625395427384785, + "grad_norm": 520.2071533203125, + "learning_rate": 1.25e-06, + "loss": 15.9313, "step": 6250 }, { - "epoch": 0.025291192120137203, - "grad_norm": 1086.2513427734375, - "learning_rate": 1.252e-05, - "loss": 222.7271, + "epoch": 0.012645596060068601, + "grad_norm": 556.5119018554688, + "learning_rate": 1.2520000000000003e-06, + "loss": 32.9128, "step": 6260 }, { - "epoch": 0.025331593385504832, - "grad_norm": 4348.87353515625, - "learning_rate": 1.2540000000000002e-05, - "loss": 316.0617, + "epoch": 0.012665796692752416, + "grad_norm": 228.95762634277344, + "learning_rate": 1.2540000000000002e-06, + "loss": 16.7764, "step": 6270 }, { - "epoch": 0.025371994650872465, - "grad_norm": 1642.019287109375, - "learning_rate": 1.256e-05, - "loss": 202.9976, + "epoch": 0.012685997325436233, + "grad_norm": 316.0266418457031, + "learning_rate": 1.256e-06, + "loss": 25.5342, "step": 6280 }, { - "epoch": 0.025412395916240095, - "grad_norm": 685.2844848632812, - "learning_rate": 1.258e-05, - "loss": 147.7885, + "epoch": 0.012706197958120048, + "grad_norm": 18.748266220092773, + "learning_rate": 1.258e-06, + "loss": 42.3436, "step": 6290 }, { - "epoch": 0.025452797181607728, - "grad_norm": 2664.384765625, - "learning_rate": 1.2600000000000001e-05, - "loss": 178.4338, + "epoch": 0.012726398590803864, + "grad_norm": 1323.001953125, + "learning_rate": 1.26e-06, + "loss": 32.5139, "step": 6300 }, { - "epoch": 0.025493198446975358, - "grad_norm": 727.255126953125, - "learning_rate": 1.2620000000000001e-05, - "loss": 142.211, + "epoch": 0.012746599223487679, + "grad_norm": 942.7252197265625, + "learning_rate": 1.2620000000000002e-06, + "loss": 29.162, "step": 6310 }, { - "epoch": 0.02553359971234299, - "grad_norm": 2616.48486328125, - "learning_rate": 1.2640000000000003e-05, - "loss": 215.0363, + "epoch": 0.012766799856171495, + "grad_norm": 1211.36962890625, + "learning_rate": 1.2640000000000003e-06, + "loss": 29.0307, "step": 6320 }, { - "epoch": 0.02557400097771062, - "grad_norm": 1724.4073486328125, - "learning_rate": 1.2659999999999999e-05, - "loss": 268.9592, + "epoch": 0.01278700048885531, + "grad_norm": 802.3575439453125, + "learning_rate": 1.266e-06, + "loss": 21.0279, "step": 6330 }, { - "epoch": 0.025614402243078253, - "grad_norm": 757.4139404296875, - "learning_rate": 1.268e-05, - "loss": 245.0375, + "epoch": 0.012807201121539127, + "grad_norm": 740.271728515625, + "learning_rate": 1.268e-06, + "loss": 20.676, "step": 6340 }, { - "epoch": 0.025654803508445883, - "grad_norm": 804.900634765625, - "learning_rate": 1.27e-05, - "loss": 258.7294, + "epoch": 0.012827401754222941, + "grad_norm": 419.27020263671875, + "learning_rate": 1.2700000000000001e-06, + "loss": 51.1324, "step": 6350 }, { - "epoch": 0.025695204773813516, - "grad_norm": 1432.1217041015625, - "learning_rate": 1.2720000000000002e-05, - "loss": 226.9255, + "epoch": 0.012847602386906758, + "grad_norm": 692.2057495117188, + "learning_rate": 1.2720000000000003e-06, + "loss": 36.6126, "step": 6360 }, { - "epoch": 0.025735606039181146, - "grad_norm": 1246.9285888671875, - "learning_rate": 1.2740000000000002e-05, - "loss": 190.6593, + "epoch": 0.012867803019590573, + "grad_norm": 132.77894592285156, + "learning_rate": 1.2740000000000002e-06, + "loss": 21.4999, "step": 6370 }, { - "epoch": 0.02577600730454878, - "grad_norm": 1320.610107421875, - "learning_rate": 1.276e-05, - "loss": 254.1634, + "epoch": 0.01288800365227439, + "grad_norm": 595.9766845703125, + "learning_rate": 1.276e-06, + "loss": 28.293, "step": 6380 }, { - "epoch": 0.025816408569916408, - "grad_norm": 5738.62939453125, - "learning_rate": 1.278e-05, - "loss": 214.6841, + "epoch": 0.012908204284958204, + "grad_norm": 1459.1014404296875, + "learning_rate": 1.278e-06, + "loss": 21.5185, "step": 6390 }, { - "epoch": 0.02585680983528404, - "grad_norm": 649.080810546875, - "learning_rate": 1.2800000000000001e-05, - "loss": 170.7529, + "epoch": 0.01292840491764202, + "grad_norm": 1731.2054443359375, + "learning_rate": 1.28e-06, + "loss": 47.9539, "step": 6400 }, { - "epoch": 0.02589721110065167, - "grad_norm": 756.0393676757812, - "learning_rate": 1.2820000000000001e-05, - "loss": 186.7894, + "epoch": 0.012948605550325835, + "grad_norm": 750.8800048828125, + "learning_rate": 1.2820000000000002e-06, + "loss": 42.8562, "step": 6410 }, { - "epoch": 0.025937612366019304, - "grad_norm": 2048.561279296875, - "learning_rate": 1.2839999999999999e-05, - "loss": 182.1876, + "epoch": 0.012968806183009652, + "grad_norm": 646.1731567382812, + "learning_rate": 1.284e-06, + "loss": 32.3806, "step": 6420 }, { - "epoch": 0.025978013631386934, - "grad_norm": 805.5757446289062, - "learning_rate": 1.286e-05, - "loss": 196.0959, + "epoch": 0.012989006815693467, + "grad_norm": 306.4696044921875, + "learning_rate": 1.286e-06, + "loss": 24.9085, "step": 6430 }, { - "epoch": 0.026018414896754567, - "grad_norm": 5938.0439453125, - "learning_rate": 1.288e-05, - "loss": 254.1976, + "epoch": 0.013009207448377283, + "grad_norm": 169.34942626953125, + "learning_rate": 1.288e-06, + "loss": 32.6633, "step": 6440 }, { - "epoch": 0.026058816162122196, - "grad_norm": 1488.987060546875, - "learning_rate": 1.29e-05, - "loss": 250.4606, + "epoch": 0.013029408081061098, + "grad_norm": 469.367919921875, + "learning_rate": 1.2900000000000001e-06, + "loss": 27.3972, "step": 6450 }, { - "epoch": 0.02609921742748983, - "grad_norm": 1574.7130126953125, - "learning_rate": 1.2920000000000002e-05, - "loss": 157.1144, + "epoch": 0.013049608713744915, + "grad_norm": 553.5028686523438, + "learning_rate": 1.2920000000000003e-06, + "loss": 35.9573, "step": 6460 }, { - "epoch": 0.02613961869285746, - "grad_norm": 736.7561645507812, - "learning_rate": 1.294e-05, - "loss": 220.315, + "epoch": 0.01306980934642873, + "grad_norm": 262.7359924316406, + "learning_rate": 1.294e-06, + "loss": 30.1157, "step": 6470 }, { - "epoch": 0.026180019958225092, - "grad_norm": 972.5496826171875, - "learning_rate": 1.296e-05, - "loss": 257.4698, + "epoch": 0.013090009979112546, + "grad_norm": 839.3622436523438, + "learning_rate": 1.296e-06, + "loss": 38.7697, "step": 6480 }, { - "epoch": 0.02622042122359272, - "grad_norm": 4659.64794921875, - "learning_rate": 1.2980000000000001e-05, - "loss": 201.6754, + "epoch": 0.01311021061179636, + "grad_norm": 469.48895263671875, + "learning_rate": 1.2980000000000001e-06, + "loss": 33.9329, "step": 6490 }, { - "epoch": 0.026260822488960354, - "grad_norm": 803.3303833007812, - "learning_rate": 1.3000000000000001e-05, - "loss": 227.2734, + "epoch": 0.013130411244480177, + "grad_norm": 435.9288635253906, + "learning_rate": 1.3e-06, + "loss": 30.6598, "step": 6500 }, { - "epoch": 0.026301223754327984, - "grad_norm": 900.7424926757812, - "learning_rate": 1.3020000000000002e-05, - "loss": 204.3565, + "epoch": 0.013150611877163992, + "grad_norm": 1100.7294921875, + "learning_rate": 1.3020000000000002e-06, + "loss": 38.6149, "step": 6510 }, { - "epoch": 0.026341625019695617, - "grad_norm": 880.3982543945312, - "learning_rate": 1.3039999999999999e-05, - "loss": 213.0109, + "epoch": 0.013170812509847809, + "grad_norm": 978.08349609375, + "learning_rate": 1.304e-06, + "loss": 29.158, "step": 6520 }, { - "epoch": 0.026382026285063247, - "grad_norm": 1765.91357421875, - "learning_rate": 1.306e-05, - "loss": 238.4321, + "epoch": 0.013191013142531623, + "grad_norm": 440.118408203125, + "learning_rate": 1.306e-06, + "loss": 51.669, "step": 6530 }, { - "epoch": 0.02642242755043088, - "grad_norm": 2665.921875, - "learning_rate": 1.308e-05, - "loss": 227.712, + "epoch": 0.01321121377521544, + "grad_norm": 217.9273223876953, + "learning_rate": 1.308e-06, + "loss": 24.4326, "step": 6540 }, { - "epoch": 0.02646282881579851, - "grad_norm": 2605.635009765625, - "learning_rate": 1.3100000000000002e-05, - "loss": 162.2536, + "epoch": 0.013231414407899255, + "grad_norm": 356.3488464355469, + "learning_rate": 1.3100000000000002e-06, + "loss": 26.9776, "step": 6550 }, { - "epoch": 0.026503230081166142, - "grad_norm": 793.8833618164062, - "learning_rate": 1.3120000000000001e-05, - "loss": 192.5132, + "epoch": 0.013251615040583071, + "grad_norm": 340.18060302734375, + "learning_rate": 1.3120000000000003e-06, + "loss": 39.9926, "step": 6560 }, { - "epoch": 0.026543631346533772, - "grad_norm": 2946.634765625, - "learning_rate": 1.314e-05, - "loss": 159.2729, + "epoch": 0.013271815673266886, + "grad_norm": 984.8867797851562, + "learning_rate": 1.314e-06, + "loss": 54.8693, "step": 6570 }, { - "epoch": 0.026584032611901405, - "grad_norm": 910.0969848632812, - "learning_rate": 1.316e-05, - "loss": 191.1152, + "epoch": 0.013292016305950703, + "grad_norm": 1010.78466796875, + "learning_rate": 1.316e-06, + "loss": 23.7788, "step": 6580 }, { - "epoch": 0.026624433877269035, - "grad_norm": 808.4738159179688, - "learning_rate": 1.3180000000000001e-05, - "loss": 117.279, + "epoch": 0.013312216938634517, + "grad_norm": 733.6953125, + "learning_rate": 1.3180000000000001e-06, + "loss": 34.9994, "step": 6590 }, { - "epoch": 0.026664835142636668, - "grad_norm": 716.1993408203125, - "learning_rate": 1.32e-05, - "loss": 246.6991, + "epoch": 0.013332417571318334, + "grad_norm": 373.2271728515625, + "learning_rate": 1.32e-06, + "loss": 37.3207, "step": 6600 }, { - "epoch": 0.026705236408004297, - "grad_norm": 1362.393310546875, - "learning_rate": 1.3220000000000002e-05, - "loss": 233.1123, + "epoch": 0.013352618204002149, + "grad_norm": 171.13934326171875, + "learning_rate": 1.3220000000000002e-06, + "loss": 12.7663, "step": 6610 }, { - "epoch": 0.02674563767337193, - "grad_norm": 1083.3197021484375, - "learning_rate": 1.324e-05, - "loss": 169.5999, + "epoch": 0.013372818836685965, + "grad_norm": 357.4339599609375, + "learning_rate": 1.324e-06, + "loss": 35.8358, "step": 6620 }, { - "epoch": 0.02678603893873956, - "grad_norm": 754.7849731445312, - "learning_rate": 1.326e-05, - "loss": 306.0971, + "epoch": 0.01339301946936978, + "grad_norm": 220.25674438476562, + "learning_rate": 1.326e-06, + "loss": 15.3749, "step": 6630 }, { - "epoch": 0.026826440204107193, - "grad_norm": 968.374755859375, - "learning_rate": 1.3280000000000002e-05, - "loss": 159.3224, + "epoch": 0.013413220102053596, + "grad_norm": 462.05841064453125, + "learning_rate": 1.328e-06, + "loss": 38.8295, "step": 6640 }, { - "epoch": 0.026866841469474823, - "grad_norm": 670.0838623046875, - "learning_rate": 1.3300000000000001e-05, - "loss": 155.3162, + "epoch": 0.013433420734737411, + "grad_norm": 469.8040466308594, + "learning_rate": 1.3300000000000002e-06, + "loss": 22.9475, "step": 6650 }, { - "epoch": 0.026907242734842456, - "grad_norm": 2666.84326171875, - "learning_rate": 1.3320000000000001e-05, - "loss": 173.0536, + "epoch": 0.013453621367421228, + "grad_norm": 971.557373046875, + "learning_rate": 1.3320000000000003e-06, + "loss": 44.1314, "step": 6660 }, { - "epoch": 0.026947644000210085, - "grad_norm": 1368.1116943359375, - "learning_rate": 1.334e-05, - "loss": 197.1025, + "epoch": 0.013473822000105043, + "grad_norm": 298.933837890625, + "learning_rate": 1.334e-06, + "loss": 28.8329, "step": 6670 }, { - "epoch": 0.026988045265577718, - "grad_norm": 1038.586181640625, - "learning_rate": 1.336e-05, - "loss": 156.9807, + "epoch": 0.013494022632788859, + "grad_norm": 623.2842407226562, + "learning_rate": 1.336e-06, + "loss": 35.835, "step": 6680 }, { - "epoch": 0.027028446530945348, - "grad_norm": 1009.5262451171875, - "learning_rate": 1.338e-05, - "loss": 204.5245, + "epoch": 0.013514223265472674, + "grad_norm": 441.5866394042969, + "learning_rate": 1.3380000000000001e-06, + "loss": 19.0923, "step": 6690 }, { - "epoch": 0.02706884779631298, - "grad_norm": 1839.5418701171875, - "learning_rate": 1.3400000000000002e-05, - "loss": 266.4659, + "epoch": 0.01353442389815649, + "grad_norm": 1859.7860107421875, + "learning_rate": 1.34e-06, + "loss": 35.8646, "step": 6700 }, { - "epoch": 0.02710924906168061, - "grad_norm": 829.3477783203125, - "learning_rate": 1.3420000000000002e-05, - "loss": 240.5496, + "epoch": 0.013554624530840305, + "grad_norm": 64.71363067626953, + "learning_rate": 1.3420000000000002e-06, + "loss": 15.234, "step": 6710 }, { - "epoch": 0.027149650327048244, - "grad_norm": 2405.634033203125, - "learning_rate": 1.344e-05, - "loss": 190.1635, + "epoch": 0.013574825163524122, + "grad_norm": 333.45794677734375, + "learning_rate": 1.344e-06, + "loss": 22.3284, "step": 6720 }, { - "epoch": 0.027190051592415873, - "grad_norm": 3415.678466796875, - "learning_rate": 1.346e-05, - "loss": 256.6028, + "epoch": 0.013595025796207937, + "grad_norm": 393.3450622558594, + "learning_rate": 1.3460000000000001e-06, + "loss": 54.5108, "step": 6730 }, { - "epoch": 0.027230452857783506, - "grad_norm": 1140.322998046875, - "learning_rate": 1.3480000000000001e-05, - "loss": 213.0321, + "epoch": 0.013615226428891753, + "grad_norm": 587.0012817382812, + "learning_rate": 1.348e-06, + "loss": 31.177, "step": 6740 }, { - "epoch": 0.027270854123151136, - "grad_norm": 1814.612548828125, - "learning_rate": 1.3500000000000001e-05, - "loss": 289.2976, + "epoch": 0.013635427061575568, + "grad_norm": 309.5393371582031, + "learning_rate": 1.3500000000000002e-06, + "loss": 13.8727, "step": 6750 }, { - "epoch": 0.02731125538851877, - "grad_norm": 5082.0244140625, - "learning_rate": 1.352e-05, - "loss": 213.1429, + "epoch": 0.013655627694259384, + "grad_norm": 468.2892150878906, + "learning_rate": 1.352e-06, + "loss": 16.0866, "step": 6760 }, { - "epoch": 0.0273516566538864, - "grad_norm": 791.2843627929688, - "learning_rate": 1.3539999999999999e-05, - "loss": 174.2937, + "epoch": 0.0136758283269432, + "grad_norm": 878.9414672851562, + "learning_rate": 1.354e-06, + "loss": 31.6945, "step": 6770 }, { - "epoch": 0.02739205791925403, - "grad_norm": 1486.5364990234375, - "learning_rate": 1.356e-05, - "loss": 211.7225, + "epoch": 0.013696028959627016, + "grad_norm": 963.8592529296875, + "learning_rate": 1.356e-06, + "loss": 26.1409, "step": 6780 }, { - "epoch": 0.02743245918462166, - "grad_norm": 2779.84765625, - "learning_rate": 1.358e-05, - "loss": 206.9393, + "epoch": 0.01371622959231083, + "grad_norm": 742.5006713867188, + "learning_rate": 1.3580000000000002e-06, + "loss": 20.0253, "step": 6790 }, { - "epoch": 0.027472860449989294, - "grad_norm": 2755.901611328125, - "learning_rate": 1.3600000000000002e-05, - "loss": 244.6609, + "epoch": 0.013736430224994647, + "grad_norm": 302.4109802246094, + "learning_rate": 1.3600000000000001e-06, + "loss": 30.4902, "step": 6800 }, { - "epoch": 0.027513261715356924, - "grad_norm": 1252.9466552734375, - "learning_rate": 1.362e-05, - "loss": 185.0354, + "epoch": 0.013756630857678462, + "grad_norm": 703.2691650390625, + "learning_rate": 1.362e-06, + "loss": 101.9395, "step": 6810 }, { - "epoch": 0.027553662980724557, - "grad_norm": 803.9773559570312, - "learning_rate": 1.364e-05, - "loss": 163.7829, + "epoch": 0.013776831490362278, + "grad_norm": 1015.4454956054688, + "learning_rate": 1.364e-06, + "loss": 22.2598, "step": 6820 }, { - "epoch": 0.027594064246092186, - "grad_norm": 3479.50341796875, - "learning_rate": 1.3660000000000001e-05, - "loss": 199.5576, + "epoch": 0.013797032123046093, + "grad_norm": 394.2761535644531, + "learning_rate": 1.3660000000000001e-06, + "loss": 36.6149, "step": 6830 }, { - "epoch": 0.02763446551145982, - "grad_norm": 4041.821533203125, - "learning_rate": 1.3680000000000001e-05, - "loss": 146.2625, + "epoch": 0.01381723275572991, + "grad_norm": 867.6441650390625, + "learning_rate": 1.368e-06, + "loss": 32.4753, "step": 6840 }, { - "epoch": 0.02767486677682745, - "grad_norm": 951.2683715820312, - "learning_rate": 1.3700000000000001e-05, - "loss": 242.1677, + "epoch": 0.013837433388413724, + "grad_norm": 386.0970458984375, + "learning_rate": 1.3700000000000002e-06, + "loss": 31.9341, "step": 6850 }, { - "epoch": 0.027715268042195082, - "grad_norm": 2521.80224609375, - "learning_rate": 1.3719999999999999e-05, - "loss": 162.2531, + "epoch": 0.013857634021097541, + "grad_norm": 650.8311767578125, + "learning_rate": 1.372e-06, + "loss": 25.4292, "step": 6860 }, { - "epoch": 0.02775566930756271, - "grad_norm": 2120.54150390625, - "learning_rate": 1.374e-05, - "loss": 218.3948, + "epoch": 0.013877834653781356, + "grad_norm": 281.85302734375, + "learning_rate": 1.374e-06, + "loss": 15.7737, "step": 6870 }, { - "epoch": 0.027796070572930345, - "grad_norm": 890.4383544921875, - "learning_rate": 1.376e-05, - "loss": 237.4137, + "epoch": 0.013898035286465172, + "grad_norm": 494.3153991699219, + "learning_rate": 1.376e-06, + "loss": 12.811, "step": 6880 }, { - "epoch": 0.027836471838297974, - "grad_norm": 1014.8067626953125, - "learning_rate": 1.3780000000000002e-05, - "loss": 271.9613, + "epoch": 0.013918235919148987, + "grad_norm": 103.57233428955078, + "learning_rate": 1.3780000000000002e-06, + "loss": 24.2835, "step": 6890 }, { - "epoch": 0.027876873103665607, - "grad_norm": 2228.795654296875, - "learning_rate": 1.3800000000000002e-05, - "loss": 160.2129, + "epoch": 0.013938436551832804, + "grad_norm": 313.1036071777344, + "learning_rate": 1.3800000000000001e-06, + "loss": 26.341, "step": 6900 }, { - "epoch": 0.027917274369033237, - "grad_norm": 1605.4246826171875, - "learning_rate": 1.382e-05, - "loss": 192.7938, + "epoch": 0.013958637184516618, + "grad_norm": 364.7337951660156, + "learning_rate": 1.382e-06, + "loss": 19.7548, "step": 6910 }, { - "epoch": 0.02795767563440087, - "grad_norm": 726.9761352539062, - "learning_rate": 1.384e-05, - "loss": 166.0171, + "epoch": 0.013978837817200435, + "grad_norm": 708.6334838867188, + "learning_rate": 1.384e-06, + "loss": 24.823, "step": 6920 }, { - "epoch": 0.0279980768997685, - "grad_norm": 1300.53076171875, - "learning_rate": 1.3860000000000001e-05, - "loss": 155.9882, + "epoch": 0.01399903844988425, + "grad_norm": 370.78387451171875, + "learning_rate": 1.3860000000000002e-06, + "loss": 22.2033, "step": 6930 }, { - "epoch": 0.028038478165136133, - "grad_norm": 1467.7305908203125, - "learning_rate": 1.3880000000000001e-05, - "loss": 175.5876, + "epoch": 0.014019239082568066, + "grad_norm": 584.7822875976562, + "learning_rate": 1.388e-06, + "loss": 21.2259, "step": 6940 }, { - "epoch": 0.028078879430503762, - "grad_norm": 653.6290283203125, - "learning_rate": 1.3900000000000002e-05, - "loss": 177.4526, + "epoch": 0.014039439715251881, + "grad_norm": 434.93896484375, + "learning_rate": 1.3900000000000002e-06, + "loss": 17.3879, "step": 6950 }, { - "epoch": 0.028119280695871395, - "grad_norm": 1209.3408203125, - "learning_rate": 1.3919999999999999e-05, - "loss": 139.5799, + "epoch": 0.014059640347935698, + "grad_norm": 1412.916015625, + "learning_rate": 1.392e-06, + "loss": 43.3578, "step": 6960 }, { - "epoch": 0.028159681961239025, - "grad_norm": 0.0, - "learning_rate": 1.394e-05, - "loss": 114.6302, + "epoch": 0.014079840980619512, + "grad_norm": 690.4659423828125, + "learning_rate": 1.3940000000000001e-06, + "loss": 29.9748, "step": 6970 }, { - "epoch": 0.028200083226606658, - "grad_norm": 2785.091796875, - "learning_rate": 1.396e-05, - "loss": 197.9756, + "epoch": 0.014100041613303329, + "grad_norm": 631.6376953125, + "learning_rate": 1.396e-06, + "loss": 27.4844, "step": 6980 }, { - "epoch": 0.028240484491974287, - "grad_norm": 1010.1685180664062, - "learning_rate": 1.3980000000000002e-05, - "loss": 194.4374, + "epoch": 0.014120242245987144, + "grad_norm": 136.48374938964844, + "learning_rate": 1.3980000000000002e-06, + "loss": 14.787, "step": 6990 }, { - "epoch": 0.02828088575734192, - "grad_norm": 810.1452026367188, - "learning_rate": 1.4000000000000001e-05, - "loss": 185.4311, + "epoch": 0.01414044287867096, + "grad_norm": 419.7654724121094, + "learning_rate": 1.4000000000000001e-06, + "loss": 29.0004, "step": 7000 }, { - "epoch": 0.02832128702270955, - "grad_norm": 793.3577270507812, - "learning_rate": 1.402e-05, - "loss": 186.9826, + "epoch": 0.014160643511354775, + "grad_norm": 468.74468994140625, + "learning_rate": 1.402e-06, + "loss": 19.4345, "step": 7010 }, { - "epoch": 0.028361688288077183, - "grad_norm": 1589.5145263671875, - "learning_rate": 1.4040000000000001e-05, - "loss": 224.4251, + "epoch": 0.014180844144038592, + "grad_norm": 468.9650573730469, + "learning_rate": 1.404e-06, + "loss": 26.9539, "step": 7020 }, { - "epoch": 0.028402089553444813, - "grad_norm": 730.669189453125, - "learning_rate": 1.4060000000000001e-05, - "loss": 129.3718, + "epoch": 0.014201044776722406, + "grad_norm": 256.4075622558594, + "learning_rate": 1.4060000000000002e-06, + "loss": 21.5345, "step": 7030 }, { - "epoch": 0.028442490818812446, - "grad_norm": 2354.304443359375, - "learning_rate": 1.408e-05, - "loss": 176.9131, + "epoch": 0.014221245409406223, + "grad_norm": 363.6224060058594, + "learning_rate": 1.4080000000000001e-06, + "loss": 28.2582, "step": 7040 }, { - "epoch": 0.028482892084180075, - "grad_norm": 1103.234619140625, - "learning_rate": 1.4099999999999999e-05, - "loss": 198.4438, + "epoch": 0.014241446042090038, + "grad_norm": 2213.649169921875, + "learning_rate": 1.41e-06, + "loss": 29.7299, "step": 7050 }, { - "epoch": 0.02852329334954771, - "grad_norm": 1036.2913818359375, - "learning_rate": 1.412e-05, - "loss": 228.0053, + "epoch": 0.014261646674773854, + "grad_norm": 525.2031860351562, + "learning_rate": 1.412e-06, + "loss": 15.0173, "step": 7060 }, { - "epoch": 0.028563694614915338, - "grad_norm": 3621.711181640625, - "learning_rate": 1.414e-05, - "loss": 140.6286, + "epoch": 0.014281847307457669, + "grad_norm": 571.482666015625, + "learning_rate": 1.4140000000000001e-06, + "loss": 22.69, "step": 7070 }, { - "epoch": 0.02860409588028297, - "grad_norm": 1558.1011962890625, - "learning_rate": 1.4160000000000002e-05, - "loss": 187.4032, + "epoch": 0.014302047940141486, + "grad_norm": 432.14385986328125, + "learning_rate": 1.416e-06, + "loss": 21.501, "step": 7080 }, { - "epoch": 0.0286444971456506, - "grad_norm": 2425.656494140625, - "learning_rate": 1.4180000000000001e-05, - "loss": 198.9401, + "epoch": 0.0143222485728253, + "grad_norm": 230.9308319091797, + "learning_rate": 1.4180000000000002e-06, + "loss": 23.7384, "step": 7090 }, { - "epoch": 0.028684898411018234, - "grad_norm": 878.8297729492188, - "learning_rate": 1.42e-05, - "loss": 248.0995, + "epoch": 0.014342449205509117, + "grad_norm": 689.4318237304688, + "learning_rate": 1.42e-06, + "loss": 16.4564, "step": 7100 }, { - "epoch": 0.028725299676385863, - "grad_norm": 791.4844360351562, - "learning_rate": 1.422e-05, - "loss": 223.9328, + "epoch": 0.014362649838192932, + "grad_norm": 833.6482543945312, + "learning_rate": 1.4220000000000001e-06, + "loss": 42.0199, "step": 7110 }, { - "epoch": 0.028765700941753496, - "grad_norm": 984.3999633789062, - "learning_rate": 1.4240000000000001e-05, - "loss": 207.1025, + "epoch": 0.014382850470876748, + "grad_norm": 263.74810791015625, + "learning_rate": 1.424e-06, + "loss": 16.752, "step": 7120 }, { - "epoch": 0.028806102207121126, - "grad_norm": 1250.86669921875, - "learning_rate": 1.426e-05, - "loss": 168.4282, + "epoch": 0.014403051103560563, + "grad_norm": 69.91686248779297, + "learning_rate": 1.4260000000000002e-06, + "loss": 11.7691, "step": 7130 }, { - "epoch": 0.02884650347248876, - "grad_norm": 993.6478881835938, - "learning_rate": 1.4280000000000002e-05, - "loss": 204.2866, + "epoch": 0.01442325173624438, + "grad_norm": 802.9617309570312, + "learning_rate": 1.4280000000000001e-06, + "loss": 29.6307, "step": 7140 }, { - "epoch": 0.02888690473785639, - "grad_norm": 1394.8699951171875, - "learning_rate": 1.43e-05, - "loss": 321.6906, + "epoch": 0.014443452368928194, + "grad_norm": 1388.6160888671875, + "learning_rate": 1.43e-06, + "loss": 17.46, "step": 7150 }, { - "epoch": 0.02892730600322402, - "grad_norm": 1557.498291015625, - "learning_rate": 1.432e-05, - "loss": 240.2873, + "epoch": 0.01446365300161201, + "grad_norm": 202.5712127685547, + "learning_rate": 1.432e-06, + "loss": 14.3374, "step": 7160 }, { - "epoch": 0.02896770726859165, - "grad_norm": 3150.471435546875, - "learning_rate": 1.434e-05, - "loss": 173.3683, + "epoch": 0.014483853634295826, + "grad_norm": 547.8307495117188, + "learning_rate": 1.4340000000000002e-06, + "loss": 30.6452, "step": 7170 }, { - "epoch": 0.029008108533959284, - "grad_norm": 40070.46875, - "learning_rate": 1.4360000000000001e-05, - "loss": 300.3157, + "epoch": 0.014504054266979642, + "grad_norm": 211.10414123535156, + "learning_rate": 1.436e-06, + "loss": 13.0712, "step": 7180 }, { - "epoch": 0.029048509799326914, - "grad_norm": 1962.33447265625, - "learning_rate": 1.4380000000000001e-05, - "loss": 240.5531, + "epoch": 0.014524254899663457, + "grad_norm": 756.9300537109375, + "learning_rate": 1.4380000000000003e-06, + "loss": 23.8358, "step": 7190 }, { - "epoch": 0.029088911064694547, - "grad_norm": 9524.1953125, - "learning_rate": 1.44e-05, - "loss": 197.5709, + "epoch": 0.014544455532347273, + "grad_norm": 378.86663818359375, + "learning_rate": 1.44e-06, + "loss": 26.3164, "step": 7200 }, { - "epoch": 0.029129312330062176, - "grad_norm": 1291.194091796875, - "learning_rate": 1.4420000000000001e-05, - "loss": 173.9855, + "epoch": 0.014564656165031088, + "grad_norm": 337.78131103515625, + "learning_rate": 1.4420000000000001e-06, + "loss": 30.7286, "step": 7210 }, { - "epoch": 0.02916971359542981, - "grad_norm": 508.72991943359375, - "learning_rate": 1.444e-05, - "loss": 198.4389, + "epoch": 0.014584856797714905, + "grad_norm": 1024.835205078125, + "learning_rate": 1.444e-06, + "loss": 29.8971, "step": 7220 }, { - "epoch": 0.02921011486079744, - "grad_norm": 3161.301513671875, - "learning_rate": 1.4460000000000002e-05, - "loss": 232.296, + "epoch": 0.01460505743039872, + "grad_norm": 938.3273315429688, + "learning_rate": 1.4460000000000002e-06, + "loss": 27.8823, "step": 7230 }, { - "epoch": 0.029250516126165072, - "grad_norm": 888.337646484375, - "learning_rate": 1.4480000000000002e-05, - "loss": 165.0789, + "epoch": 0.014625258063082536, + "grad_norm": 552.2940063476562, + "learning_rate": 1.4480000000000002e-06, + "loss": 25.7623, "step": 7240 }, { - "epoch": 0.029290917391532702, - "grad_norm": 1378.0032958984375, - "learning_rate": 1.45e-05, - "loss": 199.7313, + "epoch": 0.014645458695766351, + "grad_norm": 352.24383544921875, + "learning_rate": 1.45e-06, + "loss": 12.5614, "step": 7250 }, { - "epoch": 0.029331318656900335, - "grad_norm": 2439.7119140625, - "learning_rate": 1.452e-05, - "loss": 201.1323, + "epoch": 0.014665659328450167, + "grad_norm": 380.1072082519531, + "learning_rate": 1.452e-06, + "loss": 21.0116, "step": 7260 }, { - "epoch": 0.029371719922267964, - "grad_norm": 0.0, - "learning_rate": 1.4540000000000001e-05, - "loss": 139.8308, + "epoch": 0.014685859961133982, + "grad_norm": 89.25300598144531, + "learning_rate": 1.4540000000000002e-06, + "loss": 37.3816, "step": 7270 }, { - "epoch": 0.029412121187635597, - "grad_norm": 697.4395751953125, - "learning_rate": 1.4560000000000001e-05, - "loss": 215.4804, + "epoch": 0.014706060593817799, + "grad_norm": 742.039306640625, + "learning_rate": 1.4560000000000001e-06, + "loss": 20.3916, "step": 7280 }, { - "epoch": 0.029452522453003227, - "grad_norm": 1335.80859375, - "learning_rate": 1.4580000000000003e-05, - "loss": 206.551, + "epoch": 0.014726261226501614, + "grad_norm": 408.6346435546875, + "learning_rate": 1.4580000000000003e-06, + "loss": 22.624, "step": 7290 }, { - "epoch": 0.02949292371837086, - "grad_norm": 3926.513427734375, - "learning_rate": 1.4599999999999999e-05, - "loss": 161.3456, + "epoch": 0.01474646185918543, + "grad_norm": 489.7460632324219, + "learning_rate": 1.46e-06, + "loss": 51.9042, "step": 7300 }, { - "epoch": 0.02953332498373849, - "grad_norm": 2071.3857421875, - "learning_rate": 1.462e-05, - "loss": 245.7546, + "epoch": 0.014766662491869245, + "grad_norm": 378.1903991699219, + "learning_rate": 1.4620000000000001e-06, + "loss": 22.1225, "step": 7310 }, { - "epoch": 0.029573726249106123, - "grad_norm": 1162.638916015625, - "learning_rate": 1.464e-05, - "loss": 247.4758, + "epoch": 0.014786863124553061, + "grad_norm": 118.77698516845703, + "learning_rate": 1.464e-06, + "loss": 27.0962, "step": 7320 }, { - "epoch": 0.029614127514473752, - "grad_norm": 1904.095947265625, - "learning_rate": 1.4660000000000002e-05, - "loss": 209.4585, + "epoch": 0.014807063757236876, + "grad_norm": 497.64141845703125, + "learning_rate": 1.4660000000000002e-06, + "loss": 23.5584, "step": 7330 }, { - "epoch": 0.029654528779841385, - "grad_norm": 2012.481689453125, - "learning_rate": 1.4680000000000002e-05, - "loss": 177.5473, + "epoch": 0.014827264389920693, + "grad_norm": 26.968568801879883, + "learning_rate": 1.4680000000000002e-06, + "loss": 46.2282, "step": 7340 }, { - "epoch": 0.029694930045209015, - "grad_norm": 827.6367797851562, - "learning_rate": 1.47e-05, - "loss": 132.3128, + "epoch": 0.014847465022604507, + "grad_norm": 621.72314453125, + "learning_rate": 1.4700000000000001e-06, + "loss": 33.6739, "step": 7350 }, { - "epoch": 0.029735331310576648, - "grad_norm": 1051.8994140625, - "learning_rate": 1.472e-05, - "loss": 201.6083, + "epoch": 0.014867665655288324, + "grad_norm": 1012.1883544921875, + "learning_rate": 1.472e-06, + "loss": 30.1342, "step": 7360 }, { - "epoch": 0.029775732575944278, - "grad_norm": 2179.918701171875, - "learning_rate": 1.4740000000000001e-05, - "loss": 142.6359, + "epoch": 0.014887866287972139, + "grad_norm": 164.61184692382812, + "learning_rate": 1.4740000000000002e-06, + "loss": 22.5415, "step": 7370 }, { - "epoch": 0.02981613384131191, - "grad_norm": 1439.194091796875, - "learning_rate": 1.4760000000000001e-05, - "loss": 170.9291, + "epoch": 0.014908066920655955, + "grad_norm": 382.0716552734375, + "learning_rate": 1.4760000000000001e-06, + "loss": 63.1735, "step": 7380 }, { - "epoch": 0.02985653510667954, - "grad_norm": 1795.2176513671875, - "learning_rate": 1.4779999999999999e-05, - "loss": 205.2162, + "epoch": 0.01492826755333977, + "grad_norm": 807.2562866210938, + "learning_rate": 1.478e-06, + "loss": 51.1082, "step": 7390 }, { - "epoch": 0.029896936372047173, - "grad_norm": 1061.8426513671875, - "learning_rate": 1.48e-05, - "loss": 291.1986, + "epoch": 0.014948468186023587, + "grad_norm": 600.788330078125, + "learning_rate": 1.48e-06, + "loss": 41.2442, "step": 7400 }, { - "epoch": 0.029937337637414803, - "grad_norm": 1072.127685546875, - "learning_rate": 1.482e-05, - "loss": 190.8879, + "epoch": 0.014968668818707401, + "grad_norm": 547.8676147460938, + "learning_rate": 1.4820000000000002e-06, + "loss": 33.4473, "step": 7410 }, { - "epoch": 0.029977738902782436, - "grad_norm": 1368.1405029296875, - "learning_rate": 1.4840000000000002e-05, - "loss": 196.2706, + "epoch": 0.014988869451391218, + "grad_norm": 785.7941284179688, + "learning_rate": 1.4840000000000001e-06, + "loss": 21.1182, "step": 7420 }, { - "epoch": 0.030018140168150065, - "grad_norm": 1639.9654541015625, - "learning_rate": 1.4860000000000002e-05, - "loss": 129.1992, + "epoch": 0.015009070084075033, + "grad_norm": 373.2097473144531, + "learning_rate": 1.4860000000000003e-06, + "loss": 19.4519, "step": 7430 }, { - "epoch": 0.0300585414335177, - "grad_norm": 1488.4791259765625, - "learning_rate": 1.488e-05, - "loss": 151.4941, + "epoch": 0.01502927071675885, + "grad_norm": 283.32928466796875, + "learning_rate": 1.488e-06, + "loss": 33.4474, "step": 7440 }, { - "epoch": 0.030098942698885328, - "grad_norm": 864.4747924804688, - "learning_rate": 1.49e-05, - "loss": 199.6265, + "epoch": 0.015049471349442664, + "grad_norm": 601.2964477539062, + "learning_rate": 1.4900000000000001e-06, + "loss": 37.3402, "step": 7450 }, { - "epoch": 0.03013934396425296, - "grad_norm": 1255.5771484375, - "learning_rate": 1.4920000000000001e-05, - "loss": 226.9639, + "epoch": 0.01506967198212648, + "grad_norm": 461.42108154296875, + "learning_rate": 1.492e-06, + "loss": 33.2678, "step": 7460 }, { - "epoch": 0.03017974522962059, - "grad_norm": 1635.7647705078125, - "learning_rate": 1.4940000000000001e-05, - "loss": 174.848, + "epoch": 0.015089872614810295, + "grad_norm": 301.6255187988281, + "learning_rate": 1.4940000000000002e-06, + "loss": 21.1947, "step": 7470 }, { - "epoch": 0.030220146494988224, - "grad_norm": 1959.9365234375, - "learning_rate": 1.4960000000000002e-05, - "loss": 215.0835, + "epoch": 0.015110073247494112, + "grad_norm": 528.6672973632812, + "learning_rate": 1.4960000000000002e-06, + "loss": 30.9436, "step": 7480 }, { - "epoch": 0.030260547760355853, - "grad_norm": 1300.870361328125, - "learning_rate": 1.4979999999999999e-05, - "loss": 187.4061, + "epoch": 0.015130273880177927, + "grad_norm": 429.56304931640625, + "learning_rate": 1.498e-06, + "loss": 43.7286, "step": 7490 }, { - "epoch": 0.030300949025723486, - "grad_norm": 5733.919921875, - "learning_rate": 1.5e-05, - "loss": 203.0896, + "epoch": 0.015150474512861743, + "grad_norm": 477.3526306152344, + "learning_rate": 1.5e-06, + "loss": 44.6997, "step": 7500 }, { - "epoch": 0.030341350291091116, - "grad_norm": 6152.53173828125, - "learning_rate": 1.502e-05, - "loss": 292.1695, + "epoch": 0.015170675145545558, + "grad_norm": 694.835205078125, + "learning_rate": 1.5020000000000002e-06, + "loss": 33.5782, "step": 7510 }, { - "epoch": 0.03038175155645875, - "grad_norm": 1118.759033203125, - "learning_rate": 1.5040000000000002e-05, - "loss": 179.8201, + "epoch": 0.015190875778229375, + "grad_norm": 437.0287780761719, + "learning_rate": 1.5040000000000001e-06, + "loss": 14.402, "step": 7520 }, { - "epoch": 0.03042215282182638, - "grad_norm": 1003.1773681640625, - "learning_rate": 1.5060000000000001e-05, - "loss": 168.1782, + "epoch": 0.01521107641091319, + "grad_norm": 432.5226135253906, + "learning_rate": 1.5060000000000003e-06, + "loss": 54.8615, "step": 7530 }, { - "epoch": 0.030462554087194012, - "grad_norm": 1641.7908935546875, - "learning_rate": 1.508e-05, - "loss": 234.2666, + "epoch": 0.015231277043597006, + "grad_norm": 739.1048583984375, + "learning_rate": 1.508e-06, + "loss": 28.0186, "step": 7540 }, { - "epoch": 0.03050295535256164, - "grad_norm": 6260.85791015625, - "learning_rate": 1.51e-05, - "loss": 151.5416, + "epoch": 0.01525147767628082, + "grad_norm": 751.8126220703125, + "learning_rate": 1.5100000000000002e-06, + "loss": 14.3841, "step": 7550 }, { - "epoch": 0.030543356617929274, - "grad_norm": 1593.7576904296875, - "learning_rate": 1.5120000000000001e-05, - "loss": 159.2535, + "epoch": 0.015271678308964637, + "grad_norm": 699.0371704101562, + "learning_rate": 1.512e-06, + "loss": 26.7135, "step": 7560 }, { - "epoch": 0.030583757883296904, - "grad_norm": 1710.3023681640625, - "learning_rate": 1.514e-05, - "loss": 128.8493, + "epoch": 0.015291878941648452, + "grad_norm": 580.2321166992188, + "learning_rate": 1.5140000000000002e-06, + "loss": 43.0058, "step": 7570 }, { - "epoch": 0.030624159148664537, - "grad_norm": 1299.740966796875, - "learning_rate": 1.5160000000000002e-05, - "loss": 291.4667, + "epoch": 0.015312079574332269, + "grad_norm": 953.4240112304688, + "learning_rate": 1.5160000000000002e-06, + "loss": 30.3746, "step": 7580 }, { - "epoch": 0.030664560414032167, - "grad_norm": 772.3851318359375, - "learning_rate": 1.518e-05, - "loss": 254.7446, + "epoch": 0.015332280207016083, + "grad_norm": 498.05792236328125, + "learning_rate": 1.5180000000000001e-06, + "loss": 19.2205, "step": 7590 }, { - "epoch": 0.0307049616793998, - "grad_norm": 4841.77294921875, - "learning_rate": 1.52e-05, - "loss": 261.962, + "epoch": 0.0153524808396999, + "grad_norm": 510.3348083496094, + "learning_rate": 1.52e-06, + "loss": 15.5117, "step": 7600 }, { - "epoch": 0.03074536294476743, - "grad_norm": 1306.54296875, - "learning_rate": 1.5220000000000002e-05, - "loss": 251.6979, + "epoch": 0.015372681472383715, + "grad_norm": 1451.5672607421875, + "learning_rate": 1.5220000000000002e-06, + "loss": 29.8484, "step": 7610 }, { - "epoch": 0.030785764210135062, - "grad_norm": 1031.349365234375, - "learning_rate": 1.5240000000000001e-05, - "loss": 180.2815, + "epoch": 0.015392882105067531, + "grad_norm": 402.60919189453125, + "learning_rate": 1.5240000000000001e-06, + "loss": 28.366, "step": 7620 }, { - "epoch": 0.030826165475502692, - "grad_norm": 1182.68994140625, - "learning_rate": 1.5260000000000003e-05, - "loss": 202.774, + "epoch": 0.015413082737751346, + "grad_norm": 191.9103240966797, + "learning_rate": 1.5260000000000003e-06, + "loss": 35.3297, "step": 7630 }, { - "epoch": 0.030866566740870325, - "grad_norm": 524.015869140625, - "learning_rate": 1.528e-05, - "loss": 148.2353, + "epoch": 0.015433283370435162, + "grad_norm": 371.203125, + "learning_rate": 1.528e-06, + "loss": 10.1235, "step": 7640 }, { - "epoch": 0.030906968006237955, - "grad_norm": 1932.3292236328125, - "learning_rate": 1.53e-05, - "loss": 169.3441, + "epoch": 0.015453484003118977, + "grad_norm": 465.6565856933594, + "learning_rate": 1.5300000000000002e-06, + "loss": 21.8699, "step": 7650 }, { - "epoch": 0.030947369271605588, - "grad_norm": 840.219970703125, - "learning_rate": 1.5320000000000002e-05, - "loss": 129.2729, + "epoch": 0.015473684635802794, + "grad_norm": 703.3648681640625, + "learning_rate": 1.5320000000000001e-06, + "loss": 32.0473, "step": 7660 }, { - "epoch": 0.030987770536973217, - "grad_norm": 1622.7508544921875, - "learning_rate": 1.5340000000000002e-05, - "loss": 160.2887, + "epoch": 0.015493885268486609, + "grad_norm": 468.8616638183594, + "learning_rate": 1.5340000000000003e-06, + "loss": 21.0127, "step": 7670 }, { - "epoch": 0.03102817180234085, - "grad_norm": 2095.8623046875, - "learning_rate": 1.536e-05, - "loss": 245.4731, + "epoch": 0.015514085901170425, + "grad_norm": 1880.29248046875, + "learning_rate": 1.536e-06, + "loss": 26.7551, "step": 7680 }, { - "epoch": 0.03106857306770848, - "grad_norm": 3175.37646484375, - "learning_rate": 1.538e-05, - "loss": 208.5475, + "epoch": 0.01553428653385424, + "grad_norm": 274.2958068847656, + "learning_rate": 1.5380000000000001e-06, + "loss": 19.2041, "step": 7690 }, { - "epoch": 0.031108974333076113, - "grad_norm": 924.9345092773438, - "learning_rate": 1.54e-05, - "loss": 170.3805, + "epoch": 0.015554487166538056, + "grad_norm": 539.3201904296875, + "learning_rate": 1.54e-06, + "loss": 32.8616, "step": 7700 }, { - "epoch": 0.031149375598443742, - "grad_norm": 1739.8531494140625, - "learning_rate": 1.542e-05, - "loss": 194.0813, + "epoch": 0.015574687799221871, + "grad_norm": 87.47315216064453, + "learning_rate": 1.5420000000000002e-06, + "loss": 22.3236, "step": 7710 }, { - "epoch": 0.031189776863811376, - "grad_norm": 1644.390869140625, - "learning_rate": 1.544e-05, - "loss": 136.579, + "epoch": 0.015594888431905688, + "grad_norm": 319.38226318359375, + "learning_rate": 1.5440000000000002e-06, + "loss": 30.9488, "step": 7720 }, { - "epoch": 0.031230178129179005, - "grad_norm": 789.648193359375, - "learning_rate": 1.546e-05, - "loss": 187.856, + "epoch": 0.015615089064589503, + "grad_norm": 1636.626220703125, + "learning_rate": 1.546e-06, + "loss": 30.0025, "step": 7730 }, { - "epoch": 0.03127057939454664, - "grad_norm": 1162.065185546875, - "learning_rate": 1.548e-05, - "loss": 114.7405, + "epoch": 0.01563528969727332, + "grad_norm": 128.85711669921875, + "learning_rate": 1.548e-06, + "loss": 15.6842, "step": 7740 }, { - "epoch": 0.03131098065991427, - "grad_norm": 1140.320556640625, - "learning_rate": 1.55e-05, - "loss": 181.2212, + "epoch": 0.015655490329957136, + "grad_norm": 682.8628540039062, + "learning_rate": 1.5500000000000002e-06, + "loss": 14.3482, "step": 7750 }, { - "epoch": 0.0313513819252819, - "grad_norm": 2704.695556640625, - "learning_rate": 1.552e-05, - "loss": 241.535, + "epoch": 0.01567569096264095, + "grad_norm": 274.7837829589844, + "learning_rate": 1.5520000000000001e-06, + "loss": 26.8116, "step": 7760 }, { - "epoch": 0.03139178319064953, - "grad_norm": 939.2614135742188, - "learning_rate": 1.554e-05, - "loss": 151.1776, + "epoch": 0.015695891595324765, + "grad_norm": 698.118408203125, + "learning_rate": 1.5540000000000003e-06, + "loss": 40.2702, "step": 7770 }, { - "epoch": 0.03143218445601716, - "grad_norm": 1043.3226318359375, - "learning_rate": 1.556e-05, - "loss": 200.2128, + "epoch": 0.01571609222800858, + "grad_norm": 505.4995422363281, + "learning_rate": 1.556e-06, + "loss": 19.0552, "step": 7780 }, { - "epoch": 0.031472585721384796, - "grad_norm": 1068.1412353515625, - "learning_rate": 1.558e-05, - "loss": 181.7895, + "epoch": 0.015736292860692398, + "grad_norm": 604.3401489257812, + "learning_rate": 1.5580000000000002e-06, + "loss": 25.9554, "step": 7790 }, { - "epoch": 0.03151298698675242, - "grad_norm": 1479.6229248046875, - "learning_rate": 1.56e-05, - "loss": 196.9196, + "epoch": 0.01575649349337621, + "grad_norm": 551.531494140625, + "learning_rate": 1.56e-06, + "loss": 31.4826, "step": 7800 }, { - "epoch": 0.031553388252120056, - "grad_norm": 701.58544921875, - "learning_rate": 1.5620000000000003e-05, - "loss": 137.6865, + "epoch": 0.015776694126060028, + "grad_norm": 597.856689453125, + "learning_rate": 1.5620000000000002e-06, + "loss": 33.1883, "step": 7810 }, { - "epoch": 0.03159378951748769, - "grad_norm": 1822.54345703125, - "learning_rate": 1.5640000000000003e-05, - "loss": 227.0767, + "epoch": 0.015796894758743844, + "grad_norm": 255.5839385986328, + "learning_rate": 1.5640000000000002e-06, + "loss": 16.2388, "step": 7820 }, { - "epoch": 0.03163419078285532, - "grad_norm": 546.575439453125, - "learning_rate": 1.566e-05, - "loss": 146.3474, + "epoch": 0.01581709539142766, + "grad_norm": 638.1707763671875, + "learning_rate": 1.566e-06, + "loss": 12.3245, "step": 7830 }, { - "epoch": 0.03167459204822295, - "grad_norm": 1540.7342529296875, - "learning_rate": 1.568e-05, - "loss": 192.144, + "epoch": 0.015837296024111474, + "grad_norm": 561.0966796875, + "learning_rate": 1.568e-06, + "loss": 34.3022, "step": 7840 }, { - "epoch": 0.03171499331359058, - "grad_norm": 1766.8616943359375, - "learning_rate": 1.5700000000000002e-05, - "loss": 194.4416, + "epoch": 0.01585749665679529, + "grad_norm": 181.12254333496094, + "learning_rate": 1.5700000000000002e-06, + "loss": 14.8665, "step": 7850 }, { - "epoch": 0.031755394578958214, - "grad_norm": 1534.951416015625, - "learning_rate": 1.5720000000000002e-05, - "loss": 158.2836, + "epoch": 0.015877697289479107, + "grad_norm": 320.9978332519531, + "learning_rate": 1.5720000000000002e-06, + "loss": 40.0919, "step": 7860 }, { - "epoch": 0.03179579584432585, - "grad_norm": 980.215576171875, - "learning_rate": 1.5740000000000002e-05, - "loss": 166.7142, + "epoch": 0.015897897922162924, + "grad_norm": 488.3760681152344, + "learning_rate": 1.5740000000000003e-06, + "loss": 23.0373, "step": 7870 }, { - "epoch": 0.03183619710969347, - "grad_norm": 5774.56396484375, - "learning_rate": 1.5759999999999998e-05, - "loss": 305.0332, + "epoch": 0.015918098554846737, + "grad_norm": 725.4952392578125, + "learning_rate": 1.576e-06, + "loss": 17.3022, "step": 7880 }, { - "epoch": 0.031876598375061106, - "grad_norm": 1329.786865234375, - "learning_rate": 1.578e-05, - "loss": 137.7941, + "epoch": 0.015938299187530553, + "grad_norm": 807.7362060546875, + "learning_rate": 1.5780000000000002e-06, + "loss": 29.6214, "step": 7890 }, { - "epoch": 0.03191699964042874, - "grad_norm": 1180.7379150390625, - "learning_rate": 1.58e-05, - "loss": 122.4153, + "epoch": 0.01595849982021437, + "grad_norm": 402.11663818359375, + "learning_rate": 1.5800000000000001e-06, + "loss": 37.3078, "step": 7900 }, { - "epoch": 0.03195740090579637, - "grad_norm": 867.472900390625, - "learning_rate": 1.582e-05, - "loss": 210.3185, + "epoch": 0.015978700452898186, + "grad_norm": 416.4609680175781, + "learning_rate": 1.5820000000000003e-06, + "loss": 24.7349, "step": 7910 }, { - "epoch": 0.031997802171164, - "grad_norm": 1057.9029541015625, - "learning_rate": 1.584e-05, - "loss": 209.8075, + "epoch": 0.015998901085582, + "grad_norm": 794.6650390625, + "learning_rate": 1.5840000000000002e-06, + "loss": 38.0387, "step": 7920 }, { - "epoch": 0.03203820343653163, - "grad_norm": 1288.350341796875, - "learning_rate": 1.586e-05, - "loss": 198.4188, + "epoch": 0.016019101718265816, + "grad_norm": 593.4451293945312, + "learning_rate": 1.586e-06, + "loss": 23.7971, "step": 7930 }, { - "epoch": 0.032078604701899265, - "grad_norm": 0.0, - "learning_rate": 1.588e-05, - "loss": 145.5795, + "epoch": 0.016039302350949632, + "grad_norm": 258.35321044921875, + "learning_rate": 1.588e-06, + "loss": 27.5747, "step": 7940 }, { - "epoch": 0.0321190059672669, - "grad_norm": 983.6244506835938, - "learning_rate": 1.59e-05, - "loss": 227.0045, + "epoch": 0.01605950298363345, + "grad_norm": 773.8060302734375, + "learning_rate": 1.5900000000000002e-06, + "loss": 38.5026, "step": 7950 }, { - "epoch": 0.032159407232634524, - "grad_norm": 2391.579345703125, - "learning_rate": 1.592e-05, - "loss": 261.5729, + "epoch": 0.016079703616317262, + "grad_norm": 408.9706115722656, + "learning_rate": 1.5920000000000002e-06, + "loss": 16.2559, "step": 7960 }, { - "epoch": 0.03219980849800216, - "grad_norm": 2793.90234375, - "learning_rate": 1.594e-05, - "loss": 247.0734, + "epoch": 0.01609990424900108, + "grad_norm": 763.1421508789062, + "learning_rate": 1.594e-06, + "loss": 29.3452, "step": 7970 }, { - "epoch": 0.03224020976336979, - "grad_norm": 1532.1849365234375, - "learning_rate": 1.596e-05, - "loss": 199.9248, + "epoch": 0.016120104881684895, + "grad_norm": 139.5187530517578, + "learning_rate": 1.596e-06, + "loss": 21.015, "step": 7980 }, { - "epoch": 0.03228061102873742, - "grad_norm": 1343.6947021484375, - "learning_rate": 1.598e-05, - "loss": 167.6554, + "epoch": 0.01614030551436871, + "grad_norm": 920.0736083984375, + "learning_rate": 1.5980000000000002e-06, + "loss": 27.6155, "step": 7990 }, { - "epoch": 0.03232101229410505, - "grad_norm": 717.1405639648438, - "learning_rate": 1.6000000000000003e-05, - "loss": 218.2069, + "epoch": 0.016160506147052525, + "grad_norm": 302.7886657714844, + "learning_rate": 1.6000000000000001e-06, + "loss": 38.6215, "step": 8000 - }, - { - "epoch": 0.03236141355947268, - "grad_norm": 2144.71337890625, - "learning_rate": 1.6020000000000002e-05, - "loss": 210.7316, - "step": 8010 - }, - { - "epoch": 0.032401814824840315, - "grad_norm": 2510.060546875, - "learning_rate": 1.604e-05, - "loss": 300.955, - "step": 8020 - }, - { - "epoch": 0.03244221609020795, - "grad_norm": 1254.329833984375, - "learning_rate": 1.606e-05, - "loss": 224.6631, - "step": 8030 - }, - { - "epoch": 0.032482617355575574, - "grad_norm": 533.80517578125, - "learning_rate": 1.6080000000000002e-05, - "loss": 195.5154, - "step": 8040 - }, - { - "epoch": 0.03252301862094321, - "grad_norm": 932.7647705078125, - "learning_rate": 1.6100000000000002e-05, - "loss": 173.2508, - "step": 8050 - }, - { - "epoch": 0.03256341988631084, - "grad_norm": 1513.1566162109375, - "learning_rate": 1.612e-05, - "loss": 252.4677, - "step": 8060 - }, - { - "epoch": 0.03260382115167847, - "grad_norm": 8973.974609375, - "learning_rate": 1.6139999999999998e-05, - "loss": 160.9031, - "step": 8070 - }, - { - "epoch": 0.0326442224170461, - "grad_norm": 1346.521728515625, - "learning_rate": 1.616e-05, - "loss": 190.9977, - "step": 8080 - }, - { - "epoch": 0.03268462368241373, - "grad_norm": 696.8346557617188, - "learning_rate": 1.618e-05, - "loss": 148.2339, - "step": 8090 - }, - { - "epoch": 0.032725024947781366, - "grad_norm": 2406.046142578125, - "learning_rate": 1.62e-05, - "loss": 129.0067, - "step": 8100 - }, - { - "epoch": 0.032765426213149, - "grad_norm": 1443.768310546875, - "learning_rate": 1.622e-05, - "loss": 206.6445, - "step": 8110 - }, - { - "epoch": 0.032805827478516625, - "grad_norm": 2201.40087890625, - "learning_rate": 1.624e-05, - "loss": 188.9112, - "step": 8120 - }, - { - "epoch": 0.03284622874388426, - "grad_norm": 699.2227783203125, - "learning_rate": 1.626e-05, - "loss": 193.334, - "step": 8130 - }, - { - "epoch": 0.03288663000925189, - "grad_norm": 545.5186767578125, - "learning_rate": 1.628e-05, - "loss": 140.6468, - "step": 8140 - }, - { - "epoch": 0.032927031274619524, - "grad_norm": 611.3945922851562, - "learning_rate": 1.63e-05, - "loss": 131.4079, - "step": 8150 - }, - { - "epoch": 0.03296743253998715, - "grad_norm": 1427.6285400390625, - "learning_rate": 1.6320000000000003e-05, - "loss": 186.4774, - "step": 8160 - }, - { - "epoch": 0.03300783380535478, - "grad_norm": 769.5264892578125, - "learning_rate": 1.634e-05, - "loss": 140.9033, - "step": 8170 - }, - { - "epoch": 0.033048235070722416, - "grad_norm": 988.6732788085938, - "learning_rate": 1.636e-05, - "loss": 193.9264, - "step": 8180 - }, - { - "epoch": 0.03308863633609005, - "grad_norm": 1023.501708984375, - "learning_rate": 1.6380000000000002e-05, - "loss": 173.0703, - "step": 8190 - }, - { - "epoch": 0.033129037601457675, - "grad_norm": 1767.6136474609375, - "learning_rate": 1.6400000000000002e-05, - "loss": 235.5256, - "step": 8200 - }, - { - "epoch": 0.03316943886682531, - "grad_norm": 1146.1134033203125, - "learning_rate": 1.6420000000000002e-05, - "loss": 174.4802, - "step": 8210 - }, - { - "epoch": 0.03320984013219294, - "grad_norm": 723.232666015625, - "learning_rate": 1.644e-05, - "loss": 126.8088, - "step": 8220 - }, - { - "epoch": 0.033250241397560575, - "grad_norm": 939.8434448242188, - "learning_rate": 1.646e-05, - "loss": 169.1382, - "step": 8230 - }, - { - "epoch": 0.0332906426629282, - "grad_norm": 986.3238525390625, - "learning_rate": 1.648e-05, - "loss": 111.5822, - "step": 8240 - }, - { - "epoch": 0.033331043928295834, - "grad_norm": 1208.2476806640625, - "learning_rate": 1.65e-05, - "loss": 191.2895, - "step": 8250 - }, - { - "epoch": 0.03337144519366347, - "grad_norm": 827.257568359375, - "learning_rate": 1.652e-05, - "loss": 175.8226, - "step": 8260 - }, - { - "epoch": 0.0334118464590311, - "grad_norm": 1213.089111328125, - "learning_rate": 1.654e-05, - "loss": 214.4393, - "step": 8270 - }, - { - "epoch": 0.033452247724398726, - "grad_norm": 985.6383666992188, - "learning_rate": 1.656e-05, - "loss": 240.4976, - "step": 8280 - }, - { - "epoch": 0.03349264898976636, - "grad_norm": 1016.156494140625, - "learning_rate": 1.658e-05, - "loss": 211.4707, - "step": 8290 - }, - { - "epoch": 0.03353305025513399, - "grad_norm": 590.7755737304688, - "learning_rate": 1.66e-05, - "loss": 173.2719, - "step": 8300 - }, - { - "epoch": 0.033573451520501625, - "grad_norm": 850.2467041015625, - "learning_rate": 1.662e-05, - "loss": 169.5091, - "step": 8310 - }, - { - "epoch": 0.03361385278586925, - "grad_norm": 6493.62109375, - "learning_rate": 1.664e-05, - "loss": 206.8252, - "step": 8320 - }, - { - "epoch": 0.033654254051236884, - "grad_norm": 721.207275390625, - "learning_rate": 1.666e-05, - "loss": 172.7568, - "step": 8330 - }, - { - "epoch": 0.03369465531660452, - "grad_norm": 941.2609252929688, - "learning_rate": 1.668e-05, - "loss": 169.4456, - "step": 8340 - }, - { - "epoch": 0.03373505658197215, - "grad_norm": 2292.378173828125, - "learning_rate": 1.6700000000000003e-05, - "loss": 217.5546, - "step": 8350 - }, - { - "epoch": 0.033775457847339777, - "grad_norm": 752.173583984375, - "learning_rate": 1.672e-05, - "loss": 186.4127, - "step": 8360 - }, - { - "epoch": 0.03381585911270741, - "grad_norm": 1046.0406494140625, - "learning_rate": 1.674e-05, - "loss": 156.5079, - "step": 8370 - }, - { - "epoch": 0.03385626037807504, - "grad_norm": 926.9135131835938, - "learning_rate": 1.6760000000000002e-05, - "loss": 145.1286, - "step": 8380 - }, - { - "epoch": 0.033896661643442676, - "grad_norm": 2465.574951171875, - "learning_rate": 1.6780000000000002e-05, - "loss": 230.1866, - "step": 8390 - }, - { - "epoch": 0.0339370629088103, - "grad_norm": 1376.998779296875, - "learning_rate": 1.6800000000000002e-05, - "loss": 149.4985, - "step": 8400 - }, - { - "epoch": 0.033977464174177935, - "grad_norm": 1141.4237060546875, - "learning_rate": 1.6819999999999998e-05, - "loss": 199.0205, - "step": 8410 - }, - { - "epoch": 0.03401786543954557, - "grad_norm": 823.1524047851562, - "learning_rate": 1.684e-05, - "loss": 160.2142, - "step": 8420 - }, - { - "epoch": 0.0340582667049132, - "grad_norm": 557.1250610351562, - "learning_rate": 1.686e-05, - "loss": 177.5694, - "step": 8430 - }, - { - "epoch": 0.03409866797028083, - "grad_norm": 841.5819702148438, - "learning_rate": 1.688e-05, - "loss": 165.8316, - "step": 8440 - }, - { - "epoch": 0.03413906923564846, - "grad_norm": 949.8247680664062, - "learning_rate": 1.69e-05, - "loss": 109.1915, - "step": 8450 - }, - { - "epoch": 0.03417947050101609, - "grad_norm": 1175.8765869140625, - "learning_rate": 1.692e-05, - "loss": 186.9094, - "step": 8460 - }, - { - "epoch": 0.034219871766383726, - "grad_norm": 601.2833862304688, - "learning_rate": 1.694e-05, - "loss": 173.7073, - "step": 8470 - }, - { - "epoch": 0.03426027303175135, - "grad_norm": 1607.2193603515625, - "learning_rate": 1.696e-05, - "loss": 193.047, - "step": 8480 - }, - { - "epoch": 0.034300674297118985, - "grad_norm": 794.0247802734375, - "learning_rate": 1.698e-05, - "loss": 230.634, - "step": 8490 - }, - { - "epoch": 0.03434107556248662, - "grad_norm": 506.083740234375, - "learning_rate": 1.7000000000000003e-05, - "loss": 194.7215, - "step": 8500 - }, - { - "epoch": 0.03438147682785425, - "grad_norm": 1354.748046875, - "learning_rate": 1.702e-05, - "loss": 237.6669, - "step": 8510 - }, - { - "epoch": 0.03442187809322188, - "grad_norm": 903.7907104492188, - "learning_rate": 1.704e-05, - "loss": 176.5095, - "step": 8520 - }, - { - "epoch": 0.03446227935858951, - "grad_norm": 1148.167236328125, - "learning_rate": 1.706e-05, - "loss": 178.5096, - "step": 8530 - }, - { - "epoch": 0.034502680623957144, - "grad_norm": 2201.17919921875, - "learning_rate": 1.7080000000000002e-05, - "loss": 149.1055, - "step": 8540 - }, - { - "epoch": 0.03454308188932478, - "grad_norm": 2470.737060546875, - "learning_rate": 1.7100000000000002e-05, - "loss": 239.2134, - "step": 8550 - }, - { - "epoch": 0.0345834831546924, - "grad_norm": 1102.5830078125, - "learning_rate": 1.712e-05, - "loss": 116.4302, - "step": 8560 - }, - { - "epoch": 0.034623884420060036, - "grad_norm": 1073.0867919921875, - "learning_rate": 1.7140000000000002e-05, - "loss": 208.7902, - "step": 8570 - }, - { - "epoch": 0.03466428568542767, - "grad_norm": 1288.13818359375, - "learning_rate": 1.7160000000000002e-05, - "loss": 177.9251, - "step": 8580 - }, - { - "epoch": 0.0347046869507953, - "grad_norm": 3063.13525390625, - "learning_rate": 1.718e-05, - "loss": 132.933, - "step": 8590 - }, - { - "epoch": 0.03474508821616293, - "grad_norm": 817.6849365234375, - "learning_rate": 1.7199999999999998e-05, - "loss": 329.9039, - "step": 8600 - }, - { - "epoch": 0.03478548948153056, - "grad_norm": 871.9563598632812, - "learning_rate": 1.722e-05, - "loss": 198.9892, - "step": 8610 - }, - { - "epoch": 0.034825890746898194, - "grad_norm": 2624.832275390625, - "learning_rate": 1.724e-05, - "loss": 134.0866, - "step": 8620 - }, - { - "epoch": 0.03486629201226583, - "grad_norm": 1550.62548828125, - "learning_rate": 1.726e-05, - "loss": 195.2643, - "step": 8630 - }, - { - "epoch": 0.034906693277633453, - "grad_norm": 2074.622314453125, - "learning_rate": 1.728e-05, - "loss": 202.7179, - "step": 8640 - }, - { - "epoch": 0.03494709454300109, - "grad_norm": 1240.3668212890625, - "learning_rate": 1.73e-05, - "loss": 164.9288, - "step": 8650 - }, - { - "epoch": 0.03498749580836872, - "grad_norm": 1201.8397216796875, - "learning_rate": 1.732e-05, - "loss": 173.0605, - "step": 8660 - }, - { - "epoch": 0.03502789707373635, - "grad_norm": 1204.3741455078125, - "learning_rate": 1.734e-05, - "loss": 161.6967, - "step": 8670 - }, - { - "epoch": 0.03506829833910398, - "grad_norm": 1461.201904296875, - "learning_rate": 1.736e-05, - "loss": 91.053, - "step": 8680 - }, - { - "epoch": 0.03510869960447161, - "grad_norm": 669.2667846679688, - "learning_rate": 1.7380000000000003e-05, - "loss": 125.9261, - "step": 8690 - }, - { - "epoch": 0.035149100869839245, - "grad_norm": 902.3343505859375, - "learning_rate": 1.74e-05, - "loss": 250.4465, - "step": 8700 - }, - { - "epoch": 0.03518950213520688, - "grad_norm": 2767.30224609375, - "learning_rate": 1.742e-05, - "loss": 194.7519, - "step": 8710 - }, - { - "epoch": 0.035229903400574504, - "grad_norm": 778.5036010742188, - "learning_rate": 1.7440000000000002e-05, - "loss": 205.9209, - "step": 8720 - }, - { - "epoch": 0.03527030466594214, - "grad_norm": 2534.48681640625, - "learning_rate": 1.7460000000000002e-05, - "loss": 201.9174, - "step": 8730 - }, - { - "epoch": 0.03531070593130977, - "grad_norm": 3830.944580078125, - "learning_rate": 1.7480000000000002e-05, - "loss": 249.3413, - "step": 8740 - }, - { - "epoch": 0.0353511071966774, - "grad_norm": 1058.1517333984375, - "learning_rate": 1.75e-05, - "loss": 156.2294, - "step": 8750 - }, - { - "epoch": 0.03539150846204503, - "grad_norm": 1754.2557373046875, - "learning_rate": 1.752e-05, - "loss": 169.7632, - "step": 8760 - }, - { - "epoch": 0.03543190972741266, - "grad_norm": 631.4237060546875, - "learning_rate": 1.754e-05, - "loss": 143.3636, - "step": 8770 - }, - { - "epoch": 0.035472310992780295, - "grad_norm": 1334.2506103515625, - "learning_rate": 1.756e-05, - "loss": 172.2922, - "step": 8780 - }, - { - "epoch": 0.03551271225814793, - "grad_norm": 1203.375, - "learning_rate": 1.758e-05, - "loss": 174.6774, - "step": 8790 - }, - { - "epoch": 0.035553113523515555, - "grad_norm": 1072.609375, - "learning_rate": 1.76e-05, - "loss": 177.6043, - "step": 8800 - }, - { - "epoch": 0.03559351478888319, - "grad_norm": 1734.830810546875, - "learning_rate": 1.762e-05, - "loss": 213.749, - "step": 8810 - }, - { - "epoch": 0.03563391605425082, - "grad_norm": 1706.71875, - "learning_rate": 1.764e-05, - "loss": 141.3769, - "step": 8820 - }, - { - "epoch": 0.035674317319618454, - "grad_norm": 823.6603393554688, - "learning_rate": 1.766e-05, - "loss": 211.0458, - "step": 8830 - }, - { - "epoch": 0.03571471858498608, - "grad_norm": 1883.2462158203125, - "learning_rate": 1.7680000000000004e-05, - "loss": 147.5754, - "step": 8840 - }, - { - "epoch": 0.03575511985035371, - "grad_norm": 1167.08935546875, - "learning_rate": 1.77e-05, - "loss": 158.7962, - "step": 8850 - }, - { - "epoch": 0.035795521115721346, - "grad_norm": 653.7176513671875, - "learning_rate": 1.772e-05, - "loss": 162.9139, - "step": 8860 - }, - { - "epoch": 0.03583592238108898, - "grad_norm": 1610.84912109375, - "learning_rate": 1.774e-05, - "loss": 147.6589, - "step": 8870 - }, - { - "epoch": 0.035876323646456605, - "grad_norm": 764.0466918945312, - "learning_rate": 1.7760000000000003e-05, - "loss": 188.6739, - "step": 8880 - }, - { - "epoch": 0.03591672491182424, - "grad_norm": 1810.3558349609375, - "learning_rate": 1.7780000000000003e-05, - "loss": 205.71, - "step": 8890 - }, - { - "epoch": 0.03595712617719187, - "grad_norm": 968.5712280273438, - "learning_rate": 1.78e-05, - "loss": 164.1197, - "step": 8900 - }, - { - "epoch": 0.035997527442559504, - "grad_norm": 2819.0068359375, - "learning_rate": 1.7820000000000002e-05, - "loss": 162.7462, - "step": 8910 - }, - { - "epoch": 0.03603792870792713, - "grad_norm": 2304.89306640625, - "learning_rate": 1.7840000000000002e-05, - "loss": 166.9301, - "step": 8920 - }, - { - "epoch": 0.036078329973294763, - "grad_norm": 1451.4320068359375, - "learning_rate": 1.7860000000000002e-05, - "loss": 199.7026, - "step": 8930 - }, - { - "epoch": 0.0361187312386624, - "grad_norm": 1088.584228515625, - "learning_rate": 1.7879999999999998e-05, - "loss": 122.1127, - "step": 8940 - }, - { - "epoch": 0.03615913250403003, - "grad_norm": 1242.6817626953125, - "learning_rate": 1.79e-05, - "loss": 205.5091, - "step": 8950 - }, - { - "epoch": 0.036199533769397656, - "grad_norm": 3576.470458984375, - "learning_rate": 1.792e-05, - "loss": 222.864, - "step": 8960 - }, - { - "epoch": 0.03623993503476529, - "grad_norm": 7665.30419921875, - "learning_rate": 1.794e-05, - "loss": 177.141, - "step": 8970 - }, - { - "epoch": 0.03628033630013292, - "grad_norm": 874.79052734375, - "learning_rate": 1.796e-05, - "loss": 151.6607, - "step": 8980 - }, - { - "epoch": 0.036320737565500555, - "grad_norm": 2215.158203125, - "learning_rate": 1.798e-05, - "loss": 187.3096, - "step": 8990 - }, - { - "epoch": 0.03636113883086818, - "grad_norm": 2184.711669921875, - "learning_rate": 1.8e-05, - "loss": 192.0445, - "step": 9000 - }, - { - "epoch": 0.036401540096235814, - "grad_norm": 1615.7005615234375, - "learning_rate": 1.802e-05, - "loss": 200.9116, - "step": 9010 - }, - { - "epoch": 0.03644194136160345, - "grad_norm": 1396.12109375, - "learning_rate": 1.804e-05, - "loss": 116.5097, - "step": 9020 - }, - { - "epoch": 0.03648234262697108, - "grad_norm": 2052.5712890625, - "learning_rate": 1.8060000000000003e-05, - "loss": 91.6937, - "step": 9030 - }, - { - "epoch": 0.036522743892338706, - "grad_norm": 1198.88232421875, - "learning_rate": 1.808e-05, - "loss": 162.9016, - "step": 9040 - }, - { - "epoch": 0.03656314515770634, - "grad_norm": 1004.2339477539062, - "learning_rate": 1.81e-05, - "loss": 165.1726, - "step": 9050 - }, - { - "epoch": 0.03660354642307397, - "grad_norm": 2106.733642578125, - "learning_rate": 1.812e-05, - "loss": 293.9893, - "step": 9060 - }, - { - "epoch": 0.036643947688441605, - "grad_norm": 1582.510986328125, - "learning_rate": 1.8140000000000003e-05, - "loss": 190.1606, - "step": 9070 - }, - { - "epoch": 0.03668434895380923, - "grad_norm": 905.4036254882812, - "learning_rate": 1.8160000000000002e-05, - "loss": 141.9137, - "step": 9080 - }, - { - "epoch": 0.036724750219176865, - "grad_norm": 411.4434509277344, - "learning_rate": 1.818e-05, - "loss": 135.4074, - "step": 9090 - }, - { - "epoch": 0.0367651514845445, - "grad_norm": 2590.279541015625, - "learning_rate": 1.8200000000000002e-05, - "loss": 237.3702, - "step": 9100 - }, - { - "epoch": 0.03680555274991213, - "grad_norm": 966.46435546875, - "learning_rate": 1.8220000000000002e-05, - "loss": 99.4366, - "step": 9110 - }, - { - "epoch": 0.03684595401527976, - "grad_norm": 1426.09375, - "learning_rate": 1.824e-05, - "loss": 128.6301, - "step": 9120 - }, - { - "epoch": 0.03688635528064739, - "grad_norm": 661.1119384765625, - "learning_rate": 1.826e-05, - "loss": 119.7455, - "step": 9130 - }, - { - "epoch": 0.03692675654601502, - "grad_norm": 1419.5643310546875, - "learning_rate": 1.828e-05, - "loss": 157.1782, - "step": 9140 - }, - { - "epoch": 0.03696715781138265, - "grad_norm": 875.4746704101562, - "learning_rate": 1.83e-05, - "loss": 167.5964, - "step": 9150 - }, - { - "epoch": 0.03700755907675028, - "grad_norm": 914.225341796875, - "learning_rate": 1.832e-05, - "loss": 229.4271, - "step": 9160 - }, - { - "epoch": 0.037047960342117915, - "grad_norm": 1139.4866943359375, - "learning_rate": 1.834e-05, - "loss": 176.9276, - "step": 9170 - }, - { - "epoch": 0.03708836160748555, - "grad_norm": 1400.506103515625, - "learning_rate": 1.8360000000000004e-05, - "loss": 137.9502, - "step": 9180 - }, - { - "epoch": 0.037128762872853174, - "grad_norm": 1892.8255615234375, - "learning_rate": 1.838e-05, - "loss": 177.446, - "step": 9190 - }, - { - "epoch": 0.03716916413822081, - "grad_norm": 6680.482421875, - "learning_rate": 1.84e-05, - "loss": 177.2692, - "step": 9200 - }, - { - "epoch": 0.03720956540358844, - "grad_norm": 5937.34521484375, - "learning_rate": 1.842e-05, - "loss": 209.457, - "step": 9210 - }, - { - "epoch": 0.037249966668956074, - "grad_norm": 724.9736938476562, - "learning_rate": 1.8440000000000003e-05, - "loss": 144.7552, - "step": 9220 - }, - { - "epoch": 0.0372903679343237, - "grad_norm": 1280.418212890625, - "learning_rate": 1.846e-05, - "loss": 177.4063, - "step": 9230 - }, - { - "epoch": 0.03733076919969133, - "grad_norm": 4052.574462890625, - "learning_rate": 1.848e-05, - "loss": 192.6805, - "step": 9240 - }, - { - "epoch": 0.037371170465058966, - "grad_norm": 1359.7706298828125, - "learning_rate": 1.85e-05, - "loss": 187.3014, - "step": 9250 - }, - { - "epoch": 0.0374115717304266, - "grad_norm": 3640.478515625, - "learning_rate": 1.8520000000000002e-05, - "loss": 134.8755, - "step": 9260 - }, - { - "epoch": 0.037451972995794225, - "grad_norm": 495.39373779296875, - "learning_rate": 1.8540000000000002e-05, - "loss": 199.3222, - "step": 9270 - }, - { - "epoch": 0.03749237426116186, - "grad_norm": 614.9301147460938, - "learning_rate": 1.856e-05, - "loss": 172.9295, - "step": 9280 - }, - { - "epoch": 0.03753277552652949, - "grad_norm": 1786.66552734375, - "learning_rate": 1.858e-05, - "loss": 170.4556, - "step": 9290 - }, - { - "epoch": 0.037573176791897124, - "grad_norm": 1209.83740234375, - "learning_rate": 1.86e-05, - "loss": 179.2815, - "step": 9300 - }, - { - "epoch": 0.03761357805726475, - "grad_norm": 935.8829345703125, - "learning_rate": 1.862e-05, - "loss": 153.5103, - "step": 9310 - }, - { - "epoch": 0.03765397932263238, - "grad_norm": 1041.441162109375, - "learning_rate": 1.864e-05, - "loss": 220.6227, - "step": 9320 - }, - { - "epoch": 0.037694380588000016, - "grad_norm": 1657.690673828125, - "learning_rate": 1.866e-05, - "loss": 238.1361, - "step": 9330 - }, - { - "epoch": 0.03773478185336765, - "grad_norm": 1077.7347412109375, - "learning_rate": 1.868e-05, - "loss": 156.283, - "step": 9340 - }, - { - "epoch": 0.037775183118735275, - "grad_norm": 834.681396484375, - "learning_rate": 1.87e-05, - "loss": 94.2577, - "step": 9350 - }, - { - "epoch": 0.03781558438410291, - "grad_norm": 1160.058349609375, - "learning_rate": 1.872e-05, - "loss": 231.7988, - "step": 9360 - }, - { - "epoch": 0.03785598564947054, - "grad_norm": 997.937255859375, - "learning_rate": 1.8740000000000004e-05, - "loss": 134.2654, - "step": 9370 - }, - { - "epoch": 0.037896386914838175, - "grad_norm": 882.4503784179688, - "learning_rate": 1.876e-05, - "loss": 193.2273, - "step": 9380 - }, - { - "epoch": 0.0379367881802058, - "grad_norm": 3181.698974609375, - "learning_rate": 1.878e-05, - "loss": 132.2644, - "step": 9390 - }, - { - "epoch": 0.037977189445573434, - "grad_norm": 725.4539184570312, - "learning_rate": 1.88e-05, - "loss": 153.5085, - "step": 9400 - }, - { - "epoch": 0.03801759071094107, - "grad_norm": 1430.8912353515625, - "learning_rate": 1.8820000000000003e-05, - "loss": 196.1231, - "step": 9410 - }, - { - "epoch": 0.0380579919763087, - "grad_norm": 1368.989501953125, - "learning_rate": 1.8840000000000003e-05, - "loss": 139.1376, - "step": 9420 - }, - { - "epoch": 0.038098393241676326, - "grad_norm": 838.9828491210938, - "learning_rate": 1.886e-05, - "loss": 131.1992, - "step": 9430 - }, - { - "epoch": 0.03813879450704396, - "grad_norm": 3527.458251953125, - "learning_rate": 1.888e-05, - "loss": 187.7699, - "step": 9440 - }, - { - "epoch": 0.03817919577241159, - "grad_norm": 2998.174072265625, - "learning_rate": 1.8900000000000002e-05, - "loss": 175.4584, - "step": 9450 - }, - { - "epoch": 0.038219597037779225, - "grad_norm": 1297.8941650390625, - "learning_rate": 1.8920000000000002e-05, - "loss": 210.6279, - "step": 9460 - }, - { - "epoch": 0.03825999830314685, - "grad_norm": 3371.50244140625, - "learning_rate": 1.894e-05, - "loss": 217.5112, - "step": 9470 - }, - { - "epoch": 0.038300399568514484, - "grad_norm": 1407.463623046875, - "learning_rate": 1.896e-05, - "loss": 192.8969, - "step": 9480 - }, - { - "epoch": 0.03834080083388212, - "grad_norm": 1015.240966796875, - "learning_rate": 1.898e-05, - "loss": 173.7605, - "step": 9490 - }, - { - "epoch": 0.03838120209924975, - "grad_norm": 731.1430053710938, - "learning_rate": 1.9e-05, - "loss": 191.5719, - "step": 9500 - }, - { - "epoch": 0.03842160336461738, - "grad_norm": 849.3814086914062, - "learning_rate": 1.902e-05, - "loss": 91.8381, - "step": 9510 - }, - { - "epoch": 0.03846200462998501, - "grad_norm": 1715.9556884765625, - "learning_rate": 1.904e-05, - "loss": 175.3195, - "step": 9520 - }, - { - "epoch": 0.03850240589535264, - "grad_norm": 7559.9091796875, - "learning_rate": 1.906e-05, - "loss": 175.7841, - "step": 9530 - }, - { - "epoch": 0.038542807160720276, - "grad_norm": 689.8999633789062, - "learning_rate": 1.908e-05, - "loss": 116.3339, - "step": 9540 - }, - { - "epoch": 0.0385832084260879, - "grad_norm": 873.10009765625, - "learning_rate": 1.91e-05, - "loss": 140.4007, - "step": 9550 - }, - { - "epoch": 0.038623609691455535, - "grad_norm": 2358.655517578125, - "learning_rate": 1.9120000000000003e-05, - "loss": 168.361, - "step": 9560 - }, - { - "epoch": 0.03866401095682317, - "grad_norm": 5055.4892578125, - "learning_rate": 1.914e-05, - "loss": 174.0588, - "step": 9570 - }, - { - "epoch": 0.0387044122221908, - "grad_norm": 897.6202392578125, - "learning_rate": 1.916e-05, - "loss": 153.3665, - "step": 9580 - }, - { - "epoch": 0.03874481348755843, - "grad_norm": 729.1337280273438, - "learning_rate": 1.918e-05, - "loss": 131.9806, - "step": 9590 - }, - { - "epoch": 0.03878521475292606, - "grad_norm": 795.0892944335938, - "learning_rate": 1.9200000000000003e-05, - "loss": 166.5195, - "step": 9600 - }, - { - "epoch": 0.03882561601829369, - "grad_norm": 644.804443359375, - "learning_rate": 1.9220000000000002e-05, - "loss": 129.34, - "step": 9610 - }, - { - "epoch": 0.038866017283661326, - "grad_norm": 1547.818359375, - "learning_rate": 1.924e-05, - "loss": 165.855, - "step": 9620 - }, - { - "epoch": 0.03890641854902895, - "grad_norm": 0.0, - "learning_rate": 1.9260000000000002e-05, - "loss": 181.7364, - "step": 9630 - }, - { - "epoch": 0.038946819814396585, - "grad_norm": 2220.852294921875, - "learning_rate": 1.9280000000000002e-05, - "loss": 154.0527, - "step": 9640 - }, - { - "epoch": 0.03898722107976422, - "grad_norm": 1048.51123046875, - "learning_rate": 1.93e-05, - "loss": 160.1465, - "step": 9650 - }, - { - "epoch": 0.03902762234513185, - "grad_norm": 0.0, - "learning_rate": 1.932e-05, - "loss": 149.3159, - "step": 9660 - }, - { - "epoch": 0.03906802361049948, - "grad_norm": 1180.42919921875, - "learning_rate": 1.934e-05, - "loss": 145.5787, - "step": 9670 - }, - { - "epoch": 0.03910842487586711, - "grad_norm": 1122.5113525390625, - "learning_rate": 1.936e-05, - "loss": 124.5202, - "step": 9680 - }, - { - "epoch": 0.039148826141234744, - "grad_norm": 1447.1502685546875, - "learning_rate": 1.938e-05, - "loss": 178.7178, - "step": 9690 - }, - { - "epoch": 0.03918922740660238, - "grad_norm": 1735.78125, - "learning_rate": 1.94e-05, - "loss": 184.0263, - "step": 9700 - }, - { - "epoch": 0.03922962867197, - "grad_norm": 660.9119873046875, - "learning_rate": 1.942e-05, - "loss": 127.7493, - "step": 9710 - }, - { - "epoch": 0.039270029937337636, - "grad_norm": 1763.235107421875, - "learning_rate": 1.944e-05, - "loss": 176.0283, - "step": 9720 - }, - { - "epoch": 0.03931043120270527, - "grad_norm": 2768.470947265625, - "learning_rate": 1.946e-05, - "loss": 107.1719, - "step": 9730 - }, - { - "epoch": 0.0393508324680729, - "grad_norm": 600.2669067382812, - "learning_rate": 1.948e-05, - "loss": 162.8209, - "step": 9740 - }, - { - "epoch": 0.03939123373344053, - "grad_norm": 3563.5947265625, - "learning_rate": 1.9500000000000003e-05, - "loss": 197.7512, - "step": 9750 - }, - { - "epoch": 0.03943163499880816, - "grad_norm": 2130.047607421875, - "learning_rate": 1.9520000000000003e-05, - "loss": 160.2494, - "step": 9760 - }, - { - "epoch": 0.039472036264175794, - "grad_norm": 2377.900146484375, - "learning_rate": 1.954e-05, - "loss": 171.3171, - "step": 9770 - }, - { - "epoch": 0.03951243752954343, - "grad_norm": 1052.0291748046875, - "learning_rate": 1.956e-05, - "loss": 113.5263, - "step": 9780 - }, - { - "epoch": 0.039552838794911054, - "grad_norm": 2078.976318359375, - "learning_rate": 1.9580000000000002e-05, - "loss": 191.9397, - "step": 9790 - }, - { - "epoch": 0.03959324006027869, - "grad_norm": 773.3158569335938, - "learning_rate": 1.9600000000000002e-05, - "loss": 126.11, - "step": 9800 - }, - { - "epoch": 0.03963364132564632, - "grad_norm": 1939.7193603515625, - "learning_rate": 1.9620000000000002e-05, - "loss": 210.1035, - "step": 9810 - }, - { - "epoch": 0.03967404259101395, - "grad_norm": 531.8984985351562, - "learning_rate": 1.9640000000000002e-05, - "loss": 161.9592, - "step": 9820 - }, - { - "epoch": 0.03971444385638158, - "grad_norm": 441.3695983886719, - "learning_rate": 1.966e-05, - "loss": 131.7898, - "step": 9830 - }, - { - "epoch": 0.03975484512174921, - "grad_norm": 2530.7509765625, - "learning_rate": 1.968e-05, - "loss": 190.4342, - "step": 9840 - }, - { - "epoch": 0.039795246387116845, - "grad_norm": 1077.1409912109375, - "learning_rate": 1.97e-05, - "loss": 141.0169, - "step": 9850 - }, - { - "epoch": 0.03983564765248448, - "grad_norm": 1837.5992431640625, - "learning_rate": 1.972e-05, - "loss": 197.5706, - "step": 9860 - }, - { - "epoch": 0.039876048917852104, - "grad_norm": 1183.7669677734375, - "learning_rate": 1.974e-05, - "loss": 186.1175, - "step": 9870 - }, - { - "epoch": 0.03991645018321974, - "grad_norm": 584.4487915039062, - "learning_rate": 1.976e-05, - "loss": 128.3029, - "step": 9880 - }, - { - "epoch": 0.03995685144858737, - "grad_norm": 731.9237060546875, - "learning_rate": 1.978e-05, - "loss": 97.8851, - "step": 9890 - }, - { - "epoch": 0.039997252713955, - "grad_norm": 734.3873901367188, - "learning_rate": 1.9800000000000004e-05, - "loss": 170.2385, - "step": 9900 - }, - { - "epoch": 0.04003765397932263, - "grad_norm": 530.0155029296875, - "learning_rate": 1.982e-05, - "loss": 140.1505, - "step": 9910 - }, - { - "epoch": 0.04007805524469026, - "grad_norm": 1368.81298828125, - "learning_rate": 1.984e-05, - "loss": 188.7803, - "step": 9920 - }, - { - "epoch": 0.040118456510057895, - "grad_norm": 2284.725341796875, - "learning_rate": 1.986e-05, - "loss": 213.4412, - "step": 9930 - }, - { - "epoch": 0.04015885777542553, - "grad_norm": 953.0332641601562, - "learning_rate": 1.9880000000000003e-05, - "loss": 81.9998, - "step": 9940 - }, - { - "epoch": 0.040199259040793155, - "grad_norm": 2036.2686767578125, - "learning_rate": 1.9900000000000003e-05, - "loss": 124.487, - "step": 9950 - }, - { - "epoch": 0.04023966030616079, - "grad_norm": 1137.9842529296875, - "learning_rate": 1.992e-05, - "loss": 166.7126, - "step": 9960 - }, - { - "epoch": 0.04028006157152842, - "grad_norm": 906.6007080078125, - "learning_rate": 1.994e-05, - "loss": 188.0849, - "step": 9970 - }, - { - "epoch": 0.040320462836896054, - "grad_norm": 1181.5977783203125, - "learning_rate": 1.9960000000000002e-05, - "loss": 168.7636, - "step": 9980 - }, - { - "epoch": 0.04036086410226368, - "grad_norm": 1022.9138793945312, - "learning_rate": 1.9980000000000002e-05, - "loss": 168.0031, - "step": 9990 - }, - { - "epoch": 0.04040126536763131, - "grad_norm": 717.4224243164062, - "learning_rate": 2e-05, - "loss": 122.0701, - "step": 10000 - }, - { - "epoch": 0.040441666632998946, - "grad_norm": 595.6052856445312, - "learning_rate": 2.002e-05, - "loss": 143.9377, - "step": 10010 - }, - { - "epoch": 0.04048206789836658, - "grad_norm": 1366.42431640625, - "learning_rate": 2.004e-05, - "loss": 167.5448, - "step": 10020 - }, - { - "epoch": 0.040522469163734205, - "grad_norm": 1101.7801513671875, - "learning_rate": 2.006e-05, - "loss": 192.1521, - "step": 10030 - }, - { - "epoch": 0.04056287042910184, - "grad_norm": 1699.7630615234375, - "learning_rate": 2.008e-05, - "loss": 154.8807, - "step": 10040 - }, - { - "epoch": 0.04060327169446947, - "grad_norm": 1256.3902587890625, - "learning_rate": 2.01e-05, - "loss": 134.1945, - "step": 10050 - }, - { - "epoch": 0.040643672959837104, - "grad_norm": 0.0, - "learning_rate": 2.012e-05, - "loss": 131.3899, - "step": 10060 - }, - { - "epoch": 0.04068407422520473, - "grad_norm": 1447.4774169921875, - "learning_rate": 2.014e-05, - "loss": 159.1337, - "step": 10070 - }, - { - "epoch": 0.040724475490572364, - "grad_norm": 4435.76904296875, - "learning_rate": 2.016e-05, - "loss": 198.5379, - "step": 10080 - }, - { - "epoch": 0.04076487675594, - "grad_norm": 852.9769287109375, - "learning_rate": 2.0180000000000003e-05, - "loss": 160.13, - "step": 10090 - }, - { - "epoch": 0.04080527802130763, - "grad_norm": 1151.4747314453125, - "learning_rate": 2.0200000000000003e-05, - "loss": 162.3118, - "step": 10100 - }, - { - "epoch": 0.040845679286675256, - "grad_norm": 1879.0950927734375, - "learning_rate": 2.022e-05, - "loss": 172.1667, - "step": 10110 - }, - { - "epoch": 0.04088608055204289, - "grad_norm": 953.0318603515625, - "learning_rate": 2.024e-05, - "loss": 152.1001, - "step": 10120 - }, - { - "epoch": 0.04092648181741052, - "grad_norm": 834.5413208007812, - "learning_rate": 2.0260000000000003e-05, - "loss": 166.7469, - "step": 10130 - }, - { - "epoch": 0.040966883082778155, - "grad_norm": 1954.07080078125, - "learning_rate": 2.0280000000000002e-05, - "loss": 170.3608, - "step": 10140 - }, - { - "epoch": 0.04100728434814578, - "grad_norm": 1222.4248046875, - "learning_rate": 2.0300000000000002e-05, - "loss": 168.8605, - "step": 10150 - }, - { - "epoch": 0.041047685613513414, - "grad_norm": 2559.77099609375, - "learning_rate": 2.032e-05, - "loss": 169.7865, - "step": 10160 - }, - { - "epoch": 0.04108808687888105, - "grad_norm": 2988.0048828125, - "learning_rate": 2.0340000000000002e-05, - "loss": 175.7436, - "step": 10170 - }, - { - "epoch": 0.04112848814424868, - "grad_norm": 647.4199829101562, - "learning_rate": 2.036e-05, - "loss": 161.4738, - "step": 10180 - }, - { - "epoch": 0.041168889409616306, - "grad_norm": 1833.9415283203125, - "learning_rate": 2.038e-05, - "loss": 198.7381, - "step": 10190 - }, - { - "epoch": 0.04120929067498394, - "grad_norm": 734.6459350585938, - "learning_rate": 2.04e-05, - "loss": 145.9311, - "step": 10200 - }, - { - "epoch": 0.04124969194035157, - "grad_norm": 1672.8916015625, - "learning_rate": 2.042e-05, - "loss": 149.8653, - "step": 10210 - }, - { - "epoch": 0.041290093205719205, - "grad_norm": 1029.32421875, - "learning_rate": 2.044e-05, - "loss": 178.9316, - "step": 10220 - }, - { - "epoch": 0.04133049447108683, - "grad_norm": 714.8513793945312, - "learning_rate": 2.046e-05, - "loss": 112.8458, - "step": 10230 - }, - { - "epoch": 0.041370895736454465, - "grad_norm": 1445.0704345703125, - "learning_rate": 2.048e-05, - "loss": 164.2769, - "step": 10240 - }, - { - "epoch": 0.0414112970018221, - "grad_norm": 2015.292236328125, - "learning_rate": 2.05e-05, - "loss": 258.4405, - "step": 10250 - }, - { - "epoch": 0.04145169826718973, - "grad_norm": 820.0475463867188, - "learning_rate": 2.052e-05, - "loss": 139.5321, - "step": 10260 - }, - { - "epoch": 0.04149209953255736, - "grad_norm": 886.0709838867188, - "learning_rate": 2.054e-05, - "loss": 148.4284, - "step": 10270 - }, - { - "epoch": 0.04153250079792499, - "grad_norm": 1072.3577880859375, - "learning_rate": 2.0560000000000003e-05, - "loss": 192.8738, - "step": 10280 - }, - { - "epoch": 0.04157290206329262, - "grad_norm": 747.4786987304688, - "learning_rate": 2.0580000000000003e-05, - "loss": 101.708, - "step": 10290 - }, - { - "epoch": 0.041613303328660256, - "grad_norm": 5367.84521484375, - "learning_rate": 2.06e-05, - "loss": 194.0073, - "step": 10300 - }, - { - "epoch": 0.04165370459402788, - "grad_norm": 1179.1737060546875, - "learning_rate": 2.062e-05, - "loss": 121.6645, - "step": 10310 - }, - { - "epoch": 0.041694105859395515, - "grad_norm": 779.1863403320312, - "learning_rate": 2.0640000000000002e-05, - "loss": 156.9918, - "step": 10320 - }, - { - "epoch": 0.04173450712476315, - "grad_norm": 609.2747192382812, - "learning_rate": 2.0660000000000002e-05, - "loss": 121.481, - "step": 10330 - }, - { - "epoch": 0.04177490839013078, - "grad_norm": 856.9229736328125, - "learning_rate": 2.0680000000000002e-05, - "loss": 126.45, - "step": 10340 - }, - { - "epoch": 0.04181530965549841, - "grad_norm": 3409.32568359375, - "learning_rate": 2.07e-05, - "loss": 135.8212, - "step": 10350 - }, - { - "epoch": 0.04185571092086604, - "grad_norm": 2779.28369140625, - "learning_rate": 2.072e-05, - "loss": 180.5799, - "step": 10360 - }, - { - "epoch": 0.041896112186233674, - "grad_norm": 2200.98583984375, - "learning_rate": 2.074e-05, - "loss": 228.0418, - "step": 10370 - }, - { - "epoch": 0.04193651345160131, - "grad_norm": 864.7232055664062, - "learning_rate": 2.076e-05, - "loss": 200.8099, - "step": 10380 - }, - { - "epoch": 0.04197691471696893, - "grad_norm": 373.90771484375, - "learning_rate": 2.078e-05, - "loss": 123.1325, - "step": 10390 - }, - { - "epoch": 0.042017315982336566, - "grad_norm": 865.7789916992188, - "learning_rate": 2.08e-05, - "loss": 177.7623, - "step": 10400 - }, - { - "epoch": 0.0420577172477042, - "grad_norm": 1141.6817626953125, - "learning_rate": 2.082e-05, - "loss": 150.2677, - "step": 10410 - }, - { - "epoch": 0.04209811851307183, - "grad_norm": 720.0677490234375, - "learning_rate": 2.084e-05, - "loss": 146.5872, - "step": 10420 - }, - { - "epoch": 0.04213851977843946, - "grad_norm": 1222.44775390625, - "learning_rate": 2.086e-05, - "loss": 162.8006, - "step": 10430 - }, - { - "epoch": 0.04217892104380709, - "grad_norm": 909.986083984375, - "learning_rate": 2.0880000000000003e-05, - "loss": 193.938, - "step": 10440 - }, - { - "epoch": 0.042219322309174724, - "grad_norm": 1559.7752685546875, - "learning_rate": 2.09e-05, - "loss": 167.0518, - "step": 10450 - }, - { - "epoch": 0.04225972357454236, - "grad_norm": 2482.876953125, - "learning_rate": 2.092e-05, - "loss": 138.0267, - "step": 10460 - }, - { - "epoch": 0.04230012483990998, - "grad_norm": 1091.695556640625, - "learning_rate": 2.0940000000000003e-05, - "loss": 103.3883, - "step": 10470 - }, - { - "epoch": 0.042340526105277616, - "grad_norm": 806.2134399414062, - "learning_rate": 2.0960000000000003e-05, - "loss": 124.8749, - "step": 10480 - }, - { - "epoch": 0.04238092737064525, - "grad_norm": 1276.008544921875, - "learning_rate": 2.098e-05, - "loss": 163.3479, - "step": 10490 - }, - { - "epoch": 0.04242132863601288, - "grad_norm": 4489.3603515625, - "learning_rate": 2.1e-05, - "loss": 171.6303, - "step": 10500 - }, - { - "epoch": 0.04246172990138051, - "grad_norm": 838.2408447265625, - "learning_rate": 2.1020000000000002e-05, - "loss": 150.1332, - "step": 10510 - }, - { - "epoch": 0.04250213116674814, - "grad_norm": 1553.7894287109375, - "learning_rate": 2.1040000000000002e-05, - "loss": 161.144, - "step": 10520 - }, - { - "epoch": 0.042542532432115775, - "grad_norm": 1126.568115234375, - "learning_rate": 2.106e-05, - "loss": 152.1566, - "step": 10530 - }, - { - "epoch": 0.04258293369748341, - "grad_norm": 832.1701049804688, - "learning_rate": 2.1079999999999998e-05, - "loss": 139.9062, - "step": 10540 - }, - { - "epoch": 0.042623334962851034, - "grad_norm": 519.9462890625, - "learning_rate": 2.11e-05, - "loss": 108.9907, - "step": 10550 - }, - { - "epoch": 0.04266373622821867, - "grad_norm": 2303.817626953125, - "learning_rate": 2.112e-05, - "loss": 150.1262, - "step": 10560 - }, - { - "epoch": 0.0427041374935863, - "grad_norm": 1118.6610107421875, - "learning_rate": 2.114e-05, - "loss": 127.0093, - "step": 10570 - }, - { - "epoch": 0.04274453875895393, - "grad_norm": 1355.4498291015625, - "learning_rate": 2.116e-05, - "loss": 165.2866, - "step": 10580 - }, - { - "epoch": 0.04278494002432156, - "grad_norm": 2492.5751953125, - "learning_rate": 2.118e-05, - "loss": 167.1242, - "step": 10590 - }, - { - "epoch": 0.04282534128968919, - "grad_norm": 3182.501953125, - "learning_rate": 2.12e-05, - "loss": 146.6266, - "step": 10600 - }, - { - "epoch": 0.042865742555056825, - "grad_norm": 1109.6849365234375, - "learning_rate": 2.122e-05, - "loss": 195.9611, - "step": 10610 - }, - { - "epoch": 0.04290614382042446, - "grad_norm": 2110.990234375, - "learning_rate": 2.124e-05, - "loss": 199.2886, - "step": 10620 - }, - { - "epoch": 0.042946545085792084, - "grad_norm": 1315.0323486328125, - "learning_rate": 2.1260000000000003e-05, - "loss": 136.6857, - "step": 10630 - }, - { - "epoch": 0.04298694635115972, - "grad_norm": 1708.447998046875, - "learning_rate": 2.128e-05, - "loss": 173.377, - "step": 10640 - }, - { - "epoch": 0.04302734761652735, - "grad_norm": 1813.0264892578125, - "learning_rate": 2.13e-05, - "loss": 166.6691, - "step": 10650 - }, - { - "epoch": 0.043067748881894984, - "grad_norm": 1098.9163818359375, - "learning_rate": 2.1320000000000003e-05, - "loss": 187.8283, - "step": 10660 - }, - { - "epoch": 0.04310815014726261, - "grad_norm": 608.4671630859375, - "learning_rate": 2.1340000000000002e-05, - "loss": 141.6912, - "step": 10670 - }, - { - "epoch": 0.04314855141263024, - "grad_norm": 830.4765625, - "learning_rate": 2.1360000000000002e-05, - "loss": 142.6794, - "step": 10680 - }, - { - "epoch": 0.043188952677997876, - "grad_norm": 600.3438110351562, - "learning_rate": 2.138e-05, - "loss": 130.3432, - "step": 10690 - }, - { - "epoch": 0.04322935394336551, - "grad_norm": 1370.6168212890625, - "learning_rate": 2.1400000000000002e-05, - "loss": 209.9451, - "step": 10700 - }, - { - "epoch": 0.043269755208733135, - "grad_norm": 622.1041870117188, - "learning_rate": 2.142e-05, - "loss": 169.206, - "step": 10710 - }, - { - "epoch": 0.04331015647410077, - "grad_norm": 2445.648681640625, - "learning_rate": 2.144e-05, - "loss": 200.1026, - "step": 10720 - }, - { - "epoch": 0.0433505577394684, - "grad_norm": 1902.2943115234375, - "learning_rate": 2.146e-05, - "loss": 187.5802, - "step": 10730 - }, - { - "epoch": 0.043390959004836034, - "grad_norm": 1008.3157348632812, - "learning_rate": 2.148e-05, - "loss": 143.9581, - "step": 10740 - }, - { - "epoch": 0.04343136027020366, - "grad_norm": 594.2481079101562, - "learning_rate": 2.15e-05, - "loss": 186.3464, - "step": 10750 - }, - { - "epoch": 0.04347176153557129, - "grad_norm": 13106.4541015625, - "learning_rate": 2.152e-05, - "loss": 218.3369, - "step": 10760 - }, - { - "epoch": 0.043512162800938926, - "grad_norm": 2128.98291015625, - "learning_rate": 2.154e-05, - "loss": 170.2695, - "step": 10770 - }, - { - "epoch": 0.04355256406630656, - "grad_norm": 1214.33642578125, - "learning_rate": 2.1560000000000004e-05, - "loss": 147.123, - "step": 10780 - }, - { - "epoch": 0.043592965331674186, - "grad_norm": 1065.6256103515625, - "learning_rate": 2.158e-05, - "loss": 168.7387, - "step": 10790 - }, - { - "epoch": 0.04363336659704182, - "grad_norm": 1667.8955078125, - "learning_rate": 2.16e-05, - "loss": 152.4974, - "step": 10800 - }, - { - "epoch": 0.04367376786240945, - "grad_norm": 1360.904052734375, - "learning_rate": 2.162e-05, - "loss": 254.4096, - "step": 10810 - }, - { - "epoch": 0.043714169127777085, - "grad_norm": 1592.132080078125, - "learning_rate": 2.1640000000000003e-05, - "loss": 174.9611, - "step": 10820 - }, - { - "epoch": 0.04375457039314471, - "grad_norm": 776.6605834960938, - "learning_rate": 2.166e-05, - "loss": 184.3046, - "step": 10830 - }, - { - "epoch": 0.043794971658512344, - "grad_norm": 2525.784423828125, - "learning_rate": 2.168e-05, - "loss": 164.588, - "step": 10840 - }, - { - "epoch": 0.04383537292387998, - "grad_norm": 1608.6549072265625, - "learning_rate": 2.1700000000000002e-05, - "loss": 196.8271, - "step": 10850 - }, - { - "epoch": 0.04387577418924761, - "grad_norm": 892.496826171875, - "learning_rate": 2.1720000000000002e-05, - "loss": 148.6461, - "step": 10860 - }, - { - "epoch": 0.043916175454615236, - "grad_norm": 1432.486572265625, - "learning_rate": 2.1740000000000002e-05, - "loss": 182.0831, - "step": 10870 - }, - { - "epoch": 0.04395657671998287, - "grad_norm": 1320.6700439453125, - "learning_rate": 2.176e-05, - "loss": 180.9173, - "step": 10880 - }, - { - "epoch": 0.0439969779853505, - "grad_norm": 1471.670166015625, - "learning_rate": 2.178e-05, - "loss": 178.3413, - "step": 10890 - }, - { - "epoch": 0.044037379250718135, - "grad_norm": 1130.7198486328125, - "learning_rate": 2.18e-05, - "loss": 148.5332, - "step": 10900 - }, - { - "epoch": 0.04407778051608576, - "grad_norm": 1092.3216552734375, - "learning_rate": 2.182e-05, - "loss": 114.7338, - "step": 10910 - }, - { - "epoch": 0.044118181781453394, - "grad_norm": 1079.041259765625, - "learning_rate": 2.184e-05, - "loss": 132.0338, - "step": 10920 - }, - { - "epoch": 0.04415858304682103, - "grad_norm": 2612.80517578125, - "learning_rate": 2.186e-05, - "loss": 138.605, - "step": 10930 - }, - { - "epoch": 0.04419898431218866, - "grad_norm": 1243.3662109375, - "learning_rate": 2.188e-05, - "loss": 157.8075, - "step": 10940 - }, - { - "epoch": 0.04423938557755629, - "grad_norm": 1165.291015625, - "learning_rate": 2.19e-05, - "loss": 123.9359, - "step": 10950 - }, - { - "epoch": 0.04427978684292392, - "grad_norm": 970.86083984375, - "learning_rate": 2.192e-05, - "loss": 99.9449, - "step": 10960 - }, - { - "epoch": 0.04432018810829155, - "grad_norm": 282.0602722167969, - "learning_rate": 2.1940000000000003e-05, - "loss": 126.6935, - "step": 10970 - }, - { - "epoch": 0.044360589373659186, - "grad_norm": 2742.96044921875, - "learning_rate": 2.196e-05, - "loss": 216.6111, - "step": 10980 - }, - { - "epoch": 0.04440099063902681, - "grad_norm": 725.5316772460938, - "learning_rate": 2.198e-05, - "loss": 283.1829, - "step": 10990 - }, - { - "epoch": 0.044441391904394445, - "grad_norm": 1572.414794921875, - "learning_rate": 2.2000000000000003e-05, - "loss": 171.7036, - "step": 11000 - }, - { - "epoch": 0.04448179316976208, - "grad_norm": 0.0, - "learning_rate": 2.2020000000000003e-05, - "loss": 158.3066, - "step": 11010 - }, - { - "epoch": 0.04452219443512971, - "grad_norm": 1373.786376953125, - "learning_rate": 2.2040000000000002e-05, - "loss": 125.9849, - "step": 11020 - }, - { - "epoch": 0.04456259570049734, - "grad_norm": 1483.981689453125, - "learning_rate": 2.206e-05, - "loss": 176.718, - "step": 11030 - }, - { - "epoch": 0.04460299696586497, - "grad_norm": 997.575927734375, - "learning_rate": 2.2080000000000002e-05, - "loss": 164.0399, - "step": 11040 - }, - { - "epoch": 0.0446433982312326, - "grad_norm": 550.5761108398438, - "learning_rate": 2.2100000000000002e-05, - "loss": 135.1882, - "step": 11050 - }, - { - "epoch": 0.044683799496600236, - "grad_norm": 733.0974731445312, - "learning_rate": 2.212e-05, - "loss": 112.1112, - "step": 11060 - }, - { - "epoch": 0.04472420076196786, - "grad_norm": 856.7572021484375, - "learning_rate": 2.214e-05, - "loss": 113.8155, - "step": 11070 - }, - { - "epoch": 0.044764602027335496, - "grad_norm": 1171.519287109375, - "learning_rate": 2.216e-05, - "loss": 144.4537, - "step": 11080 - }, - { - "epoch": 0.04480500329270313, - "grad_norm": 1287.0703125, - "learning_rate": 2.218e-05, - "loss": 122.8268, - "step": 11090 - }, - { - "epoch": 0.04484540455807076, - "grad_norm": 1202.9896240234375, - "learning_rate": 2.22e-05, - "loss": 206.9014, - "step": 11100 - }, - { - "epoch": 0.04488580582343839, - "grad_norm": 1963.7164306640625, - "learning_rate": 2.222e-05, - "loss": 128.2789, - "step": 11110 - }, - { - "epoch": 0.04492620708880602, - "grad_norm": 2504.82958984375, - "learning_rate": 2.224e-05, - "loss": 124.5799, - "step": 11120 - }, - { - "epoch": 0.044966608354173654, - "grad_norm": 1163.331298828125, - "learning_rate": 2.226e-05, - "loss": 91.1706, - "step": 11130 - }, - { - "epoch": 0.04500700961954129, - "grad_norm": 10312.1435546875, - "learning_rate": 2.228e-05, - "loss": 200.2307, - "step": 11140 - }, - { - "epoch": 0.04504741088490891, - "grad_norm": 1264.3104248046875, - "learning_rate": 2.23e-05, - "loss": 161.8118, - "step": 11150 - }, - { - "epoch": 0.045087812150276546, - "grad_norm": 8854.8544921875, - "learning_rate": 2.2320000000000003e-05, - "loss": 162.5124, - "step": 11160 - }, - { - "epoch": 0.04512821341564418, - "grad_norm": 4647.8427734375, - "learning_rate": 2.234e-05, - "loss": 137.7809, - "step": 11170 - }, - { - "epoch": 0.04516861468101181, - "grad_norm": 1007.0010375976562, - "learning_rate": 2.236e-05, - "loss": 128.0276, - "step": 11180 - }, - { - "epoch": 0.04520901594637944, - "grad_norm": 2929.648681640625, - "learning_rate": 2.2380000000000003e-05, - "loss": 132.0453, - "step": 11190 - }, - { - "epoch": 0.04524941721174707, - "grad_norm": 2017.7265625, - "learning_rate": 2.2400000000000002e-05, - "loss": 242.607, - "step": 11200 - }, - { - "epoch": 0.045289818477114704, - "grad_norm": 783.3958129882812, - "learning_rate": 2.2420000000000002e-05, - "loss": 146.9575, - "step": 11210 - }, - { - "epoch": 0.04533021974248234, - "grad_norm": 1278.9561767578125, - "learning_rate": 2.244e-05, - "loss": 183.9778, - "step": 11220 - }, - { - "epoch": 0.045370621007849964, - "grad_norm": 879.9774169921875, - "learning_rate": 2.2460000000000002e-05, - "loss": 139.0227, - "step": 11230 - }, - { - "epoch": 0.0454110222732176, - "grad_norm": 1722.348876953125, - "learning_rate": 2.248e-05, - "loss": 229.9668, - "step": 11240 - }, - { - "epoch": 0.04545142353858523, - "grad_norm": 652.4034423828125, - "learning_rate": 2.25e-05, - "loss": 129.4687, - "step": 11250 - }, - { - "epoch": 0.04549182480395286, - "grad_norm": 1721.231689453125, - "learning_rate": 2.252e-05, - "loss": 168.8096, - "step": 11260 - }, - { - "epoch": 0.04553222606932049, - "grad_norm": 1650.2803955078125, - "learning_rate": 2.254e-05, - "loss": 162.424, - "step": 11270 - }, - { - "epoch": 0.04557262733468812, - "grad_norm": 954.37890625, - "learning_rate": 2.256e-05, - "loss": 197.3434, - "step": 11280 - }, - { - "epoch": 0.045613028600055755, - "grad_norm": 4419.76123046875, - "learning_rate": 2.258e-05, - "loss": 210.9756, - "step": 11290 - }, - { - "epoch": 0.04565342986542339, - "grad_norm": 1498.54296875, - "learning_rate": 2.26e-05, - "loss": 134.9217, - "step": 11300 - }, - { - "epoch": 0.045693831130791014, - "grad_norm": 987.8052368164062, - "learning_rate": 2.2620000000000004e-05, - "loss": 163.9031, - "step": 11310 - }, - { - "epoch": 0.04573423239615865, - "grad_norm": 1421.3912353515625, - "learning_rate": 2.264e-05, - "loss": 175.6454, - "step": 11320 - }, - { - "epoch": 0.04577463366152628, - "grad_norm": 4952.677734375, - "learning_rate": 2.266e-05, - "loss": 191.8143, - "step": 11330 - }, - { - "epoch": 0.04581503492689391, - "grad_norm": 1982.3878173828125, - "learning_rate": 2.268e-05, - "loss": 163.27, - "step": 11340 - }, - { - "epoch": 0.04585543619226154, - "grad_norm": 1355.781005859375, - "learning_rate": 2.2700000000000003e-05, - "loss": 169.8846, - "step": 11350 - }, - { - "epoch": 0.04589583745762917, - "grad_norm": 1866.7637939453125, - "learning_rate": 2.2720000000000003e-05, - "loss": 195.6275, - "step": 11360 - }, - { - "epoch": 0.045936238722996806, - "grad_norm": 1269.4019775390625, - "learning_rate": 2.274e-05, - "loss": 136.8505, - "step": 11370 - }, - { - "epoch": 0.04597663998836444, - "grad_norm": 712.604736328125, - "learning_rate": 2.2760000000000002e-05, - "loss": 142.4693, - "step": 11380 - }, - { - "epoch": 0.046017041253732065, - "grad_norm": 730.568115234375, - "learning_rate": 2.2780000000000002e-05, - "loss": 123.7075, - "step": 11390 - }, - { - "epoch": 0.0460574425190997, - "grad_norm": 748.4280395507812, - "learning_rate": 2.2800000000000002e-05, - "loss": 143.0049, - "step": 11400 - }, - { - "epoch": 0.04609784378446733, - "grad_norm": 1697.1856689453125, - "learning_rate": 2.282e-05, - "loss": 126.3755, - "step": 11410 - }, - { - "epoch": 0.046138245049834964, - "grad_norm": 2124.771240234375, - "learning_rate": 2.284e-05, - "loss": 172.3426, - "step": 11420 - }, - { - "epoch": 0.04617864631520259, - "grad_norm": 1425.5914306640625, - "learning_rate": 2.286e-05, - "loss": 152.4624, - "step": 11430 - }, - { - "epoch": 0.04621904758057022, - "grad_norm": 1039.5628662109375, - "learning_rate": 2.288e-05, - "loss": 168.2912, - "step": 11440 - }, - { - "epoch": 0.046259448845937856, - "grad_norm": 956.6918334960938, - "learning_rate": 2.29e-05, - "loss": 153.5313, - "step": 11450 - }, - { - "epoch": 0.04629985011130549, - "grad_norm": 1411.84423828125, - "learning_rate": 2.292e-05, - "loss": 180.7848, - "step": 11460 - }, - { - "epoch": 0.046340251376673115, - "grad_norm": 1310.616455078125, - "learning_rate": 2.294e-05, - "loss": 156.0493, - "step": 11470 - }, - { - "epoch": 0.04638065264204075, - "grad_norm": 1483.5418701171875, - "learning_rate": 2.296e-05, - "loss": 194.3713, - "step": 11480 - }, - { - "epoch": 0.04642105390740838, - "grad_norm": 1165.32763671875, - "learning_rate": 2.298e-05, - "loss": 143.955, - "step": 11490 - }, - { - "epoch": 0.046461455172776014, - "grad_norm": 1113.7464599609375, - "learning_rate": 2.3000000000000003e-05, - "loss": 151.1558, - "step": 11500 - }, - { - "epoch": 0.04650185643814364, - "grad_norm": 591.7924194335938, - "learning_rate": 2.302e-05, - "loss": 147.8064, - "step": 11510 - }, - { - "epoch": 0.046542257703511274, - "grad_norm": 375.6275329589844, - "learning_rate": 2.304e-05, - "loss": 147.5092, - "step": 11520 - }, - { - "epoch": 0.04658265896887891, - "grad_norm": 1058.679443359375, - "learning_rate": 2.306e-05, - "loss": 129.6008, - "step": 11530 - }, - { - "epoch": 0.04662306023424654, - "grad_norm": 2693.062255859375, - "learning_rate": 2.3080000000000003e-05, - "loss": 168.9607, - "step": 11540 - }, - { - "epoch": 0.046663461499614166, - "grad_norm": 1006.8905029296875, - "learning_rate": 2.3100000000000002e-05, - "loss": 186.7646, - "step": 11550 - }, - { - "epoch": 0.0467038627649818, - "grad_norm": 1659.6192626953125, - "learning_rate": 2.312e-05, - "loss": 245.8637, - "step": 11560 - }, - { - "epoch": 0.04674426403034943, - "grad_norm": 1359.0103759765625, - "learning_rate": 2.3140000000000002e-05, - "loss": 150.9513, - "step": 11570 - }, - { - "epoch": 0.046784665295717065, - "grad_norm": 627.9051513671875, - "learning_rate": 2.3160000000000002e-05, - "loss": 96.2488, - "step": 11580 - }, - { - "epoch": 0.04682506656108469, - "grad_norm": 1331.7342529296875, - "learning_rate": 2.318e-05, - "loss": 139.2599, - "step": 11590 - }, - { - "epoch": 0.046865467826452324, - "grad_norm": 1107.905517578125, - "learning_rate": 2.32e-05, - "loss": 127.3733, - "step": 11600 - }, - { - "epoch": 0.04690586909181996, - "grad_norm": 1476.7293701171875, - "learning_rate": 2.322e-05, - "loss": 88.5844, - "step": 11610 - }, - { - "epoch": 0.04694627035718759, - "grad_norm": 1438.66748046875, - "learning_rate": 2.324e-05, - "loss": 146.773, - "step": 11620 - }, - { - "epoch": 0.046986671622555216, - "grad_norm": 1060.174560546875, - "learning_rate": 2.326e-05, - "loss": 185.9057, - "step": 11630 - }, - { - "epoch": 0.04702707288792285, - "grad_norm": 1116.5933837890625, - "learning_rate": 2.328e-05, - "loss": 161.711, - "step": 11640 - }, - { - "epoch": 0.04706747415329048, - "grad_norm": 1230.9957275390625, - "learning_rate": 2.3300000000000004e-05, - "loss": 161.603, - "step": 11650 - }, - { - "epoch": 0.047107875418658116, - "grad_norm": 1050.322265625, - "learning_rate": 2.332e-05, - "loss": 87.1483, - "step": 11660 - }, - { - "epoch": 0.04714827668402574, - "grad_norm": 561.3192138671875, - "learning_rate": 2.334e-05, - "loss": 72.6746, - "step": 11670 - }, - { - "epoch": 0.047188677949393375, - "grad_norm": 768.3349609375, - "learning_rate": 2.336e-05, - "loss": 245.4826, - "step": 11680 - }, - { - "epoch": 0.04722907921476101, - "grad_norm": 1956.8682861328125, - "learning_rate": 2.3380000000000003e-05, - "loss": 246.9039, - "step": 11690 - }, - { - "epoch": 0.04726948048012864, - "grad_norm": 1342.6962890625, - "learning_rate": 2.3400000000000003e-05, - "loss": 143.455, - "step": 11700 - }, - { - "epoch": 0.04730988174549627, - "grad_norm": 1271.6512451171875, - "learning_rate": 2.342e-05, - "loss": 119.4163, - "step": 11710 - }, - { - "epoch": 0.0473502830108639, - "grad_norm": 973.38427734375, - "learning_rate": 2.344e-05, - "loss": 142.8418, - "step": 11720 - }, - { - "epoch": 0.04739068427623153, - "grad_norm": 725.7868041992188, - "learning_rate": 2.3460000000000002e-05, - "loss": 151.6831, - "step": 11730 - }, - { - "epoch": 0.047431085541599166, - "grad_norm": 1243.775146484375, - "learning_rate": 2.3480000000000002e-05, - "loss": 129.1701, - "step": 11740 - }, - { - "epoch": 0.04747148680696679, - "grad_norm": 1194.3792724609375, - "learning_rate": 2.35e-05, - "loss": 179.6937, - "step": 11750 - }, - { - "epoch": 0.047511888072334425, - "grad_norm": 836.063720703125, - "learning_rate": 2.3520000000000002e-05, - "loss": 96.7801, - "step": 11760 - }, - { - "epoch": 0.04755228933770206, - "grad_norm": 6927.7421875, - "learning_rate": 2.354e-05, - "loss": 170.788, - "step": 11770 - }, - { - "epoch": 0.04759269060306969, - "grad_norm": 1191.6163330078125, - "learning_rate": 2.356e-05, - "loss": 95.2363, - "step": 11780 - }, - { - "epoch": 0.04763309186843732, - "grad_norm": 4237.19384765625, - "learning_rate": 2.358e-05, - "loss": 149.3932, - "step": 11790 - }, - { - "epoch": 0.04767349313380495, - "grad_norm": 770.5933227539062, - "learning_rate": 2.36e-05, - "loss": 163.007, - "step": 11800 - }, - { - "epoch": 0.047713894399172584, - "grad_norm": 393.3005065917969, - "learning_rate": 2.362e-05, - "loss": 169.8844, - "step": 11810 - }, - { - "epoch": 0.04775429566454022, - "grad_norm": 1076.961181640625, - "learning_rate": 2.364e-05, - "loss": 144.8054, - "step": 11820 - }, - { - "epoch": 0.04779469692990784, - "grad_norm": 881.7391967773438, - "learning_rate": 2.366e-05, - "loss": 157.4215, - "step": 11830 - }, - { - "epoch": 0.047835098195275476, - "grad_norm": 1243.5374755859375, - "learning_rate": 2.3680000000000004e-05, - "loss": 159.1609, - "step": 11840 - }, - { - "epoch": 0.04787549946064311, - "grad_norm": 1366.5406494140625, - "learning_rate": 2.37e-05, - "loss": 132.554, - "step": 11850 - }, - { - "epoch": 0.04791590072601074, - "grad_norm": 1306.282470703125, - "learning_rate": 2.372e-05, - "loss": 133.127, - "step": 11860 - }, - { - "epoch": 0.04795630199137837, - "grad_norm": 1843.0394287109375, - "learning_rate": 2.374e-05, - "loss": 167.0897, - "step": 11870 - }, - { - "epoch": 0.047996703256746, - "grad_norm": 1241.4002685546875, - "learning_rate": 2.3760000000000003e-05, - "loss": 195.5516, - "step": 11880 - }, - { - "epoch": 0.048037104522113634, - "grad_norm": 1342.593017578125, - "learning_rate": 2.3780000000000003e-05, - "loss": 188.283, - "step": 11890 - }, - { - "epoch": 0.04807750578748127, - "grad_norm": 6172.35498046875, - "learning_rate": 2.38e-05, - "loss": 202.552, - "step": 11900 - }, - { - "epoch": 0.04811790705284889, - "grad_norm": 979.6281127929688, - "learning_rate": 2.3820000000000002e-05, - "loss": 126.3583, - "step": 11910 - }, - { - "epoch": 0.048158308318216526, - "grad_norm": 3780.92236328125, - "learning_rate": 2.3840000000000002e-05, - "loss": 125.3818, - "step": 11920 - }, - { - "epoch": 0.04819870958358416, - "grad_norm": 1145.655029296875, - "learning_rate": 2.3860000000000002e-05, - "loss": 135.4183, - "step": 11930 - }, - { - "epoch": 0.04823911084895179, - "grad_norm": 3658.722412109375, - "learning_rate": 2.3880000000000002e-05, - "loss": 180.5494, - "step": 11940 - }, - { - "epoch": 0.04827951211431942, - "grad_norm": 1273.83740234375, - "learning_rate": 2.39e-05, - "loss": 208.1243, - "step": 11950 - }, - { - "epoch": 0.04831991337968705, - "grad_norm": 701.6416015625, - "learning_rate": 2.392e-05, - "loss": 119.6717, - "step": 11960 - }, - { - "epoch": 0.048360314645054685, - "grad_norm": 598.38818359375, - "learning_rate": 2.394e-05, - "loss": 106.7641, - "step": 11970 - }, - { - "epoch": 0.04840071591042232, - "grad_norm": 697.3867797851562, - "learning_rate": 2.396e-05, - "loss": 157.0546, - "step": 11980 - }, - { - "epoch": 0.048441117175789944, - "grad_norm": 2313.9013671875, - "learning_rate": 2.398e-05, - "loss": 199.3208, - "step": 11990 - }, - { - "epoch": 0.04848151844115758, - "grad_norm": 712.7728881835938, - "learning_rate": 2.4e-05, - "loss": 163.3351, - "step": 12000 - }, - { - "epoch": 0.04852191970652521, - "grad_norm": 2287.64794921875, - "learning_rate": 2.402e-05, - "loss": 156.7126, - "step": 12010 - }, - { - "epoch": 0.04856232097189284, - "grad_norm": 1171.4212646484375, - "learning_rate": 2.404e-05, - "loss": 154.6671, - "step": 12020 - }, - { - "epoch": 0.04860272223726047, - "grad_norm": 928.2921142578125, - "learning_rate": 2.4060000000000003e-05, - "loss": 189.5635, - "step": 12030 - }, - { - "epoch": 0.0486431235026281, - "grad_norm": 2594.64794921875, - "learning_rate": 2.408e-05, - "loss": 122.9152, - "step": 12040 - }, - { - "epoch": 0.048683524767995735, - "grad_norm": 625.3611450195312, - "learning_rate": 2.41e-05, - "loss": 157.0619, - "step": 12050 - }, - { - "epoch": 0.04872392603336337, - "grad_norm": 1715.5015869140625, - "learning_rate": 2.412e-05, - "loss": 146.0929, - "step": 12060 - }, - { - "epoch": 0.048764327298730994, - "grad_norm": 1481.7435302734375, - "learning_rate": 2.4140000000000003e-05, - "loss": 150.0942, - "step": 12070 - }, - { - "epoch": 0.04880472856409863, - "grad_norm": 1766.0885009765625, - "learning_rate": 2.4160000000000002e-05, - "loss": 122.9469, - "step": 12080 - }, - { - "epoch": 0.04884512982946626, - "grad_norm": 1821.820556640625, - "learning_rate": 2.418e-05, - "loss": 236.4261, - "step": 12090 - }, - { - "epoch": 0.048885531094833894, - "grad_norm": 1121.892333984375, - "learning_rate": 2.4200000000000002e-05, - "loss": 194.8495, - "step": 12100 - }, - { - "epoch": 0.04892593236020152, - "grad_norm": 2687.8505859375, - "learning_rate": 2.4220000000000002e-05, - "loss": 199.661, - "step": 12110 - }, - { - "epoch": 0.04896633362556915, - "grad_norm": 1251.5821533203125, - "learning_rate": 2.4240000000000002e-05, - "loss": 188.0581, - "step": 12120 - }, - { - "epoch": 0.049006734890936786, - "grad_norm": 1508.91455078125, - "learning_rate": 2.426e-05, - "loss": 155.5656, - "step": 12130 - }, - { - "epoch": 0.04904713615630442, - "grad_norm": 1015.0963745117188, - "learning_rate": 2.428e-05, - "loss": 156.3022, - "step": 12140 - }, - { - "epoch": 0.049087537421672045, - "grad_norm": 992.3895874023438, - "learning_rate": 2.43e-05, - "loss": 173.4455, - "step": 12150 - }, - { - "epoch": 0.04912793868703968, - "grad_norm": 5113.12451171875, - "learning_rate": 2.432e-05, - "loss": 201.295, - "step": 12160 - }, - { - "epoch": 0.04916833995240731, - "grad_norm": 1270.9464111328125, - "learning_rate": 2.434e-05, - "loss": 185.7967, - "step": 12170 - }, - { - "epoch": 0.04920874121777494, - "grad_norm": 1565.5892333984375, - "learning_rate": 2.4360000000000004e-05, - "loss": 170.4479, - "step": 12180 - }, - { - "epoch": 0.04924914248314257, - "grad_norm": 1304.619140625, - "learning_rate": 2.438e-05, - "loss": 173.9784, - "step": 12190 - }, - { - "epoch": 0.0492895437485102, - "grad_norm": 1918.267822265625, - "learning_rate": 2.44e-05, - "loss": 100.1017, - "step": 12200 - }, - { - "epoch": 0.049329945013877836, - "grad_norm": 3872.838134765625, - "learning_rate": 2.442e-05, - "loss": 128.8565, - "step": 12210 - }, - { - "epoch": 0.04937034627924546, - "grad_norm": 1211.8031005859375, - "learning_rate": 2.4440000000000003e-05, - "loss": 130.2835, - "step": 12220 - }, - { - "epoch": 0.049410747544613096, - "grad_norm": 1228.3138427734375, - "learning_rate": 2.4460000000000003e-05, - "loss": 191.7344, - "step": 12230 - }, - { - "epoch": 0.04945114880998073, - "grad_norm": 931.3762817382812, - "learning_rate": 2.448e-05, - "loss": 188.0092, - "step": 12240 - }, - { - "epoch": 0.04949155007534836, - "grad_norm": 3841.882568359375, - "learning_rate": 2.45e-05, - "loss": 176.4354, - "step": 12250 - }, - { - "epoch": 0.04953195134071599, - "grad_norm": 868.33935546875, - "learning_rate": 2.4520000000000002e-05, - "loss": 211.7608, - "step": 12260 - }, - { - "epoch": 0.04957235260608362, - "grad_norm": 1211.6044921875, - "learning_rate": 2.4540000000000002e-05, - "loss": 130.6806, - "step": 12270 - }, - { - "epoch": 0.049612753871451254, - "grad_norm": 1216.9310302734375, - "learning_rate": 2.4560000000000002e-05, - "loss": 163.6193, - "step": 12280 - }, - { - "epoch": 0.04965315513681889, - "grad_norm": 745.621337890625, - "learning_rate": 2.4580000000000002e-05, - "loss": 108.5664, - "step": 12290 - }, - { - "epoch": 0.04969355640218651, - "grad_norm": 737.6925048828125, - "learning_rate": 2.46e-05, - "loss": 163.5928, - "step": 12300 - }, - { - "epoch": 0.049733957667554146, - "grad_norm": 1154.443115234375, - "learning_rate": 2.462e-05, - "loss": 114.6066, - "step": 12310 - }, - { - "epoch": 0.04977435893292178, - "grad_norm": 1484.0311279296875, - "learning_rate": 2.464e-05, - "loss": 141.8092, - "step": 12320 - }, - { - "epoch": 0.04981476019828941, - "grad_norm": 1149.4248046875, - "learning_rate": 2.466e-05, - "loss": 142.7456, - "step": 12330 - }, - { - "epoch": 0.04985516146365704, - "grad_norm": 1364.304931640625, - "learning_rate": 2.468e-05, - "loss": 207.4074, - "step": 12340 - }, - { - "epoch": 0.04989556272902467, - "grad_norm": 802.7201538085938, - "learning_rate": 2.47e-05, - "loss": 162.2597, - "step": 12350 - }, - { - "epoch": 0.049935963994392304, - "grad_norm": 1634.9937744140625, - "learning_rate": 2.472e-05, - "loss": 158.4684, - "step": 12360 - }, - { - "epoch": 0.04997636525975994, - "grad_norm": 2859.273681640625, - "learning_rate": 2.4740000000000004e-05, - "loss": 171.345, - "step": 12370 - }, - { - "epoch": 0.050016766525127564, - "grad_norm": 1695.544189453125, - "learning_rate": 2.476e-05, - "loss": 153.0457, - "step": 12380 - }, - { - "epoch": 0.0500571677904952, - "grad_norm": 1252.4989013671875, - "learning_rate": 2.478e-05, - "loss": 223.5171, - "step": 12390 - }, - { - "epoch": 0.05009756905586283, - "grad_norm": 0.0, - "learning_rate": 2.48e-05, - "loss": 124.2808, - "step": 12400 - }, - { - "epoch": 0.05013797032123046, - "grad_norm": 1869.185546875, - "learning_rate": 2.4820000000000003e-05, - "loss": 176.1387, - "step": 12410 - }, - { - "epoch": 0.05017837158659809, - "grad_norm": 2657.873291015625, - "learning_rate": 2.4840000000000003e-05, - "loss": 136.5703, - "step": 12420 - }, - { - "epoch": 0.05021877285196572, - "grad_norm": 1434.21337890625, - "learning_rate": 2.486e-05, - "loss": 185.8033, - "step": 12430 - }, - { - "epoch": 0.050259174117333355, - "grad_norm": 4622.36328125, - "learning_rate": 2.488e-05, - "loss": 238.0834, - "step": 12440 - }, - { - "epoch": 0.05029957538270099, - "grad_norm": 2024.818359375, - "learning_rate": 2.4900000000000002e-05, - "loss": 134.9255, - "step": 12450 - }, - { - "epoch": 0.050339976648068614, - "grad_norm": 2336.821044921875, - "learning_rate": 2.4920000000000002e-05, - "loss": 112.3334, - "step": 12460 - }, - { - "epoch": 0.05038037791343625, - "grad_norm": 617.8527221679688, - "learning_rate": 2.4940000000000002e-05, - "loss": 69.1087, - "step": 12470 - }, - { - "epoch": 0.05042077917880388, - "grad_norm": 871.863037109375, - "learning_rate": 2.496e-05, - "loss": 201.0841, - "step": 12480 - }, - { - "epoch": 0.05046118044417151, - "grad_norm": 1000.557373046875, - "learning_rate": 2.498e-05, - "loss": 167.9705, - "step": 12490 - }, - { - "epoch": 0.05050158170953914, - "grad_norm": 1009.2303466796875, - "learning_rate": 2.5e-05, - "loss": 119.5033, - "step": 12500 - }, - { - "epoch": 0.05054198297490677, - "grad_norm": 2307.47607421875, - "learning_rate": 2.5019999999999998e-05, - "loss": 138.7849, - "step": 12510 - }, - { - "epoch": 0.050582384240274406, - "grad_norm": 2979.96533203125, - "learning_rate": 2.504e-05, - "loss": 132.3597, - "step": 12520 - }, - { - "epoch": 0.05062278550564204, - "grad_norm": 1333.7869873046875, - "learning_rate": 2.506e-05, - "loss": 114.2453, - "step": 12530 - }, - { - "epoch": 0.050663186771009665, - "grad_norm": 2323.83935546875, - "learning_rate": 2.5080000000000004e-05, - "loss": 199.8365, - "step": 12540 - }, - { - "epoch": 0.0507035880363773, - "grad_norm": 988.7455444335938, - "learning_rate": 2.51e-05, - "loss": 158.2298, - "step": 12550 - }, - { - "epoch": 0.05074398930174493, - "grad_norm": 1229.513427734375, - "learning_rate": 2.512e-05, - "loss": 177.6321, - "step": 12560 - }, - { - "epoch": 0.050784390567112564, - "grad_norm": 598.7608032226562, - "learning_rate": 2.5140000000000003e-05, - "loss": 205.6968, - "step": 12570 - }, - { - "epoch": 0.05082479183248019, - "grad_norm": 476.6854553222656, - "learning_rate": 2.516e-05, - "loss": 135.5016, - "step": 12580 - }, - { - "epoch": 0.05086519309784782, - "grad_norm": 441.1506652832031, - "learning_rate": 2.5180000000000003e-05, - "loss": 183.0943, - "step": 12590 - }, - { - "epoch": 0.050905594363215456, - "grad_norm": 861.925537109375, - "learning_rate": 2.5200000000000003e-05, - "loss": 110.7362, - "step": 12600 - }, - { - "epoch": 0.05094599562858309, - "grad_norm": 1135.808837890625, - "learning_rate": 2.522e-05, - "loss": 157.3768, - "step": 12610 - }, - { - "epoch": 0.050986396893950715, - "grad_norm": 764.6640014648438, - "learning_rate": 2.5240000000000002e-05, - "loss": 197.2682, - "step": 12620 - }, - { - "epoch": 0.05102679815931835, - "grad_norm": 1185.298583984375, - "learning_rate": 2.526e-05, - "loss": 154.5542, - "step": 12630 - }, - { - "epoch": 0.05106719942468598, - "grad_norm": 572.9112548828125, - "learning_rate": 2.5280000000000005e-05, - "loss": 185.382, - "step": 12640 - }, - { - "epoch": 0.051107600690053615, - "grad_norm": 0.0, - "learning_rate": 2.5300000000000002e-05, - "loss": 155.8, - "step": 12650 - }, - { - "epoch": 0.05114800195542124, - "grad_norm": 1676.220703125, - "learning_rate": 2.5319999999999998e-05, - "loss": 153.2075, - "step": 12660 - }, - { - "epoch": 0.051188403220788874, - "grad_norm": 3056.052001953125, - "learning_rate": 2.534e-05, - "loss": 160.0424, - "step": 12670 - }, - { - "epoch": 0.05122880448615651, - "grad_norm": 3853.407958984375, - "learning_rate": 2.536e-05, - "loss": 142.0041, - "step": 12680 - }, - { - "epoch": 0.05126920575152414, - "grad_norm": 1022.783447265625, - "learning_rate": 2.5380000000000004e-05, - "loss": 173.1114, - "step": 12690 - }, - { - "epoch": 0.051309607016891766, - "grad_norm": 609.7467041015625, - "learning_rate": 2.54e-05, - "loss": 124.7601, - "step": 12700 - }, - { - "epoch": 0.0513500082822594, - "grad_norm": 871.4917602539062, - "learning_rate": 2.542e-05, - "loss": 107.3807, - "step": 12710 - }, - { - "epoch": 0.05139040954762703, - "grad_norm": 1300.36279296875, - "learning_rate": 2.5440000000000004e-05, - "loss": 128.8927, - "step": 12720 - }, - { - "epoch": 0.051430810812994665, - "grad_norm": 1378.396484375, - "learning_rate": 2.546e-05, - "loss": 179.4028, - "step": 12730 - }, - { - "epoch": 0.05147121207836229, - "grad_norm": 5043.6845703125, - "learning_rate": 2.5480000000000003e-05, - "loss": 145.2417, - "step": 12740 - }, - { - "epoch": 0.051511613343729924, - "grad_norm": 956.9037475585938, - "learning_rate": 2.5500000000000003e-05, - "loss": 122.089, - "step": 12750 - }, - { - "epoch": 0.05155201460909756, - "grad_norm": 655.8726806640625, - "learning_rate": 2.552e-05, - "loss": 172.3547, - "step": 12760 - }, - { - "epoch": 0.05159241587446519, - "grad_norm": 6090.908203125, - "learning_rate": 2.5540000000000003e-05, - "loss": 200.5886, - "step": 12770 - }, - { - "epoch": 0.051632817139832816, - "grad_norm": 760.5556640625, - "learning_rate": 2.556e-05, - "loss": 168.6275, - "step": 12780 - }, - { - "epoch": 0.05167321840520045, - "grad_norm": 1935.444091796875, - "learning_rate": 2.5580000000000002e-05, - "loss": 173.0487, - "step": 12790 - }, - { - "epoch": 0.05171361967056808, - "grad_norm": 1355.29931640625, - "learning_rate": 2.5600000000000002e-05, - "loss": 169.9729, - "step": 12800 - }, - { - "epoch": 0.051754020935935716, - "grad_norm": 1155.65380859375, - "learning_rate": 2.562e-05, - "loss": 230.3313, - "step": 12810 - }, - { - "epoch": 0.05179442220130334, - "grad_norm": 1020.41064453125, - "learning_rate": 2.5640000000000002e-05, - "loss": 151.9028, - "step": 12820 - }, - { - "epoch": 0.051834823466670975, - "grad_norm": 1026.9921875, - "learning_rate": 2.566e-05, - "loss": 194.6138, - "step": 12830 - }, - { - "epoch": 0.05187522473203861, - "grad_norm": 859.4483032226562, - "learning_rate": 2.5679999999999998e-05, - "loss": 137.587, - "step": 12840 - }, - { - "epoch": 0.05191562599740624, - "grad_norm": 1088.9954833984375, - "learning_rate": 2.57e-05, - "loss": 137.7595, - "step": 12850 - }, - { - "epoch": 0.05195602726277387, - "grad_norm": 576.4170532226562, - "learning_rate": 2.572e-05, - "loss": 112.6303, - "step": 12860 - }, - { - "epoch": 0.0519964285281415, - "grad_norm": 1509.91943359375, - "learning_rate": 2.5740000000000004e-05, - "loss": 152.7388, - "step": 12870 - }, - { - "epoch": 0.05203682979350913, - "grad_norm": 1924.8671875, - "learning_rate": 2.576e-05, - "loss": 150.297, - "step": 12880 - }, - { - "epoch": 0.052077231058876766, - "grad_norm": 4206.75048828125, - "learning_rate": 2.5779999999999997e-05, - "loss": 153.3789, - "step": 12890 - }, - { - "epoch": 0.05211763232424439, - "grad_norm": 732.2725219726562, - "learning_rate": 2.58e-05, - "loss": 146.1245, - "step": 12900 - }, - { - "epoch": 0.052158033589612025, - "grad_norm": 804.0054321289062, - "learning_rate": 2.582e-05, - "loss": 155.7175, - "step": 12910 - }, - { - "epoch": 0.05219843485497966, - "grad_norm": 1622.7066650390625, - "learning_rate": 2.5840000000000003e-05, - "loss": 181.8288, - "step": 12920 - }, - { - "epoch": 0.05223883612034729, - "grad_norm": 1736.3221435546875, - "learning_rate": 2.586e-05, - "loss": 156.0094, - "step": 12930 - }, - { - "epoch": 0.05227923738571492, - "grad_norm": 1899.74072265625, - "learning_rate": 2.588e-05, - "loss": 161.6015, - "step": 12940 - }, - { - "epoch": 0.05231963865108255, - "grad_norm": 2343.95556640625, - "learning_rate": 2.5900000000000003e-05, - "loss": 170.2185, - "step": 12950 - }, - { - "epoch": 0.052360039916450184, - "grad_norm": 1888.6707763671875, - "learning_rate": 2.592e-05, - "loss": 204.9056, - "step": 12960 - }, - { - "epoch": 0.05240044118181782, - "grad_norm": 498.68780517578125, - "learning_rate": 2.5940000000000002e-05, - "loss": 142.0052, - "step": 12970 - }, - { - "epoch": 0.05244084244718544, - "grad_norm": 838.1019287109375, - "learning_rate": 2.5960000000000002e-05, - "loss": 113.003, - "step": 12980 - }, - { - "epoch": 0.052481243712553076, - "grad_norm": 636.7835083007812, - "learning_rate": 2.598e-05, - "loss": 96.236, - "step": 12990 - }, - { - "epoch": 0.05252164497792071, - "grad_norm": 1207.0146484375, - "learning_rate": 2.6000000000000002e-05, - "loss": 128.6797, - "step": 13000 - }, - { - "epoch": 0.05256204624328834, - "grad_norm": 2183.191650390625, - "learning_rate": 2.602e-05, - "loss": 135.0814, - "step": 13010 - }, - { - "epoch": 0.05260244750865597, - "grad_norm": 1018.5341796875, - "learning_rate": 2.6040000000000005e-05, - "loss": 83.9305, - "step": 13020 - }, - { - "epoch": 0.0526428487740236, - "grad_norm": 437.8675537109375, - "learning_rate": 2.606e-05, - "loss": 141.287, - "step": 13030 - }, - { - "epoch": 0.052683250039391234, - "grad_norm": 2055.59228515625, - "learning_rate": 2.6079999999999998e-05, - "loss": 131.6824, - "step": 13040 - }, - { - "epoch": 0.05272365130475887, - "grad_norm": 1526.861572265625, - "learning_rate": 2.61e-05, - "loss": 104.1964, - "step": 13050 - }, - { - "epoch": 0.05276405257012649, - "grad_norm": 1107.270263671875, - "learning_rate": 2.612e-05, - "loss": 119.3528, - "step": 13060 - }, - { - "epoch": 0.052804453835494126, - "grad_norm": 0.0, - "learning_rate": 2.6140000000000004e-05, - "loss": 209.7389, - "step": 13070 - }, - { - "epoch": 0.05284485510086176, - "grad_norm": 887.7672729492188, - "learning_rate": 2.616e-05, - "loss": 134.3163, - "step": 13080 - }, - { - "epoch": 0.05288525636622939, - "grad_norm": 600.7372436523438, - "learning_rate": 2.618e-05, - "loss": 120.8824, - "step": 13090 - }, - { - "epoch": 0.05292565763159702, - "grad_norm": 2771.222900390625, - "learning_rate": 2.6200000000000003e-05, - "loss": 116.1366, - "step": 13100 - }, - { - "epoch": 0.05296605889696465, - "grad_norm": 1079.399658203125, - "learning_rate": 2.622e-05, - "loss": 120.0783, - "step": 13110 - }, - { - "epoch": 0.053006460162332285, - "grad_norm": 3635.591796875, - "learning_rate": 2.6240000000000003e-05, - "loss": 114.0672, - "step": 13120 - }, - { - "epoch": 0.05304686142769992, - "grad_norm": 967.0631713867188, - "learning_rate": 2.6260000000000003e-05, - "loss": 197.697, - "step": 13130 - }, - { - "epoch": 0.053087262693067544, - "grad_norm": 540.1041259765625, - "learning_rate": 2.628e-05, - "loss": 169.7453, - "step": 13140 - }, - { - "epoch": 0.05312766395843518, - "grad_norm": 1378.034912109375, - "learning_rate": 2.6300000000000002e-05, - "loss": 152.1042, - "step": 13150 - }, - { - "epoch": 0.05316806522380281, - "grad_norm": 2139.15234375, - "learning_rate": 2.632e-05, - "loss": 219.7844, - "step": 13160 - }, - { - "epoch": 0.05320846648917044, - "grad_norm": 2143.552734375, - "learning_rate": 2.6340000000000002e-05, - "loss": 152.3353, - "step": 13170 - }, - { - "epoch": 0.05324886775453807, - "grad_norm": 1372.249755859375, - "learning_rate": 2.6360000000000002e-05, - "loss": 216.3558, - "step": 13180 - }, - { - "epoch": 0.0532892690199057, - "grad_norm": 723.7531127929688, - "learning_rate": 2.6379999999999998e-05, - "loss": 106.8604, - "step": 13190 - }, - { - "epoch": 0.053329670285273335, - "grad_norm": 1488.490234375, - "learning_rate": 2.64e-05, - "loss": 128.0357, - "step": 13200 - }, - { - "epoch": 0.05337007155064097, - "grad_norm": 962.85498046875, - "learning_rate": 2.642e-05, - "loss": 214.9135, - "step": 13210 - }, - { - "epoch": 0.053410472816008595, - "grad_norm": 916.577392578125, - "learning_rate": 2.6440000000000004e-05, - "loss": 100.3119, - "step": 13220 - }, - { - "epoch": 0.05345087408137623, - "grad_norm": 520.3284912109375, - "learning_rate": 2.646e-05, - "loss": 159.4224, - "step": 13230 - }, - { - "epoch": 0.05349127534674386, - "grad_norm": 1644.9588623046875, - "learning_rate": 2.648e-05, - "loss": 153.6036, - "step": 13240 - }, - { - "epoch": 0.053531676612111494, - "grad_norm": 834.4176025390625, - "learning_rate": 2.6500000000000004e-05, - "loss": 179.9941, - "step": 13250 - }, - { - "epoch": 0.05357207787747912, - "grad_norm": 753.1478881835938, - "learning_rate": 2.652e-05, - "loss": 114.1454, - "step": 13260 - }, - { - "epoch": 0.05361247914284675, - "grad_norm": 1375.6973876953125, - "learning_rate": 2.6540000000000003e-05, - "loss": 121.5828, - "step": 13270 - }, - { - "epoch": 0.053652880408214386, - "grad_norm": 837.9177856445312, - "learning_rate": 2.6560000000000003e-05, - "loss": 162.0316, - "step": 13280 - }, - { - "epoch": 0.05369328167358202, - "grad_norm": 1957.6124267578125, - "learning_rate": 2.658e-05, - "loss": 123.4925, - "step": 13290 - }, - { - "epoch": 0.053733682938949645, - "grad_norm": 2115.5908203125, - "learning_rate": 2.6600000000000003e-05, - "loss": 163.0786, - "step": 13300 - }, - { - "epoch": 0.05377408420431728, - "grad_norm": 767.2696533203125, - "learning_rate": 2.662e-05, - "loss": 99.3506, - "step": 13310 - }, - { - "epoch": 0.05381448546968491, - "grad_norm": 1096.5877685546875, - "learning_rate": 2.6640000000000002e-05, - "loss": 130.1019, - "step": 13320 - }, - { - "epoch": 0.053854886735052544, - "grad_norm": 990.4303588867188, - "learning_rate": 2.6660000000000002e-05, - "loss": 218.8272, - "step": 13330 - }, - { - "epoch": 0.05389528800042017, - "grad_norm": 1289.1722412109375, - "learning_rate": 2.668e-05, - "loss": 181.8528, - "step": 13340 - }, - { - "epoch": 0.0539356892657878, - "grad_norm": 779.4696655273438, - "learning_rate": 2.6700000000000002e-05, - "loss": 129.5975, - "step": 13350 - }, - { - "epoch": 0.053976090531155436, - "grad_norm": 928.5990600585938, - "learning_rate": 2.672e-05, - "loss": 186.8758, - "step": 13360 - }, - { - "epoch": 0.05401649179652307, - "grad_norm": 3074.220947265625, - "learning_rate": 2.6740000000000005e-05, - "loss": 182.7136, - "step": 13370 - }, - { - "epoch": 0.054056893061890696, - "grad_norm": 1353.44970703125, - "learning_rate": 2.676e-05, - "loss": 115.3092, - "step": 13380 - }, - { - "epoch": 0.05409729432725833, - "grad_norm": 1833.181396484375, - "learning_rate": 2.678e-05, - "loss": 136.1055, - "step": 13390 - }, - { - "epoch": 0.05413769559262596, - "grad_norm": 671.771484375, - "learning_rate": 2.6800000000000004e-05, - "loss": 101.5414, - "step": 13400 - }, - { - "epoch": 0.054178096857993595, - "grad_norm": 1407.6787109375, - "learning_rate": 2.682e-05, - "loss": 140.1277, - "step": 13410 - }, - { - "epoch": 0.05421849812336122, - "grad_norm": 999.6378784179688, - "learning_rate": 2.6840000000000004e-05, - "loss": 173.1913, - "step": 13420 - }, - { - "epoch": 0.054258899388728854, - "grad_norm": 1126.044189453125, - "learning_rate": 2.686e-05, - "loss": 100.9075, - "step": 13430 - }, - { - "epoch": 0.05429930065409649, - "grad_norm": 995.6368408203125, - "learning_rate": 2.688e-05, - "loss": 152.3883, - "step": 13440 - }, - { - "epoch": 0.05433970191946412, - "grad_norm": 1857.345947265625, - "learning_rate": 2.6900000000000003e-05, - "loss": 164.2901, - "step": 13450 - }, - { - "epoch": 0.054380103184831746, - "grad_norm": 1917.1810302734375, - "learning_rate": 2.692e-05, - "loss": 117.7432, - "step": 13460 - }, - { - "epoch": 0.05442050445019938, - "grad_norm": 1812.2779541015625, - "learning_rate": 2.694e-05, - "loss": 158.6927, - "step": 13470 - }, - { - "epoch": 0.05446090571556701, - "grad_norm": 823.9198608398438, - "learning_rate": 2.6960000000000003e-05, - "loss": 147.1588, - "step": 13480 - }, - { - "epoch": 0.054501306980934645, - "grad_norm": 1157.1685791015625, - "learning_rate": 2.698e-05, - "loss": 167.8175, - "step": 13490 - }, - { - "epoch": 0.05454170824630227, - "grad_norm": 1123.951416015625, - "learning_rate": 2.7000000000000002e-05, - "loss": 194.7915, - "step": 13500 - }, - { - "epoch": 0.054582109511669905, - "grad_norm": 1402.1549072265625, - "learning_rate": 2.7020000000000002e-05, - "loss": 137.6079, - "step": 13510 - }, - { - "epoch": 0.05462251077703754, - "grad_norm": 786.1826782226562, - "learning_rate": 2.704e-05, - "loss": 113.2719, - "step": 13520 - }, - { - "epoch": 0.05466291204240517, - "grad_norm": 1365.093017578125, - "learning_rate": 2.7060000000000002e-05, - "loss": 104.7612, - "step": 13530 - }, - { - "epoch": 0.0547033133077728, - "grad_norm": 1461.1785888671875, - "learning_rate": 2.7079999999999998e-05, - "loss": 168.2114, - "step": 13540 - }, - { - "epoch": 0.05474371457314043, - "grad_norm": 1377.26806640625, - "learning_rate": 2.7100000000000005e-05, - "loss": 184.7103, - "step": 13550 - }, - { - "epoch": 0.05478411583850806, - "grad_norm": 1345.474853515625, - "learning_rate": 2.712e-05, - "loss": 137.9565, - "step": 13560 - }, - { - "epoch": 0.054824517103875696, - "grad_norm": 1087.7122802734375, - "learning_rate": 2.7139999999999998e-05, - "loss": 152.1091, - "step": 13570 - }, - { - "epoch": 0.05486491836924332, - "grad_norm": 0.0, - "learning_rate": 2.716e-05, - "loss": 82.2364, - "step": 13580 - }, - { - "epoch": 0.054905319634610955, - "grad_norm": 730.9200439453125, - "learning_rate": 2.718e-05, - "loss": 165.8833, - "step": 13590 - }, - { - "epoch": 0.05494572089997859, - "grad_norm": 1368.674072265625, - "learning_rate": 2.7200000000000004e-05, - "loss": 121.1773, - "step": 13600 - }, - { - "epoch": 0.05498612216534622, - "grad_norm": 1869.849853515625, - "learning_rate": 2.722e-05, - "loss": 231.0817, - "step": 13610 - }, - { - "epoch": 0.05502652343071385, - "grad_norm": 976.0435180664062, - "learning_rate": 2.724e-05, - "loss": 111.1706, - "step": 13620 - }, - { - "epoch": 0.05506692469608148, - "grad_norm": 1996.230224609375, - "learning_rate": 2.7260000000000003e-05, - "loss": 189.7502, - "step": 13630 - }, - { - "epoch": 0.05510732596144911, - "grad_norm": 971.2109985351562, - "learning_rate": 2.728e-05, - "loss": 117.1644, - "step": 13640 - }, - { - "epoch": 0.055147727226816746, - "grad_norm": 1315.8665771484375, - "learning_rate": 2.7300000000000003e-05, - "loss": 185.0801, - "step": 13650 - }, - { - "epoch": 0.05518812849218437, - "grad_norm": 600.6631469726562, - "learning_rate": 2.7320000000000003e-05, - "loss": 157.1564, - "step": 13660 - }, - { - "epoch": 0.055228529757552006, - "grad_norm": 888.3346557617188, - "learning_rate": 2.734e-05, - "loss": 117.6915, - "step": 13670 - }, - { - "epoch": 0.05526893102291964, - "grad_norm": 808.2348022460938, - "learning_rate": 2.7360000000000002e-05, - "loss": 99.7357, - "step": 13680 - }, - { - "epoch": 0.05530933228828727, - "grad_norm": 1063.2569580078125, - "learning_rate": 2.738e-05, - "loss": 143.7845, - "step": 13690 - }, - { - "epoch": 0.0553497335536549, - "grad_norm": 1069.0965576171875, - "learning_rate": 2.7400000000000002e-05, - "loss": 155.6282, - "step": 13700 - }, - { - "epoch": 0.05539013481902253, - "grad_norm": 1055.5029296875, - "learning_rate": 2.7420000000000002e-05, - "loss": 173.6117, - "step": 13710 - }, - { - "epoch": 0.055430536084390164, - "grad_norm": 940.4775390625, - "learning_rate": 2.7439999999999998e-05, - "loss": 120.3455, - "step": 13720 - }, - { - "epoch": 0.0554709373497578, - "grad_norm": 1010.4285888671875, - "learning_rate": 2.746e-05, - "loss": 142.2922, - "step": 13730 - }, - { - "epoch": 0.05551133861512542, - "grad_norm": 1600.6968994140625, - "learning_rate": 2.748e-05, - "loss": 157.9479, - "step": 13740 - }, - { - "epoch": 0.055551739880493056, - "grad_norm": 1101.0772705078125, - "learning_rate": 2.7500000000000004e-05, - "loss": 137.5087, - "step": 13750 - }, - { - "epoch": 0.05559214114586069, - "grad_norm": 2070.27978515625, - "learning_rate": 2.752e-05, - "loss": 125.7709, - "step": 13760 - }, - { - "epoch": 0.05563254241122832, - "grad_norm": 869.279052734375, - "learning_rate": 2.754e-05, - "loss": 135.6122, - "step": 13770 - }, - { - "epoch": 0.05567294367659595, - "grad_norm": 635.1331787109375, - "learning_rate": 2.7560000000000004e-05, - "loss": 120.2631, - "step": 13780 - }, - { - "epoch": 0.05571334494196358, - "grad_norm": 712.9791870117188, - "learning_rate": 2.758e-05, - "loss": 132.9366, - "step": 13790 - }, - { - "epoch": 0.055753746207331215, - "grad_norm": 1173.3760986328125, - "learning_rate": 2.7600000000000003e-05, - "loss": 174.3566, - "step": 13800 - }, - { - "epoch": 0.05579414747269885, - "grad_norm": 1058.1973876953125, - "learning_rate": 2.762e-05, - "loss": 101.473, - "step": 13810 - }, - { - "epoch": 0.055834548738066474, - "grad_norm": 1767.0400390625, - "learning_rate": 2.764e-05, - "loss": 149.575, - "step": 13820 - }, - { - "epoch": 0.05587495000343411, - "grad_norm": 438.71697998046875, - "learning_rate": 2.7660000000000003e-05, - "loss": 150.794, - "step": 13830 - }, - { - "epoch": 0.05591535126880174, - "grad_norm": 1495.567626953125, - "learning_rate": 2.768e-05, - "loss": 116.9854, - "step": 13840 - }, - { - "epoch": 0.05595575253416937, - "grad_norm": 1169.4012451171875, - "learning_rate": 2.7700000000000002e-05, - "loss": 158.6589, - "step": 13850 - }, - { - "epoch": 0.055996153799537, - "grad_norm": 1633.1995849609375, - "learning_rate": 2.7720000000000002e-05, - "loss": 126.1669, - "step": 13860 - }, - { - "epoch": 0.05603655506490463, - "grad_norm": 1052.2532958984375, - "learning_rate": 2.774e-05, - "loss": 180.1025, - "step": 13870 - }, - { - "epoch": 0.056076956330272265, - "grad_norm": 1021.2620239257812, - "learning_rate": 2.7760000000000002e-05, - "loss": 136.4397, - "step": 13880 - }, - { - "epoch": 0.0561173575956399, - "grad_norm": 886.8902587890625, - "learning_rate": 2.778e-05, - "loss": 91.0113, - "step": 13890 - }, - { - "epoch": 0.056157758861007524, - "grad_norm": 1001.5513305664062, - "learning_rate": 2.7800000000000005e-05, - "loss": 110.4852, - "step": 13900 - }, - { - "epoch": 0.05619816012637516, - "grad_norm": 668.9522094726562, - "learning_rate": 2.782e-05, - "loss": 126.04, - "step": 13910 - }, - { - "epoch": 0.05623856139174279, - "grad_norm": 3181.50146484375, - "learning_rate": 2.7839999999999998e-05, - "loss": 175.8362, - "step": 13920 - }, - { - "epoch": 0.05627896265711042, - "grad_norm": 1067.25732421875, - "learning_rate": 2.7860000000000004e-05, - "loss": 124.6091, - "step": 13930 - }, - { - "epoch": 0.05631936392247805, - "grad_norm": 758.8274536132812, - "learning_rate": 2.788e-05, - "loss": 129.1508, - "step": 13940 - }, - { - "epoch": 0.05635976518784568, - "grad_norm": 845.1571044921875, - "learning_rate": 2.7900000000000004e-05, - "loss": 164.5396, - "step": 13950 - }, - { - "epoch": 0.056400166453213316, - "grad_norm": 812.546875, - "learning_rate": 2.792e-05, - "loss": 144.2103, - "step": 13960 - }, - { - "epoch": 0.05644056771858095, - "grad_norm": 1005.88720703125, - "learning_rate": 2.794e-05, - "loss": 131.6613, - "step": 13970 - }, - { - "epoch": 0.056480968983948575, - "grad_norm": 2054.029541015625, - "learning_rate": 2.7960000000000003e-05, - "loss": 127.4749, - "step": 13980 - }, - { - "epoch": 0.05652137024931621, - "grad_norm": 466.14068603515625, - "learning_rate": 2.798e-05, - "loss": 163.5435, - "step": 13990 - }, - { - "epoch": 0.05656177151468384, - "grad_norm": 599.2301025390625, - "learning_rate": 2.8000000000000003e-05, - "loss": 107.817, - "step": 14000 - }, - { - "epoch": 0.056602172780051474, - "grad_norm": 2105.31640625, - "learning_rate": 2.8020000000000003e-05, - "loss": 197.3428, - "step": 14010 - }, - { - "epoch": 0.0566425740454191, - "grad_norm": 953.76025390625, - "learning_rate": 2.804e-05, - "loss": 147.2875, - "step": 14020 - }, - { - "epoch": 0.05668297531078673, - "grad_norm": 495.1427001953125, - "learning_rate": 2.8060000000000002e-05, - "loss": 132.651, - "step": 14030 - }, - { - "epoch": 0.056723376576154366, - "grad_norm": 1009.431884765625, - "learning_rate": 2.8080000000000002e-05, - "loss": 145.7106, - "step": 14040 - }, - { - "epoch": 0.056763777841522, - "grad_norm": 939.3583374023438, - "learning_rate": 2.8100000000000005e-05, - "loss": 139.3796, - "step": 14050 - }, - { - "epoch": 0.056804179106889625, - "grad_norm": 1285.1646728515625, - "learning_rate": 2.8120000000000002e-05, - "loss": 142.4715, - "step": 14060 - }, - { - "epoch": 0.05684458037225726, - "grad_norm": 1945.31494140625, - "learning_rate": 2.8139999999999998e-05, - "loss": 181.1451, - "step": 14070 - }, - { - "epoch": 0.05688498163762489, - "grad_norm": 811.0607299804688, - "learning_rate": 2.816e-05, - "loss": 126.362, - "step": 14080 - }, - { - "epoch": 0.056925382902992525, - "grad_norm": 4073.4345703125, - "learning_rate": 2.818e-05, - "loss": 116.4024, - "step": 14090 - }, - { - "epoch": 0.05696578416836015, - "grad_norm": 3792.585205078125, - "learning_rate": 2.8199999999999998e-05, - "loss": 192.2604, - "step": 14100 - }, - { - "epoch": 0.057006185433727784, - "grad_norm": 2066.12060546875, - "learning_rate": 2.822e-05, - "loss": 119.7598, - "step": 14110 - }, - { - "epoch": 0.05704658669909542, - "grad_norm": 938.1848754882812, - "learning_rate": 2.824e-05, - "loss": 127.1674, - "step": 14120 - }, - { - "epoch": 0.05708698796446305, - "grad_norm": 1010.5850830078125, - "learning_rate": 2.8260000000000004e-05, - "loss": 144.3951, - "step": 14130 - }, - { - "epoch": 0.057127389229830676, - "grad_norm": 2251.332275390625, - "learning_rate": 2.828e-05, - "loss": 202.1318, - "step": 14140 - }, - { - "epoch": 0.05716779049519831, - "grad_norm": 701.1483154296875, - "learning_rate": 2.83e-05, - "loss": 138.087, - "step": 14150 - }, - { - "epoch": 0.05720819176056594, - "grad_norm": 646.7645263671875, - "learning_rate": 2.8320000000000003e-05, - "loss": 114.6259, - "step": 14160 - }, - { - "epoch": 0.057248593025933575, - "grad_norm": 1258.2159423828125, - "learning_rate": 2.834e-05, - "loss": 134.9526, - "step": 14170 - }, - { - "epoch": 0.0572889942913012, - "grad_norm": 994.9586181640625, - "learning_rate": 2.8360000000000003e-05, - "loss": 116.3534, - "step": 14180 - }, - { - "epoch": 0.057329395556668834, - "grad_norm": 1028.46240234375, - "learning_rate": 2.8380000000000003e-05, - "loss": 140.937, - "step": 14190 - }, - { - "epoch": 0.05736979682203647, - "grad_norm": 814.48681640625, - "learning_rate": 2.84e-05, - "loss": 117.722, - "step": 14200 - }, - { - "epoch": 0.0574101980874041, - "grad_norm": 1542.7169189453125, - "learning_rate": 2.8420000000000002e-05, - "loss": 115.8539, - "step": 14210 - }, - { - "epoch": 0.057450599352771727, - "grad_norm": 576.6505737304688, - "learning_rate": 2.844e-05, - "loss": 111.9559, - "step": 14220 - }, - { - "epoch": 0.05749100061813936, - "grad_norm": 1681.9095458984375, - "learning_rate": 2.8460000000000002e-05, - "loss": 145.2461, - "step": 14230 - }, - { - "epoch": 0.05753140188350699, - "grad_norm": 1178.3514404296875, - "learning_rate": 2.8480000000000002e-05, - "loss": 178.6759, - "step": 14240 - }, - { - "epoch": 0.057571803148874626, - "grad_norm": 1500.439208984375, - "learning_rate": 2.8499999999999998e-05, - "loss": 131.998, - "step": 14250 - }, - { - "epoch": 0.05761220441424225, - "grad_norm": 1533.505126953125, - "learning_rate": 2.852e-05, - "loss": 147.4367, - "step": 14260 - }, - { - "epoch": 0.057652605679609885, - "grad_norm": 495.7720947265625, - "learning_rate": 2.854e-05, - "loss": 115.4109, - "step": 14270 - }, - { - "epoch": 0.05769300694497752, - "grad_norm": 561.2579956054688, - "learning_rate": 2.8560000000000004e-05, - "loss": 147.6145, - "step": 14280 - }, - { - "epoch": 0.05773340821034515, - "grad_norm": 571.2040405273438, - "learning_rate": 2.858e-05, - "loss": 197.6198, - "step": 14290 - }, - { - "epoch": 0.05777380947571278, - "grad_norm": 2524.6904296875, - "learning_rate": 2.86e-05, - "loss": 171.4188, - "step": 14300 - }, - { - "epoch": 0.05781421074108041, - "grad_norm": 2208.06396484375, - "learning_rate": 2.8620000000000004e-05, - "loss": 124.7192, - "step": 14310 - }, - { - "epoch": 0.05785461200644804, - "grad_norm": 1544.6385498046875, - "learning_rate": 2.864e-05, - "loss": 145.8727, - "step": 14320 - }, - { - "epoch": 0.057895013271815676, - "grad_norm": 953.2141723632812, - "learning_rate": 2.8660000000000003e-05, - "loss": 98.7387, - "step": 14330 - }, - { - "epoch": 0.0579354145371833, - "grad_norm": 497.53021240234375, - "learning_rate": 2.868e-05, - "loss": 167.9052, - "step": 14340 - }, - { - "epoch": 0.057975815802550935, - "grad_norm": 441.49951171875, - "learning_rate": 2.87e-05, - "loss": 166.814, - "step": 14350 - }, - { - "epoch": 0.05801621706791857, - "grad_norm": 2393.28271484375, - "learning_rate": 2.8720000000000003e-05, - "loss": 155.3981, - "step": 14360 - }, - { - "epoch": 0.0580566183332862, - "grad_norm": 5085.328125, - "learning_rate": 2.874e-05, - "loss": 126.6876, - "step": 14370 - }, - { - "epoch": 0.05809701959865383, - "grad_norm": 1251.988525390625, - "learning_rate": 2.8760000000000002e-05, - "loss": 161.9475, - "step": 14380 - }, - { - "epoch": 0.05813742086402146, - "grad_norm": 2122.52392578125, - "learning_rate": 2.8780000000000002e-05, - "loss": 177.3616, - "step": 14390 - }, - { - "epoch": 0.058177822129389094, - "grad_norm": 886.3876342773438, - "learning_rate": 2.88e-05, - "loss": 102.448, - "step": 14400 - }, - { - "epoch": 0.05821822339475673, - "grad_norm": 1496.39697265625, - "learning_rate": 2.8820000000000002e-05, - "loss": 177.4485, - "step": 14410 - }, - { - "epoch": 0.05825862466012435, - "grad_norm": 1387.0819091796875, - "learning_rate": 2.8840000000000002e-05, - "loss": 134.0211, - "step": 14420 - }, - { - "epoch": 0.058299025925491986, - "grad_norm": 571.4561157226562, - "learning_rate": 2.8860000000000005e-05, - "loss": 109.2302, - "step": 14430 - }, - { - "epoch": 0.05833942719085962, - "grad_norm": 679.628173828125, - "learning_rate": 2.888e-05, - "loss": 124.9287, - "step": 14440 - }, - { - "epoch": 0.05837982845622725, - "grad_norm": 1030.914306640625, - "learning_rate": 2.8899999999999998e-05, - "loss": 141.9723, - "step": 14450 - }, - { - "epoch": 0.05842022972159488, - "grad_norm": 1213.34033203125, - "learning_rate": 2.8920000000000004e-05, - "loss": 133.645, - "step": 14460 - }, - { - "epoch": 0.05846063098696251, - "grad_norm": 867.3865356445312, - "learning_rate": 2.894e-05, - "loss": 109.0037, - "step": 14470 - }, - { - "epoch": 0.058501032252330144, - "grad_norm": 1019.241943359375, - "learning_rate": 2.8960000000000004e-05, - "loss": 179.6844, - "step": 14480 - }, - { - "epoch": 0.05854143351769778, - "grad_norm": 3133.170166015625, - "learning_rate": 2.898e-05, - "loss": 141.274, - "step": 14490 - }, - { - "epoch": 0.058581834783065403, - "grad_norm": 675.6682739257812, - "learning_rate": 2.9e-05, - "loss": 116.3539, - "step": 14500 - }, - { - "epoch": 0.05862223604843304, - "grad_norm": 915.9588012695312, - "learning_rate": 2.9020000000000003e-05, - "loss": 95.1154, - "step": 14510 - }, - { - "epoch": 0.05866263731380067, - "grad_norm": 1290.10546875, - "learning_rate": 2.904e-05, - "loss": 179.5605, - "step": 14520 - }, - { - "epoch": 0.0587030385791683, - "grad_norm": 712.0923461914062, - "learning_rate": 2.9060000000000003e-05, - "loss": 146.6162, - "step": 14530 - }, - { - "epoch": 0.05874343984453593, - "grad_norm": 893.6064453125, - "learning_rate": 2.9080000000000003e-05, - "loss": 104.5347, - "step": 14540 - }, - { - "epoch": 0.05878384110990356, - "grad_norm": 1025.2301025390625, - "learning_rate": 2.91e-05, - "loss": 104.5477, - "step": 14550 - }, - { - "epoch": 0.058824242375271195, - "grad_norm": 2338.9072265625, - "learning_rate": 2.9120000000000002e-05, - "loss": 142.9934, - "step": 14560 - }, - { - "epoch": 0.05886464364063883, - "grad_norm": 633.3606567382812, - "learning_rate": 2.9140000000000002e-05, - "loss": 98.545, - "step": 14570 - }, - { - "epoch": 0.058905044906006454, - "grad_norm": 1361.783935546875, - "learning_rate": 2.9160000000000005e-05, - "loss": 113.2945, - "step": 14580 - }, - { - "epoch": 0.05894544617137409, - "grad_norm": 1828.3387451171875, - "learning_rate": 2.9180000000000002e-05, - "loss": 119.5862, - "step": 14590 - }, - { - "epoch": 0.05898584743674172, - "grad_norm": 2555.387939453125, - "learning_rate": 2.9199999999999998e-05, - "loss": 177.2158, - "step": 14600 - }, - { - "epoch": 0.05902624870210935, - "grad_norm": 2425.751220703125, - "learning_rate": 2.922e-05, - "loss": 118.8401, - "step": 14610 - }, - { - "epoch": 0.05906664996747698, - "grad_norm": 758.7974243164062, - "learning_rate": 2.924e-05, - "loss": 136.7834, - "step": 14620 - }, - { - "epoch": 0.05910705123284461, - "grad_norm": 2093.38720703125, - "learning_rate": 2.9260000000000004e-05, - "loss": 91.4101, - "step": 14630 - }, - { - "epoch": 0.059147452498212245, - "grad_norm": 1140.317138671875, - "learning_rate": 2.928e-05, - "loss": 142.2483, - "step": 14640 - }, - { - "epoch": 0.05918785376357988, - "grad_norm": 764.8568115234375, - "learning_rate": 2.93e-05, - "loss": 105.0373, - "step": 14650 - }, - { - "epoch": 0.059228255028947505, - "grad_norm": 562.0804443359375, - "learning_rate": 2.9320000000000004e-05, - "loss": 140.6483, - "step": 14660 - }, - { - "epoch": 0.05926865629431514, - "grad_norm": 1717.2381591796875, - "learning_rate": 2.934e-05, - "loss": 120.7989, - "step": 14670 - }, - { - "epoch": 0.05930905755968277, - "grad_norm": 1295.689208984375, - "learning_rate": 2.9360000000000003e-05, - "loss": 145.4503, - "step": 14680 - }, - { - "epoch": 0.059349458825050404, - "grad_norm": 1297.196533203125, - "learning_rate": 2.9380000000000003e-05, - "loss": 127.0184, - "step": 14690 - }, - { - "epoch": 0.05938986009041803, - "grad_norm": 483.24420166015625, - "learning_rate": 2.94e-05, - "loss": 153.2117, - "step": 14700 - }, - { - "epoch": 0.05943026135578566, - "grad_norm": 777.18408203125, - "learning_rate": 2.9420000000000003e-05, - "loss": 126.1312, - "step": 14710 - }, - { - "epoch": 0.059470662621153296, - "grad_norm": 1387.6092529296875, - "learning_rate": 2.944e-05, - "loss": 158.3849, - "step": 14720 - }, - { - "epoch": 0.05951106388652093, - "grad_norm": 3654.914306640625, - "learning_rate": 2.946e-05, - "loss": 177.4717, - "step": 14730 - }, - { - "epoch": 0.059551465151888555, - "grad_norm": 2103.89599609375, - "learning_rate": 2.9480000000000002e-05, - "loss": 133.8275, - "step": 14740 - }, - { - "epoch": 0.05959186641725619, - "grad_norm": 506.49200439453125, - "learning_rate": 2.95e-05, - "loss": 121.0761, - "step": 14750 - }, - { - "epoch": 0.05963226768262382, - "grad_norm": 1055.931396484375, - "learning_rate": 2.9520000000000002e-05, - "loss": 143.0577, - "step": 14760 - }, - { - "epoch": 0.059672668947991454, - "grad_norm": 745.9114379882812, - "learning_rate": 2.9540000000000002e-05, - "loss": 140.8658, - "step": 14770 - }, - { - "epoch": 0.05971307021335908, - "grad_norm": 809.5819702148438, - "learning_rate": 2.9559999999999998e-05, - "loss": 104.4415, - "step": 14780 - }, - { - "epoch": 0.059753471478726713, - "grad_norm": 1688.5380859375, - "learning_rate": 2.958e-05, - "loss": 152.457, - "step": 14790 - }, - { - "epoch": 0.05979387274409435, - "grad_norm": 910.4462280273438, - "learning_rate": 2.96e-05, - "loss": 149.1814, - "step": 14800 - }, - { - "epoch": 0.05983427400946198, - "grad_norm": 753.3027954101562, - "learning_rate": 2.9620000000000004e-05, - "loss": 142.1045, - "step": 14810 - }, - { - "epoch": 0.059874675274829606, - "grad_norm": 545.16552734375, - "learning_rate": 2.964e-05, - "loss": 120.4504, - "step": 14820 - }, - { - "epoch": 0.05991507654019724, - "grad_norm": 1327.6309814453125, - "learning_rate": 2.9659999999999997e-05, - "loss": 109.4865, - "step": 14830 - }, - { - "epoch": 0.05995547780556487, - "grad_norm": 867.6209106445312, - "learning_rate": 2.9680000000000004e-05, - "loss": 128.9373, - "step": 14840 - }, - { - "epoch": 0.059995879070932505, - "grad_norm": 552.6983032226562, - "learning_rate": 2.97e-05, - "loss": 148.3395, - "step": 14850 - }, - { - "epoch": 0.06003628033630013, - "grad_norm": 1947.0716552734375, - "learning_rate": 2.9720000000000003e-05, - "loss": 115.2734, - "step": 14860 - }, - { - "epoch": 0.060076681601667764, - "grad_norm": 793.457763671875, - "learning_rate": 2.974e-05, - "loss": 112.6054, - "step": 14870 - }, - { - "epoch": 0.0601170828670354, - "grad_norm": 849.2798461914062, - "learning_rate": 2.976e-05, - "loss": 126.2524, - "step": 14880 - }, - { - "epoch": 0.06015748413240303, - "grad_norm": 1418.7293701171875, - "learning_rate": 2.9780000000000003e-05, - "loss": 135.3141, - "step": 14890 - }, - { - "epoch": 0.060197885397770656, - "grad_norm": 787.8895263671875, - "learning_rate": 2.98e-05, - "loss": 150.2098, - "step": 14900 - }, - { - "epoch": 0.06023828666313829, - "grad_norm": 1818.129150390625, - "learning_rate": 2.9820000000000002e-05, - "loss": 142.2445, - "step": 14910 - }, - { - "epoch": 0.06027868792850592, - "grad_norm": 910.6742553710938, - "learning_rate": 2.9840000000000002e-05, - "loss": 178.141, - "step": 14920 - }, - { - "epoch": 0.060319089193873555, - "grad_norm": 2399.4091796875, - "learning_rate": 2.986e-05, - "loss": 175.9365, - "step": 14930 - }, - { - "epoch": 0.06035949045924118, - "grad_norm": 1148.7041015625, - "learning_rate": 2.9880000000000002e-05, - "loss": 145.5652, - "step": 14940 - }, - { - "epoch": 0.060399891724608815, - "grad_norm": 1392.08740234375, - "learning_rate": 2.9900000000000002e-05, - "loss": 139.3894, - "step": 14950 - }, - { - "epoch": 0.06044029298997645, - "grad_norm": 878.8516235351562, - "learning_rate": 2.9920000000000005e-05, - "loss": 65.7636, - "step": 14960 - }, - { - "epoch": 0.06048069425534408, - "grad_norm": 2386.329833984375, - "learning_rate": 2.994e-05, - "loss": 186.3342, - "step": 14970 - }, - { - "epoch": 0.06052109552071171, - "grad_norm": 365.4548645019531, - "learning_rate": 2.9959999999999998e-05, - "loss": 117.0936, - "step": 14980 - }, - { - "epoch": 0.06056149678607934, - "grad_norm": 557.1009521484375, - "learning_rate": 2.998e-05, - "loss": 98.8324, - "step": 14990 - }, - { - "epoch": 0.06060189805144697, - "grad_norm": 1404.343017578125, - "learning_rate": 3e-05, - "loss": 129.4244, - "step": 15000 - }, - { - "epoch": 0.060642299316814606, - "grad_norm": 1246.394775390625, - "learning_rate": 3.0020000000000004e-05, - "loss": 126.248, - "step": 15010 - }, - { - "epoch": 0.06068270058218223, - "grad_norm": 1066.50048828125, - "learning_rate": 3.004e-05, - "loss": 124.5184, - "step": 15020 - }, - { - "epoch": 0.060723101847549865, - "grad_norm": 705.3773803710938, - "learning_rate": 3.006e-05, - "loss": 133.4564, - "step": 15030 - }, - { - "epoch": 0.0607635031129175, - "grad_norm": 874.49072265625, - "learning_rate": 3.0080000000000003e-05, - "loss": 111.9202, - "step": 15040 - }, - { - "epoch": 0.06080390437828513, - "grad_norm": 725.5115356445312, - "learning_rate": 3.01e-05, - "loss": 116.5958, - "step": 15050 - }, - { - "epoch": 0.06084430564365276, - "grad_norm": 1046.1822509765625, - "learning_rate": 3.0120000000000003e-05, - "loss": 108.4798, - "step": 15060 - }, - { - "epoch": 0.06088470690902039, - "grad_norm": 1189.979248046875, - "learning_rate": 3.0140000000000003e-05, - "loss": 141.0663, - "step": 15070 - }, - { - "epoch": 0.060925108174388024, - "grad_norm": 3112.551513671875, - "learning_rate": 3.016e-05, - "loss": 172.9219, - "step": 15080 - }, - { - "epoch": 0.06096550943975566, - "grad_norm": 2147.462890625, - "learning_rate": 3.0180000000000002e-05, - "loss": 146.7627, - "step": 15090 - }, - { - "epoch": 0.06100591070512328, - "grad_norm": 1260.1737060546875, - "learning_rate": 3.02e-05, - "loss": 151.8984, - "step": 15100 - }, - { - "epoch": 0.061046311970490916, - "grad_norm": 1158.15771484375, - "learning_rate": 3.0220000000000005e-05, - "loss": 116.2214, - "step": 15110 - }, - { - "epoch": 0.06108671323585855, - "grad_norm": 3242.16259765625, - "learning_rate": 3.0240000000000002e-05, - "loss": 153.6299, - "step": 15120 - }, - { - "epoch": 0.06112711450122618, - "grad_norm": 837.7076416015625, - "learning_rate": 3.0259999999999998e-05, - "loss": 84.5595, - "step": 15130 - }, - { - "epoch": 0.06116751576659381, - "grad_norm": 2217.368408203125, - "learning_rate": 3.028e-05, - "loss": 154.1678, - "step": 15140 - }, - { - "epoch": 0.06120791703196144, - "grad_norm": 753.1808471679688, - "learning_rate": 3.03e-05, - "loss": 130.1928, - "step": 15150 - }, - { - "epoch": 0.061248318297329074, - "grad_norm": 1219.47607421875, - "learning_rate": 3.0320000000000004e-05, - "loss": 169.753, - "step": 15160 - }, - { - "epoch": 0.06128871956269671, - "grad_norm": 6474.40478515625, - "learning_rate": 3.034e-05, - "loss": 149.4557, - "step": 15170 - }, - { - "epoch": 0.06132912082806433, - "grad_norm": 5858.99658203125, - "learning_rate": 3.036e-05, - "loss": 191.529, - "step": 15180 - }, - { - "epoch": 0.061369522093431966, - "grad_norm": 1600.056396484375, - "learning_rate": 3.0380000000000004e-05, - "loss": 130.8423, - "step": 15190 - }, - { - "epoch": 0.0614099233587996, - "grad_norm": 814.158935546875, - "learning_rate": 3.04e-05, - "loss": 148.7461, - "step": 15200 - }, - { - "epoch": 0.061450324624167225, - "grad_norm": 844.9783935546875, - "learning_rate": 3.0420000000000004e-05, - "loss": 148.264, - "step": 15210 - }, - { - "epoch": 0.06149072588953486, - "grad_norm": 1373.4136962890625, - "learning_rate": 3.0440000000000003e-05, - "loss": 172.1749, - "step": 15220 - }, - { - "epoch": 0.06153112715490249, - "grad_norm": 897.79248046875, - "learning_rate": 3.046e-05, - "loss": 113.2348, - "step": 15230 - }, - { - "epoch": 0.061571528420270125, - "grad_norm": 1343.7520751953125, - "learning_rate": 3.0480000000000003e-05, - "loss": 98.1409, - "step": 15240 - }, - { - "epoch": 0.06161192968563775, - "grad_norm": 828.697998046875, - "learning_rate": 3.05e-05, - "loss": 162.0212, - "step": 15250 - }, - { - "epoch": 0.061652330951005384, - "grad_norm": 366.743896484375, - "learning_rate": 3.0520000000000006e-05, - "loss": 75.6308, - "step": 15260 - }, - { - "epoch": 0.06169273221637302, - "grad_norm": 2019.9920654296875, - "learning_rate": 3.054e-05, - "loss": 237.3679, - "step": 15270 - }, - { - "epoch": 0.06173313348174065, - "grad_norm": 1361.7977294921875, - "learning_rate": 3.056e-05, - "loss": 108.2089, - "step": 15280 - }, - { - "epoch": 0.061773534747108276, - "grad_norm": 1101.6763916015625, - "learning_rate": 3.058e-05, - "loss": 90.1339, - "step": 15290 - }, - { - "epoch": 0.06181393601247591, - "grad_norm": 1785.1817626953125, - "learning_rate": 3.06e-05, - "loss": 142.6872, - "step": 15300 - }, - { - "epoch": 0.06185433727784354, - "grad_norm": 1439.0386962890625, - "learning_rate": 3.062e-05, - "loss": 135.7678, - "step": 15310 - }, - { - "epoch": 0.061894738543211175, - "grad_norm": 1499.1324462890625, - "learning_rate": 3.0640000000000005e-05, - "loss": 123.4259, - "step": 15320 - }, - { - "epoch": 0.0619351398085788, - "grad_norm": 1074.73681640625, - "learning_rate": 3.066e-05, - "loss": 98.1504, - "step": 15330 - }, - { - "epoch": 0.061975541073946434, - "grad_norm": 1967.539306640625, - "learning_rate": 3.0680000000000004e-05, - "loss": 151.5911, - "step": 15340 - }, - { - "epoch": 0.06201594233931407, - "grad_norm": 485.7192077636719, - "learning_rate": 3.07e-05, - "loss": 137.3458, - "step": 15350 - }, - { - "epoch": 0.0620563436046817, - "grad_norm": 753.7666015625, - "learning_rate": 3.072e-05, - "loss": 103.7652, - "step": 15360 - }, - { - "epoch": 0.06209674487004933, - "grad_norm": 1353.95556640625, - "learning_rate": 3.074e-05, - "loss": 117.3775, - "step": 15370 - }, - { - "epoch": 0.06213714613541696, - "grad_norm": 438.0009765625, - "learning_rate": 3.076e-05, - "loss": 115.8443, - "step": 15380 - }, - { - "epoch": 0.06217754740078459, - "grad_norm": 1873.119140625, - "learning_rate": 3.078e-05, - "loss": 134.3721, - "step": 15390 - }, - { - "epoch": 0.062217948666152226, - "grad_norm": 1559.0321044921875, - "learning_rate": 3.08e-05, - "loss": 146.9015, - "step": 15400 - }, - { - "epoch": 0.06225834993151985, - "grad_norm": 953.1947021484375, - "learning_rate": 3.082e-05, - "loss": 146.0465, - "step": 15410 - }, - { - "epoch": 0.062298751196887485, - "grad_norm": 1465.51806640625, - "learning_rate": 3.084e-05, - "loss": 127.4744, - "step": 15420 - }, - { - "epoch": 0.06233915246225512, - "grad_norm": 871.5877685546875, - "learning_rate": 3.086e-05, - "loss": 110.5899, - "step": 15430 - }, - { - "epoch": 0.06237955372762275, - "grad_norm": 2501.28564453125, - "learning_rate": 3.088e-05, - "loss": 202.4409, - "step": 15440 - }, - { - "epoch": 0.06241995499299038, - "grad_norm": 1591.2879638671875, - "learning_rate": 3.09e-05, - "loss": 129.12, - "step": 15450 - }, - { - "epoch": 0.06246035625835801, - "grad_norm": 838.3685302734375, - "learning_rate": 3.092e-05, - "loss": 102.4406, - "step": 15460 - }, - { - "epoch": 0.06250075752372564, - "grad_norm": 1001.7224731445312, - "learning_rate": 3.0940000000000005e-05, - "loss": 108.1954, - "step": 15470 - }, - { - "epoch": 0.06254115878909328, - "grad_norm": 1513.2293701171875, - "learning_rate": 3.096e-05, - "loss": 103.5236, - "step": 15480 - }, - { - "epoch": 0.0625815600544609, - "grad_norm": 1613.0614013671875, - "learning_rate": 3.0980000000000005e-05, - "loss": 159.1191, - "step": 15490 - }, - { - "epoch": 0.06262196131982854, - "grad_norm": 1024.2308349609375, - "learning_rate": 3.1e-05, - "loss": 131.2479, - "step": 15500 - }, - { - "epoch": 0.06266236258519617, - "grad_norm": 2692.8251953125, - "learning_rate": 3.102e-05, - "loss": 125.2973, - "step": 15510 - }, - { - "epoch": 0.0627027638505638, - "grad_norm": 1083.9881591796875, - "learning_rate": 3.104e-05, - "loss": 121.1962, - "step": 15520 - }, - { - "epoch": 0.06274316511593143, - "grad_norm": 1520.9947509765625, - "learning_rate": 3.106e-05, - "loss": 127.8408, - "step": 15530 - }, - { - "epoch": 0.06278356638129906, - "grad_norm": 871.9234008789062, - "learning_rate": 3.108e-05, - "loss": 107.3415, - "step": 15540 - }, - { - "epoch": 0.06282396764666669, - "grad_norm": 1313.199462890625, - "learning_rate": 3.1100000000000004e-05, - "loss": 124.2825, - "step": 15550 - }, - { - "epoch": 0.06286436891203433, - "grad_norm": 997.6826782226562, - "learning_rate": 3.112e-05, - "loss": 116.8165, - "step": 15560 - }, - { - "epoch": 0.06290477017740195, - "grad_norm": 713.2454833984375, - "learning_rate": 3.1140000000000003e-05, - "loss": 142.1327, - "step": 15570 - }, - { - "epoch": 0.06294517144276959, - "grad_norm": 841.623779296875, - "learning_rate": 3.116e-05, - "loss": 136.2796, - "step": 15580 - }, - { - "epoch": 0.06298557270813722, - "grad_norm": 1948.3192138671875, - "learning_rate": 3.118e-05, - "loss": 192.9212, - "step": 15590 - }, - { - "epoch": 0.06302597397350485, - "grad_norm": 1726.7506103515625, - "learning_rate": 3.12e-05, - "loss": 172.2863, - "step": 15600 - }, - { - "epoch": 0.06306637523887249, - "grad_norm": 1170.4307861328125, - "learning_rate": 3.122e-05, - "loss": 130.6975, - "step": 15610 - }, - { - "epoch": 0.06310677650424011, - "grad_norm": 1774.3511962890625, - "learning_rate": 3.1240000000000006e-05, - "loss": 137.2435, - "step": 15620 - }, - { - "epoch": 0.06314717776960774, - "grad_norm": 4738.48193359375, - "learning_rate": 3.126e-05, - "loss": 140.0143, - "step": 15630 - }, - { - "epoch": 0.06318757903497538, - "grad_norm": 793.2022094726562, - "learning_rate": 3.1280000000000005e-05, - "loss": 163.8472, - "step": 15640 - }, - { - "epoch": 0.063227980300343, - "grad_norm": 0.0, - "learning_rate": 3.13e-05, - "loss": 113.3165, - "step": 15650 - }, - { - "epoch": 0.06326838156571064, - "grad_norm": 1195.0482177734375, - "learning_rate": 3.132e-05, - "loss": 151.147, - "step": 15660 - }, - { - "epoch": 0.06330878283107827, - "grad_norm": 1180.14453125, - "learning_rate": 3.134e-05, - "loss": 100.6629, - "step": 15670 - }, - { - "epoch": 0.0633491840964459, - "grad_norm": 926.8880615234375, - "learning_rate": 3.136e-05, - "loss": 161.5127, - "step": 15680 - }, - { - "epoch": 0.06338958536181354, - "grad_norm": 1517.244873046875, - "learning_rate": 3.138e-05, - "loss": 164.7377, - "step": 15690 - }, - { - "epoch": 0.06342998662718116, - "grad_norm": 942.4938354492188, - "learning_rate": 3.1400000000000004e-05, - "loss": 127.0913, - "step": 15700 - }, - { - "epoch": 0.06347038789254879, - "grad_norm": 519.3255004882812, - "learning_rate": 3.142e-05, - "loss": 110.9436, - "step": 15710 - }, - { - "epoch": 0.06351078915791643, - "grad_norm": 931.0354614257812, - "learning_rate": 3.1440000000000004e-05, - "loss": 155.0716, - "step": 15720 - }, - { - "epoch": 0.06355119042328405, - "grad_norm": 2942.558837890625, - "learning_rate": 3.146e-05, - "loss": 106.2107, - "step": 15730 - }, - { - "epoch": 0.0635915916886517, - "grad_norm": 1383.4794921875, - "learning_rate": 3.1480000000000004e-05, - "loss": 146.2436, - "step": 15740 - }, - { - "epoch": 0.06363199295401932, - "grad_norm": 3386.990966796875, - "learning_rate": 3.15e-05, - "loss": 148.2886, - "step": 15750 - }, - { - "epoch": 0.06367239421938695, - "grad_norm": 1014.3907470703125, - "learning_rate": 3.1519999999999996e-05, - "loss": 120.5053, - "step": 15760 - }, - { - "epoch": 0.06371279548475459, - "grad_norm": 762.3274536132812, - "learning_rate": 3.154e-05, - "loss": 139.9023, - "step": 15770 - }, - { - "epoch": 0.06375319675012221, - "grad_norm": 1421.413330078125, - "learning_rate": 3.156e-05, - "loss": 146.9265, - "step": 15780 - }, - { - "epoch": 0.06379359801548984, - "grad_norm": 1463.6063232421875, - "learning_rate": 3.1580000000000006e-05, - "loss": 134.0383, - "step": 15790 - }, - { - "epoch": 0.06383399928085748, - "grad_norm": 1137.2342529296875, - "learning_rate": 3.16e-05, - "loss": 161.4233, - "step": 15800 - }, - { - "epoch": 0.0638744005462251, - "grad_norm": 1729.1817626953125, - "learning_rate": 3.162e-05, - "loss": 109.8035, - "step": 15810 - }, - { - "epoch": 0.06391480181159274, - "grad_norm": 757.51025390625, - "learning_rate": 3.164e-05, - "loss": 83.0763, - "step": 15820 - }, - { - "epoch": 0.06395520307696037, - "grad_norm": 5324.74169921875, - "learning_rate": 3.166e-05, - "loss": 132.4072, - "step": 15830 - }, - { - "epoch": 0.063995604342328, - "grad_norm": 1629.523681640625, - "learning_rate": 3.168e-05, - "loss": 84.4823, - "step": 15840 - }, - { - "epoch": 0.06403600560769564, - "grad_norm": 1089.279296875, - "learning_rate": 3.1700000000000005e-05, - "loss": 130.4876, - "step": 15850 - }, - { - "epoch": 0.06407640687306326, - "grad_norm": 1170.0994873046875, - "learning_rate": 3.172e-05, - "loss": 144.4939, - "step": 15860 - }, - { - "epoch": 0.06411680813843089, - "grad_norm": 1094.1689453125, - "learning_rate": 3.1740000000000004e-05, - "loss": 124.1748, - "step": 15870 - }, - { - "epoch": 0.06415720940379853, - "grad_norm": 742.95703125, - "learning_rate": 3.176e-05, - "loss": 85.3714, - "step": 15880 - }, - { - "epoch": 0.06419761066916616, - "grad_norm": 1239.1507568359375, - "learning_rate": 3.1780000000000004e-05, - "loss": 215.5378, - "step": 15890 - }, - { - "epoch": 0.0642380119345338, - "grad_norm": 734.71484375, - "learning_rate": 3.18e-05, - "loss": 109.2592, - "step": 15900 - }, - { - "epoch": 0.06427841319990142, - "grad_norm": 1894.3482666015625, - "learning_rate": 3.182e-05, - "loss": 115.1967, - "step": 15910 - }, - { - "epoch": 0.06431881446526905, - "grad_norm": 626.6444091796875, - "learning_rate": 3.184e-05, - "loss": 85.9778, - "step": 15920 - }, - { - "epoch": 0.06435921573063669, - "grad_norm": 1511.507568359375, - "learning_rate": 3.186e-05, - "loss": 182.5304, - "step": 15930 - }, - { - "epoch": 0.06439961699600431, - "grad_norm": 693.2582397460938, - "learning_rate": 3.188e-05, - "loss": 90.9457, - "step": 15940 - }, - { - "epoch": 0.06444001826137194, - "grad_norm": 683.79150390625, - "learning_rate": 3.19e-05, - "loss": 98.913, - "step": 15950 - }, - { - "epoch": 0.06448041952673958, - "grad_norm": 1719.1845703125, - "learning_rate": 3.192e-05, - "loss": 129.3339, - "step": 15960 - }, - { - "epoch": 0.0645208207921072, - "grad_norm": 1166.0206298828125, - "learning_rate": 3.194e-05, - "loss": 153.576, - "step": 15970 - }, - { - "epoch": 0.06456122205747485, - "grad_norm": 1580.4215087890625, - "learning_rate": 3.196e-05, - "loss": 170.2588, - "step": 15980 - }, - { - "epoch": 0.06460162332284247, - "grad_norm": 984.2314453125, - "learning_rate": 3.198e-05, - "loss": 156.6156, - "step": 15990 - }, - { - "epoch": 0.0646420245882101, - "grad_norm": 1595.7958984375, - "learning_rate": 3.2000000000000005e-05, - "loss": 174.3226, - "step": 16000 - }, - { - "epoch": 0.06468242585357774, - "grad_norm": 12306.052734375, - "learning_rate": 3.202e-05, - "loss": 203.855, - "step": 16010 - }, - { - "epoch": 0.06472282711894536, - "grad_norm": 2370.59765625, - "learning_rate": 3.2040000000000005e-05, - "loss": 144.5134, - "step": 16020 - }, - { - "epoch": 0.06476322838431299, - "grad_norm": 1784.508056640625, - "learning_rate": 3.206e-05, - "loss": 159.992, - "step": 16030 - }, - { - "epoch": 0.06480362964968063, - "grad_norm": 894.4352416992188, - "learning_rate": 3.208e-05, - "loss": 162.522, - "step": 16040 - }, - { - "epoch": 0.06484403091504826, - "grad_norm": 1397.84814453125, - "learning_rate": 3.21e-05, - "loss": 113.1159, - "step": 16050 - }, - { - "epoch": 0.0648844321804159, - "grad_norm": 2406.892578125, - "learning_rate": 3.212e-05, - "loss": 104.4846, - "step": 16060 - }, - { - "epoch": 0.06492483344578352, - "grad_norm": 1434.818115234375, - "learning_rate": 3.214e-05, - "loss": 142.6461, - "step": 16070 - }, - { - "epoch": 0.06496523471115115, - "grad_norm": 3077.646240234375, - "learning_rate": 3.2160000000000004e-05, - "loss": 101.5211, - "step": 16080 - }, - { - "epoch": 0.06500563597651879, - "grad_norm": 1358.7418212890625, - "learning_rate": 3.218e-05, - "loss": 129.6278, - "step": 16090 - }, - { - "epoch": 0.06504603724188641, - "grad_norm": 11825.310546875, - "learning_rate": 3.2200000000000003e-05, - "loss": 145.8424, - "step": 16100 - }, - { - "epoch": 0.06508643850725404, - "grad_norm": 1621.6112060546875, - "learning_rate": 3.222e-05, - "loss": 117.4145, - "step": 16110 - }, - { - "epoch": 0.06512683977262168, - "grad_norm": 1896.356689453125, - "learning_rate": 3.224e-05, - "loss": 103.7541, - "step": 16120 - }, - { - "epoch": 0.0651672410379893, - "grad_norm": 8822.8671875, - "learning_rate": 3.226e-05, - "loss": 174.6907, - "step": 16130 - }, - { - "epoch": 0.06520764230335695, - "grad_norm": 1795.896728515625, - "learning_rate": 3.2279999999999996e-05, - "loss": 187.9359, - "step": 16140 - }, - { - "epoch": 0.06524804356872457, - "grad_norm": 870.3344116210938, - "learning_rate": 3.2300000000000006e-05, - "loss": 113.2782, - "step": 16150 - }, - { - "epoch": 0.0652884448340922, - "grad_norm": 1710.718505859375, - "learning_rate": 3.232e-05, - "loss": 114.0318, - "step": 16160 - }, - { - "epoch": 0.06532884609945984, - "grad_norm": 879.309326171875, - "learning_rate": 3.2340000000000005e-05, - "loss": 159.0129, - "step": 16170 - }, - { - "epoch": 0.06536924736482747, - "grad_norm": 1179.8787841796875, - "learning_rate": 3.236e-05, - "loss": 108.0792, - "step": 16180 - }, - { - "epoch": 0.06540964863019509, - "grad_norm": 1053.493896484375, - "learning_rate": 3.238e-05, - "loss": 162.0999, - "step": 16190 - }, - { - "epoch": 0.06545004989556273, - "grad_norm": 1169.240478515625, - "learning_rate": 3.24e-05, - "loss": 117.8543, - "step": 16200 - }, - { - "epoch": 0.06549045116093036, - "grad_norm": 1077.4576416015625, - "learning_rate": 3.242e-05, - "loss": 161.1472, - "step": 16210 - }, - { - "epoch": 0.065530852426298, - "grad_norm": 827.2643432617188, - "learning_rate": 3.244e-05, - "loss": 122.5282, - "step": 16220 - }, - { - "epoch": 0.06557125369166562, - "grad_norm": 1683.487060546875, - "learning_rate": 3.2460000000000004e-05, - "loss": 163.201, - "step": 16230 - }, - { - "epoch": 0.06561165495703325, - "grad_norm": 1506.306884765625, - "learning_rate": 3.248e-05, - "loss": 123.3369, - "step": 16240 - }, - { - "epoch": 0.06565205622240089, - "grad_norm": 410.9195861816406, - "learning_rate": 3.2500000000000004e-05, - "loss": 102.7201, - "step": 16250 - }, - { - "epoch": 0.06569245748776852, - "grad_norm": 484.0295104980469, - "learning_rate": 3.252e-05, - "loss": 191.6438, - "step": 16260 - }, - { - "epoch": 0.06573285875313614, - "grad_norm": 649.029541015625, - "learning_rate": 3.2540000000000004e-05, - "loss": 139.1572, - "step": 16270 - }, - { - "epoch": 0.06577326001850378, - "grad_norm": 2727.947509765625, - "learning_rate": 3.256e-05, - "loss": 198.9751, - "step": 16280 - }, - { - "epoch": 0.06581366128387141, - "grad_norm": 879.7364501953125, - "learning_rate": 3.2579999999999996e-05, - "loss": 119.9532, - "step": 16290 - }, - { - "epoch": 0.06585406254923905, - "grad_norm": 2567.528564453125, - "learning_rate": 3.26e-05, - "loss": 182.2825, - "step": 16300 - }, - { - "epoch": 0.06589446381460667, - "grad_norm": 1746.7139892578125, - "learning_rate": 3.262e-05, - "loss": 109.7434, - "step": 16310 - }, - { - "epoch": 0.0659348650799743, - "grad_norm": 1511.91845703125, - "learning_rate": 3.2640000000000006e-05, - "loss": 117.7299, - "step": 16320 - }, - { - "epoch": 0.06597526634534194, - "grad_norm": 626.0679321289062, - "learning_rate": 3.266e-05, - "loss": 154.5593, - "step": 16330 - }, - { - "epoch": 0.06601566761070957, - "grad_norm": 1950.873779296875, - "learning_rate": 3.268e-05, - "loss": 134.659, - "step": 16340 - }, - { - "epoch": 0.06605606887607719, - "grad_norm": 2815.42919921875, - "learning_rate": 3.27e-05, - "loss": 189.5417, - "step": 16350 - }, - { - "epoch": 0.06609647014144483, - "grad_norm": 791.9197387695312, - "learning_rate": 3.272e-05, - "loss": 104.6571, - "step": 16360 - }, - { - "epoch": 0.06613687140681246, - "grad_norm": 961.1111450195312, - "learning_rate": 3.274e-05, - "loss": 110.3847, - "step": 16370 - }, - { - "epoch": 0.0661772726721801, - "grad_norm": 1224.700927734375, - "learning_rate": 3.2760000000000005e-05, - "loss": 129.7902, - "step": 16380 - }, - { - "epoch": 0.06621767393754772, - "grad_norm": 892.2734985351562, - "learning_rate": 3.278e-05, - "loss": 131.0276, - "step": 16390 - }, - { - "epoch": 0.06625807520291535, - "grad_norm": 2211.9521484375, - "learning_rate": 3.2800000000000004e-05, - "loss": 102.1123, - "step": 16400 - }, - { - "epoch": 0.06629847646828299, - "grad_norm": 2223.035888671875, - "learning_rate": 3.282e-05, - "loss": 130.2351, - "step": 16410 - }, - { - "epoch": 0.06633887773365062, - "grad_norm": 896.9553833007812, - "learning_rate": 3.2840000000000004e-05, - "loss": 113.7288, - "step": 16420 - }, - { - "epoch": 0.06637927899901824, - "grad_norm": 0.0, - "learning_rate": 3.286e-05, - "loss": 70.063, - "step": 16430 - }, - { - "epoch": 0.06641968026438588, - "grad_norm": 665.3837890625, - "learning_rate": 3.288e-05, - "loss": 102.8864, - "step": 16440 - }, - { - "epoch": 0.06646008152975351, - "grad_norm": 529.5450439453125, - "learning_rate": 3.29e-05, - "loss": 107.0171, - "step": 16450 - }, - { - "epoch": 0.06650048279512115, - "grad_norm": 445.30950927734375, - "learning_rate": 3.292e-05, - "loss": 118.4387, - "step": 16460 - }, - { - "epoch": 0.06654088406048878, - "grad_norm": 1200.6650390625, - "learning_rate": 3.2940000000000006e-05, - "loss": 115.6011, - "step": 16470 - }, - { - "epoch": 0.0665812853258564, - "grad_norm": 997.8995971679688, - "learning_rate": 3.296e-05, - "loss": 101.1121, - "step": 16480 - }, - { - "epoch": 0.06662168659122404, - "grad_norm": 1181.9188232421875, - "learning_rate": 3.298e-05, - "loss": 94.8608, - "step": 16490 - }, - { - "epoch": 0.06666208785659167, - "grad_norm": 816.2405395507812, - "learning_rate": 3.3e-05, - "loss": 125.1284, - "step": 16500 - }, - { - "epoch": 0.0667024891219593, - "grad_norm": 703.4663696289062, - "learning_rate": 3.302e-05, - "loss": 123.1175, - "step": 16510 - }, - { - "epoch": 0.06674289038732693, - "grad_norm": 671.1204833984375, - "learning_rate": 3.304e-05, - "loss": 133.0177, - "step": 16520 - }, - { - "epoch": 0.06678329165269456, - "grad_norm": 635.96728515625, - "learning_rate": 3.3060000000000005e-05, - "loss": 117.4494, - "step": 16530 - }, - { - "epoch": 0.0668236929180622, - "grad_norm": 0.0, - "learning_rate": 3.308e-05, - "loss": 84.0806, - "step": 16540 - }, - { - "epoch": 0.06686409418342983, - "grad_norm": 1110.686279296875, - "learning_rate": 3.3100000000000005e-05, - "loss": 149.0371, - "step": 16550 - }, - { - "epoch": 0.06690449544879745, - "grad_norm": 1048.002197265625, - "learning_rate": 3.312e-05, - "loss": 118.8765, - "step": 16560 - }, - { - "epoch": 0.06694489671416509, - "grad_norm": 532.6653442382812, - "learning_rate": 3.314e-05, - "loss": 115.488, - "step": 16570 - }, - { - "epoch": 0.06698529797953272, - "grad_norm": 1357.1689453125, - "learning_rate": 3.316e-05, - "loss": 121.7834, - "step": 16580 - }, - { - "epoch": 0.06702569924490034, - "grad_norm": 3220.8486328125, - "learning_rate": 3.318e-05, - "loss": 146.6197, - "step": 16590 - }, - { - "epoch": 0.06706610051026798, - "grad_norm": 809.4137573242188, - "learning_rate": 3.32e-05, - "loss": 157.1161, - "step": 16600 - }, - { - "epoch": 0.06710650177563561, - "grad_norm": 479.6476135253906, - "learning_rate": 3.3220000000000004e-05, - "loss": 123.665, - "step": 16610 - }, - { - "epoch": 0.06714690304100325, - "grad_norm": 1147.013916015625, - "learning_rate": 3.324e-05, - "loss": 151.3696, - "step": 16620 - }, - { - "epoch": 0.06718730430637088, - "grad_norm": 1325.5657958984375, - "learning_rate": 3.3260000000000003e-05, - "loss": 228.0247, - "step": 16630 - }, - { - "epoch": 0.0672277055717385, - "grad_norm": 1421.17578125, - "learning_rate": 3.328e-05, - "loss": 174.1318, - "step": 16640 - }, - { - "epoch": 0.06726810683710614, - "grad_norm": 872.6542358398438, - "learning_rate": 3.33e-05, - "loss": 118.2037, - "step": 16650 - }, - { - "epoch": 0.06730850810247377, - "grad_norm": 936.040283203125, - "learning_rate": 3.332e-05, - "loss": 113.6235, - "step": 16660 - }, - { - "epoch": 0.0673489093678414, - "grad_norm": 829.9392700195312, - "learning_rate": 3.3339999999999996e-05, - "loss": 124.7012, - "step": 16670 - }, - { - "epoch": 0.06738931063320903, - "grad_norm": 829.8602294921875, - "learning_rate": 3.336e-05, - "loss": 122.1225, - "step": 16680 - }, - { - "epoch": 0.06742971189857666, - "grad_norm": 591.7499389648438, - "learning_rate": 3.338e-05, - "loss": 117.5648, - "step": 16690 - }, - { - "epoch": 0.0674701131639443, - "grad_norm": 585.739013671875, - "learning_rate": 3.3400000000000005e-05, - "loss": 120.4557, - "step": 16700 - }, - { - "epoch": 0.06751051442931193, - "grad_norm": 1151.9459228515625, - "learning_rate": 3.342e-05, - "loss": 122.6646, - "step": 16710 - }, - { - "epoch": 0.06755091569467955, - "grad_norm": 1100.1302490234375, - "learning_rate": 3.344e-05, - "loss": 124.8112, - "step": 16720 - }, - { - "epoch": 0.06759131696004719, - "grad_norm": 1049.39501953125, - "learning_rate": 3.346e-05, - "loss": 131.1703, - "step": 16730 - }, - { - "epoch": 0.06763171822541482, - "grad_norm": 976.2811889648438, - "learning_rate": 3.348e-05, - "loss": 104.992, - "step": 16740 - }, - { - "epoch": 0.06767211949078245, - "grad_norm": 1059.683837890625, - "learning_rate": 3.35e-05, - "loss": 109.1284, - "step": 16750 - }, - { - "epoch": 0.06771252075615009, - "grad_norm": 1005.8837280273438, - "learning_rate": 3.3520000000000004e-05, - "loss": 107.3349, - "step": 16760 - }, - { - "epoch": 0.06775292202151771, - "grad_norm": 850.0352783203125, - "learning_rate": 3.354e-05, - "loss": 126.588, - "step": 16770 - }, - { - "epoch": 0.06779332328688535, - "grad_norm": 1083.05712890625, - "learning_rate": 3.3560000000000004e-05, - "loss": 139.6427, - "step": 16780 - }, - { - "epoch": 0.06783372455225298, - "grad_norm": 1554.07080078125, - "learning_rate": 3.358e-05, - "loss": 145.7689, - "step": 16790 - }, - { - "epoch": 0.0678741258176206, - "grad_norm": 3450.517333984375, - "learning_rate": 3.3600000000000004e-05, - "loss": 140.9184, - "step": 16800 - }, - { - "epoch": 0.06791452708298824, - "grad_norm": 1706.325439453125, - "learning_rate": 3.362e-05, - "loss": 199.2792, - "step": 16810 - }, - { - "epoch": 0.06795492834835587, - "grad_norm": 1185.1922607421875, - "learning_rate": 3.3639999999999996e-05, - "loss": 140.3882, - "step": 16820 - }, - { - "epoch": 0.0679953296137235, - "grad_norm": 1169.992919921875, - "learning_rate": 3.366e-05, - "loss": 123.5538, - "step": 16830 - }, - { - "epoch": 0.06803573087909114, - "grad_norm": 1750.4180908203125, - "learning_rate": 3.368e-05, - "loss": 159.0448, - "step": 16840 - }, - { - "epoch": 0.06807613214445876, - "grad_norm": 1771.9908447265625, - "learning_rate": 3.3700000000000006e-05, - "loss": 153.4333, - "step": 16850 - }, - { - "epoch": 0.0681165334098264, - "grad_norm": 2022.061279296875, - "learning_rate": 3.372e-05, - "loss": 139.1972, - "step": 16860 - }, - { - "epoch": 0.06815693467519403, - "grad_norm": 1393.0201416015625, - "learning_rate": 3.374e-05, - "loss": 152.1366, - "step": 16870 - }, - { - "epoch": 0.06819733594056165, - "grad_norm": 2371.81494140625, - "learning_rate": 3.376e-05, - "loss": 142.447, - "step": 16880 - }, - { - "epoch": 0.0682377372059293, - "grad_norm": 1049.164794921875, - "learning_rate": 3.378e-05, - "loss": 115.6586, - "step": 16890 - }, - { - "epoch": 0.06827813847129692, - "grad_norm": 1043.5615234375, - "learning_rate": 3.38e-05, - "loss": 174.1537, - "step": 16900 - }, - { - "epoch": 0.06831853973666455, - "grad_norm": 1559.866455078125, - "learning_rate": 3.3820000000000005e-05, - "loss": 108.5037, - "step": 16910 - }, - { - "epoch": 0.06835894100203219, - "grad_norm": 2484.740478515625, - "learning_rate": 3.384e-05, - "loss": 184.837, - "step": 16920 - }, - { - "epoch": 0.06839934226739981, - "grad_norm": 775.7838745117188, - "learning_rate": 3.3860000000000004e-05, - "loss": 126.9209, - "step": 16930 - }, - { - "epoch": 0.06843974353276745, - "grad_norm": 471.7488708496094, - "learning_rate": 3.388e-05, - "loss": 135.7176, - "step": 16940 - }, - { - "epoch": 0.06848014479813508, - "grad_norm": 969.9213256835938, - "learning_rate": 3.3900000000000004e-05, - "loss": 95.2846, - "step": 16950 - }, - { - "epoch": 0.0685205460635027, - "grad_norm": 1296.1826171875, - "learning_rate": 3.392e-05, - "loss": 159.3641, - "step": 16960 - }, - { - "epoch": 0.06856094732887034, - "grad_norm": 2220.822021484375, - "learning_rate": 3.394e-05, - "loss": 171.076, - "step": 16970 - }, - { - "epoch": 0.06860134859423797, - "grad_norm": 764.9531860351562, - "learning_rate": 3.396e-05, - "loss": 139.6084, - "step": 16980 - }, - { - "epoch": 0.0686417498596056, - "grad_norm": 2453.2900390625, - "learning_rate": 3.398e-05, - "loss": 162.632, - "step": 16990 - }, - { - "epoch": 0.06868215112497324, - "grad_norm": 1016.1632690429688, - "learning_rate": 3.4000000000000007e-05, - "loss": 236.2789, - "step": 17000 - }, - { - "epoch": 0.06872255239034086, - "grad_norm": 1268.531494140625, - "learning_rate": 3.402e-05, - "loss": 119.6548, - "step": 17010 - }, - { - "epoch": 0.0687629536557085, - "grad_norm": 0.0, - "learning_rate": 3.404e-05, - "loss": 115.3437, - "step": 17020 - }, - { - "epoch": 0.06880335492107613, - "grad_norm": 867.4444580078125, - "learning_rate": 3.406e-05, - "loss": 173.6102, - "step": 17030 - }, - { - "epoch": 0.06884375618644376, - "grad_norm": 1306.5975341796875, - "learning_rate": 3.408e-05, - "loss": 147.4384, - "step": 17040 - }, - { - "epoch": 0.0688841574518114, - "grad_norm": 701.0490112304688, - "learning_rate": 3.41e-05, - "loss": 138.4343, - "step": 17050 - }, - { - "epoch": 0.06892455871717902, - "grad_norm": 1092.9249267578125, - "learning_rate": 3.412e-05, - "loss": 112.9486, - "step": 17060 - }, - { - "epoch": 0.06896495998254665, - "grad_norm": 1403.239990234375, - "learning_rate": 3.414e-05, - "loss": 106.8123, - "step": 17070 - }, - { - "epoch": 0.06900536124791429, - "grad_norm": 2368.7109375, - "learning_rate": 3.4160000000000005e-05, - "loss": 90.9559, - "step": 17080 - }, - { - "epoch": 0.06904576251328191, - "grad_norm": 723.0379028320312, - "learning_rate": 3.418e-05, - "loss": 92.075, - "step": 17090 - }, - { - "epoch": 0.06908616377864955, - "grad_norm": 1454.345703125, - "learning_rate": 3.4200000000000005e-05, - "loss": 122.138, - "step": 17100 - }, - { - "epoch": 0.06912656504401718, - "grad_norm": 849.4160766601562, - "learning_rate": 3.422e-05, - "loss": 132.3787, - "step": 17110 - }, - { - "epoch": 0.0691669663093848, - "grad_norm": 1275.969482421875, - "learning_rate": 3.424e-05, - "loss": 80.5995, - "step": 17120 - }, - { - "epoch": 0.06920736757475245, - "grad_norm": 977.9779052734375, - "learning_rate": 3.426e-05, - "loss": 126.0984, - "step": 17130 - }, - { - "epoch": 0.06924776884012007, - "grad_norm": 1691.71826171875, - "learning_rate": 3.4280000000000004e-05, - "loss": 123.841, - "step": 17140 - }, - { - "epoch": 0.0692881701054877, - "grad_norm": 3872.140869140625, - "learning_rate": 3.430000000000001e-05, - "loss": 148.4945, - "step": 17150 - }, - { - "epoch": 0.06932857137085534, - "grad_norm": 3100.976318359375, - "learning_rate": 3.4320000000000003e-05, - "loss": 117.4309, - "step": 17160 - }, - { - "epoch": 0.06936897263622296, - "grad_norm": 1637.591552734375, - "learning_rate": 3.434e-05, - "loss": 119.0649, - "step": 17170 - }, - { - "epoch": 0.0694093739015906, - "grad_norm": 8878.0693359375, - "learning_rate": 3.436e-05, - "loss": 140.0768, - "step": 17180 - }, - { - "epoch": 0.06944977516695823, - "grad_norm": 959.3743286132812, - "learning_rate": 3.438e-05, - "loss": 172.569, - "step": 17190 - }, - { - "epoch": 0.06949017643232586, - "grad_norm": 520.9686889648438, - "learning_rate": 3.4399999999999996e-05, - "loss": 93.1335, - "step": 17200 - }, - { - "epoch": 0.0695305776976935, - "grad_norm": 1169.4296875, - "learning_rate": 3.442e-05, - "loss": 109.4803, - "step": 17210 - }, - { - "epoch": 0.06957097896306112, - "grad_norm": 2471.82568359375, - "learning_rate": 3.444e-05, - "loss": 149.8675, - "step": 17220 - }, - { - "epoch": 0.06961138022842875, - "grad_norm": 1193.904541015625, - "learning_rate": 3.4460000000000005e-05, - "loss": 137.5065, - "step": 17230 - }, - { - "epoch": 0.06965178149379639, - "grad_norm": 796.9132080078125, - "learning_rate": 3.448e-05, - "loss": 89.8475, - "step": 17240 - }, - { - "epoch": 0.06969218275916401, - "grad_norm": 688.8414916992188, - "learning_rate": 3.45e-05, - "loss": 125.9576, - "step": 17250 - }, - { - "epoch": 0.06973258402453165, - "grad_norm": 480.16009521484375, - "learning_rate": 3.452e-05, - "loss": 78.7464, - "step": 17260 - }, - { - "epoch": 0.06977298528989928, - "grad_norm": 783.6867065429688, - "learning_rate": 3.454e-05, - "loss": 108.8815, - "step": 17270 - }, - { - "epoch": 0.06981338655526691, - "grad_norm": 0.0, - "learning_rate": 3.456e-05, - "loss": 116.7318, - "step": 17280 - }, - { - "epoch": 0.06985378782063455, - "grad_norm": 972.9898681640625, - "learning_rate": 3.4580000000000004e-05, - "loss": 163.7325, - "step": 17290 - }, - { - "epoch": 0.06989418908600217, - "grad_norm": 2305.32470703125, - "learning_rate": 3.46e-05, - "loss": 102.6796, - "step": 17300 - }, - { - "epoch": 0.0699345903513698, - "grad_norm": 985.5175170898438, - "learning_rate": 3.4620000000000004e-05, - "loss": 133.6453, - "step": 17310 - }, - { - "epoch": 0.06997499161673744, - "grad_norm": 966.200439453125, - "learning_rate": 3.464e-05, - "loss": 133.9981, - "step": 17320 - }, - { - "epoch": 0.07001539288210507, - "grad_norm": 582.17041015625, - "learning_rate": 3.4660000000000004e-05, - "loss": 119.5202, - "step": 17330 - }, - { - "epoch": 0.0700557941474727, - "grad_norm": 841.1046752929688, - "learning_rate": 3.468e-05, - "loss": 133.1583, - "step": 17340 - }, - { - "epoch": 0.07009619541284033, - "grad_norm": 675.8413696289062, - "learning_rate": 3.4699999999999996e-05, - "loss": 139.8855, - "step": 17350 - }, - { - "epoch": 0.07013659667820796, - "grad_norm": 957.8898315429688, - "learning_rate": 3.472e-05, - "loss": 137.3781, - "step": 17360 - }, - { - "epoch": 0.0701769979435756, - "grad_norm": 3106.9208984375, - "learning_rate": 3.474e-05, - "loss": 167.9274, - "step": 17370 - }, - { - "epoch": 0.07021739920894322, - "grad_norm": 1493.3551025390625, - "learning_rate": 3.4760000000000006e-05, - "loss": 163.2431, - "step": 17380 - }, - { - "epoch": 0.07025780047431085, - "grad_norm": 993.3938598632812, - "learning_rate": 3.478e-05, - "loss": 114.7312, - "step": 17390 - }, - { - "epoch": 0.07029820173967849, - "grad_norm": 1034.116943359375, - "learning_rate": 3.48e-05, - "loss": 70.717, - "step": 17400 - }, - { - "epoch": 0.07033860300504612, - "grad_norm": 921.6234741210938, - "learning_rate": 3.482e-05, - "loss": 185.619, - "step": 17410 - }, - { - "epoch": 0.07037900427041376, - "grad_norm": 1703.0343017578125, - "learning_rate": 3.484e-05, - "loss": 142.7865, - "step": 17420 - }, - { - "epoch": 0.07041940553578138, - "grad_norm": 1368.336181640625, - "learning_rate": 3.486e-05, - "loss": 116.2519, - "step": 17430 - }, - { - "epoch": 0.07045980680114901, - "grad_norm": 1559.697265625, - "learning_rate": 3.4880000000000005e-05, - "loss": 110.2219, - "step": 17440 - }, - { - "epoch": 0.07050020806651665, - "grad_norm": 3143.30517578125, - "learning_rate": 3.49e-05, - "loss": 91.7808, - "step": 17450 - }, - { - "epoch": 0.07054060933188427, - "grad_norm": 1277.235595703125, - "learning_rate": 3.4920000000000004e-05, - "loss": 143.4617, - "step": 17460 - }, - { - "epoch": 0.0705810105972519, - "grad_norm": 2107.630615234375, - "learning_rate": 3.494e-05, - "loss": 141.62, - "step": 17470 - }, - { - "epoch": 0.07062141186261954, - "grad_norm": 916.9269409179688, - "learning_rate": 3.4960000000000004e-05, - "loss": 124.4125, - "step": 17480 - }, - { - "epoch": 0.07066181312798717, - "grad_norm": 495.5371398925781, - "learning_rate": 3.498e-05, - "loss": 113.1787, - "step": 17490 - }, - { - "epoch": 0.0707022143933548, - "grad_norm": 722.7784423828125, - "learning_rate": 3.5e-05, - "loss": 113.6147, - "step": 17500 - }, - { - "epoch": 0.07074261565872243, - "grad_norm": 8005.68017578125, - "learning_rate": 3.502e-05, - "loss": 160.4929, - "step": 17510 - }, - { - "epoch": 0.07078301692409006, - "grad_norm": 836.4596557617188, - "learning_rate": 3.504e-05, - "loss": 93.0578, - "step": 17520 - }, - { - "epoch": 0.0708234181894577, - "grad_norm": 1631.076171875, - "learning_rate": 3.5060000000000007e-05, - "loss": 126.6171, - "step": 17530 - }, - { - "epoch": 0.07086381945482532, - "grad_norm": 639.418212890625, - "learning_rate": 3.508e-05, - "loss": 105.7042, - "step": 17540 - }, - { - "epoch": 0.07090422072019295, - "grad_norm": 1241.2578125, - "learning_rate": 3.51e-05, - "loss": 201.5257, - "step": 17550 - }, - { - "epoch": 0.07094462198556059, - "grad_norm": 835.3104248046875, - "learning_rate": 3.512e-05, - "loss": 159.556, - "step": 17560 - }, - { - "epoch": 0.07098502325092822, - "grad_norm": 565.4751586914062, - "learning_rate": 3.514e-05, - "loss": 93.872, - "step": 17570 - }, - { - "epoch": 0.07102542451629586, - "grad_norm": 599.105712890625, - "learning_rate": 3.516e-05, - "loss": 132.7931, - "step": 17580 - }, - { - "epoch": 0.07106582578166348, - "grad_norm": 1342.3175048828125, - "learning_rate": 3.518e-05, - "loss": 121.4404, - "step": 17590 - }, - { - "epoch": 0.07110622704703111, - "grad_norm": 853.9281616210938, - "learning_rate": 3.52e-05, - "loss": 185.0158, - "step": 17600 - }, - { - "epoch": 0.07114662831239875, - "grad_norm": 1526.38427734375, - "learning_rate": 3.5220000000000005e-05, - "loss": 130.5014, - "step": 17610 - }, - { - "epoch": 0.07118702957776638, - "grad_norm": 0.0, - "learning_rate": 3.524e-05, - "loss": 155.2073, - "step": 17620 - }, - { - "epoch": 0.071227430843134, - "grad_norm": 722.3214111328125, - "learning_rate": 3.5260000000000005e-05, - "loss": 98.7197, - "step": 17630 - }, - { - "epoch": 0.07126783210850164, - "grad_norm": 769.6561889648438, - "learning_rate": 3.528e-05, - "loss": 98.4344, - "step": 17640 - }, - { - "epoch": 0.07130823337386927, - "grad_norm": 1916.5062255859375, - "learning_rate": 3.53e-05, - "loss": 169.392, - "step": 17650 - }, - { - "epoch": 0.07134863463923691, - "grad_norm": 790.8156127929688, - "learning_rate": 3.532e-05, - "loss": 112.2699, - "step": 17660 - }, - { - "epoch": 0.07138903590460453, - "grad_norm": 0.0, - "learning_rate": 3.5340000000000004e-05, - "loss": 153.1781, - "step": 17670 - }, - { - "epoch": 0.07142943716997216, - "grad_norm": 1370.72216796875, - "learning_rate": 3.536000000000001e-05, - "loss": 170.2639, - "step": 17680 - }, - { - "epoch": 0.0714698384353398, - "grad_norm": 466.6986999511719, - "learning_rate": 3.5380000000000003e-05, - "loss": 140.3446, - "step": 17690 - }, - { - "epoch": 0.07151023970070743, - "grad_norm": 1512.586181640625, - "learning_rate": 3.54e-05, - "loss": 123.2379, - "step": 17700 - }, - { - "epoch": 0.07155064096607505, - "grad_norm": 1187.423828125, - "learning_rate": 3.542e-05, - "loss": 121.9325, - "step": 17710 - }, - { - "epoch": 0.07159104223144269, - "grad_norm": 1491.8526611328125, - "learning_rate": 3.544e-05, - "loss": 97.88, - "step": 17720 - }, - { - "epoch": 0.07163144349681032, - "grad_norm": 819.60400390625, - "learning_rate": 3.546e-05, - "loss": 136.5916, - "step": 17730 - }, - { - "epoch": 0.07167184476217796, - "grad_norm": 650.4725952148438, - "learning_rate": 3.548e-05, - "loss": 100.4026, - "step": 17740 - }, - { - "epoch": 0.07171224602754558, - "grad_norm": 822.773681640625, - "learning_rate": 3.55e-05, - "loss": 103.9474, - "step": 17750 - }, - { - "epoch": 0.07175264729291321, - "grad_norm": 879.6776123046875, - "learning_rate": 3.5520000000000006e-05, - "loss": 90.2404, - "step": 17760 - }, - { - "epoch": 0.07179304855828085, - "grad_norm": 1487.169921875, - "learning_rate": 3.554e-05, - "loss": 172.1061, - "step": 17770 - }, - { - "epoch": 0.07183344982364848, - "grad_norm": 953.589111328125, - "learning_rate": 3.5560000000000005e-05, - "loss": 150.0098, - "step": 17780 - }, - { - "epoch": 0.0718738510890161, - "grad_norm": 688.1337890625, - "learning_rate": 3.558e-05, - "loss": 78.3569, - "step": 17790 - }, - { - "epoch": 0.07191425235438374, - "grad_norm": 789.90283203125, - "learning_rate": 3.56e-05, - "loss": 106.5888, - "step": 17800 - }, - { - "epoch": 0.07195465361975137, - "grad_norm": 934.8272094726562, - "learning_rate": 3.562e-05, - "loss": 111.0842, - "step": 17810 - }, - { - "epoch": 0.07199505488511901, - "grad_norm": 336.4629211425781, - "learning_rate": 3.5640000000000004e-05, - "loss": 99.3697, - "step": 17820 - }, - { - "epoch": 0.07203545615048663, - "grad_norm": 3090.107666015625, - "learning_rate": 3.566e-05, - "loss": 97.4285, - "step": 17830 - }, - { - "epoch": 0.07207585741585426, - "grad_norm": 1428.7044677734375, - "learning_rate": 3.5680000000000004e-05, - "loss": 118.1064, - "step": 17840 - }, - { - "epoch": 0.0721162586812219, - "grad_norm": 2355.272705078125, - "learning_rate": 3.57e-05, - "loss": 151.7737, - "step": 17850 - }, - { - "epoch": 0.07215665994658953, - "grad_norm": 1052.816650390625, - "learning_rate": 3.5720000000000004e-05, - "loss": 86.3124, - "step": 17860 - }, - { - "epoch": 0.07219706121195715, - "grad_norm": 683.0614624023438, - "learning_rate": 3.574e-05, - "loss": 120.8411, - "step": 17870 - }, - { - "epoch": 0.0722374624773248, - "grad_norm": 1161.9776611328125, - "learning_rate": 3.5759999999999996e-05, - "loss": 145.4579, - "step": 17880 - }, - { - "epoch": 0.07227786374269242, - "grad_norm": 1213.32861328125, - "learning_rate": 3.578e-05, - "loss": 160.1318, - "step": 17890 - }, - { - "epoch": 0.07231826500806006, - "grad_norm": 1079.72119140625, - "learning_rate": 3.58e-05, - "loss": 125.1568, - "step": 17900 - }, - { - "epoch": 0.07235866627342769, - "grad_norm": 960.3634643554688, - "learning_rate": 3.5820000000000006e-05, - "loss": 116.5516, - "step": 17910 - }, - { - "epoch": 0.07239906753879531, - "grad_norm": 1030.158447265625, - "learning_rate": 3.584e-05, - "loss": 78.2271, - "step": 17920 - }, - { - "epoch": 0.07243946880416295, - "grad_norm": 812.413818359375, - "learning_rate": 3.586e-05, - "loss": 122.26, - "step": 17930 - }, - { - "epoch": 0.07247987006953058, - "grad_norm": 2242.89794921875, - "learning_rate": 3.588e-05, - "loss": 150.4362, - "step": 17940 - }, - { - "epoch": 0.0725202713348982, - "grad_norm": 1707.64306640625, - "learning_rate": 3.59e-05, - "loss": 107.4575, - "step": 17950 - }, - { - "epoch": 0.07256067260026584, - "grad_norm": 1091.1968994140625, - "learning_rate": 3.592e-05, - "loss": 176.9033, - "step": 17960 - }, - { - "epoch": 0.07260107386563347, - "grad_norm": 660.7951049804688, - "learning_rate": 3.594e-05, - "loss": 120.4594, - "step": 17970 - }, - { - "epoch": 0.07264147513100111, - "grad_norm": 1081.6932373046875, - "learning_rate": 3.596e-05, - "loss": 101.9581, - "step": 17980 - }, - { - "epoch": 0.07268187639636874, - "grad_norm": 1247.1197509765625, - "learning_rate": 3.5980000000000004e-05, - "loss": 90.3047, - "step": 17990 - }, - { - "epoch": 0.07272227766173636, - "grad_norm": 2352.89208984375, - "learning_rate": 3.6e-05, - "loss": 121.5598, - "step": 18000 - }, - { - "epoch": 0.072762678927104, - "grad_norm": 774.28466796875, - "learning_rate": 3.6020000000000004e-05, - "loss": 126.1789, - "step": 18010 - }, - { - "epoch": 0.07280308019247163, - "grad_norm": 821.19384765625, - "learning_rate": 3.604e-05, - "loss": 100.4537, - "step": 18020 - }, - { - "epoch": 0.07284348145783925, - "grad_norm": 919.6599731445312, - "learning_rate": 3.606e-05, - "loss": 85.4199, - "step": 18030 - }, - { - "epoch": 0.0728838827232069, - "grad_norm": 805.6433715820312, - "learning_rate": 3.608e-05, - "loss": 76.5743, - "step": 18040 - }, - { - "epoch": 0.07292428398857452, - "grad_norm": 1283.3468017578125, - "learning_rate": 3.61e-05, - "loss": 128.92, - "step": 18050 - }, - { - "epoch": 0.07296468525394216, - "grad_norm": 2695.972900390625, - "learning_rate": 3.6120000000000007e-05, - "loss": 131.2778, - "step": 18060 - }, - { - "epoch": 0.07300508651930979, - "grad_norm": 2971.26904296875, - "learning_rate": 3.614e-05, - "loss": 103.6507, - "step": 18070 - }, - { - "epoch": 0.07304548778467741, - "grad_norm": 1631.7698974609375, - "learning_rate": 3.616e-05, - "loss": 101.8744, - "step": 18080 - }, - { - "epoch": 0.07308588905004505, - "grad_norm": 707.8176879882812, - "learning_rate": 3.618e-05, - "loss": 106.0584, - "step": 18090 - }, - { - "epoch": 0.07312629031541268, - "grad_norm": 497.0185546875, - "learning_rate": 3.62e-05, - "loss": 74.8409, - "step": 18100 - }, - { - "epoch": 0.0731666915807803, - "grad_norm": 1669.726806640625, - "learning_rate": 3.622e-05, - "loss": 105.9762, - "step": 18110 - }, - { - "epoch": 0.07320709284614794, - "grad_norm": 557.0554809570312, - "learning_rate": 3.624e-05, - "loss": 116.4344, - "step": 18120 - }, - { - "epoch": 0.07324749411151557, - "grad_norm": 2284.02099609375, - "learning_rate": 3.626e-05, - "loss": 122.7274, - "step": 18130 - }, - { - "epoch": 0.07328789537688321, - "grad_norm": 1483.209228515625, - "learning_rate": 3.6280000000000005e-05, - "loss": 100.2296, - "step": 18140 - }, - { - "epoch": 0.07332829664225084, - "grad_norm": 2553.381591796875, - "learning_rate": 3.63e-05, - "loss": 134.3823, - "step": 18150 - }, - { - "epoch": 0.07336869790761846, - "grad_norm": 465.40289306640625, - "learning_rate": 3.6320000000000005e-05, - "loss": 104.473, - "step": 18160 - }, - { - "epoch": 0.0734090991729861, - "grad_norm": 2143.05419921875, - "learning_rate": 3.634e-05, - "loss": 133.1543, - "step": 18170 - }, - { - "epoch": 0.07344950043835373, - "grad_norm": 854.1031494140625, - "learning_rate": 3.636e-05, - "loss": 139.8475, - "step": 18180 - }, - { - "epoch": 0.07348990170372136, - "grad_norm": 812.4703979492188, - "learning_rate": 3.638e-05, - "loss": 124.2388, - "step": 18190 - }, - { - "epoch": 0.073530302969089, - "grad_norm": 693.6560668945312, - "learning_rate": 3.6400000000000004e-05, - "loss": 125.4126, - "step": 18200 - }, - { - "epoch": 0.07357070423445662, - "grad_norm": 1367.3717041015625, - "learning_rate": 3.642000000000001e-05, - "loss": 141.6495, - "step": 18210 - }, - { - "epoch": 0.07361110549982426, - "grad_norm": 1077.287353515625, - "learning_rate": 3.6440000000000003e-05, - "loss": 130.6979, - "step": 18220 - }, - { - "epoch": 0.07365150676519189, - "grad_norm": 2508.4072265625, - "learning_rate": 3.646e-05, - "loss": 125.5967, - "step": 18230 - }, - { - "epoch": 0.07369190803055951, - "grad_norm": 2781.412841796875, - "learning_rate": 3.648e-05, - "loss": 149.0504, - "step": 18240 - }, - { - "epoch": 0.07373230929592715, - "grad_norm": 1022.107177734375, - "learning_rate": 3.65e-05, - "loss": 128.4223, - "step": 18250 - }, - { - "epoch": 0.07377271056129478, - "grad_norm": 700.0909423828125, - "learning_rate": 3.652e-05, - "loss": 95.3364, - "step": 18260 - }, - { - "epoch": 0.0738131118266624, - "grad_norm": 992.6243896484375, - "learning_rate": 3.654e-05, - "loss": 130.7981, - "step": 18270 - }, - { - "epoch": 0.07385351309203005, - "grad_norm": 1268.7449951171875, - "learning_rate": 3.656e-05, - "loss": 116.5302, - "step": 18280 - }, - { - "epoch": 0.07389391435739767, - "grad_norm": 915.7352905273438, - "learning_rate": 3.6580000000000006e-05, - "loss": 141.5618, - "step": 18290 - }, - { - "epoch": 0.0739343156227653, - "grad_norm": 3412.534912109375, - "learning_rate": 3.66e-05, - "loss": 188.1361, - "step": 18300 - }, - { - "epoch": 0.07397471688813294, - "grad_norm": 1475.6259765625, - "learning_rate": 3.6620000000000005e-05, - "loss": 125.9999, - "step": 18310 - }, - { - "epoch": 0.07401511815350056, - "grad_norm": 838.94140625, - "learning_rate": 3.664e-05, - "loss": 142.8819, - "step": 18320 - }, - { - "epoch": 0.0740555194188682, - "grad_norm": 2300.423583984375, - "learning_rate": 3.666e-05, - "loss": 168.7538, - "step": 18330 - }, - { - "epoch": 0.07409592068423583, - "grad_norm": 2023.9940185546875, - "learning_rate": 3.668e-05, - "loss": 126.4141, - "step": 18340 - }, - { - "epoch": 0.07413632194960346, - "grad_norm": 884.4683227539062, - "learning_rate": 3.6700000000000004e-05, - "loss": 64.7707, - "step": 18350 - }, - { - "epoch": 0.0741767232149711, - "grad_norm": 598.0726928710938, - "learning_rate": 3.672000000000001e-05, - "loss": 90.2243, - "step": 18360 - }, - { - "epoch": 0.07421712448033872, - "grad_norm": 852.4734497070312, - "learning_rate": 3.6740000000000004e-05, - "loss": 193.19, - "step": 18370 - }, - { - "epoch": 0.07425752574570635, - "grad_norm": 926.2811889648438, - "learning_rate": 3.676e-05, - "loss": 113.1817, - "step": 18380 - }, - { - "epoch": 0.07429792701107399, - "grad_norm": 840.8629760742188, - "learning_rate": 3.6780000000000004e-05, - "loss": 128.7378, - "step": 18390 - }, - { - "epoch": 0.07433832827644161, - "grad_norm": 1284.182373046875, - "learning_rate": 3.68e-05, - "loss": 119.9407, - "step": 18400 - }, - { - "epoch": 0.07437872954180925, - "grad_norm": 3427.597900390625, - "learning_rate": 3.682e-05, - "loss": 168.6104, - "step": 18410 - }, - { - "epoch": 0.07441913080717688, - "grad_norm": 1045.6478271484375, - "learning_rate": 3.684e-05, - "loss": 92.3087, - "step": 18420 - }, - { - "epoch": 0.07445953207254451, - "grad_norm": 1453.0216064453125, - "learning_rate": 3.686e-05, - "loss": 128.912, - "step": 18430 - }, - { - "epoch": 0.07449993333791215, - "grad_norm": 1257.60986328125, - "learning_rate": 3.6880000000000006e-05, - "loss": 67.0141, - "step": 18440 - }, - { - "epoch": 0.07454033460327977, - "grad_norm": 900.5206298828125, - "learning_rate": 3.69e-05, - "loss": 94.6233, - "step": 18450 - }, - { - "epoch": 0.0745807358686474, - "grad_norm": 769.1461181640625, - "learning_rate": 3.692e-05, - "loss": 128.7582, - "step": 18460 - }, - { - "epoch": 0.07462113713401504, - "grad_norm": 1010.8888549804688, - "learning_rate": 3.694e-05, - "loss": 115.287, - "step": 18470 - }, - { - "epoch": 0.07466153839938267, - "grad_norm": 974.634765625, - "learning_rate": 3.696e-05, - "loss": 97.2538, - "step": 18480 - }, - { - "epoch": 0.0747019396647503, - "grad_norm": 998.1504516601562, - "learning_rate": 3.698e-05, - "loss": 123.2764, - "step": 18490 - }, - { - "epoch": 0.07474234093011793, - "grad_norm": 1043.8447265625, - "learning_rate": 3.7e-05, - "loss": 105.7509, - "step": 18500 - }, - { - "epoch": 0.07478274219548556, - "grad_norm": 551.1446533203125, - "learning_rate": 3.702e-05, - "loss": 92.3631, - "step": 18510 - }, - { - "epoch": 0.0748231434608532, - "grad_norm": 677.5746459960938, - "learning_rate": 3.7040000000000005e-05, - "loss": 92.7502, - "step": 18520 - }, - { - "epoch": 0.07486354472622082, - "grad_norm": 935.2184448242188, - "learning_rate": 3.706e-05, - "loss": 94.4942, - "step": 18530 - }, - { - "epoch": 0.07490394599158845, - "grad_norm": 4195.58837890625, - "learning_rate": 3.7080000000000004e-05, - "loss": 180.4082, - "step": 18540 - }, - { - "epoch": 0.07494434725695609, - "grad_norm": 553.2954711914062, - "learning_rate": 3.71e-05, - "loss": 136.8525, - "step": 18550 - }, - { - "epoch": 0.07498474852232372, - "grad_norm": 1196.5914306640625, - "learning_rate": 3.712e-05, - "loss": 87.8535, - "step": 18560 - }, - { - "epoch": 0.07502514978769136, - "grad_norm": 925.3016967773438, - "learning_rate": 3.714e-05, - "loss": 99.2028, - "step": 18570 - }, - { - "epoch": 0.07506555105305898, - "grad_norm": 743.458984375, - "learning_rate": 3.716e-05, - "loss": 115.926, - "step": 18580 - }, - { - "epoch": 0.07510595231842661, - "grad_norm": 452.0498962402344, - "learning_rate": 3.7180000000000007e-05, - "loss": 72.2173, - "step": 18590 - }, - { - "epoch": 0.07514635358379425, - "grad_norm": 1277.8033447265625, - "learning_rate": 3.72e-05, - "loss": 196.1923, - "step": 18600 - }, - { - "epoch": 0.07518675484916187, - "grad_norm": 613.0692138671875, - "learning_rate": 3.722e-05, - "loss": 165.0734, - "step": 18610 - }, - { - "epoch": 0.0752271561145295, - "grad_norm": 707.7522583007812, - "learning_rate": 3.724e-05, - "loss": 146.3568, - "step": 18620 - }, - { - "epoch": 0.07526755737989714, - "grad_norm": 892.3648681640625, - "learning_rate": 3.726e-05, - "loss": 119.203, - "step": 18630 - }, - { - "epoch": 0.07530795864526477, - "grad_norm": 763.5734252929688, - "learning_rate": 3.728e-05, - "loss": 154.0482, - "step": 18640 - }, - { - "epoch": 0.0753483599106324, - "grad_norm": 779.2640991210938, - "learning_rate": 3.73e-05, - "loss": 73.6996, - "step": 18650 - }, - { - "epoch": 0.07538876117600003, - "grad_norm": 2393.424560546875, - "learning_rate": 3.732e-05, - "loss": 129.6301, - "step": 18660 - }, - { - "epoch": 0.07542916244136766, - "grad_norm": 2221.367919921875, - "learning_rate": 3.7340000000000005e-05, - "loss": 132.7365, - "step": 18670 - }, - { - "epoch": 0.0754695637067353, - "grad_norm": 1009.402587890625, - "learning_rate": 3.736e-05, - "loss": 110.5086, - "step": 18680 - }, - { - "epoch": 0.07550996497210292, - "grad_norm": 5788.82275390625, - "learning_rate": 3.7380000000000005e-05, - "loss": 93.0837, - "step": 18690 - }, - { - "epoch": 0.07555036623747055, - "grad_norm": 989.4839477539062, - "learning_rate": 3.74e-05, - "loss": 120.5923, - "step": 18700 - }, - { - "epoch": 0.07559076750283819, - "grad_norm": 1187.691650390625, - "learning_rate": 3.742e-05, - "loss": 191.3571, - "step": 18710 - }, - { - "epoch": 0.07563116876820582, - "grad_norm": 1042.4420166015625, - "learning_rate": 3.744e-05, - "loss": 123.3321, - "step": 18720 - }, - { - "epoch": 0.07567157003357346, - "grad_norm": 593.2315063476562, - "learning_rate": 3.7460000000000004e-05, - "loss": 124.6118, - "step": 18730 - }, - { - "epoch": 0.07571197129894108, - "grad_norm": 1148.8316650390625, - "learning_rate": 3.748000000000001e-05, - "loss": 129.3611, - "step": 18740 - }, - { - "epoch": 0.07575237256430871, - "grad_norm": 721.3247680664062, - "learning_rate": 3.7500000000000003e-05, - "loss": 115.121, - "step": 18750 - }, - { - "epoch": 0.07579277382967635, - "grad_norm": 1153.5718994140625, - "learning_rate": 3.752e-05, - "loss": 96.7754, - "step": 18760 - }, - { - "epoch": 0.07583317509504398, - "grad_norm": 559.9420166015625, - "learning_rate": 3.754e-05, - "loss": 117.4526, - "step": 18770 - }, - { - "epoch": 0.0758735763604116, - "grad_norm": 1740.1900634765625, - "learning_rate": 3.756e-05, - "loss": 103.682, - "step": 18780 - }, - { - "epoch": 0.07591397762577924, - "grad_norm": 0.0, - "learning_rate": 3.758e-05, - "loss": 74.3376, - "step": 18790 - }, - { - "epoch": 0.07595437889114687, - "grad_norm": 1305.6412353515625, - "learning_rate": 3.76e-05, - "loss": 114.3259, - "step": 18800 - }, - { - "epoch": 0.07599478015651451, - "grad_norm": 1425.0380859375, - "learning_rate": 3.762e-05, - "loss": 171.961, - "step": 18810 - }, - { - "epoch": 0.07603518142188213, - "grad_norm": 1266.49072265625, - "learning_rate": 3.7640000000000006e-05, - "loss": 166.5889, - "step": 18820 - }, - { - "epoch": 0.07607558268724976, - "grad_norm": 1901.879638671875, - "learning_rate": 3.766e-05, - "loss": 132.9882, - "step": 18830 - }, - { - "epoch": 0.0761159839526174, - "grad_norm": 690.49951171875, - "learning_rate": 3.7680000000000005e-05, - "loss": 84.5623, - "step": 18840 - }, - { - "epoch": 0.07615638521798503, - "grad_norm": 650.4954833984375, - "learning_rate": 3.77e-05, - "loss": 135.0686, - "step": 18850 - }, - { - "epoch": 0.07619678648335265, - "grad_norm": 2350.57373046875, - "learning_rate": 3.772e-05, - "loss": 133.9776, - "step": 18860 - }, - { - "epoch": 0.07623718774872029, - "grad_norm": 1563.426513671875, - "learning_rate": 3.774e-05, - "loss": 101.9517, - "step": 18870 - }, - { - "epoch": 0.07627758901408792, - "grad_norm": 797.4249267578125, - "learning_rate": 3.776e-05, - "loss": 120.9917, - "step": 18880 - }, - { - "epoch": 0.07631799027945556, - "grad_norm": 1120.25732421875, - "learning_rate": 3.778000000000001e-05, - "loss": 115.9363, - "step": 18890 - }, - { - "epoch": 0.07635839154482318, - "grad_norm": 1334.6461181640625, - "learning_rate": 3.7800000000000004e-05, - "loss": 147.4801, - "step": 18900 - }, - { - "epoch": 0.07639879281019081, - "grad_norm": 841.9830322265625, - "learning_rate": 3.782e-05, - "loss": 127.5146, - "step": 18910 - }, - { - "epoch": 0.07643919407555845, - "grad_norm": 951.7286987304688, - "learning_rate": 3.7840000000000004e-05, - "loss": 222.0843, - "step": 18920 - }, - { - "epoch": 0.07647959534092608, - "grad_norm": 1344.3798828125, - "learning_rate": 3.786e-05, - "loss": 118.1746, - "step": 18930 - }, - { - "epoch": 0.0765199966062937, - "grad_norm": 763.527587890625, - "learning_rate": 3.788e-05, - "loss": 74.0978, - "step": 18940 - }, - { - "epoch": 0.07656039787166134, - "grad_norm": 800.6107177734375, - "learning_rate": 3.79e-05, - "loss": 105.7953, - "step": 18950 - }, - { - "epoch": 0.07660079913702897, - "grad_norm": 1480.68505859375, - "learning_rate": 3.792e-05, - "loss": 127.3082, - "step": 18960 - }, - { - "epoch": 0.07664120040239661, - "grad_norm": 334.6526184082031, - "learning_rate": 3.7940000000000006e-05, - "loss": 85.6625, - "step": 18970 - }, - { - "epoch": 0.07668160166776423, - "grad_norm": 1548.9031982421875, - "learning_rate": 3.796e-05, - "loss": 131.035, - "step": 18980 - }, - { - "epoch": 0.07672200293313186, - "grad_norm": 1739.5743408203125, - "learning_rate": 3.7980000000000006e-05, - "loss": 172.2465, - "step": 18990 - }, - { - "epoch": 0.0767624041984995, - "grad_norm": 3091.291015625, - "learning_rate": 3.8e-05, - "loss": 160.8068, - "step": 19000 - }, - { - "epoch": 0.07680280546386713, - "grad_norm": 1500.0257568359375, - "learning_rate": 3.802e-05, - "loss": 76.5043, - "step": 19010 - }, - { - "epoch": 0.07684320672923475, - "grad_norm": 1217.782958984375, - "learning_rate": 3.804e-05, - "loss": 102.2421, - "step": 19020 - }, - { - "epoch": 0.0768836079946024, - "grad_norm": 1387.4803466796875, - "learning_rate": 3.806e-05, - "loss": 94.5826, - "step": 19030 - }, - { - "epoch": 0.07692400925997002, - "grad_norm": 1300.2745361328125, - "learning_rate": 3.808e-05, - "loss": 130.5162, - "step": 19040 - }, - { - "epoch": 0.07696441052533766, - "grad_norm": 1954.7742919921875, - "learning_rate": 3.8100000000000005e-05, - "loss": 157.192, - "step": 19050 - }, - { - "epoch": 0.07700481179070529, - "grad_norm": 2402.571533203125, - "learning_rate": 3.812e-05, - "loss": 181.6468, - "step": 19060 - }, - { - "epoch": 0.07704521305607291, - "grad_norm": 1889.8359375, - "learning_rate": 3.8140000000000004e-05, - "loss": 112.0516, - "step": 19070 - }, - { - "epoch": 0.07708561432144055, - "grad_norm": 834.8114624023438, - "learning_rate": 3.816e-05, - "loss": 104.1592, - "step": 19080 - }, - { - "epoch": 0.07712601558680818, - "grad_norm": 1373.436279296875, - "learning_rate": 3.818e-05, - "loss": 121.8585, - "step": 19090 - }, - { - "epoch": 0.0771664168521758, - "grad_norm": 1009.5123901367188, - "learning_rate": 3.82e-05, - "loss": 156.3473, - "step": 19100 - }, - { - "epoch": 0.07720681811754344, - "grad_norm": 986.4491577148438, - "learning_rate": 3.822e-05, - "loss": 123.8396, - "step": 19110 - }, - { - "epoch": 0.07724721938291107, - "grad_norm": 690.7348022460938, - "learning_rate": 3.8240000000000007e-05, - "loss": 81.2586, - "step": 19120 - }, - { - "epoch": 0.07728762064827871, - "grad_norm": 603.4257202148438, - "learning_rate": 3.826e-05, - "loss": 148.1277, - "step": 19130 - }, - { - "epoch": 0.07732802191364634, - "grad_norm": 610.265869140625, - "learning_rate": 3.828e-05, - "loss": 107.6773, - "step": 19140 - }, - { - "epoch": 0.07736842317901396, - "grad_norm": 596.2022094726562, - "learning_rate": 3.83e-05, - "loss": 81.1043, - "step": 19150 - }, - { - "epoch": 0.0774088244443816, - "grad_norm": 630.5447387695312, - "learning_rate": 3.832e-05, - "loss": 75.0851, - "step": 19160 - }, - { - "epoch": 0.07744922570974923, - "grad_norm": 967.669921875, - "learning_rate": 3.834e-05, - "loss": 68.293, - "step": 19170 - }, - { - "epoch": 0.07748962697511685, - "grad_norm": 7127.17236328125, - "learning_rate": 3.836e-05, - "loss": 117.3339, - "step": 19180 - }, - { - "epoch": 0.0775300282404845, - "grad_norm": 978.7310180664062, - "learning_rate": 3.838e-05, - "loss": 169.6352, - "step": 19190 - }, - { - "epoch": 0.07757042950585212, - "grad_norm": 1465.735595703125, - "learning_rate": 3.8400000000000005e-05, - "loss": 133.1634, - "step": 19200 - }, - { - "epoch": 0.07761083077121976, - "grad_norm": 1282.474609375, - "learning_rate": 3.842e-05, - "loss": 142.1115, - "step": 19210 - }, - { - "epoch": 0.07765123203658739, - "grad_norm": 576.253173828125, - "learning_rate": 3.8440000000000005e-05, - "loss": 167.8867, - "step": 19220 - }, - { - "epoch": 0.07769163330195501, - "grad_norm": 911.6886596679688, - "learning_rate": 3.846e-05, - "loss": 106.331, - "step": 19230 - }, - { - "epoch": 0.07773203456732265, - "grad_norm": 1108.574462890625, - "learning_rate": 3.848e-05, - "loss": 159.7625, - "step": 19240 - }, - { - "epoch": 0.07777243583269028, - "grad_norm": 1084.5352783203125, - "learning_rate": 3.85e-05, - "loss": 102.5427, - "step": 19250 - }, - { - "epoch": 0.0778128370980579, - "grad_norm": 2015.353515625, - "learning_rate": 3.8520000000000004e-05, - "loss": 152.4607, - "step": 19260 - }, - { - "epoch": 0.07785323836342554, - "grad_norm": 2406.21484375, - "learning_rate": 3.854000000000001e-05, - "loss": 115.7677, - "step": 19270 - }, - { - "epoch": 0.07789363962879317, - "grad_norm": 1180.568115234375, - "learning_rate": 3.8560000000000004e-05, - "loss": 87.3702, - "step": 19280 - }, - { - "epoch": 0.07793404089416081, - "grad_norm": 546.580810546875, - "learning_rate": 3.858e-05, - "loss": 71.9833, - "step": 19290 - }, - { - "epoch": 0.07797444215952844, - "grad_norm": 1000.6842651367188, - "learning_rate": 3.86e-05, - "loss": 127.7328, - "step": 19300 - }, - { - "epoch": 0.07801484342489606, - "grad_norm": 717.3246459960938, - "learning_rate": 3.862e-05, - "loss": 95.6857, - "step": 19310 - }, - { - "epoch": 0.0780552446902637, - "grad_norm": 4112.51220703125, - "learning_rate": 3.864e-05, - "loss": 199.442, - "step": 19320 - }, - { - "epoch": 0.07809564595563133, - "grad_norm": 972.7607421875, - "learning_rate": 3.866e-05, - "loss": 168.3066, - "step": 19330 - }, - { - "epoch": 0.07813604722099896, - "grad_norm": 1725.4652099609375, - "learning_rate": 3.868e-05, - "loss": 125.3838, - "step": 19340 - }, - { - "epoch": 0.0781764484863666, - "grad_norm": 991.9950561523438, - "learning_rate": 3.8700000000000006e-05, - "loss": 110.0063, - "step": 19350 - }, - { - "epoch": 0.07821684975173422, - "grad_norm": 570.45751953125, - "learning_rate": 3.872e-05, - "loss": 148.7821, - "step": 19360 - }, - { - "epoch": 0.07825725101710186, - "grad_norm": 998.0093994140625, - "learning_rate": 3.8740000000000005e-05, - "loss": 106.5317, - "step": 19370 - }, - { - "epoch": 0.07829765228246949, - "grad_norm": 2785.006103515625, - "learning_rate": 3.876e-05, - "loss": 174.5811, - "step": 19380 - }, - { - "epoch": 0.07833805354783711, - "grad_norm": 1203.6185302734375, - "learning_rate": 3.878e-05, - "loss": 159.747, - "step": 19390 - }, - { - "epoch": 0.07837845481320475, - "grad_norm": 1574.8548583984375, - "learning_rate": 3.88e-05, - "loss": 161.0951, - "step": 19400 - }, - { - "epoch": 0.07841885607857238, - "grad_norm": 3679.686279296875, - "learning_rate": 3.882e-05, - "loss": 140.7081, - "step": 19410 - }, - { - "epoch": 0.07845925734394, - "grad_norm": 3366.86669921875, - "learning_rate": 3.884e-05, - "loss": 129.2223, - "step": 19420 - }, - { - "epoch": 0.07849965860930765, - "grad_norm": 979.6845703125, - "learning_rate": 3.8860000000000004e-05, - "loss": 123.001, - "step": 19430 - }, - { - "epoch": 0.07854005987467527, - "grad_norm": 706.135498046875, - "learning_rate": 3.888e-05, - "loss": 107.5821, - "step": 19440 - }, - { - "epoch": 0.07858046114004291, - "grad_norm": 3030.436279296875, - "learning_rate": 3.8900000000000004e-05, - "loss": 136.2499, - "step": 19450 - }, - { - "epoch": 0.07862086240541054, - "grad_norm": 341.5601501464844, - "learning_rate": 3.892e-05, - "loss": 87.6859, - "step": 19460 - }, - { - "epoch": 0.07866126367077816, - "grad_norm": 890.729248046875, - "learning_rate": 3.894e-05, - "loss": 130.7491, - "step": 19470 - }, - { - "epoch": 0.0787016649361458, - "grad_norm": 2620.10400390625, - "learning_rate": 3.896e-05, - "loss": 100.2728, - "step": 19480 - }, - { - "epoch": 0.07874206620151343, - "grad_norm": 2374.006103515625, - "learning_rate": 3.898e-05, - "loss": 102.7446, - "step": 19490 - }, - { - "epoch": 0.07878246746688106, - "grad_norm": 661.5973510742188, - "learning_rate": 3.9000000000000006e-05, - "loss": 103.1776, - "step": 19500 - }, - { - "epoch": 0.0788228687322487, - "grad_norm": 496.9710693359375, - "learning_rate": 3.902e-05, - "loss": 104.6827, - "step": 19510 - }, - { - "epoch": 0.07886326999761632, - "grad_norm": 950.9691772460938, - "learning_rate": 3.9040000000000006e-05, - "loss": 104.4576, - "step": 19520 - }, - { - "epoch": 0.07890367126298396, - "grad_norm": 1480.8165283203125, - "learning_rate": 3.906e-05, - "loss": 123.9244, - "step": 19530 - }, - { - "epoch": 0.07894407252835159, - "grad_norm": 1668.803955078125, - "learning_rate": 3.908e-05, - "loss": 114.0788, - "step": 19540 - }, - { - "epoch": 0.07898447379371921, - "grad_norm": 998.9266357421875, - "learning_rate": 3.91e-05, - "loss": 116.9195, - "step": 19550 - }, - { - "epoch": 0.07902487505908685, - "grad_norm": 685.7608032226562, - "learning_rate": 3.912e-05, - "loss": 120.8969, - "step": 19560 - }, - { - "epoch": 0.07906527632445448, - "grad_norm": 1155.7894287109375, - "learning_rate": 3.914e-05, - "loss": 121.0664, - "step": 19570 - }, - { - "epoch": 0.07910567758982211, - "grad_norm": 959.234130859375, - "learning_rate": 3.9160000000000005e-05, - "loss": 96.3062, - "step": 19580 - }, - { - "epoch": 0.07914607885518975, - "grad_norm": 1889.412109375, - "learning_rate": 3.918e-05, - "loss": 81.9331, - "step": 19590 - }, - { - "epoch": 0.07918648012055737, - "grad_norm": 966.55078125, - "learning_rate": 3.9200000000000004e-05, - "loss": 164.067, - "step": 19600 - }, - { - "epoch": 0.07922688138592501, - "grad_norm": 645.4921875, - "learning_rate": 3.922e-05, - "loss": 126.4012, - "step": 19610 - }, - { - "epoch": 0.07926728265129264, - "grad_norm": 1248.7474365234375, - "learning_rate": 3.9240000000000004e-05, - "loss": 109.2031, - "step": 19620 - }, - { - "epoch": 0.07930768391666027, - "grad_norm": 2941.3408203125, - "learning_rate": 3.926e-05, - "loss": 100.5554, - "step": 19630 - }, - { - "epoch": 0.0793480851820279, - "grad_norm": 1487.9503173828125, - "learning_rate": 3.9280000000000003e-05, - "loss": 120.6536, - "step": 19640 - }, - { - "epoch": 0.07938848644739553, - "grad_norm": 956.5473022460938, - "learning_rate": 3.9300000000000007e-05, - "loss": 112.047, - "step": 19650 - }, - { - "epoch": 0.07942888771276316, - "grad_norm": 675.1141967773438, - "learning_rate": 3.932e-05, - "loss": 121.3393, - "step": 19660 - }, - { - "epoch": 0.0794692889781308, - "grad_norm": 1062.9857177734375, - "learning_rate": 3.9340000000000006e-05, - "loss": 118.6263, - "step": 19670 - }, - { - "epoch": 0.07950969024349842, - "grad_norm": 796.38525390625, - "learning_rate": 3.936e-05, - "loss": 93.5364, - "step": 19680 - }, - { - "epoch": 0.07955009150886606, - "grad_norm": 712.4339599609375, - "learning_rate": 3.938e-05, - "loss": 169.9139, - "step": 19690 - }, - { - "epoch": 0.07959049277423369, - "grad_norm": 348.9084777832031, - "learning_rate": 3.94e-05, - "loss": 98.5714, - "step": 19700 - }, - { - "epoch": 0.07963089403960132, - "grad_norm": 1002.7638549804688, - "learning_rate": 3.942e-05, - "loss": 101.6811, - "step": 19710 - }, - { - "epoch": 0.07967129530496896, - "grad_norm": 1141.4864501953125, - "learning_rate": 3.944e-05, - "loss": 99.0602, - "step": 19720 - }, - { - "epoch": 0.07971169657033658, - "grad_norm": 1789.052001953125, - "learning_rate": 3.9460000000000005e-05, - "loss": 146.9852, - "step": 19730 - }, - { - "epoch": 0.07975209783570421, - "grad_norm": 1004.76611328125, - "learning_rate": 3.948e-05, - "loss": 108.5757, - "step": 19740 - }, - { - "epoch": 0.07979249910107185, - "grad_norm": 1779.8487548828125, - "learning_rate": 3.9500000000000005e-05, - "loss": 92.2654, - "step": 19750 - }, - { - "epoch": 0.07983290036643947, - "grad_norm": 1891.206787109375, - "learning_rate": 3.952e-05, - "loss": 130.2949, - "step": 19760 - }, - { - "epoch": 0.07987330163180711, - "grad_norm": 1210.8375244140625, - "learning_rate": 3.954e-05, - "loss": 149.1875, - "step": 19770 - }, - { - "epoch": 0.07991370289717474, - "grad_norm": 1035.046630859375, - "learning_rate": 3.956e-05, - "loss": 90.7728, - "step": 19780 - }, - { - "epoch": 0.07995410416254237, - "grad_norm": 1166.8427734375, - "learning_rate": 3.958e-05, - "loss": 113.0428, - "step": 19790 - }, - { - "epoch": 0.07999450542791, - "grad_norm": 1342.7369384765625, - "learning_rate": 3.960000000000001e-05, - "loss": 84.0289, - "step": 19800 - }, - { - "epoch": 0.08003490669327763, - "grad_norm": 930.572509765625, - "learning_rate": 3.9620000000000004e-05, - "loss": 137.8586, - "step": 19810 - }, - { - "epoch": 0.08007530795864526, - "grad_norm": 537.0020141601562, - "learning_rate": 3.964e-05, - "loss": 136.4454, - "step": 19820 - }, - { - "epoch": 0.0801157092240129, - "grad_norm": 8671.7001953125, - "learning_rate": 3.966e-05, - "loss": 194.5568, - "step": 19830 - }, - { - "epoch": 0.08015611048938052, - "grad_norm": 1121.135986328125, - "learning_rate": 3.968e-05, - "loss": 171.3159, - "step": 19840 - }, - { - "epoch": 0.08019651175474816, - "grad_norm": 875.9639282226562, - "learning_rate": 3.97e-05, - "loss": 138.4204, - "step": 19850 - }, - { - "epoch": 0.08023691302011579, - "grad_norm": 1031.24462890625, - "learning_rate": 3.972e-05, - "loss": 70.7438, - "step": 19860 - }, - { - "epoch": 0.08027731428548342, - "grad_norm": 844.333251953125, - "learning_rate": 3.974e-05, - "loss": 86.4556, - "step": 19870 - }, - { - "epoch": 0.08031771555085106, - "grad_norm": 775.6649780273438, - "learning_rate": 3.9760000000000006e-05, - "loss": 141.9916, - "step": 19880 - }, - { - "epoch": 0.08035811681621868, - "grad_norm": 579.8970947265625, - "learning_rate": 3.978e-05, - "loss": 131.3396, - "step": 19890 - }, - { - "epoch": 0.08039851808158631, - "grad_norm": 0.0, - "learning_rate": 3.9800000000000005e-05, - "loss": 98.0002, - "step": 19900 - }, - { - "epoch": 0.08043891934695395, - "grad_norm": 1350.4500732421875, - "learning_rate": 3.982e-05, - "loss": 221.3071, - "step": 19910 - }, - { - "epoch": 0.08047932061232158, - "grad_norm": 809.1064453125, - "learning_rate": 3.984e-05, - "loss": 82.145, - "step": 19920 - }, - { - "epoch": 0.08051972187768922, - "grad_norm": 1032.8472900390625, - "learning_rate": 3.986e-05, - "loss": 90.5167, - "step": 19930 - }, - { - "epoch": 0.08056012314305684, - "grad_norm": 548.9085693359375, - "learning_rate": 3.988e-05, - "loss": 82.506, - "step": 19940 - }, - { - "epoch": 0.08060052440842447, - "grad_norm": 828.6427001953125, - "learning_rate": 3.99e-05, - "loss": 103.581, - "step": 19950 - }, - { - "epoch": 0.08064092567379211, - "grad_norm": 1131.4522705078125, - "learning_rate": 3.9920000000000004e-05, - "loss": 124.3697, - "step": 19960 - }, - { - "epoch": 0.08068132693915973, - "grad_norm": 3095.481689453125, - "learning_rate": 3.994e-05, - "loss": 180.1047, - "step": 19970 - }, - { - "epoch": 0.08072172820452736, - "grad_norm": 1326.9815673828125, - "learning_rate": 3.9960000000000004e-05, - "loss": 111.703, - "step": 19980 - }, - { - "epoch": 0.080762129469895, - "grad_norm": 563.6680297851562, - "learning_rate": 3.998e-05, - "loss": 86.8286, - "step": 19990 - }, - { - "epoch": 0.08080253073526263, - "grad_norm": 867.2445068359375, - "learning_rate": 4e-05, - "loss": 120.7213, - "step": 20000 - }, - { - "epoch": 0.08084293200063027, - "grad_norm": 3826.01513671875, - "learning_rate": 4.002e-05, - "loss": 188.3878, - "step": 20010 - }, - { - "epoch": 0.08088333326599789, - "grad_norm": 626.7747192382812, - "learning_rate": 4.004e-05, - "loss": 102.346, - "step": 20020 - }, - { - "epoch": 0.08092373453136552, - "grad_norm": 447.5019836425781, - "learning_rate": 4.0060000000000006e-05, - "loss": 112.2855, - "step": 20030 - }, - { - "epoch": 0.08096413579673316, - "grad_norm": 1387.30029296875, - "learning_rate": 4.008e-05, - "loss": 144.4808, - "step": 20040 - }, - { - "epoch": 0.08100453706210078, - "grad_norm": 711.4352416992188, - "learning_rate": 4.0100000000000006e-05, - "loss": 123.1025, - "step": 20050 - }, - { - "epoch": 0.08104493832746841, - "grad_norm": 696.3189086914062, - "learning_rate": 4.012e-05, - "loss": 125.8491, - "step": 20060 - }, - { - "epoch": 0.08108533959283605, - "grad_norm": 1703.4642333984375, - "learning_rate": 4.014e-05, - "loss": 180.9456, - "step": 20070 - }, - { - "epoch": 0.08112574085820368, - "grad_norm": 931.850341796875, - "learning_rate": 4.016e-05, - "loss": 157.8773, - "step": 20080 - }, - { - "epoch": 0.08116614212357132, - "grad_norm": 1737.6793212890625, - "learning_rate": 4.018e-05, - "loss": 108.9433, - "step": 20090 - }, - { - "epoch": 0.08120654338893894, - "grad_norm": 1597.1063232421875, - "learning_rate": 4.02e-05, - "loss": 116.3655, - "step": 20100 - }, - { - "epoch": 0.08124694465430657, - "grad_norm": 1292.120849609375, - "learning_rate": 4.0220000000000005e-05, - "loss": 128.3085, - "step": 20110 - }, - { - "epoch": 0.08128734591967421, - "grad_norm": 784.8877563476562, - "learning_rate": 4.024e-05, - "loss": 142.1472, - "step": 20120 - }, - { - "epoch": 0.08132774718504183, - "grad_norm": 518.2711181640625, - "learning_rate": 4.0260000000000004e-05, - "loss": 138.0721, - "step": 20130 - }, - { - "epoch": 0.08136814845040946, - "grad_norm": 3041.567138671875, - "learning_rate": 4.028e-05, - "loss": 143.0653, - "step": 20140 - }, - { - "epoch": 0.0814085497157771, - "grad_norm": 393.4328918457031, - "learning_rate": 4.0300000000000004e-05, - "loss": 123.1964, - "step": 20150 - }, - { - "epoch": 0.08144895098114473, - "grad_norm": 433.4530334472656, - "learning_rate": 4.032e-05, - "loss": 108.3037, - "step": 20160 - }, - { - "epoch": 0.08148935224651237, - "grad_norm": 1056.2037353515625, - "learning_rate": 4.034e-05, - "loss": 104.124, - "step": 20170 - }, - { - "epoch": 0.08152975351188, - "grad_norm": 1283.22607421875, - "learning_rate": 4.0360000000000007e-05, - "loss": 146.2523, - "step": 20180 - }, - { - "epoch": 0.08157015477724762, - "grad_norm": 1303.0574951171875, - "learning_rate": 4.038e-05, - "loss": 108.7274, - "step": 20190 - }, - { - "epoch": 0.08161055604261526, - "grad_norm": 748.68115234375, - "learning_rate": 4.0400000000000006e-05, - "loss": 118.8553, - "step": 20200 - }, - { - "epoch": 0.08165095730798289, - "grad_norm": 559.082763671875, - "learning_rate": 4.042e-05, - "loss": 104.5537, - "step": 20210 - }, - { - "epoch": 0.08169135857335051, - "grad_norm": 883.0214233398438, - "learning_rate": 4.044e-05, - "loss": 119.6957, - "step": 20220 - }, - { - "epoch": 0.08173175983871815, - "grad_norm": 3064.196533203125, - "learning_rate": 4.046e-05, - "loss": 167.4422, - "step": 20230 - }, - { - "epoch": 0.08177216110408578, - "grad_norm": 0.0, - "learning_rate": 4.048e-05, - "loss": 128.549, - "step": 20240 - }, - { - "epoch": 0.08181256236945342, - "grad_norm": 755.3316040039062, - "learning_rate": 4.05e-05, - "loss": 108.4741, - "step": 20250 - }, - { - "epoch": 0.08185296363482104, - "grad_norm": 1562.85498046875, - "learning_rate": 4.0520000000000005e-05, - "loss": 111.5061, - "step": 20260 - }, - { - "epoch": 0.08189336490018867, - "grad_norm": 871.8521118164062, - "learning_rate": 4.054e-05, - "loss": 110.9914, - "step": 20270 - }, - { - "epoch": 0.08193376616555631, - "grad_norm": 772.6279907226562, - "learning_rate": 4.0560000000000005e-05, - "loss": 145.3599, - "step": 20280 - }, - { - "epoch": 0.08197416743092394, - "grad_norm": 529.8480224609375, - "learning_rate": 4.058e-05, - "loss": 101.3806, - "step": 20290 - }, - { - "epoch": 0.08201456869629156, - "grad_norm": 2998.20556640625, - "learning_rate": 4.0600000000000004e-05, - "loss": 111.4782, - "step": 20300 - }, - { - "epoch": 0.0820549699616592, - "grad_norm": 1405.43505859375, - "learning_rate": 4.062e-05, - "loss": 163.5577, - "step": 20310 - }, - { - "epoch": 0.08209537122702683, - "grad_norm": 1239.15087890625, - "learning_rate": 4.064e-05, - "loss": 94.2629, - "step": 20320 - }, - { - "epoch": 0.08213577249239447, - "grad_norm": 1255.966552734375, - "learning_rate": 4.066e-05, - "loss": 126.8316, - "step": 20330 - }, - { - "epoch": 0.0821761737577621, - "grad_norm": 737.30126953125, - "learning_rate": 4.0680000000000004e-05, - "loss": 113.6555, - "step": 20340 - }, - { - "epoch": 0.08221657502312972, - "grad_norm": 2296.812744140625, - "learning_rate": 4.07e-05, - "loss": 109.0417, - "step": 20350 - }, - { - "epoch": 0.08225697628849736, - "grad_norm": 1186.671875, - "learning_rate": 4.072e-05, - "loss": 86.3001, - "step": 20360 - }, - { - "epoch": 0.08229737755386499, - "grad_norm": 1619.2152099609375, - "learning_rate": 4.074e-05, - "loss": 148.3378, - "step": 20370 - }, - { - "epoch": 0.08233777881923261, - "grad_norm": 3074.595703125, - "learning_rate": 4.076e-05, - "loss": 112.1198, - "step": 20380 - }, - { - "epoch": 0.08237818008460025, - "grad_norm": 2610.1181640625, - "learning_rate": 4.078e-05, - "loss": 118.642, - "step": 20390 - }, - { - "epoch": 0.08241858134996788, - "grad_norm": 944.0482788085938, - "learning_rate": 4.08e-05, - "loss": 118.4644, - "step": 20400 - }, - { - "epoch": 0.08245898261533552, - "grad_norm": 2997.534423828125, - "learning_rate": 4.0820000000000006e-05, - "loss": 95.5434, - "step": 20410 - }, - { - "epoch": 0.08249938388070314, - "grad_norm": 1543.2197265625, - "learning_rate": 4.084e-05, - "loss": 155.0003, - "step": 20420 - }, - { - "epoch": 0.08253978514607077, - "grad_norm": 749.4830932617188, - "learning_rate": 4.0860000000000005e-05, - "loss": 98.9934, - "step": 20430 - }, - { - "epoch": 0.08258018641143841, - "grad_norm": 2779.212646484375, - "learning_rate": 4.088e-05, - "loss": 115.3504, - "step": 20440 - }, - { - "epoch": 0.08262058767680604, - "grad_norm": 2938.130615234375, - "learning_rate": 4.09e-05, - "loss": 121.3548, - "step": 20450 - }, - { - "epoch": 0.08266098894217366, - "grad_norm": 364.253173828125, - "learning_rate": 4.092e-05, - "loss": 118.8423, - "step": 20460 - }, - { - "epoch": 0.0827013902075413, - "grad_norm": 956.0880737304688, - "learning_rate": 4.094e-05, - "loss": 162.3844, - "step": 20470 - }, - { - "epoch": 0.08274179147290893, - "grad_norm": 651.1348266601562, - "learning_rate": 4.096e-05, - "loss": 101.0112, - "step": 20480 - }, - { - "epoch": 0.08278219273827657, - "grad_norm": 1472.0135498046875, - "learning_rate": 4.0980000000000004e-05, - "loss": 140.2291, - "step": 20490 - }, - { - "epoch": 0.0828225940036442, - "grad_norm": 1709.4990234375, - "learning_rate": 4.1e-05, - "loss": 156.1177, - "step": 20500 - }, - { - "epoch": 0.08286299526901182, - "grad_norm": 1078.7275390625, - "learning_rate": 4.1020000000000004e-05, - "loss": 149.2032, - "step": 20510 - }, - { - "epoch": 0.08290339653437946, - "grad_norm": 1183.4559326171875, - "learning_rate": 4.104e-05, - "loss": 85.2116, - "step": 20520 - }, - { - "epoch": 0.08294379779974709, - "grad_norm": 787.4295043945312, - "learning_rate": 4.106e-05, - "loss": 143.7207, - "step": 20530 - }, - { - "epoch": 0.08298419906511471, - "grad_norm": 497.1183776855469, - "learning_rate": 4.108e-05, - "loss": 89.7213, - "step": 20540 - }, - { - "epoch": 0.08302460033048235, - "grad_norm": 1972.8258056640625, - "learning_rate": 4.11e-05, - "loss": 115.1941, - "step": 20550 - }, - { - "epoch": 0.08306500159584998, - "grad_norm": 969.597900390625, - "learning_rate": 4.1120000000000006e-05, - "loss": 102.9652, - "step": 20560 - }, - { - "epoch": 0.08310540286121762, - "grad_norm": 698.786376953125, - "learning_rate": 4.114e-05, - "loss": 63.844, - "step": 20570 - }, - { - "epoch": 0.08314580412658525, - "grad_norm": 1212.814697265625, - "learning_rate": 4.1160000000000006e-05, - "loss": 149.4642, - "step": 20580 - }, - { - "epoch": 0.08318620539195287, - "grad_norm": 552.6875610351562, - "learning_rate": 4.118e-05, - "loss": 140.7715, - "step": 20590 - }, - { - "epoch": 0.08322660665732051, - "grad_norm": 1048.22607421875, - "learning_rate": 4.12e-05, - "loss": 154.9678, - "step": 20600 - }, - { - "epoch": 0.08326700792268814, - "grad_norm": 1216.959716796875, - "learning_rate": 4.122e-05, - "loss": 167.41, - "step": 20610 - }, - { - "epoch": 0.08330740918805576, - "grad_norm": 2889.177490234375, - "learning_rate": 4.124e-05, - "loss": 142.2961, - "step": 20620 - }, - { - "epoch": 0.0833478104534234, - "grad_norm": 1590.9716796875, - "learning_rate": 4.126e-05, - "loss": 158.046, - "step": 20630 - }, - { - "epoch": 0.08338821171879103, - "grad_norm": 1097.50146484375, - "learning_rate": 4.1280000000000005e-05, - "loss": 132.7002, - "step": 20640 - }, - { - "epoch": 0.08342861298415867, - "grad_norm": 2981.930908203125, - "learning_rate": 4.13e-05, - "loss": 95.1602, - "step": 20650 - }, - { - "epoch": 0.0834690142495263, - "grad_norm": 2784.6552734375, - "learning_rate": 4.1320000000000004e-05, - "loss": 173.26, - "step": 20660 - }, - { - "epoch": 0.08350941551489392, - "grad_norm": 1329.2828369140625, - "learning_rate": 4.134e-05, - "loss": 162.6604, - "step": 20670 - }, - { - "epoch": 0.08354981678026156, - "grad_norm": 808.767822265625, - "learning_rate": 4.1360000000000004e-05, - "loss": 112.3144, - "step": 20680 - }, - { - "epoch": 0.08359021804562919, - "grad_norm": 1342.0172119140625, - "learning_rate": 4.138e-05, - "loss": 131.2333, - "step": 20690 - }, - { - "epoch": 0.08363061931099681, - "grad_norm": 848.967529296875, - "learning_rate": 4.14e-05, - "loss": 147.4289, - "step": 20700 - }, - { - "epoch": 0.08367102057636445, - "grad_norm": 566.413818359375, - "learning_rate": 4.142000000000001e-05, - "loss": 150.0216, - "step": 20710 - }, - { - "epoch": 0.08371142184173208, - "grad_norm": 707.4857788085938, - "learning_rate": 4.144e-05, - "loss": 113.268, - "step": 20720 - }, - { - "epoch": 0.08375182310709972, - "grad_norm": 851.73779296875, - "learning_rate": 4.1460000000000006e-05, - "loss": 128.7113, - "step": 20730 - }, - { - "epoch": 0.08379222437246735, - "grad_norm": 28336.75390625, - "learning_rate": 4.148e-05, - "loss": 211.0017, - "step": 20740 - }, - { - "epoch": 0.08383262563783497, - "grad_norm": 1380.7489013671875, - "learning_rate": 4.15e-05, - "loss": 72.6195, - "step": 20750 - }, - { - "epoch": 0.08387302690320261, - "grad_norm": 1030.243408203125, - "learning_rate": 4.152e-05, - "loss": 127.0354, - "step": 20760 - }, - { - "epoch": 0.08391342816857024, - "grad_norm": 7085.2041015625, - "learning_rate": 4.154e-05, - "loss": 119.6303, - "step": 20770 - }, - { - "epoch": 0.08395382943393787, - "grad_norm": 783.239013671875, - "learning_rate": 4.156e-05, - "loss": 123.3007, - "step": 20780 - }, - { - "epoch": 0.0839942306993055, - "grad_norm": 1146.0455322265625, - "learning_rate": 4.1580000000000005e-05, - "loss": 111.6334, - "step": 20790 - }, - { - "epoch": 0.08403463196467313, - "grad_norm": 3808.298583984375, - "learning_rate": 4.16e-05, - "loss": 155.187, - "step": 20800 - }, - { - "epoch": 0.08407503323004077, - "grad_norm": 2816.82275390625, - "learning_rate": 4.1620000000000005e-05, - "loss": 146.4871, - "step": 20810 - }, - { - "epoch": 0.0841154344954084, - "grad_norm": 1327.615966796875, - "learning_rate": 4.164e-05, - "loss": 99.5535, - "step": 20820 - }, - { - "epoch": 0.08415583576077602, - "grad_norm": 759.77490234375, - "learning_rate": 4.1660000000000004e-05, - "loss": 135.0864, - "step": 20830 - }, - { - "epoch": 0.08419623702614366, - "grad_norm": 1578.577392578125, - "learning_rate": 4.168e-05, - "loss": 134.005, - "step": 20840 - }, - { - "epoch": 0.08423663829151129, - "grad_norm": 845.4844970703125, - "learning_rate": 4.17e-05, - "loss": 72.3538, - "step": 20850 - }, - { - "epoch": 0.08427703955687892, - "grad_norm": 681.817626953125, - "learning_rate": 4.172e-05, - "loss": 89.2945, - "step": 20860 - }, - { - "epoch": 0.08431744082224656, - "grad_norm": 610.7493896484375, - "learning_rate": 4.1740000000000004e-05, - "loss": 88.0488, - "step": 20870 - }, - { - "epoch": 0.08435784208761418, - "grad_norm": 1551.756103515625, - "learning_rate": 4.176000000000001e-05, - "loss": 144.3311, - "step": 20880 - }, - { - "epoch": 0.08439824335298182, - "grad_norm": 941.2823486328125, - "learning_rate": 4.178e-05, - "loss": 108.5786, - "step": 20890 - }, - { - "epoch": 0.08443864461834945, - "grad_norm": 2806.956787109375, - "learning_rate": 4.18e-05, - "loss": 139.1996, - "step": 20900 - }, - { - "epoch": 0.08447904588371707, - "grad_norm": 2764.781982421875, - "learning_rate": 4.182e-05, - "loss": 202.5234, - "step": 20910 - }, - { - "epoch": 0.08451944714908471, - "grad_norm": 4884.42431640625, - "learning_rate": 4.184e-05, - "loss": 113.7894, - "step": 20920 - }, - { - "epoch": 0.08455984841445234, - "grad_norm": 752.3662719726562, - "learning_rate": 4.186e-05, - "loss": 108.6684, - "step": 20930 - }, - { - "epoch": 0.08460024967981997, - "grad_norm": 997.4862060546875, - "learning_rate": 4.1880000000000006e-05, - "loss": 119.868, - "step": 20940 - }, - { - "epoch": 0.0846406509451876, - "grad_norm": 1489.1739501953125, - "learning_rate": 4.19e-05, - "loss": 118.152, - "step": 20950 - }, - { - "epoch": 0.08468105221055523, - "grad_norm": 509.3271789550781, - "learning_rate": 4.1920000000000005e-05, - "loss": 95.0759, - "step": 20960 - }, - { - "epoch": 0.08472145347592287, - "grad_norm": 1403.126708984375, - "learning_rate": 4.194e-05, - "loss": 130.0431, - "step": 20970 - }, - { - "epoch": 0.0847618547412905, - "grad_norm": 2763.188232421875, - "learning_rate": 4.196e-05, - "loss": 82.064, - "step": 20980 - }, - { - "epoch": 0.08480225600665812, - "grad_norm": 643.8973388671875, - "learning_rate": 4.198e-05, - "loss": 120.0969, - "step": 20990 - }, - { - "epoch": 0.08484265727202576, - "grad_norm": 953.9916381835938, - "learning_rate": 4.2e-05, - "loss": 99.3082, - "step": 21000 - }, - { - "epoch": 0.08488305853739339, - "grad_norm": 7278.77392578125, - "learning_rate": 4.202e-05, - "loss": 185.1893, - "step": 21010 - }, - { - "epoch": 0.08492345980276102, - "grad_norm": 4063.332763671875, - "learning_rate": 4.2040000000000004e-05, - "loss": 172.6903, - "step": 21020 - }, - { - "epoch": 0.08496386106812866, - "grad_norm": 355.11962890625, - "learning_rate": 4.206e-05, - "loss": 97.8671, - "step": 21030 - }, - { - "epoch": 0.08500426233349628, - "grad_norm": 803.3870239257812, - "learning_rate": 4.2080000000000004e-05, - "loss": 90.9247, - "step": 21040 - }, - { - "epoch": 0.08504466359886392, - "grad_norm": 1197.3392333984375, - "learning_rate": 4.21e-05, - "loss": 111.2428, - "step": 21050 - }, - { - "epoch": 0.08508506486423155, - "grad_norm": 1060.21728515625, - "learning_rate": 4.212e-05, - "loss": 80.9763, - "step": 21060 - }, - { - "epoch": 0.08512546612959918, - "grad_norm": 553.6648559570312, - "learning_rate": 4.214e-05, - "loss": 164.6878, - "step": 21070 - }, - { - "epoch": 0.08516586739496682, - "grad_norm": 1732.870361328125, - "learning_rate": 4.2159999999999996e-05, - "loss": 127.5546, - "step": 21080 - }, - { - "epoch": 0.08520626866033444, - "grad_norm": 3130.48828125, - "learning_rate": 4.2180000000000006e-05, - "loss": 125.8488, - "step": 21090 - }, - { - "epoch": 0.08524666992570207, - "grad_norm": 1232.9683837890625, - "learning_rate": 4.22e-05, - "loss": 110.2355, - "step": 21100 - }, - { - "epoch": 0.08528707119106971, - "grad_norm": 1068.6566162109375, - "learning_rate": 4.2220000000000006e-05, - "loss": 116.9769, - "step": 21110 - }, - { - "epoch": 0.08532747245643733, - "grad_norm": 687.2569580078125, - "learning_rate": 4.224e-05, - "loss": 124.6508, - "step": 21120 - }, - { - "epoch": 0.08536787372180497, - "grad_norm": 680.8721313476562, - "learning_rate": 4.226e-05, - "loss": 123.4505, - "step": 21130 - }, - { - "epoch": 0.0854082749871726, - "grad_norm": 1629.7750244140625, - "learning_rate": 4.228e-05, - "loss": 165.3711, - "step": 21140 - }, - { - "epoch": 0.08544867625254023, - "grad_norm": 1123.4427490234375, - "learning_rate": 4.23e-05, - "loss": 104.645, - "step": 21150 - }, - { - "epoch": 0.08548907751790787, - "grad_norm": 684.88134765625, - "learning_rate": 4.232e-05, - "loss": 87.5831, - "step": 21160 - }, - { - "epoch": 0.08552947878327549, - "grad_norm": 1038.5843505859375, - "learning_rate": 4.2340000000000005e-05, - "loss": 111.1774, - "step": 21170 - }, - { - "epoch": 0.08556988004864312, - "grad_norm": 441.3775634765625, - "learning_rate": 4.236e-05, - "loss": 97.0562, - "step": 21180 - }, - { - "epoch": 0.08561028131401076, - "grad_norm": 1054.5010986328125, - "learning_rate": 4.2380000000000004e-05, - "loss": 137.0658, - "step": 21190 - }, - { - "epoch": 0.08565068257937838, - "grad_norm": 2301.26708984375, - "learning_rate": 4.24e-05, - "loss": 121.703, - "step": 21200 - }, - { - "epoch": 0.08569108384474602, - "grad_norm": 1787.3651123046875, - "learning_rate": 4.2420000000000004e-05, - "loss": 168.9554, - "step": 21210 - }, - { - "epoch": 0.08573148511011365, - "grad_norm": 3590.007568359375, - "learning_rate": 4.244e-05, - "loss": 162.3379, - "step": 21220 - }, - { - "epoch": 0.08577188637548128, - "grad_norm": 1609.46142578125, - "learning_rate": 4.246e-05, - "loss": 124.5303, - "step": 21230 - }, - { - "epoch": 0.08581228764084892, - "grad_norm": 558.2980346679688, - "learning_rate": 4.248e-05, - "loss": 95.647, - "step": 21240 - }, - { - "epoch": 0.08585268890621654, - "grad_norm": 1266.0831298828125, - "learning_rate": 4.25e-05, - "loss": 95.554, - "step": 21250 - }, - { - "epoch": 0.08589309017158417, - "grad_norm": 1427.5911865234375, - "learning_rate": 4.2520000000000006e-05, - "loss": 136.6243, - "step": 21260 - }, - { - "epoch": 0.08593349143695181, - "grad_norm": 1910.5550537109375, - "learning_rate": 4.254e-05, - "loss": 104.6323, - "step": 21270 - }, - { - "epoch": 0.08597389270231943, - "grad_norm": 2682.699951171875, - "learning_rate": 4.256e-05, - "loss": 123.6776, - "step": 21280 - }, - { - "epoch": 0.08601429396768706, - "grad_norm": 7804.3984375, - "learning_rate": 4.258e-05, - "loss": 165.5531, - "step": 21290 - }, - { - "epoch": 0.0860546952330547, - "grad_norm": 790.6614990234375, - "learning_rate": 4.26e-05, - "loss": 114.8768, - "step": 21300 - }, - { - "epoch": 0.08609509649842233, - "grad_norm": 1748.3701171875, - "learning_rate": 4.262e-05, - "loss": 120.4578, - "step": 21310 - }, - { - "epoch": 0.08613549776378997, - "grad_norm": 948.3206176757812, - "learning_rate": 4.2640000000000005e-05, - "loss": 100.5346, - "step": 21320 - }, - { - "epoch": 0.0861758990291576, - "grad_norm": 695.2459106445312, - "learning_rate": 4.266e-05, - "loss": 97.4773, - "step": 21330 - }, - { - "epoch": 0.08621630029452522, - "grad_norm": 2172.08837890625, - "learning_rate": 4.2680000000000005e-05, - "loss": 132.5773, - "step": 21340 - }, - { - "epoch": 0.08625670155989286, - "grad_norm": 867.505859375, - "learning_rate": 4.27e-05, - "loss": 104.199, - "step": 21350 - }, - { - "epoch": 0.08629710282526049, - "grad_norm": 1258.5723876953125, - "learning_rate": 4.2720000000000004e-05, - "loss": 99.1206, - "step": 21360 - }, - { - "epoch": 0.08633750409062811, - "grad_norm": 974.3964233398438, - "learning_rate": 4.274e-05, - "loss": 98.0185, - "step": 21370 - }, - { - "epoch": 0.08637790535599575, - "grad_norm": 1084.6614990234375, - "learning_rate": 4.276e-05, - "loss": 95.9101, - "step": 21380 - }, - { - "epoch": 0.08641830662136338, - "grad_norm": 1883.6488037109375, - "learning_rate": 4.278e-05, - "loss": 125.3519, - "step": 21390 - }, - { - "epoch": 0.08645870788673102, - "grad_norm": 3283.783935546875, - "learning_rate": 4.2800000000000004e-05, - "loss": 166.7042, - "step": 21400 - }, - { - "epoch": 0.08649910915209864, - "grad_norm": 942.7132568359375, - "learning_rate": 4.282000000000001e-05, - "loss": 154.2377, - "step": 21410 - }, - { - "epoch": 0.08653951041746627, - "grad_norm": 1186.5281982421875, - "learning_rate": 4.284e-05, - "loss": 104.8457, - "step": 21420 - }, - { - "epoch": 0.08657991168283391, - "grad_norm": 1021.5708618164062, - "learning_rate": 4.286e-05, - "loss": 100.3719, - "step": 21430 - }, - { - "epoch": 0.08662031294820154, - "grad_norm": 1118.336669921875, - "learning_rate": 4.288e-05, - "loss": 145.7167, - "step": 21440 - }, - { - "epoch": 0.08666071421356916, - "grad_norm": 686.393310546875, - "learning_rate": 4.29e-05, - "loss": 107.4845, - "step": 21450 - }, - { - "epoch": 0.0867011154789368, - "grad_norm": 10137.349609375, - "learning_rate": 4.292e-05, - "loss": 131.5519, - "step": 21460 - }, - { - "epoch": 0.08674151674430443, - "grad_norm": 4428.59521484375, - "learning_rate": 4.2940000000000006e-05, - "loss": 163.3146, - "step": 21470 - }, - { - "epoch": 0.08678191800967207, - "grad_norm": 816.3525390625, - "learning_rate": 4.296e-05, - "loss": 69.2457, - "step": 21480 - }, - { - "epoch": 0.0868223192750397, - "grad_norm": 1754.2156982421875, - "learning_rate": 4.2980000000000005e-05, - "loss": 163.8832, - "step": 21490 - }, - { - "epoch": 0.08686272054040732, - "grad_norm": 1116.2850341796875, - "learning_rate": 4.3e-05, - "loss": 86.4771, - "step": 21500 - }, - { - "epoch": 0.08690312180577496, - "grad_norm": 542.83984375, - "learning_rate": 4.3020000000000005e-05, - "loss": 121.8374, - "step": 21510 - }, - { - "epoch": 0.08694352307114259, - "grad_norm": 1956.363037109375, - "learning_rate": 4.304e-05, - "loss": 108.1872, - "step": 21520 - }, - { - "epoch": 0.08698392433651021, - "grad_norm": 1243.8492431640625, - "learning_rate": 4.306e-05, - "loss": 115.3768, - "step": 21530 - }, - { - "epoch": 0.08702432560187785, - "grad_norm": 1239.7283935546875, - "learning_rate": 4.308e-05, - "loss": 81.8431, - "step": 21540 - }, - { - "epoch": 0.08706472686724548, - "grad_norm": 759.3988037109375, - "learning_rate": 4.3100000000000004e-05, - "loss": 128.8443, - "step": 21550 - }, - { - "epoch": 0.08710512813261312, - "grad_norm": 1724.4061279296875, - "learning_rate": 4.312000000000001e-05, - "loss": 132.2185, - "step": 21560 - }, - { - "epoch": 0.08714552939798074, - "grad_norm": 815.0044555664062, - "learning_rate": 4.3140000000000004e-05, - "loss": 76.6797, - "step": 21570 - }, - { - "epoch": 0.08718593066334837, - "grad_norm": 458.9404602050781, - "learning_rate": 4.316e-05, - "loss": 115.9924, - "step": 21580 - }, - { - "epoch": 0.08722633192871601, - "grad_norm": 553.5025024414062, - "learning_rate": 4.318e-05, - "loss": 121.1555, - "step": 21590 - }, - { - "epoch": 0.08726673319408364, - "grad_norm": 1283.981689453125, - "learning_rate": 4.32e-05, - "loss": 134.989, - "step": 21600 - }, - { - "epoch": 0.08730713445945126, - "grad_norm": 438.9365234375, - "learning_rate": 4.3219999999999996e-05, - "loss": 96.8193, - "step": 21610 - }, - { - "epoch": 0.0873475357248189, - "grad_norm": 963.8663940429688, - "learning_rate": 4.324e-05, - "loss": 109.2355, - "step": 21620 - }, - { - "epoch": 0.08738793699018653, - "grad_norm": 389.078369140625, - "learning_rate": 4.326e-05, - "loss": 179.7937, - "step": 21630 - }, - { - "epoch": 0.08742833825555417, - "grad_norm": 1283.707763671875, - "learning_rate": 4.3280000000000006e-05, - "loss": 93.5443, - "step": 21640 - }, - { - "epoch": 0.0874687395209218, - "grad_norm": 1297.939208984375, - "learning_rate": 4.33e-05, - "loss": 119.5012, - "step": 21650 - }, - { - "epoch": 0.08750914078628942, - "grad_norm": 1461.7098388671875, - "learning_rate": 4.332e-05, - "loss": 112.2618, - "step": 21660 - }, - { - "epoch": 0.08754954205165706, - "grad_norm": 370.33056640625, - "learning_rate": 4.334e-05, - "loss": 118.6215, - "step": 21670 - }, - { - "epoch": 0.08758994331702469, - "grad_norm": 1230.4083251953125, - "learning_rate": 4.336e-05, - "loss": 83.1036, - "step": 21680 - }, - { - "epoch": 0.08763034458239231, - "grad_norm": 713.216796875, - "learning_rate": 4.338e-05, - "loss": 116.6274, - "step": 21690 - }, - { - "epoch": 0.08767074584775995, - "grad_norm": 333.86663818359375, - "learning_rate": 4.3400000000000005e-05, - "loss": 138.7234, - "step": 21700 - }, - { - "epoch": 0.08771114711312758, - "grad_norm": 1813.9105224609375, - "learning_rate": 4.342e-05, - "loss": 124.1437, - "step": 21710 - }, - { - "epoch": 0.08775154837849522, - "grad_norm": 1551.122802734375, - "learning_rate": 4.3440000000000004e-05, - "loss": 117.1614, - "step": 21720 - }, - { - "epoch": 0.08779194964386285, - "grad_norm": 556.9833984375, - "learning_rate": 4.346e-05, - "loss": 85.8124, - "step": 21730 - }, - { - "epoch": 0.08783235090923047, - "grad_norm": 1249.522705078125, - "learning_rate": 4.3480000000000004e-05, - "loss": 82.4492, - "step": 21740 - }, - { - "epoch": 0.08787275217459811, - "grad_norm": 644.7138671875, - "learning_rate": 4.35e-05, - "loss": 103.3423, - "step": 21750 - }, - { - "epoch": 0.08791315343996574, - "grad_norm": 893.0758056640625, - "learning_rate": 4.352e-05, - "loss": 140.0829, - "step": 21760 - }, - { - "epoch": 0.08795355470533336, - "grad_norm": 527.2681274414062, - "learning_rate": 4.354e-05, - "loss": 99.5734, - "step": 21770 - }, - { - "epoch": 0.087993955970701, - "grad_norm": 623.2860717773438, - "learning_rate": 4.356e-05, - "loss": 75.2396, - "step": 21780 - }, - { - "epoch": 0.08803435723606863, - "grad_norm": 590.4302978515625, - "learning_rate": 4.3580000000000006e-05, - "loss": 117.1983, - "step": 21790 - }, - { - "epoch": 0.08807475850143627, - "grad_norm": 945.4894409179688, - "learning_rate": 4.36e-05, - "loss": 156.6693, - "step": 21800 - }, - { - "epoch": 0.0881151597668039, - "grad_norm": 892.6826782226562, - "learning_rate": 4.362e-05, - "loss": 128.0761, - "step": 21810 - }, - { - "epoch": 0.08815556103217152, - "grad_norm": 1329.8853759765625, - "learning_rate": 4.364e-05, - "loss": 81.0603, - "step": 21820 - }, - { - "epoch": 0.08819596229753916, - "grad_norm": 3241.863525390625, - "learning_rate": 4.366e-05, - "loss": 114.7735, - "step": 21830 - }, - { - "epoch": 0.08823636356290679, - "grad_norm": 621.0053100585938, - "learning_rate": 4.368e-05, - "loss": 99.9302, - "step": 21840 - }, - { - "epoch": 0.08827676482827441, - "grad_norm": 571.9194946289062, - "learning_rate": 4.3700000000000005e-05, - "loss": 60.8999, - "step": 21850 - }, - { - "epoch": 0.08831716609364205, - "grad_norm": 1223.1873779296875, - "learning_rate": 4.372e-05, - "loss": 115.3964, - "step": 21860 - }, - { - "epoch": 0.08835756735900968, - "grad_norm": 1025.819580078125, - "learning_rate": 4.3740000000000005e-05, - "loss": 115.1883, - "step": 21870 - }, - { - "epoch": 0.08839796862437732, - "grad_norm": 1899.594970703125, - "learning_rate": 4.376e-05, - "loss": 118.3364, - "step": 21880 - }, - { - "epoch": 0.08843836988974495, - "grad_norm": 1690.5460205078125, - "learning_rate": 4.3780000000000004e-05, - "loss": 88.396, - "step": 21890 - }, - { - "epoch": 0.08847877115511257, - "grad_norm": 1255.8304443359375, - "learning_rate": 4.38e-05, - "loss": 95.6884, - "step": 21900 - }, - { - "epoch": 0.08851917242048021, - "grad_norm": 2578.5478515625, - "learning_rate": 4.382e-05, - "loss": 143.0776, - "step": 21910 - }, - { - "epoch": 0.08855957368584784, - "grad_norm": 4696.71826171875, - "learning_rate": 4.384e-05, - "loss": 95.2831, - "step": 21920 - }, - { - "epoch": 0.08859997495121547, - "grad_norm": 3200.905517578125, - "learning_rate": 4.3860000000000004e-05, - "loss": 98.7196, - "step": 21930 - }, - { - "epoch": 0.0886403762165831, - "grad_norm": 771.8877563476562, - "learning_rate": 4.388000000000001e-05, - "loss": 109.9122, - "step": 21940 - }, - { - "epoch": 0.08868077748195073, - "grad_norm": 2375.28515625, - "learning_rate": 4.39e-05, - "loss": 97.5147, - "step": 21950 - }, - { - "epoch": 0.08872117874731837, - "grad_norm": 1200.1995849609375, - "learning_rate": 4.392e-05, - "loss": 163.8434, - "step": 21960 - }, - { - "epoch": 0.088761580012686, - "grad_norm": 743.7747192382812, - "learning_rate": 4.394e-05, - "loss": 152.2877, - "step": 21970 - }, - { - "epoch": 0.08880198127805362, - "grad_norm": 748.3530883789062, - "learning_rate": 4.396e-05, - "loss": 102.9261, - "step": 21980 - }, - { - "epoch": 0.08884238254342126, - "grad_norm": 0.0, - "learning_rate": 4.398e-05, - "loss": 86.7398, - "step": 21990 - }, - { - "epoch": 0.08888278380878889, - "grad_norm": 641.658935546875, - "learning_rate": 4.4000000000000006e-05, - "loss": 115.4044, - "step": 22000 - }, - { - "epoch": 0.08892318507415652, - "grad_norm": 757.2327880859375, - "learning_rate": 4.402e-05, - "loss": 106.9586, - "step": 22010 - }, - { - "epoch": 0.08896358633952416, - "grad_norm": 1741.6318359375, - "learning_rate": 4.4040000000000005e-05, - "loss": 61.3795, - "step": 22020 - }, - { - "epoch": 0.08900398760489178, - "grad_norm": 3420.192138671875, - "learning_rate": 4.406e-05, - "loss": 150.3777, - "step": 22030 - }, - { - "epoch": 0.08904438887025942, - "grad_norm": 785.1651000976562, - "learning_rate": 4.4080000000000005e-05, - "loss": 112.2344, - "step": 22040 - }, - { - "epoch": 0.08908479013562705, - "grad_norm": 2430.542236328125, - "learning_rate": 4.41e-05, - "loss": 84.5093, - "step": 22050 - }, - { - "epoch": 0.08912519140099467, - "grad_norm": 1095.4105224609375, - "learning_rate": 4.412e-05, - "loss": 129.3121, - "step": 22060 - }, - { - "epoch": 0.08916559266636231, - "grad_norm": 654.0252075195312, - "learning_rate": 4.414e-05, - "loss": 105.0435, - "step": 22070 - }, - { - "epoch": 0.08920599393172994, - "grad_norm": 6289.986328125, - "learning_rate": 4.4160000000000004e-05, - "loss": 141.9109, - "step": 22080 - }, - { - "epoch": 0.08924639519709757, - "grad_norm": 3365.2255859375, - "learning_rate": 4.418000000000001e-05, - "loss": 164.1694, - "step": 22090 - }, - { - "epoch": 0.0892867964624652, - "grad_norm": 1369.172607421875, - "learning_rate": 4.4200000000000004e-05, - "loss": 133.5227, - "step": 22100 - }, - { - "epoch": 0.08932719772783283, - "grad_norm": 2439.97265625, - "learning_rate": 4.422e-05, - "loss": 126.4849, - "step": 22110 - }, - { - "epoch": 0.08936759899320047, - "grad_norm": 1391.3780517578125, - "learning_rate": 4.424e-05, - "loss": 103.1816, - "step": 22120 - }, - { - "epoch": 0.0894080002585681, - "grad_norm": 2001.207275390625, - "learning_rate": 4.426e-05, - "loss": 124.9317, - "step": 22130 - }, - { - "epoch": 0.08944840152393572, - "grad_norm": 1090.6702880859375, - "learning_rate": 4.428e-05, - "loss": 86.8185, - "step": 22140 - }, - { - "epoch": 0.08948880278930336, - "grad_norm": 1352.2353515625, - "learning_rate": 4.43e-05, - "loss": 123.4582, - "step": 22150 - }, - { - "epoch": 0.08952920405467099, - "grad_norm": 1216.326171875, - "learning_rate": 4.432e-05, - "loss": 77.8633, - "step": 22160 - }, - { - "epoch": 0.08956960532003862, - "grad_norm": 805.3572998046875, - "learning_rate": 4.4340000000000006e-05, - "loss": 112.9714, - "step": 22170 - }, - { - "epoch": 0.08961000658540626, - "grad_norm": 558.133544921875, - "learning_rate": 4.436e-05, - "loss": 89.9262, - "step": 22180 - }, - { - "epoch": 0.08965040785077388, - "grad_norm": 2130.873046875, - "learning_rate": 4.438e-05, - "loss": 112.3018, - "step": 22190 - }, - { - "epoch": 0.08969080911614152, - "grad_norm": 1761.7283935546875, - "learning_rate": 4.44e-05, - "loss": 111.0985, - "step": 22200 - }, - { - "epoch": 0.08973121038150915, - "grad_norm": 915.045166015625, - "learning_rate": 4.442e-05, - "loss": 107.6649, - "step": 22210 - }, - { - "epoch": 0.08977161164687678, - "grad_norm": 1191.688232421875, - "learning_rate": 4.444e-05, - "loss": 165.1702, - "step": 22220 - }, - { - "epoch": 0.08981201291224442, - "grad_norm": 496.5788879394531, - "learning_rate": 4.4460000000000005e-05, - "loss": 82.1613, - "step": 22230 - }, - { - "epoch": 0.08985241417761204, - "grad_norm": 500.4427185058594, - "learning_rate": 4.448e-05, - "loss": 154.052, - "step": 22240 - }, - { - "epoch": 0.08989281544297967, - "grad_norm": 1123.475341796875, - "learning_rate": 4.4500000000000004e-05, - "loss": 103.6876, - "step": 22250 - }, - { - "epoch": 0.08993321670834731, - "grad_norm": 763.8755493164062, - "learning_rate": 4.452e-05, - "loss": 105.7942, - "step": 22260 - }, - { - "epoch": 0.08997361797371493, - "grad_norm": 659.6030883789062, - "learning_rate": 4.4540000000000004e-05, - "loss": 111.4345, - "step": 22270 - }, - { - "epoch": 0.09001401923908257, - "grad_norm": 649.8712158203125, - "learning_rate": 4.456e-05, - "loss": 78.0137, - "step": 22280 - }, - { - "epoch": 0.0900544205044502, - "grad_norm": 510.55853271484375, - "learning_rate": 4.458e-05, - "loss": 142.013, - "step": 22290 - }, - { - "epoch": 0.09009482176981783, - "grad_norm": 3399.17236328125, - "learning_rate": 4.46e-05, - "loss": 159.2714, - "step": 22300 - }, - { - "epoch": 0.09013522303518547, - "grad_norm": 998.1200561523438, - "learning_rate": 4.462e-05, - "loss": 100.7152, - "step": 22310 - }, - { - "epoch": 0.09017562430055309, - "grad_norm": 531.780517578125, - "learning_rate": 4.4640000000000006e-05, - "loss": 168.3419, - "step": 22320 - }, - { - "epoch": 0.09021602556592072, - "grad_norm": 910.6287841796875, - "learning_rate": 4.466e-05, - "loss": 98.8065, - "step": 22330 - }, - { - "epoch": 0.09025642683128836, - "grad_norm": 1744.2012939453125, - "learning_rate": 4.468e-05, - "loss": 105.1678, - "step": 22340 - }, - { - "epoch": 0.09029682809665598, - "grad_norm": 597.9017944335938, - "learning_rate": 4.47e-05, - "loss": 112.0376, - "step": 22350 - }, - { - "epoch": 0.09033722936202362, - "grad_norm": 769.6776733398438, - "learning_rate": 4.472e-05, - "loss": 94.9958, - "step": 22360 - }, - { - "epoch": 0.09037763062739125, - "grad_norm": 1181.265625, - "learning_rate": 4.474e-05, - "loss": 110.0878, - "step": 22370 - }, - { - "epoch": 0.09041803189275888, - "grad_norm": 757.1105346679688, - "learning_rate": 4.4760000000000005e-05, - "loss": 100.3439, - "step": 22380 - }, - { - "epoch": 0.09045843315812652, - "grad_norm": 717.3568115234375, - "learning_rate": 4.478e-05, - "loss": 134.2481, - "step": 22390 - }, - { - "epoch": 0.09049883442349414, - "grad_norm": 892.8013305664062, - "learning_rate": 4.4800000000000005e-05, - "loss": 101.8901, - "step": 22400 - }, - { - "epoch": 0.09053923568886177, - "grad_norm": 796.65380859375, - "learning_rate": 4.482e-05, - "loss": 107.8212, - "step": 22410 - }, - { - "epoch": 0.09057963695422941, - "grad_norm": 599.0886840820312, - "learning_rate": 4.4840000000000004e-05, - "loss": 112.4707, - "step": 22420 - }, - { - "epoch": 0.09062003821959703, - "grad_norm": 384.7815246582031, - "learning_rate": 4.486e-05, - "loss": 103.7676, - "step": 22430 - }, - { - "epoch": 0.09066043948496467, - "grad_norm": 0.0, - "learning_rate": 4.488e-05, - "loss": 83.413, - "step": 22440 - }, - { - "epoch": 0.0907008407503323, - "grad_norm": 1596.82373046875, - "learning_rate": 4.49e-05, - "loss": 125.9112, - "step": 22450 - }, - { - "epoch": 0.09074124201569993, - "grad_norm": 8902.1123046875, - "learning_rate": 4.4920000000000004e-05, - "loss": 158.18, - "step": 22460 - }, - { - "epoch": 0.09078164328106757, - "grad_norm": 809.1474609375, - "learning_rate": 4.494000000000001e-05, - "loss": 129.158, - "step": 22470 - }, - { - "epoch": 0.0908220445464352, - "grad_norm": 657.6831665039062, - "learning_rate": 4.496e-05, - "loss": 121.9886, - "step": 22480 - }, - { - "epoch": 0.09086244581180282, - "grad_norm": 866.2117309570312, - "learning_rate": 4.498e-05, - "loss": 123.8151, - "step": 22490 - }, - { - "epoch": 0.09090284707717046, - "grad_norm": 585.983642578125, - "learning_rate": 4.5e-05, - "loss": 74.8665, - "step": 22500 - }, - { - "epoch": 0.09094324834253809, - "grad_norm": 1273.0323486328125, - "learning_rate": 4.502e-05, - "loss": 96.7427, - "step": 22510 - }, - { - "epoch": 0.09098364960790573, - "grad_norm": 1038.0701904296875, - "learning_rate": 4.504e-05, - "loss": 85.5748, - "step": 22520 - }, - { - "epoch": 0.09102405087327335, - "grad_norm": 1610.0634765625, - "learning_rate": 4.506e-05, - "loss": 92.2955, - "step": 22530 - }, - { - "epoch": 0.09106445213864098, - "grad_norm": 2549.61572265625, - "learning_rate": 4.508e-05, - "loss": 147.1345, - "step": 22540 - }, - { - "epoch": 0.09110485340400862, - "grad_norm": 931.28173828125, - "learning_rate": 4.5100000000000005e-05, - "loss": 82.7087, - "step": 22550 - }, - { - "epoch": 0.09114525466937624, - "grad_norm": 765.2784423828125, - "learning_rate": 4.512e-05, - "loss": 135.216, - "step": 22560 - }, - { - "epoch": 0.09118565593474387, - "grad_norm": 597.312255859375, - "learning_rate": 4.5140000000000005e-05, - "loss": 147.5036, - "step": 22570 - }, - { - "epoch": 0.09122605720011151, - "grad_norm": 756.5013427734375, - "learning_rate": 4.516e-05, - "loss": 128.549, - "step": 22580 - }, - { - "epoch": 0.09126645846547914, - "grad_norm": 719.6322631835938, - "learning_rate": 4.518e-05, - "loss": 80.9833, - "step": 22590 - }, - { - "epoch": 0.09130685973084678, - "grad_norm": 721.7781982421875, - "learning_rate": 4.52e-05, - "loss": 144.421, - "step": 22600 - }, - { - "epoch": 0.0913472609962144, - "grad_norm": 1193.63232421875, - "learning_rate": 4.5220000000000004e-05, - "loss": 99.2124, - "step": 22610 - }, - { - "epoch": 0.09138766226158203, - "grad_norm": 886.3824462890625, - "learning_rate": 4.524000000000001e-05, - "loss": 117.6559, - "step": 22620 - }, - { - "epoch": 0.09142806352694967, - "grad_norm": 869.9031372070312, - "learning_rate": 4.5260000000000004e-05, - "loss": 128.7948, - "step": 22630 - }, - { - "epoch": 0.0914684647923173, - "grad_norm": 323.6915283203125, - "learning_rate": 4.528e-05, - "loss": 103.8211, - "step": 22640 - }, - { - "epoch": 0.09150886605768492, - "grad_norm": 648.2799072265625, - "learning_rate": 4.53e-05, - "loss": 124.0231, - "step": 22650 - }, - { - "epoch": 0.09154926732305256, - "grad_norm": 3163.934814453125, - "learning_rate": 4.532e-05, - "loss": 133.5165, - "step": 22660 - }, - { - "epoch": 0.09158966858842019, - "grad_norm": 694.5257568359375, - "learning_rate": 4.534e-05, - "loss": 117.9862, - "step": 22670 - }, - { - "epoch": 0.09163006985378783, - "grad_norm": 1048.819091796875, - "learning_rate": 4.536e-05, - "loss": 110.8099, - "step": 22680 - }, - { - "epoch": 0.09167047111915545, - "grad_norm": 760.2977905273438, - "learning_rate": 4.538e-05, - "loss": 93.8862, - "step": 22690 - }, - { - "epoch": 0.09171087238452308, - "grad_norm": 1749.94580078125, - "learning_rate": 4.5400000000000006e-05, - "loss": 141.0014, - "step": 22700 - }, - { - "epoch": 0.09175127364989072, - "grad_norm": 0.0, - "learning_rate": 4.542e-05, - "loss": 100.4336, - "step": 22710 - }, - { - "epoch": 0.09179167491525834, - "grad_norm": 520.8677978515625, - "learning_rate": 4.5440000000000005e-05, - "loss": 85.8024, - "step": 22720 - }, - { - "epoch": 0.09183207618062597, - "grad_norm": 1674.249755859375, - "learning_rate": 4.546e-05, - "loss": 138.2154, - "step": 22730 - }, - { - "epoch": 0.09187247744599361, - "grad_norm": 917.3933715820312, - "learning_rate": 4.548e-05, - "loss": 103.3672, - "step": 22740 - }, - { - "epoch": 0.09191287871136124, - "grad_norm": 1019.2123413085938, - "learning_rate": 4.55e-05, - "loss": 56.5289, - "step": 22750 - }, - { - "epoch": 0.09195327997672888, - "grad_norm": 2317.489501953125, - "learning_rate": 4.5520000000000005e-05, - "loss": 155.0266, - "step": 22760 - }, - { - "epoch": 0.0919936812420965, - "grad_norm": 819.0970458984375, - "learning_rate": 4.554000000000001e-05, - "loss": 116.7189, - "step": 22770 - }, - { - "epoch": 0.09203408250746413, - "grad_norm": 1225.271240234375, - "learning_rate": 4.5560000000000004e-05, - "loss": 111.6631, - "step": 22780 - }, - { - "epoch": 0.09207448377283177, - "grad_norm": 1159.3927001953125, - "learning_rate": 4.558e-05, - "loss": 66.4148, - "step": 22790 - }, - { - "epoch": 0.0921148850381994, - "grad_norm": 1292.975341796875, - "learning_rate": 4.5600000000000004e-05, - "loss": 105.9983, - "step": 22800 - }, - { - "epoch": 0.09215528630356702, - "grad_norm": 1175.0252685546875, - "learning_rate": 4.562e-05, - "loss": 143.92, - "step": 22810 - }, - { - "epoch": 0.09219568756893466, - "grad_norm": 759.6773681640625, - "learning_rate": 4.564e-05, - "loss": 90.7138, - "step": 22820 - }, - { - "epoch": 0.09223608883430229, - "grad_norm": 857.7040405273438, - "learning_rate": 4.566e-05, - "loss": 92.1247, - "step": 22830 - }, - { - "epoch": 0.09227649009966993, - "grad_norm": 1580.8248291015625, - "learning_rate": 4.568e-05, - "loss": 128.0727, - "step": 22840 - }, - { - "epoch": 0.09231689136503755, - "grad_norm": 567.8139038085938, - "learning_rate": 4.5700000000000006e-05, - "loss": 97.4495, - "step": 22850 - }, - { - "epoch": 0.09235729263040518, - "grad_norm": 1576.912841796875, - "learning_rate": 4.572e-05, - "loss": 128.2519, - "step": 22860 - }, - { - "epoch": 0.09239769389577282, - "grad_norm": 716.1073608398438, - "learning_rate": 4.574e-05, - "loss": 94.8568, - "step": 22870 - }, - { - "epoch": 0.09243809516114045, - "grad_norm": 638.1775512695312, - "learning_rate": 4.576e-05, - "loss": 66.2251, - "step": 22880 - }, - { - "epoch": 0.09247849642650807, - "grad_norm": 2102.469482421875, - "learning_rate": 4.578e-05, - "loss": 128.2732, - "step": 22890 - }, - { - "epoch": 0.09251889769187571, - "grad_norm": 682.6776123046875, - "learning_rate": 4.58e-05, - "loss": 102.548, - "step": 22900 - }, - { - "epoch": 0.09255929895724334, - "grad_norm": 586.2388916015625, - "learning_rate": 4.5820000000000005e-05, - "loss": 148.1805, - "step": 22910 - }, - { - "epoch": 0.09259970022261098, - "grad_norm": 1975.264404296875, - "learning_rate": 4.584e-05, - "loss": 143.5229, - "step": 22920 - }, - { - "epoch": 0.0926401014879786, - "grad_norm": 758.1207885742188, - "learning_rate": 4.5860000000000005e-05, - "loss": 128.5342, - "step": 22930 - }, - { - "epoch": 0.09268050275334623, - "grad_norm": 2084.8974609375, - "learning_rate": 4.588e-05, - "loss": 94.417, - "step": 22940 - }, - { - "epoch": 0.09272090401871387, - "grad_norm": 830.3482055664062, - "learning_rate": 4.5900000000000004e-05, - "loss": 97.3431, - "step": 22950 - }, - { - "epoch": 0.0927613052840815, - "grad_norm": 562.5604858398438, - "learning_rate": 4.592e-05, - "loss": 115.6859, - "step": 22960 - }, - { - "epoch": 0.09280170654944912, - "grad_norm": 1079.0665283203125, - "learning_rate": 4.594e-05, - "loss": 118.9365, - "step": 22970 - }, - { - "epoch": 0.09284210781481676, - "grad_norm": 1732.04638671875, - "learning_rate": 4.596e-05, - "loss": 86.5984, - "step": 22980 - }, - { - "epoch": 0.09288250908018439, - "grad_norm": 1780.00537109375, - "learning_rate": 4.5980000000000004e-05, - "loss": 113.6839, - "step": 22990 - }, - { - "epoch": 0.09292291034555203, - "grad_norm": 1192.3056640625, - "learning_rate": 4.600000000000001e-05, - "loss": 89.4596, - "step": 23000 - }, - { - "epoch": 0.09296331161091966, - "grad_norm": 1134.75830078125, - "learning_rate": 4.602e-05, - "loss": 87.713, - "step": 23010 - }, - { - "epoch": 0.09300371287628728, - "grad_norm": 1336.7357177734375, - "learning_rate": 4.604e-05, - "loss": 65.8475, - "step": 23020 - }, - { - "epoch": 0.09304411414165492, - "grad_norm": 815.8203125, - "learning_rate": 4.606e-05, - "loss": 148.7902, - "step": 23030 - }, - { - "epoch": 0.09308451540702255, - "grad_norm": 1082.975830078125, - "learning_rate": 4.608e-05, - "loss": 114.2601, - "step": 23040 - }, - { - "epoch": 0.09312491667239017, - "grad_norm": 778.52587890625, - "learning_rate": 4.61e-05, - "loss": 284.198, - "step": 23050 - }, - { - "epoch": 0.09316531793775781, - "grad_norm": 591.0042114257812, - "learning_rate": 4.612e-05, - "loss": 106.8146, - "step": 23060 - }, - { - "epoch": 0.09320571920312544, - "grad_norm": 2606.027099609375, - "learning_rate": 4.614e-05, - "loss": 109.8302, - "step": 23070 - }, - { - "epoch": 0.09324612046849308, - "grad_norm": 1184.9754638671875, - "learning_rate": 4.6160000000000005e-05, - "loss": 118.0777, - "step": 23080 - }, - { - "epoch": 0.0932865217338607, - "grad_norm": 1047.3983154296875, - "learning_rate": 4.618e-05, - "loss": 119.2703, - "step": 23090 - }, - { - "epoch": 0.09332692299922833, - "grad_norm": 522.4287719726562, - "learning_rate": 4.6200000000000005e-05, - "loss": 115.0015, - "step": 23100 - }, - { - "epoch": 0.09336732426459597, - "grad_norm": 387.2696838378906, - "learning_rate": 4.622e-05, - "loss": 112.657, - "step": 23110 - }, - { - "epoch": 0.0934077255299636, - "grad_norm": 680.420166015625, - "learning_rate": 4.624e-05, - "loss": 81.0697, - "step": 23120 - }, - { - "epoch": 0.09344812679533122, - "grad_norm": 1640.8831787109375, - "learning_rate": 4.626e-05, - "loss": 128.7615, - "step": 23130 - }, - { - "epoch": 0.09348852806069886, - "grad_norm": 666.759765625, - "learning_rate": 4.6280000000000004e-05, - "loss": 86.7895, - "step": 23140 - }, - { - "epoch": 0.09352892932606649, - "grad_norm": 2630.477783203125, - "learning_rate": 4.630000000000001e-05, - "loss": 174.0495, - "step": 23150 - }, - { - "epoch": 0.09356933059143413, - "grad_norm": 2171.40185546875, - "learning_rate": 4.6320000000000004e-05, - "loss": 126.4433, - "step": 23160 - }, - { - "epoch": 0.09360973185680176, - "grad_norm": 952.8081665039062, - "learning_rate": 4.634e-05, - "loss": 60.8379, - "step": 23170 - }, - { - "epoch": 0.09365013312216938, - "grad_norm": 1052.6181640625, - "learning_rate": 4.636e-05, - "loss": 155.2496, - "step": 23180 - }, - { - "epoch": 0.09369053438753702, - "grad_norm": 1012.393310546875, - "learning_rate": 4.638e-05, - "loss": 137.2208, - "step": 23190 - }, - { - "epoch": 0.09373093565290465, - "grad_norm": 1177.2757568359375, - "learning_rate": 4.64e-05, - "loss": 123.9954, - "step": 23200 - }, - { - "epoch": 0.09377133691827227, - "grad_norm": 1205.6624755859375, - "learning_rate": 4.642e-05, - "loss": 133.0356, - "step": 23210 - }, - { - "epoch": 0.09381173818363991, - "grad_norm": 790.0210571289062, - "learning_rate": 4.644e-05, - "loss": 134.6905, - "step": 23220 - }, - { - "epoch": 0.09385213944900754, - "grad_norm": 1255.626953125, - "learning_rate": 4.6460000000000006e-05, - "loss": 182.1293, - "step": 23230 - }, - { - "epoch": 0.09389254071437518, - "grad_norm": 705.6256103515625, - "learning_rate": 4.648e-05, - "loss": 80.2685, - "step": 23240 - }, - { - "epoch": 0.0939329419797428, - "grad_norm": 1400.9781494140625, - "learning_rate": 4.6500000000000005e-05, - "loss": 65.442, - "step": 23250 - }, - { - "epoch": 0.09397334324511043, - "grad_norm": 475.5975646972656, - "learning_rate": 4.652e-05, - "loss": 104.4814, - "step": 23260 - }, - { - "epoch": 0.09401374451047807, - "grad_norm": 656.1465454101562, - "learning_rate": 4.654e-05, - "loss": 64.6497, - "step": 23270 - }, - { - "epoch": 0.0940541457758457, - "grad_norm": 870.1063232421875, - "learning_rate": 4.656e-05, - "loss": 106.1441, - "step": 23280 - }, - { - "epoch": 0.09409454704121333, - "grad_norm": 461.961181640625, - "learning_rate": 4.6580000000000005e-05, - "loss": 77.1417, - "step": 23290 - }, - { - "epoch": 0.09413494830658097, - "grad_norm": 677.691162109375, - "learning_rate": 4.660000000000001e-05, - "loss": 99.122, - "step": 23300 - }, - { - "epoch": 0.09417534957194859, - "grad_norm": 651.6929931640625, - "learning_rate": 4.6620000000000004e-05, - "loss": 80.5311, - "step": 23310 - }, - { - "epoch": 0.09421575083731623, - "grad_norm": 4944.45703125, - "learning_rate": 4.664e-05, - "loss": 122.0651, - "step": 23320 - }, - { - "epoch": 0.09425615210268386, - "grad_norm": 585.9901733398438, - "learning_rate": 4.6660000000000004e-05, - "loss": 105.5626, - "step": 23330 - }, - { - "epoch": 0.09429655336805148, - "grad_norm": 869.3453979492188, - "learning_rate": 4.668e-05, - "loss": 107.5253, - "step": 23340 - }, - { - "epoch": 0.09433695463341912, - "grad_norm": 1403.1729736328125, - "learning_rate": 4.6700000000000003e-05, - "loss": 82.5326, - "step": 23350 - }, - { - "epoch": 0.09437735589878675, - "grad_norm": 1053.485595703125, - "learning_rate": 4.672e-05, - "loss": 110.7292, - "step": 23360 - }, - { - "epoch": 0.09441775716415438, - "grad_norm": 495.44793701171875, - "learning_rate": 4.674e-05, - "loss": 85.2537, - "step": 23370 - }, - { - "epoch": 0.09445815842952202, - "grad_norm": 1976.839599609375, - "learning_rate": 4.6760000000000006e-05, - "loss": 122.28, - "step": 23380 - }, - { - "epoch": 0.09449855969488964, - "grad_norm": 840.1267700195312, - "learning_rate": 4.678e-05, - "loss": 61.5691, - "step": 23390 - }, - { - "epoch": 0.09453896096025728, - "grad_norm": 1114.9774169921875, - "learning_rate": 4.6800000000000006e-05, - "loss": 179.376, - "step": 23400 - }, - { - "epoch": 0.09457936222562491, - "grad_norm": 2026.411376953125, - "learning_rate": 4.682e-05, - "loss": 117.7227, - "step": 23410 - }, - { - "epoch": 0.09461976349099253, - "grad_norm": 1031.648193359375, - "learning_rate": 4.684e-05, - "loss": 95.3115, - "step": 23420 - }, - { - "epoch": 0.09466016475636017, - "grad_norm": 774.4593505859375, - "learning_rate": 4.686e-05, - "loss": 74.8853, - "step": 23430 - }, - { - "epoch": 0.0947005660217278, - "grad_norm": 799.5897216796875, - "learning_rate": 4.688e-05, - "loss": 106.8804, - "step": 23440 - }, - { - "epoch": 0.09474096728709543, - "grad_norm": 801.3352661132812, - "learning_rate": 4.69e-05, - "loss": 117.8924, - "step": 23450 - }, - { - "epoch": 0.09478136855246307, - "grad_norm": 562.31494140625, - "learning_rate": 4.6920000000000005e-05, - "loss": 70.9862, - "step": 23460 - }, - { - "epoch": 0.09482176981783069, - "grad_norm": 1088.81103515625, - "learning_rate": 4.694e-05, - "loss": 105.8252, - "step": 23470 - }, - { - "epoch": 0.09486217108319833, - "grad_norm": 773.8255004882812, - "learning_rate": 4.6960000000000004e-05, - "loss": 74.9372, - "step": 23480 - }, - { - "epoch": 0.09490257234856596, - "grad_norm": 2185.90771484375, - "learning_rate": 4.698e-05, - "loss": 137.0428, - "step": 23490 - }, - { - "epoch": 0.09494297361393358, - "grad_norm": 1338.7786865234375, - "learning_rate": 4.7e-05, - "loss": 138.3532, - "step": 23500 - }, - { - "epoch": 0.09498337487930122, - "grad_norm": 828.383544921875, - "learning_rate": 4.702e-05, - "loss": 103.3043, - "step": 23510 - }, - { - "epoch": 0.09502377614466885, - "grad_norm": 1320.6363525390625, - "learning_rate": 4.7040000000000004e-05, - "loss": 112.5485, - "step": 23520 - }, - { - "epoch": 0.09506417741003648, - "grad_norm": 1088.3404541015625, - "learning_rate": 4.706000000000001e-05, - "loss": 107.0843, - "step": 23530 - }, - { - "epoch": 0.09510457867540412, - "grad_norm": 697.0968627929688, - "learning_rate": 4.708e-05, - "loss": 86.7037, - "step": 23540 - }, - { - "epoch": 0.09514497994077174, - "grad_norm": 1503.7210693359375, - "learning_rate": 4.71e-05, - "loss": 113.7888, - "step": 23550 - }, - { - "epoch": 0.09518538120613938, - "grad_norm": 840.5352783203125, - "learning_rate": 4.712e-05, - "loss": 107.902, - "step": 23560 - }, - { - "epoch": 0.09522578247150701, - "grad_norm": 1638.9781494140625, - "learning_rate": 4.714e-05, - "loss": 102.9792, - "step": 23570 - }, - { - "epoch": 0.09526618373687464, - "grad_norm": 518.2499389648438, - "learning_rate": 4.716e-05, - "loss": 78.3873, - "step": 23580 - }, - { - "epoch": 0.09530658500224228, - "grad_norm": 837.2974853515625, - "learning_rate": 4.718e-05, - "loss": 71.646, - "step": 23590 - }, - { - "epoch": 0.0953469862676099, - "grad_norm": 560.1998291015625, - "learning_rate": 4.72e-05, - "loss": 131.5562, - "step": 23600 - }, - { - "epoch": 0.09538738753297753, - "grad_norm": 789.7793579101562, - "learning_rate": 4.7220000000000005e-05, - "loss": 95.4459, - "step": 23610 - }, - { - "epoch": 0.09542778879834517, - "grad_norm": 836.2322998046875, - "learning_rate": 4.724e-05, - "loss": 118.2009, - "step": 23620 - }, - { - "epoch": 0.0954681900637128, - "grad_norm": 1441.4818115234375, - "learning_rate": 4.7260000000000005e-05, - "loss": 164.6346, - "step": 23630 - }, - { - "epoch": 0.09550859132908043, - "grad_norm": 965.8220825195312, - "learning_rate": 4.728e-05, - "loss": 109.1539, - "step": 23640 - }, - { - "epoch": 0.09554899259444806, - "grad_norm": 1859.1063232421875, - "learning_rate": 4.73e-05, - "loss": 180.398, - "step": 23650 - }, - { - "epoch": 0.09558939385981569, - "grad_norm": 823.5313110351562, - "learning_rate": 4.732e-05, - "loss": 95.1897, - "step": 23660 - }, - { - "epoch": 0.09562979512518333, - "grad_norm": 880.113037109375, - "learning_rate": 4.7340000000000004e-05, - "loss": 107.1814, - "step": 23670 - }, - { - "epoch": 0.09567019639055095, - "grad_norm": 1045.581298828125, - "learning_rate": 4.736000000000001e-05, - "loss": 151.5835, - "step": 23680 - }, - { - "epoch": 0.09571059765591858, - "grad_norm": 1099.427978515625, - "learning_rate": 4.7380000000000004e-05, - "loss": 93.5232, - "step": 23690 - }, - { - "epoch": 0.09575099892128622, - "grad_norm": 640.9761962890625, - "learning_rate": 4.74e-05, - "loss": 117.26, - "step": 23700 - }, - { - "epoch": 0.09579140018665384, - "grad_norm": 337.58795166015625, - "learning_rate": 4.742e-05, - "loss": 114.4822, - "step": 23710 - }, - { - "epoch": 0.09583180145202148, - "grad_norm": 1089.921875, - "learning_rate": 4.744e-05, - "loss": 104.9418, - "step": 23720 - }, - { - "epoch": 0.09587220271738911, - "grad_norm": 2395.615234375, - "learning_rate": 4.746e-05, - "loss": 99.8505, - "step": 23730 - }, - { - "epoch": 0.09591260398275674, - "grad_norm": 1333.4490966796875, - "learning_rate": 4.748e-05, - "loss": 136.7725, - "step": 23740 - }, - { - "epoch": 0.09595300524812438, - "grad_norm": 1200.8509521484375, - "learning_rate": 4.75e-05, - "loss": 108.3065, - "step": 23750 - }, - { - "epoch": 0.095993406513492, - "grad_norm": 674.5408325195312, - "learning_rate": 4.7520000000000006e-05, - "loss": 104.4726, - "step": 23760 - }, - { - "epoch": 0.09603380777885963, - "grad_norm": 789.4915161132812, - "learning_rate": 4.754e-05, - "loss": 145.3513, - "step": 23770 - }, - { - "epoch": 0.09607420904422727, - "grad_norm": 797.5672607421875, - "learning_rate": 4.7560000000000005e-05, - "loss": 99.0135, - "step": 23780 - }, - { - "epoch": 0.0961146103095949, - "grad_norm": 1080.6739501953125, - "learning_rate": 4.758e-05, - "loss": 128.2187, - "step": 23790 - }, - { - "epoch": 0.09615501157496253, - "grad_norm": 1180.974365234375, - "learning_rate": 4.76e-05, - "loss": 112.952, - "step": 23800 - }, - { - "epoch": 0.09619541284033016, - "grad_norm": 782.664794921875, - "learning_rate": 4.762e-05, - "loss": 88.9206, - "step": 23810 - }, - { - "epoch": 0.09623581410569779, - "grad_norm": 731.3271484375, - "learning_rate": 4.7640000000000005e-05, - "loss": 109.8621, - "step": 23820 - }, - { - "epoch": 0.09627621537106543, - "grad_norm": 1363.64501953125, - "learning_rate": 4.766000000000001e-05, - "loss": 151.2704, - "step": 23830 - }, - { - "epoch": 0.09631661663643305, - "grad_norm": 802.5873413085938, - "learning_rate": 4.7680000000000004e-05, - "loss": 126.6234, - "step": 23840 - }, - { - "epoch": 0.09635701790180068, - "grad_norm": 1359.4644775390625, - "learning_rate": 4.77e-05, - "loss": 73.3318, - "step": 23850 - }, - { - "epoch": 0.09639741916716832, - "grad_norm": 733.4736328125, - "learning_rate": 4.7720000000000004e-05, - "loss": 115.1329, - "step": 23860 - }, - { - "epoch": 0.09643782043253595, - "grad_norm": 620.9826049804688, - "learning_rate": 4.774e-05, - "loss": 86.756, - "step": 23870 - }, - { - "epoch": 0.09647822169790359, - "grad_norm": 508.6841125488281, - "learning_rate": 4.7760000000000004e-05, - "loss": 90.5046, - "step": 23880 - }, - { - "epoch": 0.09651862296327121, - "grad_norm": 756.0390014648438, - "learning_rate": 4.778e-05, - "loss": 125.9883, - "step": 23890 - }, - { - "epoch": 0.09655902422863884, - "grad_norm": 531.0929565429688, - "learning_rate": 4.78e-05, - "loss": 152.4438, - "step": 23900 - }, - { - "epoch": 0.09659942549400648, - "grad_norm": 2243.71630859375, - "learning_rate": 4.7820000000000006e-05, - "loss": 155.722, - "step": 23910 - }, - { - "epoch": 0.0966398267593741, - "grad_norm": 1023.2913818359375, - "learning_rate": 4.784e-05, - "loss": 76.3717, - "step": 23920 - }, - { - "epoch": 0.09668022802474173, - "grad_norm": 896.178955078125, - "learning_rate": 4.7860000000000006e-05, - "loss": 114.2663, - "step": 23930 - }, - { - "epoch": 0.09672062929010937, - "grad_norm": 656.4266967773438, - "learning_rate": 4.788e-05, - "loss": 122.8116, - "step": 23940 - }, - { - "epoch": 0.096761030555477, - "grad_norm": 1333.90234375, - "learning_rate": 4.79e-05, - "loss": 119.1918, - "step": 23950 - }, - { - "epoch": 0.09680143182084464, - "grad_norm": 929.0885620117188, - "learning_rate": 4.792e-05, - "loss": 101.0765, - "step": 23960 - }, - { - "epoch": 0.09684183308621226, - "grad_norm": 783.88134765625, - "learning_rate": 4.794e-05, - "loss": 123.106, - "step": 23970 - }, - { - "epoch": 0.09688223435157989, - "grad_norm": 495.6070861816406, - "learning_rate": 4.796e-05, - "loss": 77.067, - "step": 23980 - }, - { - "epoch": 0.09692263561694753, - "grad_norm": 757.9783325195312, - "learning_rate": 4.7980000000000005e-05, - "loss": 108.6014, - "step": 23990 - }, - { - "epoch": 0.09696303688231515, - "grad_norm": 1557.5858154296875, - "learning_rate": 4.8e-05, - "loss": 132.5803, - "step": 24000 - }, - { - "epoch": 0.09700343814768278, - "grad_norm": 1372.1710205078125, - "learning_rate": 4.8020000000000004e-05, - "loss": 140.0741, - "step": 24010 - }, - { - "epoch": 0.09704383941305042, - "grad_norm": 708.5230712890625, - "learning_rate": 4.804e-05, - "loss": 136.9571, - "step": 24020 - }, - { - "epoch": 0.09708424067841805, - "grad_norm": 949.0543823242188, - "learning_rate": 4.8060000000000004e-05, - "loss": 119.4232, - "step": 24030 - }, - { - "epoch": 0.09712464194378569, - "grad_norm": 779.013916015625, - "learning_rate": 4.808e-05, - "loss": 132.5883, - "step": 24040 - }, - { - "epoch": 0.09716504320915331, - "grad_norm": 1494.10595703125, - "learning_rate": 4.8100000000000004e-05, - "loss": 101.6414, - "step": 24050 - }, - { - "epoch": 0.09720544447452094, - "grad_norm": 628.3251342773438, - "learning_rate": 4.812000000000001e-05, - "loss": 100.0648, - "step": 24060 - }, - { - "epoch": 0.09724584573988858, - "grad_norm": 518.7921752929688, - "learning_rate": 4.814e-05, - "loss": 55.6692, - "step": 24070 - }, - { - "epoch": 0.0972862470052562, - "grad_norm": 1588.790771484375, - "learning_rate": 4.816e-05, - "loss": 133.6212, - "step": 24080 - }, - { - "epoch": 0.09732664827062383, - "grad_norm": 843.473388671875, - "learning_rate": 4.818e-05, - "loss": 87.3461, - "step": 24090 - }, - { - "epoch": 0.09736704953599147, - "grad_norm": 659.4796752929688, - "learning_rate": 4.82e-05, - "loss": 117.1845, - "step": 24100 - }, - { - "epoch": 0.0974074508013591, - "grad_norm": 584.1058959960938, - "learning_rate": 4.822e-05, - "loss": 103.56, - "step": 24110 - }, - { - "epoch": 0.09744785206672674, - "grad_norm": 1724.538330078125, - "learning_rate": 4.824e-05, - "loss": 89.6239, - "step": 24120 - }, - { - "epoch": 0.09748825333209436, - "grad_norm": 693.462646484375, - "learning_rate": 4.826e-05, - "loss": 93.3504, - "step": 24130 - }, - { - "epoch": 0.09752865459746199, - "grad_norm": 1428.2559814453125, - "learning_rate": 4.8280000000000005e-05, - "loss": 112.4596, - "step": 24140 - }, - { - "epoch": 0.09756905586282963, - "grad_norm": 1738.0125732421875, - "learning_rate": 4.83e-05, - "loss": 107.2036, - "step": 24150 - }, - { - "epoch": 0.09760945712819726, - "grad_norm": 590.5738525390625, - "learning_rate": 4.8320000000000005e-05, - "loss": 95.526, - "step": 24160 - }, - { - "epoch": 0.09764985839356488, - "grad_norm": 1145.53515625, - "learning_rate": 4.834e-05, - "loss": 82.7261, - "step": 24170 - }, - { - "epoch": 0.09769025965893252, - "grad_norm": 1240.5450439453125, - "learning_rate": 4.836e-05, - "loss": 138.7817, - "step": 24180 - }, - { - "epoch": 0.09773066092430015, - "grad_norm": 1606.5479736328125, - "learning_rate": 4.838e-05, - "loss": 101.9048, - "step": 24190 - }, - { - "epoch": 0.09777106218966779, - "grad_norm": 1542.466796875, - "learning_rate": 4.8400000000000004e-05, - "loss": 106.2202, - "step": 24200 - }, - { - "epoch": 0.09781146345503541, - "grad_norm": 889.3588256835938, - "learning_rate": 4.842000000000001e-05, - "loss": 144.4749, - "step": 24210 - }, - { - "epoch": 0.09785186472040304, - "grad_norm": 639.1673583984375, - "learning_rate": 4.8440000000000004e-05, - "loss": 103.4213, - "step": 24220 - }, - { - "epoch": 0.09789226598577068, - "grad_norm": 3171.987060546875, - "learning_rate": 4.846e-05, - "loss": 156.6668, - "step": 24230 - }, - { - "epoch": 0.0979326672511383, - "grad_norm": 709.1254272460938, - "learning_rate": 4.8480000000000003e-05, - "loss": 80.9064, - "step": 24240 - }, - { - "epoch": 0.09797306851650593, - "grad_norm": 561.7057495117188, - "learning_rate": 4.85e-05, - "loss": 171.3163, - "step": 24250 - }, - { - "epoch": 0.09801346978187357, - "grad_norm": 929.6841430664062, - "learning_rate": 4.852e-05, - "loss": 157.7511, - "step": 24260 - }, - { - "epoch": 0.0980538710472412, - "grad_norm": 1814.5821533203125, - "learning_rate": 4.854e-05, - "loss": 91.1438, - "step": 24270 - }, - { - "epoch": 0.09809427231260884, - "grad_norm": 4481.90673828125, - "learning_rate": 4.856e-05, - "loss": 116.3378, - "step": 24280 - }, - { - "epoch": 0.09813467357797646, - "grad_norm": 1093.2393798828125, - "learning_rate": 4.8580000000000006e-05, - "loss": 95.4637, - "step": 24290 - }, - { - "epoch": 0.09817507484334409, - "grad_norm": 559.2593383789062, - "learning_rate": 4.86e-05, - "loss": 80.5582, - "step": 24300 - }, - { - "epoch": 0.09821547610871173, - "grad_norm": 538.036865234375, - "learning_rate": 4.8620000000000005e-05, - "loss": 91.296, - "step": 24310 - }, - { - "epoch": 0.09825587737407936, - "grad_norm": 1021.3819580078125, - "learning_rate": 4.864e-05, - "loss": 96.4481, - "step": 24320 - }, - { - "epoch": 0.09829627863944698, - "grad_norm": 905.1778564453125, - "learning_rate": 4.866e-05, - "loss": 93.2619, - "step": 24330 - }, - { - "epoch": 0.09833667990481462, - "grad_norm": 604.1958618164062, - "learning_rate": 4.868e-05, - "loss": 62.2251, - "step": 24340 - }, - { - "epoch": 0.09837708117018225, - "grad_norm": 707.2637939453125, - "learning_rate": 4.87e-05, - "loss": 81.4563, - "step": 24350 - }, - { - "epoch": 0.09841748243554987, - "grad_norm": 1935.6922607421875, - "learning_rate": 4.872000000000001e-05, - "loss": 150.9556, - "step": 24360 - }, - { - "epoch": 0.09845788370091751, - "grad_norm": 529.8268432617188, - "learning_rate": 4.8740000000000004e-05, - "loss": 95.0914, - "step": 24370 - }, - { - "epoch": 0.09849828496628514, - "grad_norm": 1129.89599609375, - "learning_rate": 4.876e-05, - "loss": 128.5505, - "step": 24380 - }, - { - "epoch": 0.09853868623165278, - "grad_norm": 2489.7001953125, - "learning_rate": 4.8780000000000004e-05, - "loss": 138.6563, - "step": 24390 - }, - { - "epoch": 0.0985790874970204, - "grad_norm": 1026.3582763671875, - "learning_rate": 4.88e-05, - "loss": 126.4132, - "step": 24400 - }, - { - "epoch": 0.09861948876238803, - "grad_norm": 2506.63916015625, - "learning_rate": 4.8820000000000004e-05, - "loss": 151.2192, - "step": 24410 - }, - { - "epoch": 0.09865989002775567, - "grad_norm": 1200.955810546875, - "learning_rate": 4.884e-05, - "loss": 131.9323, - "step": 24420 - }, - { - "epoch": 0.0987002912931233, - "grad_norm": 888.474365234375, - "learning_rate": 4.886e-05, - "loss": 80.7957, - "step": 24430 - }, - { - "epoch": 0.09874069255849093, - "grad_norm": 853.1484375, - "learning_rate": 4.8880000000000006e-05, - "loss": 152.3066, - "step": 24440 - }, - { - "epoch": 0.09878109382385857, - "grad_norm": 931.940673828125, - "learning_rate": 4.89e-05, - "loss": 124.5411, - "step": 24450 - }, - { - "epoch": 0.09882149508922619, - "grad_norm": 755.48828125, - "learning_rate": 4.8920000000000006e-05, - "loss": 113.8801, - "step": 24460 - }, - { - "epoch": 0.09886189635459383, - "grad_norm": 1995.7860107421875, - "learning_rate": 4.894e-05, - "loss": 101.5088, - "step": 24470 - }, - { - "epoch": 0.09890229761996146, - "grad_norm": 645.9288940429688, - "learning_rate": 4.896e-05, - "loss": 75.8798, - "step": 24480 - }, - { - "epoch": 0.09894269888532908, - "grad_norm": 1073.67626953125, - "learning_rate": 4.898e-05, - "loss": 161.3777, - "step": 24490 - }, - { - "epoch": 0.09898310015069672, - "grad_norm": 1047.078125, - "learning_rate": 4.9e-05, - "loss": 136.8098, - "step": 24500 - }, - { - "epoch": 0.09902350141606435, - "grad_norm": 1193.5416259765625, - "learning_rate": 4.902e-05, - "loss": 112.7242, - "step": 24510 - }, - { - "epoch": 0.09906390268143198, - "grad_norm": 2082.607421875, - "learning_rate": 4.9040000000000005e-05, - "loss": 104.1718, - "step": 24520 - }, - { - "epoch": 0.09910430394679962, - "grad_norm": 657.3272705078125, - "learning_rate": 4.906e-05, - "loss": 94.1763, - "step": 24530 - }, - { - "epoch": 0.09914470521216724, - "grad_norm": 822.24462890625, - "learning_rate": 4.9080000000000004e-05, - "loss": 120.4415, - "step": 24540 - }, - { - "epoch": 0.09918510647753488, - "grad_norm": 1214.71142578125, - "learning_rate": 4.91e-05, - "loss": 95.7002, - "step": 24550 - }, - { - "epoch": 0.09922550774290251, - "grad_norm": 974.8594360351562, - "learning_rate": 4.9120000000000004e-05, - "loss": 122.8078, - "step": 24560 - }, - { - "epoch": 0.09926590900827013, - "grad_norm": 2204.4921875, - "learning_rate": 4.914e-05, - "loss": 181.9215, - "step": 24570 - }, - { - "epoch": 0.09930631027363777, - "grad_norm": 667.4011840820312, - "learning_rate": 4.9160000000000004e-05, - "loss": 80.917, - "step": 24580 - }, - { - "epoch": 0.0993467115390054, - "grad_norm": 896.2610473632812, - "learning_rate": 4.918000000000001e-05, - "loss": 144.0085, - "step": 24590 - }, - { - "epoch": 0.09938711280437303, - "grad_norm": 0.0, - "learning_rate": 4.92e-05, - "loss": 132.3478, - "step": 24600 - }, - { - "epoch": 0.09942751406974067, - "grad_norm": 941.837646484375, - "learning_rate": 4.9220000000000006e-05, - "loss": 98.3554, - "step": 24610 - }, - { - "epoch": 0.09946791533510829, - "grad_norm": 975.3572387695312, - "learning_rate": 4.924e-05, - "loss": 71.2832, - "step": 24620 - }, - { - "epoch": 0.09950831660047593, - "grad_norm": 467.11871337890625, - "learning_rate": 4.926e-05, - "loss": 84.4188, - "step": 24630 - }, - { - "epoch": 0.09954871786584356, - "grad_norm": 1425.18603515625, - "learning_rate": 4.928e-05, - "loss": 117.7631, - "step": 24640 - }, - { - "epoch": 0.09958911913121118, - "grad_norm": 992.929443359375, - "learning_rate": 4.93e-05, - "loss": 74.3671, - "step": 24650 - }, - { - "epoch": 0.09962952039657882, - "grad_norm": 1589.87646484375, - "learning_rate": 4.932e-05, - "loss": 127.9026, - "step": 24660 - }, - { - "epoch": 0.09966992166194645, - "grad_norm": 726.9409790039062, - "learning_rate": 4.9340000000000005e-05, - "loss": 127.6771, - "step": 24670 - }, - { - "epoch": 0.09971032292731408, - "grad_norm": 1687.093017578125, - "learning_rate": 4.936e-05, - "loss": 112.6813, - "step": 24680 - }, - { - "epoch": 0.09975072419268172, - "grad_norm": 891.7119140625, - "learning_rate": 4.9380000000000005e-05, - "loss": 131.5728, - "step": 24690 - }, - { - "epoch": 0.09979112545804934, - "grad_norm": 601.4475708007812, - "learning_rate": 4.94e-05, - "loss": 80.0845, - "step": 24700 - }, - { - "epoch": 0.09983152672341698, - "grad_norm": 1217.7177734375, - "learning_rate": 4.942e-05, - "loss": 108.1615, - "step": 24710 - }, - { - "epoch": 0.09987192798878461, - "grad_norm": 1115.55517578125, - "learning_rate": 4.944e-05, - "loss": 86.1, - "step": 24720 - }, - { - "epoch": 0.09991232925415224, - "grad_norm": 623.4889526367188, - "learning_rate": 4.946e-05, - "loss": 99.014, - "step": 24730 - }, - { - "epoch": 0.09995273051951988, - "grad_norm": 3404.5966796875, - "learning_rate": 4.948000000000001e-05, - "loss": 111.524, - "step": 24740 - }, - { - "epoch": 0.0999931317848875, - "grad_norm": 1838.208251953125, - "learning_rate": 4.9500000000000004e-05, - "loss": 162.1484, - "step": 24750 - }, - { - "epoch": 0.10003353305025513, - "grad_norm": 371.3464660644531, - "learning_rate": 4.952e-05, - "loss": 85.2928, - "step": 24760 - }, - { - "epoch": 0.10007393431562277, - "grad_norm": 718.6541748046875, - "learning_rate": 4.9540000000000003e-05, - "loss": 86.9755, - "step": 24770 - }, - { - "epoch": 0.1001143355809904, - "grad_norm": 1240.49169921875, - "learning_rate": 4.956e-05, - "loss": 72.236, - "step": 24780 - }, - { - "epoch": 0.10015473684635803, - "grad_norm": 2078.4365234375, - "learning_rate": 4.958e-05, - "loss": 130.2423, - "step": 24790 - }, - { - "epoch": 0.10019513811172566, - "grad_norm": 1719.6787109375, - "learning_rate": 4.96e-05, - "loss": 147.8617, - "step": 24800 - }, - { - "epoch": 0.10023553937709329, - "grad_norm": 1234.83203125, - "learning_rate": 4.962e-05, - "loss": 116.7895, - "step": 24810 - }, - { - "epoch": 0.10027594064246093, - "grad_norm": 1845.6007080078125, - "learning_rate": 4.9640000000000006e-05, - "loss": 123.7702, - "step": 24820 - }, - { - "epoch": 0.10031634190782855, - "grad_norm": 1477.3031005859375, - "learning_rate": 4.966e-05, - "loss": 175.7533, - "step": 24830 - }, - { - "epoch": 0.10035674317319618, - "grad_norm": 753.8565063476562, - "learning_rate": 4.9680000000000005e-05, - "loss": 137.9543, - "step": 24840 - }, - { - "epoch": 0.10039714443856382, - "grad_norm": 458.0389709472656, - "learning_rate": 4.97e-05, - "loss": 99.559, - "step": 24850 - }, - { - "epoch": 0.10043754570393144, - "grad_norm": 1369.2882080078125, - "learning_rate": 4.972e-05, - "loss": 151.6343, - "step": 24860 - }, - { - "epoch": 0.10047794696929908, - "grad_norm": 2486.626220703125, - "learning_rate": 4.974e-05, - "loss": 118.1052, - "step": 24870 - }, - { - "epoch": 0.10051834823466671, - "grad_norm": 1189.0513916015625, - "learning_rate": 4.976e-05, - "loss": 115.5461, - "step": 24880 - }, - { - "epoch": 0.10055874950003434, - "grad_norm": 3800.11083984375, - "learning_rate": 4.978e-05, - "loss": 113.4657, - "step": 24890 - }, - { - "epoch": 0.10059915076540198, - "grad_norm": 588.5828857421875, - "learning_rate": 4.9800000000000004e-05, - "loss": 116.0871, - "step": 24900 - }, - { - "epoch": 0.1006395520307696, - "grad_norm": 748.7584228515625, - "learning_rate": 4.982e-05, - "loss": 96.397, - "step": 24910 - }, - { - "epoch": 0.10067995329613723, - "grad_norm": 883.9682006835938, - "learning_rate": 4.9840000000000004e-05, - "loss": 73.1233, - "step": 24920 - }, - { - "epoch": 0.10072035456150487, - "grad_norm": 747.5608520507812, - "learning_rate": 4.986e-05, - "loss": 101.5735, - "step": 24930 - }, - { - "epoch": 0.1007607558268725, - "grad_norm": 1152.760009765625, - "learning_rate": 4.9880000000000004e-05, - "loss": 106.2813, - "step": 24940 - }, - { - "epoch": 0.10080115709224013, - "grad_norm": 1556.455322265625, - "learning_rate": 4.99e-05, - "loss": 92.2315, - "step": 24950 - }, - { - "epoch": 0.10084155835760776, - "grad_norm": 564.4570922851562, - "learning_rate": 4.992e-05, - "loss": 105.7234, - "step": 24960 - }, - { - "epoch": 0.10088195962297539, - "grad_norm": 582.0642700195312, - "learning_rate": 4.9940000000000006e-05, - "loss": 85.4253, - "step": 24970 - }, - { - "epoch": 0.10092236088834303, - "grad_norm": 2132.204833984375, - "learning_rate": 4.996e-05, - "loss": 145.5456, - "step": 24980 - }, - { - "epoch": 0.10096276215371065, - "grad_norm": 1085.25048828125, - "learning_rate": 4.9980000000000006e-05, - "loss": 105.3973, - "step": 24990 - }, - { - "epoch": 0.10100316341907828, - "grad_norm": 482.8312072753906, - "learning_rate": 5e-05, - "loss": 107.6187, - "step": 25000 - }, - { - "epoch": 0.10104356468444592, - "grad_norm": 12273.1533203125, - "learning_rate": 4.999999975630607e-05, - "loss": 166.6157, - "step": 25010 - }, - { - "epoch": 0.10108396594981355, - "grad_norm": 1100.8514404296875, - "learning_rate": 4.999999902522426e-05, - "loss": 170.2824, - "step": 25020 - }, - { - "epoch": 0.10112436721518119, - "grad_norm": 1492.0712890625, - "learning_rate": 4.9999997806754614e-05, - "loss": 105.2635, - "step": 25030 - }, - { - "epoch": 0.10116476848054881, - "grad_norm": 2506.78173828125, - "learning_rate": 4.9999996100897126e-05, - "loss": 76.4485, - "step": 25040 - }, - { - "epoch": 0.10120516974591644, - "grad_norm": 2876.141845703125, - "learning_rate": 4.999999390765185e-05, - "loss": 115.6973, - "step": 25050 - }, - { - "epoch": 0.10124557101128408, - "grad_norm": 1318.9537353515625, - "learning_rate": 4.999999122701883e-05, - "loss": 120.0407, - "step": 25060 - }, - { - "epoch": 0.1012859722766517, - "grad_norm": 598.3362426757812, - "learning_rate": 4.99999880589981e-05, - "loss": 99.9749, - "step": 25070 - }, - { - "epoch": 0.10132637354201933, - "grad_norm": 680.303466796875, - "learning_rate": 4.999998440358973e-05, - "loss": 86.1489, - "step": 25080 - }, - { - "epoch": 0.10136677480738697, - "grad_norm": 1665.12548828125, - "learning_rate": 4.99999802607938e-05, - "loss": 125.203, - "step": 25090 - }, - { - "epoch": 0.1014071760727546, - "grad_norm": 4897.3798828125, - "learning_rate": 4.999997563061038e-05, - "loss": 99.004, - "step": 25100 - }, - { - "epoch": 0.10144757733812224, - "grad_norm": 561.159423828125, - "learning_rate": 4.999997051303956e-05, - "loss": 128.6623, - "step": 25110 - }, - { - "epoch": 0.10148797860348986, - "grad_norm": 407.36114501953125, - "learning_rate": 4.9999964908081455e-05, - "loss": 107.5606, - "step": 25120 - }, - { - "epoch": 0.10152837986885749, - "grad_norm": 602.0693359375, - "learning_rate": 4.999995881573616e-05, - "loss": 99.1402, - "step": 25130 - }, - { - "epoch": 0.10156878113422513, - "grad_norm": 428.0013427734375, - "learning_rate": 4.999995223600379e-05, - "loss": 176.0661, - "step": 25140 - }, - { - "epoch": 0.10160918239959275, - "grad_norm": 1730.8555908203125, - "learning_rate": 4.999994516888449e-05, - "loss": 143.4497, - "step": 25150 - }, - { - "epoch": 0.10164958366496038, - "grad_norm": 1302.3773193359375, - "learning_rate": 4.999993761437838e-05, - "loss": 137.5996, - "step": 25160 - }, - { - "epoch": 0.10168998493032802, - "grad_norm": 3554.90771484375, - "learning_rate": 4.9999929572485616e-05, - "loss": 139.0566, - "step": 25170 - }, - { - "epoch": 0.10173038619569565, - "grad_norm": 1487.5889892578125, - "learning_rate": 4.999992104320636e-05, - "loss": 108.4844, - "step": 25180 - }, - { - "epoch": 0.10177078746106329, - "grad_norm": 1060.8238525390625, - "learning_rate": 4.999991202654076e-05, - "loss": 110.4729, - "step": 25190 - }, - { - "epoch": 0.10181118872643091, - "grad_norm": 899.1284790039062, - "learning_rate": 4.9999902522489015e-05, - "loss": 117.7502, - "step": 25200 - }, - { - "epoch": 0.10185158999179854, - "grad_norm": 824.8314819335938, - "learning_rate": 4.99998925310513e-05, - "loss": 96.4199, - "step": 25210 - }, - { - "epoch": 0.10189199125716618, - "grad_norm": 729.9413452148438, - "learning_rate": 4.999988205222781e-05, - "loss": 58.8153, - "step": 25220 - }, - { - "epoch": 0.1019323925225338, - "grad_norm": 1475.010498046875, - "learning_rate": 4.999987108601874e-05, - "loss": 118.004, - "step": 25230 - }, - { - "epoch": 0.10197279378790143, - "grad_norm": 3924.0517578125, - "learning_rate": 4.999985963242432e-05, - "loss": 161.4323, - "step": 25240 - }, - { - "epoch": 0.10201319505326907, - "grad_norm": 684.8850708007812, - "learning_rate": 4.999984769144476e-05, - "loss": 93.1785, - "step": 25250 - }, - { - "epoch": 0.1020535963186367, - "grad_norm": 617.2882690429688, - "learning_rate": 4.99998352630803e-05, - "loss": 108.2454, - "step": 25260 - }, - { - "epoch": 0.10209399758400434, - "grad_norm": 713.0025634765625, - "learning_rate": 4.999982234733118e-05, - "loss": 128.2708, - "step": 25270 - }, - { - "epoch": 0.10213439884937196, - "grad_norm": 830.2299194335938, - "learning_rate": 4.9999808944197666e-05, - "loss": 93.2193, - "step": 25280 - }, - { - "epoch": 0.10217480011473959, - "grad_norm": 635.0062255859375, - "learning_rate": 4.999979505367999e-05, - "loss": 95.8657, - "step": 25290 - }, - { - "epoch": 0.10221520138010723, - "grad_norm": 681.8064575195312, - "learning_rate": 4.999978067577844e-05, - "loss": 81.1595, - "step": 25300 - }, - { - "epoch": 0.10225560264547486, - "grad_norm": 1729.1885986328125, - "learning_rate": 4.999976581049331e-05, - "loss": 104.3257, - "step": 25310 - }, - { - "epoch": 0.10229600391084248, - "grad_norm": 499.2225341796875, - "learning_rate": 4.999975045782486e-05, - "loss": 150.2334, - "step": 25320 - }, - { - "epoch": 0.10233640517621012, - "grad_norm": 1919.6650390625, - "learning_rate": 4.9999734617773405e-05, - "loss": 93.5073, - "step": 25330 - }, - { - "epoch": 0.10237680644157775, - "grad_norm": 628.6290283203125, - "learning_rate": 4.9999718290339256e-05, - "loss": 138.5961, - "step": 25340 - }, - { - "epoch": 0.10241720770694539, - "grad_norm": 877.5479125976562, - "learning_rate": 4.999970147552272e-05, - "loss": 106.7546, - "step": 25350 - }, - { - "epoch": 0.10245760897231301, - "grad_norm": 426.84027099609375, - "learning_rate": 4.999968417332415e-05, - "loss": 102.9967, - "step": 25360 - }, - { - "epoch": 0.10249801023768064, - "grad_norm": 1005.803466796875, - "learning_rate": 4.9999666383743854e-05, - "loss": 133.2389, - "step": 25370 - }, - { - "epoch": 0.10253841150304828, - "grad_norm": 973.8045654296875, - "learning_rate": 4.999964810678219e-05, - "loss": 127.7309, - "step": 25380 - }, - { - "epoch": 0.1025788127684159, - "grad_norm": 1207.312255859375, - "learning_rate": 4.9999629342439524e-05, - "loss": 91.1231, - "step": 25390 - }, - { - "epoch": 0.10261921403378353, - "grad_norm": 2072.83837890625, - "learning_rate": 4.999961009071621e-05, - "loss": 154.6031, - "step": 25400 - }, - { - "epoch": 0.10265961529915117, - "grad_norm": 547.6760864257812, - "learning_rate": 4.999959035161263e-05, - "loss": 112.7586, - "step": 25410 - }, - { - "epoch": 0.1027000165645188, - "grad_norm": 784.879150390625, - "learning_rate": 4.999957012512916e-05, - "loss": 104.8253, - "step": 25420 - }, - { - "epoch": 0.10274041782988644, - "grad_norm": 625.8154296875, - "learning_rate": 4.99995494112662e-05, - "loss": 80.9909, - "step": 25430 - }, - { - "epoch": 0.10278081909525406, - "grad_norm": 976.3950805664062, - "learning_rate": 4.999952821002415e-05, - "loss": 121.5834, - "step": 25440 - }, - { - "epoch": 0.10282122036062169, - "grad_norm": 658.44140625, - "learning_rate": 4.999950652140343e-05, - "loss": 152.7022, - "step": 25450 - }, - { - "epoch": 0.10286162162598933, - "grad_norm": 948.6815795898438, - "learning_rate": 4.999948434540446e-05, - "loss": 119.2311, - "step": 25460 - }, - { - "epoch": 0.10290202289135696, - "grad_norm": 2785.1484375, - "learning_rate": 4.999946168202767e-05, - "loss": 123.5097, - "step": 25470 - }, - { - "epoch": 0.10294242415672458, - "grad_norm": 2336.2509765625, - "learning_rate": 4.999943853127351e-05, - "loss": 92.8932, - "step": 25480 - }, - { - "epoch": 0.10298282542209222, - "grad_norm": 324.7106018066406, - "learning_rate": 4.9999414893142425e-05, - "loss": 84.0734, - "step": 25490 - }, - { - "epoch": 0.10302322668745985, - "grad_norm": 1529.5069580078125, - "learning_rate": 4.999939076763487e-05, - "loss": 151.1412, - "step": 25500 - }, - { - "epoch": 0.10306362795282749, - "grad_norm": 3243.043701171875, - "learning_rate": 4.999936615475133e-05, - "loss": 98.3063, - "step": 25510 - }, - { - "epoch": 0.10310402921819511, - "grad_norm": 656.1243286132812, - "learning_rate": 4.9999341054492265e-05, - "loss": 132.48, - "step": 25520 - }, - { - "epoch": 0.10314443048356274, - "grad_norm": 358.87744140625, - "learning_rate": 4.999931546685819e-05, - "loss": 160.6029, - "step": 25530 - }, - { - "epoch": 0.10318483174893038, - "grad_norm": 1219.5975341796875, - "learning_rate": 4.999928939184958e-05, - "loss": 122.454, - "step": 25540 - }, - { - "epoch": 0.103225233014298, - "grad_norm": 1476.14208984375, - "learning_rate": 4.999926282946695e-05, - "loss": 121.1642, - "step": 25550 - }, - { - "epoch": 0.10326563427966563, - "grad_norm": 737.3023071289062, - "learning_rate": 4.9999235779710826e-05, - "loss": 68.9884, - "step": 25560 - }, - { - "epoch": 0.10330603554503327, - "grad_norm": 0.0, - "learning_rate": 4.999920824258173e-05, - "loss": 70.6594, - "step": 25570 - }, - { - "epoch": 0.1033464368104009, - "grad_norm": 585.1388549804688, - "learning_rate": 4.999918021808019e-05, - "loss": 82.4569, - "step": 25580 - }, - { - "epoch": 0.10338683807576854, - "grad_norm": 1436.7415771484375, - "learning_rate": 4.999915170620677e-05, - "loss": 135.1959, - "step": 25590 - }, - { - "epoch": 0.10342723934113617, - "grad_norm": 1880.6636962890625, - "learning_rate": 4.999912270696202e-05, - "loss": 110.9196, - "step": 25600 - }, - { - "epoch": 0.10346764060650379, - "grad_norm": 435.46234130859375, - "learning_rate": 4.9999093220346495e-05, - "loss": 101.5219, - "step": 25610 - }, - { - "epoch": 0.10350804187187143, - "grad_norm": 1327.9521484375, - "learning_rate": 4.9999063246360786e-05, - "loss": 92.5202, - "step": 25620 - }, - { - "epoch": 0.10354844313723906, - "grad_norm": 1072.19775390625, - "learning_rate": 4.9999032785005464e-05, - "loss": 104.1919, - "step": 25630 - }, - { - "epoch": 0.10358884440260668, - "grad_norm": 1694.939697265625, - "learning_rate": 4.999900183628112e-05, - "loss": 165.591, - "step": 25640 - }, - { - "epoch": 0.10362924566797432, - "grad_norm": 773.9560546875, - "learning_rate": 4.999897040018837e-05, - "loss": 104.817, - "step": 25650 - }, - { - "epoch": 0.10366964693334195, - "grad_norm": 1010.9199829101562, - "learning_rate": 4.9998938476727826e-05, - "loss": 114.004, - "step": 25660 - }, - { - "epoch": 0.10371004819870959, - "grad_norm": 2661.3837890625, - "learning_rate": 4.99989060659001e-05, - "loss": 99.0087, - "step": 25670 - }, - { - "epoch": 0.10375044946407722, - "grad_norm": 553.9918823242188, - "learning_rate": 4.999887316770584e-05, - "loss": 72.9581, - "step": 25680 - }, - { - "epoch": 0.10379085072944484, - "grad_norm": 580.9917602539062, - "learning_rate": 4.999883978214567e-05, - "loss": 103.5152, - "step": 25690 - }, - { - "epoch": 0.10383125199481248, - "grad_norm": 4983.98974609375, - "learning_rate": 4.999880590922025e-05, - "loss": 135.9269, - "step": 25700 - }, - { - "epoch": 0.10387165326018011, - "grad_norm": 1303.5921630859375, - "learning_rate": 4.999877154893023e-05, - "loss": 125.694, - "step": 25710 - }, - { - "epoch": 0.10391205452554773, - "grad_norm": 416.0110778808594, - "learning_rate": 4.9998736701276295e-05, - "loss": 61.3954, - "step": 25720 - }, - { - "epoch": 0.10395245579091537, - "grad_norm": 1457.8369140625, - "learning_rate": 4.999870136625912e-05, - "loss": 148.0761, - "step": 25730 - }, - { - "epoch": 0.103992857056283, - "grad_norm": 758.3458251953125, - "learning_rate": 4.999866554387939e-05, - "loss": 94.0594, - "step": 25740 - }, - { - "epoch": 0.10403325832165064, - "grad_norm": 1217.02978515625, - "learning_rate": 4.999862923413781e-05, - "loss": 101.6771, - "step": 25750 - }, - { - "epoch": 0.10407365958701827, - "grad_norm": 1469.3179931640625, - "learning_rate": 4.9998592437035076e-05, - "loss": 119.159, - "step": 25760 - }, - { - "epoch": 0.10411406085238589, - "grad_norm": 2354.025390625, - "learning_rate": 4.9998555152571914e-05, - "loss": 115.2768, - "step": 25770 - }, - { - "epoch": 0.10415446211775353, - "grad_norm": 789.3027954101562, - "learning_rate": 4.999851738074904e-05, - "loss": 73.4068, - "step": 25780 - }, - { - "epoch": 0.10419486338312116, - "grad_norm": 841.7200317382812, - "learning_rate": 4.9998479121567214e-05, - "loss": 134.448, - "step": 25790 - }, - { - "epoch": 0.10423526464848878, - "grad_norm": 431.4704895019531, - "learning_rate": 4.9998440375027166e-05, - "loss": 150.1897, - "step": 25800 - }, - { - "epoch": 0.10427566591385642, - "grad_norm": 508.6786804199219, - "learning_rate": 4.999840114112965e-05, - "loss": 82.7051, - "step": 25810 - }, - { - "epoch": 0.10431606717922405, - "grad_norm": 818.539794921875, - "learning_rate": 4.999836141987543e-05, - "loss": 138.6107, - "step": 25820 - }, - { - "epoch": 0.10435646844459169, - "grad_norm": 503.4788513183594, - "learning_rate": 4.999832121126529e-05, - "loss": 119.86, - "step": 25830 - }, - { - "epoch": 0.10439686970995932, - "grad_norm": 838.92041015625, - "learning_rate": 4.99982805153e-05, - "loss": 121.616, - "step": 25840 - }, - { - "epoch": 0.10443727097532694, - "grad_norm": 631.1499633789062, - "learning_rate": 4.9998239331980366e-05, - "loss": 85.7237, - "step": 25850 - }, - { - "epoch": 0.10447767224069458, - "grad_norm": 566.1675415039062, - "learning_rate": 4.999819766130719e-05, - "loss": 91.9205, - "step": 25860 - }, - { - "epoch": 0.10451807350606221, - "grad_norm": 0.0, - "learning_rate": 4.999815550328128e-05, - "loss": 88.1384, - "step": 25870 - }, - { - "epoch": 0.10455847477142984, - "grad_norm": 735.2154541015625, - "learning_rate": 4.9998112857903454e-05, - "loss": 114.0636, - "step": 25880 - }, - { - "epoch": 0.10459887603679748, - "grad_norm": 1965.4488525390625, - "learning_rate": 4.9998069725174546e-05, - "loss": 117.463, - "step": 25890 - }, - { - "epoch": 0.1046392773021651, - "grad_norm": 954.0660400390625, - "learning_rate": 4.9998026105095405e-05, - "loss": 110.6166, - "step": 25900 - }, - { - "epoch": 0.10467967856753274, - "grad_norm": 2468.442626953125, - "learning_rate": 4.9997981997666874e-05, - "loss": 118.2137, - "step": 25910 - }, - { - "epoch": 0.10472007983290037, - "grad_norm": 1254.0142822265625, - "learning_rate": 4.999793740288982e-05, - "loss": 74.752, - "step": 25920 - }, - { - "epoch": 0.104760481098268, - "grad_norm": 625.595458984375, - "learning_rate": 4.999789232076509e-05, - "loss": 89.1264, - "step": 25930 - }, - { - "epoch": 0.10480088236363563, - "grad_norm": 579.2570190429688, - "learning_rate": 4.999784675129359e-05, - "loss": 128.8163, - "step": 25940 - }, - { - "epoch": 0.10484128362900326, - "grad_norm": 4238.0439453125, - "learning_rate": 4.999780069447619e-05, - "loss": 135.1213, - "step": 25950 - }, - { - "epoch": 0.10488168489437089, - "grad_norm": 2267.60693359375, - "learning_rate": 4.9997754150313815e-05, - "loss": 106.0803, - "step": 25960 - }, - { - "epoch": 0.10492208615973853, - "grad_norm": 1275.7972412109375, - "learning_rate": 4.999770711880734e-05, - "loss": 114.9827, - "step": 25970 - }, - { - "epoch": 0.10496248742510615, - "grad_norm": 1259.9520263671875, - "learning_rate": 4.999765959995769e-05, - "loss": 122.4034, - "step": 25980 - }, - { - "epoch": 0.10500288869047379, - "grad_norm": 1249.4486083984375, - "learning_rate": 4.99976115937658e-05, - "loss": 113.4047, - "step": 25990 - }, - { - "epoch": 0.10504328995584142, - "grad_norm": 829.5191650390625, - "learning_rate": 4.999756310023261e-05, - "loss": 100.0737, - "step": 26000 - }, - { - "epoch": 0.10508369122120904, - "grad_norm": 1288.53515625, - "learning_rate": 4.999751411935905e-05, - "loss": 86.3596, - "step": 26010 - }, - { - "epoch": 0.10512409248657668, - "grad_norm": 1140.1302490234375, - "learning_rate": 4.999746465114609e-05, - "loss": 62.8034, - "step": 26020 - }, - { - "epoch": 0.10516449375194431, - "grad_norm": 694.5878295898438, - "learning_rate": 4.999741469559468e-05, - "loss": 92.5841, - "step": 26030 - }, - { - "epoch": 0.10520489501731194, - "grad_norm": 1500.5892333984375, - "learning_rate": 4.99973642527058e-05, - "loss": 110.6927, - "step": 26040 - }, - { - "epoch": 0.10524529628267958, - "grad_norm": 874.5784301757812, - "learning_rate": 4.999731332248044e-05, - "loss": 63.3448, - "step": 26050 - }, - { - "epoch": 0.1052856975480472, - "grad_norm": 932.64501953125, - "learning_rate": 4.999726190491958e-05, - "loss": 107.8189, - "step": 26060 - }, - { - "epoch": 0.10532609881341484, - "grad_norm": 736.0410766601562, - "learning_rate": 4.9997210000024236e-05, - "loss": 99.0618, - "step": 26070 - }, - { - "epoch": 0.10536650007878247, - "grad_norm": 1410.8321533203125, - "learning_rate": 4.999715760779541e-05, - "loss": 126.3873, - "step": 26080 - }, - { - "epoch": 0.1054069013441501, - "grad_norm": 1724.4503173828125, - "learning_rate": 4.999710472823414e-05, - "loss": 104.2871, - "step": 26090 - }, - { - "epoch": 0.10544730260951773, - "grad_norm": 881.2642822265625, - "learning_rate": 4.9997051361341425e-05, - "loss": 67.8849, - "step": 26100 - }, - { - "epoch": 0.10548770387488536, - "grad_norm": 1165.669921875, - "learning_rate": 4.999699750711833e-05, - "loss": 99.132, - "step": 26110 - }, - { - "epoch": 0.10552810514025299, - "grad_norm": 994.9390869140625, - "learning_rate": 4.9996943165565905e-05, - "loss": 62.6341, - "step": 26120 - }, - { - "epoch": 0.10556850640562063, - "grad_norm": 1422.3941650390625, - "learning_rate": 4.99968883366852e-05, - "loss": 85.9586, - "step": 26130 - }, - { - "epoch": 0.10560890767098825, - "grad_norm": 791.3880615234375, - "learning_rate": 4.9996833020477285e-05, - "loss": 76.5841, - "step": 26140 - }, - { - "epoch": 0.10564930893635589, - "grad_norm": 1224.0389404296875, - "learning_rate": 4.999677721694325e-05, - "loss": 161.3394, - "step": 26150 - }, - { - "epoch": 0.10568971020172352, - "grad_norm": 562.48486328125, - "learning_rate": 4.9996720926084164e-05, - "loss": 117.8859, - "step": 26160 - }, - { - "epoch": 0.10573011146709115, - "grad_norm": 1088.6715087890625, - "learning_rate": 4.999666414790113e-05, - "loss": 119.7692, - "step": 26170 - }, - { - "epoch": 0.10577051273245879, - "grad_norm": 716.9673461914062, - "learning_rate": 4.999660688239527e-05, - "loss": 85.0265, - "step": 26180 - }, - { - "epoch": 0.10581091399782641, - "grad_norm": 469.45477294921875, - "learning_rate": 4.999654912956769e-05, - "loss": 75.7188, - "step": 26190 - }, - { - "epoch": 0.10585131526319404, - "grad_norm": 434.962890625, - "learning_rate": 4.9996490889419514e-05, - "loss": 92.6741, - "step": 26200 - }, - { - "epoch": 0.10589171652856168, - "grad_norm": 1787.8798828125, - "learning_rate": 4.9996432161951875e-05, - "loss": 101.0907, - "step": 26210 - }, - { - "epoch": 0.1059321177939293, - "grad_norm": 619.8673706054688, - "learning_rate": 4.999637294716593e-05, - "loss": 144.3787, - "step": 26220 - }, - { - "epoch": 0.10597251905929694, - "grad_norm": 1517.8831787109375, - "learning_rate": 4.9996313245062823e-05, - "loss": 106.3331, - "step": 26230 - }, - { - "epoch": 0.10601292032466457, - "grad_norm": 7921.58203125, - "learning_rate": 4.999625305564371e-05, - "loss": 83.1559, - "step": 26240 - }, - { - "epoch": 0.1060533215900322, - "grad_norm": 839.67822265625, - "learning_rate": 4.9996192378909786e-05, - "loss": 57.181, - "step": 26250 - }, - { - "epoch": 0.10609372285539984, - "grad_norm": 696.2799682617188, - "learning_rate": 4.999613121486222e-05, - "loss": 49.9272, - "step": 26260 - }, - { - "epoch": 0.10613412412076746, - "grad_norm": 1080.623291015625, - "learning_rate": 4.99960695635022e-05, - "loss": 120.3179, - "step": 26270 - }, - { - "epoch": 0.10617452538613509, - "grad_norm": 499.45166015625, - "learning_rate": 4.999600742483094e-05, - "loss": 85.3591, - "step": 26280 - }, - { - "epoch": 0.10621492665150273, - "grad_norm": 938.3174438476562, - "learning_rate": 4.999594479884965e-05, - "loss": 136.4883, - "step": 26290 - }, - { - "epoch": 0.10625532791687035, - "grad_norm": 1555.7998046875, - "learning_rate": 4.999588168555954e-05, - "loss": 122.8322, - "step": 26300 - }, - { - "epoch": 0.106295729182238, - "grad_norm": 4008.9033203125, - "learning_rate": 4.999581808496185e-05, - "loss": 91.5676, - "step": 26310 - }, - { - "epoch": 0.10633613044760562, - "grad_norm": 2325.766845703125, - "learning_rate": 4.999575399705783e-05, - "loss": 85.4827, - "step": 26320 - }, - { - "epoch": 0.10637653171297325, - "grad_norm": 954.0802612304688, - "learning_rate": 4.999568942184871e-05, - "loss": 161.0085, - "step": 26330 - }, - { - "epoch": 0.10641693297834089, - "grad_norm": 1845.4427490234375, - "learning_rate": 4.999562435933575e-05, - "loss": 106.469, - "step": 26340 - }, - { - "epoch": 0.10645733424370851, - "grad_norm": 1453.1446533203125, - "learning_rate": 4.999555880952023e-05, - "loss": 142.5144, - "step": 26350 - }, - { - "epoch": 0.10649773550907614, - "grad_norm": 824.1065673828125, - "learning_rate": 4.999549277240342e-05, - "loss": 84.602, - "step": 26360 - }, - { - "epoch": 0.10653813677444378, - "grad_norm": 3219.528564453125, - "learning_rate": 4.999542624798661e-05, - "loss": 79.4629, - "step": 26370 - }, - { - "epoch": 0.1065785380398114, - "grad_norm": 655.1124267578125, - "learning_rate": 4.999535923627109e-05, - "loss": 103.1578, - "step": 26380 - }, - { - "epoch": 0.10661893930517904, - "grad_norm": 840.5703125, - "learning_rate": 4.999529173725819e-05, - "loss": 146.742, - "step": 26390 - }, - { - "epoch": 0.10665934057054667, - "grad_norm": 1991.2275390625, - "learning_rate": 4.999522375094919e-05, - "loss": 88.6063, - "step": 26400 - }, - { - "epoch": 0.1066997418359143, - "grad_norm": 1580.2342529296875, - "learning_rate": 4.999515527734545e-05, - "loss": 113.3611, - "step": 26410 - }, - { - "epoch": 0.10674014310128194, - "grad_norm": 1060.3555908203125, - "learning_rate": 4.9995086316448284e-05, - "loss": 176.4278, - "step": 26420 - }, - { - "epoch": 0.10678054436664956, - "grad_norm": 2521.33447265625, - "learning_rate": 4.999501686825904e-05, - "loss": 104.2664, - "step": 26430 - }, - { - "epoch": 0.10682094563201719, - "grad_norm": 1635.3087158203125, - "learning_rate": 4.999494693277907e-05, - "loss": 129.5747, - "step": 26440 - }, - { - "epoch": 0.10686134689738483, - "grad_norm": 0.0, - "learning_rate": 4.999487651000975e-05, - "loss": 83.0078, - "step": 26450 - }, - { - "epoch": 0.10690174816275246, - "grad_norm": 1445.0679931640625, - "learning_rate": 4.9994805599952445e-05, - "loss": 115.5746, - "step": 26460 - }, - { - "epoch": 0.1069421494281201, - "grad_norm": 861.307861328125, - "learning_rate": 4.999473420260853e-05, - "loss": 63.1311, - "step": 26470 - }, - { - "epoch": 0.10698255069348772, - "grad_norm": 2009.19189453125, - "learning_rate": 4.999466231797941e-05, - "loss": 119.9433, - "step": 26480 - }, - { - "epoch": 0.10702295195885535, - "grad_norm": 1041.7818603515625, - "learning_rate": 4.9994589946066475e-05, - "loss": 134.1364, - "step": 26490 - }, - { - "epoch": 0.10706335322422299, - "grad_norm": 717.532470703125, - "learning_rate": 4.999451708687114e-05, - "loss": 99.0042, - "step": 26500 - }, - { - "epoch": 0.10710375448959061, - "grad_norm": 1365.530517578125, - "learning_rate": 4.999444374039483e-05, - "loss": 126.3179, - "step": 26510 - }, - { - "epoch": 0.10714415575495824, - "grad_norm": 570.315673828125, - "learning_rate": 4.999436990663897e-05, - "loss": 75.446, - "step": 26520 - }, - { - "epoch": 0.10718455702032588, - "grad_norm": 1172.0423583984375, - "learning_rate": 4.9994295585605e-05, - "loss": 117.2075, - "step": 26530 - }, - { - "epoch": 0.1072249582856935, - "grad_norm": 773.590087890625, - "learning_rate": 4.9994220777294364e-05, - "loss": 90.5786, - "step": 26540 - }, - { - "epoch": 0.10726535955106115, - "grad_norm": 983.9734497070312, - "learning_rate": 4.999414548170853e-05, - "loss": 89.9236, - "step": 26550 - }, - { - "epoch": 0.10730576081642877, - "grad_norm": 1194.6666259765625, - "learning_rate": 4.999406969884897e-05, - "loss": 64.2603, - "step": 26560 - }, - { - "epoch": 0.1073461620817964, - "grad_norm": 1597.0323486328125, - "learning_rate": 4.9993993428717144e-05, - "loss": 130.4407, - "step": 26570 - }, - { - "epoch": 0.10738656334716404, - "grad_norm": 1673.6080322265625, - "learning_rate": 4.999391667131455e-05, - "loss": 163.3066, - "step": 26580 - }, - { - "epoch": 0.10742696461253166, - "grad_norm": 663.078857421875, - "learning_rate": 4.9993839426642685e-05, - "loss": 130.6437, - "step": 26590 - }, - { - "epoch": 0.10746736587789929, - "grad_norm": 1640.7069091796875, - "learning_rate": 4.999376169470306e-05, - "loss": 131.2924, - "step": 26600 - }, - { - "epoch": 0.10750776714326693, - "grad_norm": 986.1610717773438, - "learning_rate": 4.9993683475497174e-05, - "loss": 115.7689, - "step": 26610 - }, - { - "epoch": 0.10754816840863456, - "grad_norm": 322.355224609375, - "learning_rate": 4.999360476902656e-05, - "loss": 75.3167, - "step": 26620 - }, - { - "epoch": 0.1075885696740022, - "grad_norm": 507.4998779296875, - "learning_rate": 4.999352557529275e-05, - "loss": 96.3537, - "step": 26630 - }, - { - "epoch": 0.10762897093936982, - "grad_norm": 1944.9493408203125, - "learning_rate": 4.99934458942973e-05, - "loss": 145.2764, - "step": 26640 - }, - { - "epoch": 0.10766937220473745, - "grad_norm": 2324.57470703125, - "learning_rate": 4.999336572604175e-05, - "loss": 90.6306, - "step": 26650 - }, - { - "epoch": 0.10770977347010509, - "grad_norm": 1668.3519287109375, - "learning_rate": 4.999328507052768e-05, - "loss": 104.2174, - "step": 26660 - }, - { - "epoch": 0.10775017473547271, - "grad_norm": 3942.664794921875, - "learning_rate": 4.999320392775663e-05, - "loss": 126.4141, - "step": 26670 - }, - { - "epoch": 0.10779057600084034, - "grad_norm": 1391.0018310546875, - "learning_rate": 4.999312229773022e-05, - "loss": 83.7744, - "step": 26680 - }, - { - "epoch": 0.10783097726620798, - "grad_norm": 1609.7559814453125, - "learning_rate": 4.999304018045001e-05, - "loss": 100.6681, - "step": 26690 - }, - { - "epoch": 0.1078713785315756, - "grad_norm": 1184.3446044921875, - "learning_rate": 4.999295757591762e-05, - "loss": 95.7704, - "step": 26700 - }, - { - "epoch": 0.10791177979694325, - "grad_norm": 2358.531494140625, - "learning_rate": 4.9992874484134653e-05, - "loss": 74.604, - "step": 26710 - }, - { - "epoch": 0.10795218106231087, - "grad_norm": 576.525146484375, - "learning_rate": 4.9992790905102734e-05, - "loss": 114.726, - "step": 26720 - }, - { - "epoch": 0.1079925823276785, - "grad_norm": 3263.18701171875, - "learning_rate": 4.999270683882349e-05, - "loss": 147.2892, - "step": 26730 - }, - { - "epoch": 0.10803298359304614, - "grad_norm": 882.455322265625, - "learning_rate": 4.999262228529855e-05, - "loss": 127.5431, - "step": 26740 - }, - { - "epoch": 0.10807338485841377, - "grad_norm": 1100.7166748046875, - "learning_rate": 4.999253724452958e-05, - "loss": 106.7672, - "step": 26750 - }, - { - "epoch": 0.10811378612378139, - "grad_norm": 687.6595458984375, - "learning_rate": 4.999245171651823e-05, - "loss": 89.8419, - "step": 26760 - }, - { - "epoch": 0.10815418738914903, - "grad_norm": 443.3103942871094, - "learning_rate": 4.999236570126616e-05, - "loss": 125.1742, - "step": 26770 - }, - { - "epoch": 0.10819458865451666, - "grad_norm": 1187.34912109375, - "learning_rate": 4.999227919877506e-05, - "loss": 127.4198, - "step": 26780 - }, - { - "epoch": 0.1082349899198843, - "grad_norm": 496.15240478515625, - "learning_rate": 4.9992192209046603e-05, - "loss": 117.4117, - "step": 26790 - }, - { - "epoch": 0.10827539118525192, - "grad_norm": 1453.9019775390625, - "learning_rate": 4.99921047320825e-05, - "loss": 93.6351, - "step": 26800 - }, - { - "epoch": 0.10831579245061955, - "grad_norm": 773.209228515625, - "learning_rate": 4.999201676788445e-05, - "loss": 105.4083, - "step": 26810 - }, - { - "epoch": 0.10835619371598719, - "grad_norm": 1077.73193359375, - "learning_rate": 4.999192831645416e-05, - "loss": 113.7033, - "step": 26820 - }, - { - "epoch": 0.10839659498135482, - "grad_norm": 3824.103515625, - "learning_rate": 4.999183937779336e-05, - "loss": 128.3056, - "step": 26830 - }, - { - "epoch": 0.10843699624672244, - "grad_norm": 1111.69189453125, - "learning_rate": 4.999174995190379e-05, - "loss": 69.169, - "step": 26840 - }, - { - "epoch": 0.10847739751209008, - "grad_norm": 355.9375, - "learning_rate": 4.999166003878718e-05, - "loss": 95.577, - "step": 26850 - }, - { - "epoch": 0.10851779877745771, - "grad_norm": 566.212158203125, - "learning_rate": 4.99915696384453e-05, - "loss": 92.3288, - "step": 26860 - }, - { - "epoch": 0.10855820004282535, - "grad_norm": 433.31170654296875, - "learning_rate": 4.99914787508799e-05, - "loss": 77.2623, - "step": 26870 - }, - { - "epoch": 0.10859860130819297, - "grad_norm": 1091.27734375, - "learning_rate": 4.999138737609276e-05, - "loss": 97.2431, - "step": 26880 - }, - { - "epoch": 0.1086390025735606, - "grad_norm": 779.114501953125, - "learning_rate": 4.9991295514085644e-05, - "loss": 90.7876, - "step": 26890 - }, - { - "epoch": 0.10867940383892824, - "grad_norm": 1826.2213134765625, - "learning_rate": 4.9991203164860365e-05, - "loss": 164.2726, - "step": 26900 - }, - { - "epoch": 0.10871980510429587, - "grad_norm": 1381.0306396484375, - "learning_rate": 4.999111032841871e-05, - "loss": 163.8954, - "step": 26910 - }, - { - "epoch": 0.10876020636966349, - "grad_norm": 535.5016479492188, - "learning_rate": 4.9991017004762496e-05, - "loss": 85.5764, - "step": 26920 - }, - { - "epoch": 0.10880060763503113, - "grad_norm": 822.9321899414062, - "learning_rate": 4.999092319389354e-05, - "loss": 117.8981, - "step": 26930 - }, - { - "epoch": 0.10884100890039876, - "grad_norm": 800.758544921875, - "learning_rate": 4.999082889581367e-05, - "loss": 76.8381, - "step": 26940 - }, - { - "epoch": 0.1088814101657664, - "grad_norm": 557.4238891601562, - "learning_rate": 4.999073411052472e-05, - "loss": 130.8072, - "step": 26950 - }, - { - "epoch": 0.10892181143113402, - "grad_norm": 1034.1680908203125, - "learning_rate": 4.9990638838028546e-05, - "loss": 75.0051, - "step": 26960 - }, - { - "epoch": 0.10896221269650165, - "grad_norm": 460.87469482421875, - "learning_rate": 4.9990543078327e-05, - "loss": 97.5543, - "step": 26970 - }, - { - "epoch": 0.10900261396186929, - "grad_norm": 726.0296630859375, - "learning_rate": 4.9990446831421955e-05, - "loss": 97.844, - "step": 26980 - }, - { - "epoch": 0.10904301522723692, - "grad_norm": 857.0488891601562, - "learning_rate": 4.9990350097315275e-05, - "loss": 110.277, - "step": 26990 - }, - { - "epoch": 0.10908341649260454, - "grad_norm": 494.9609375, - "learning_rate": 4.999025287600886e-05, - "loss": 78.5458, - "step": 27000 - }, - { - "epoch": 0.10912381775797218, - "grad_norm": 893.1571655273438, - "learning_rate": 4.99901551675046e-05, - "loss": 109.9289, - "step": 27010 - }, - { - "epoch": 0.10916421902333981, - "grad_norm": 970.8618774414062, - "learning_rate": 4.99900569718044e-05, - "loss": 116.1883, - "step": 27020 - }, - { - "epoch": 0.10920462028870745, - "grad_norm": 745.3575439453125, - "learning_rate": 4.9989958288910164e-05, - "loss": 161.3541, - "step": 27030 - }, - { - "epoch": 0.10924502155407508, - "grad_norm": 804.6139526367188, - "learning_rate": 4.998985911882384e-05, - "loss": 105.6759, - "step": 27040 - }, - { - "epoch": 0.1092854228194427, - "grad_norm": 988.0171508789062, - "learning_rate": 4.998975946154734e-05, - "loss": 167.7433, - "step": 27050 - }, - { - "epoch": 0.10932582408481034, - "grad_norm": 570.0489501953125, - "learning_rate": 4.998965931708261e-05, - "loss": 102.1875, - "step": 27060 - }, - { - "epoch": 0.10936622535017797, - "grad_norm": 677.1341552734375, - "learning_rate": 4.998955868543161e-05, - "loss": 128.7201, - "step": 27070 - }, - { - "epoch": 0.1094066266155456, - "grad_norm": 905.8904418945312, - "learning_rate": 4.99894575665963e-05, - "loss": 107.9805, - "step": 27080 - }, - { - "epoch": 0.10944702788091323, - "grad_norm": 737.1869506835938, - "learning_rate": 4.9989355960578645e-05, - "loss": 89.4114, - "step": 27090 - }, - { - "epoch": 0.10948742914628086, - "grad_norm": 670.5479125976562, - "learning_rate": 4.998925386738063e-05, - "loss": 175.9301, - "step": 27100 - }, - { - "epoch": 0.1095278304116485, - "grad_norm": 1188.797607421875, - "learning_rate": 4.9989151287004244e-05, - "loss": 90.327, - "step": 27110 - }, - { - "epoch": 0.10956823167701613, - "grad_norm": 1030.21533203125, - "learning_rate": 4.9989048219451495e-05, - "loss": 63.2848, - "step": 27120 - }, - { - "epoch": 0.10960863294238375, - "grad_norm": 539.4580688476562, - "learning_rate": 4.998894466472438e-05, - "loss": 104.4276, - "step": 27130 - }, - { - "epoch": 0.10964903420775139, - "grad_norm": 826.8709106445312, - "learning_rate": 4.998884062282492e-05, - "loss": 92.5154, - "step": 27140 - }, - { - "epoch": 0.10968943547311902, - "grad_norm": 916.9140014648438, - "learning_rate": 4.998873609375516e-05, - "loss": 104.0753, - "step": 27150 - }, - { - "epoch": 0.10972983673848664, - "grad_norm": 478.9193115234375, - "learning_rate": 4.998863107751711e-05, - "loss": 117.0698, - "step": 27160 - }, - { - "epoch": 0.10977023800385428, - "grad_norm": 944.5336303710938, - "learning_rate": 4.9988525574112846e-05, - "loss": 151.4209, - "step": 27170 - }, - { - "epoch": 0.10981063926922191, - "grad_norm": 552.6795654296875, - "learning_rate": 4.99884195835444e-05, - "loss": 88.4628, - "step": 27180 - }, - { - "epoch": 0.10985104053458955, - "grad_norm": 954.2455444335938, - "learning_rate": 4.9988313105813856e-05, - "loss": 112.3373, - "step": 27190 - }, - { - "epoch": 0.10989144179995718, - "grad_norm": 936.270751953125, - "learning_rate": 4.998820614092328e-05, - "loss": 75.6448, - "step": 27200 - }, - { - "epoch": 0.1099318430653248, - "grad_norm": 777.1414184570312, - "learning_rate": 4.9988098688874763e-05, - "loss": 96.6067, - "step": 27210 - }, - { - "epoch": 0.10997224433069244, - "grad_norm": 1599.469482421875, - "learning_rate": 4.9987990749670395e-05, - "loss": 126.3036, - "step": 27220 - }, - { - "epoch": 0.11001264559606007, - "grad_norm": 398.20196533203125, - "learning_rate": 4.9987882323312287e-05, - "loss": 86.396, - "step": 27230 - }, - { - "epoch": 0.1100530468614277, - "grad_norm": 1596.5191650390625, - "learning_rate": 4.998777340980254e-05, - "loss": 93.4679, - "step": 27240 - }, - { - "epoch": 0.11009344812679533, - "grad_norm": 807.2232055664062, - "learning_rate": 4.998766400914329e-05, - "loss": 75.585, - "step": 27250 - }, - { - "epoch": 0.11013384939216296, - "grad_norm": 1436.22216796875, - "learning_rate": 4.9987554121336666e-05, - "loss": 102.0856, - "step": 27260 - }, - { - "epoch": 0.1101742506575306, - "grad_norm": 1310.2745361328125, - "learning_rate": 4.998744374638481e-05, - "loss": 102.2359, - "step": 27270 - }, - { - "epoch": 0.11021465192289823, - "grad_norm": 1259.0927734375, - "learning_rate": 4.998733288428987e-05, - "loss": 82.0602, - "step": 27280 - }, - { - "epoch": 0.11025505318826585, - "grad_norm": 2409.748046875, - "learning_rate": 4.998722153505402e-05, - "loss": 95.3449, - "step": 27290 - }, - { - "epoch": 0.11029545445363349, - "grad_norm": 1046.286865234375, - "learning_rate": 4.998710969867942e-05, - "loss": 113.0733, - "step": 27300 - }, - { - "epoch": 0.11033585571900112, - "grad_norm": 630.7343139648438, - "learning_rate": 4.9986997375168246e-05, - "loss": 68.2742, - "step": 27310 - }, - { - "epoch": 0.11037625698436875, - "grad_norm": 553.9161987304688, - "learning_rate": 4.9986884564522696e-05, - "loss": 145.594, - "step": 27320 - }, - { - "epoch": 0.11041665824973639, - "grad_norm": 823.818603515625, - "learning_rate": 4.998677126674497e-05, - "loss": 106.8122, - "step": 27330 - }, - { - "epoch": 0.11045705951510401, - "grad_norm": 1561.4769287109375, - "learning_rate": 4.9986657481837277e-05, - "loss": 154.897, - "step": 27340 - }, - { - "epoch": 0.11049746078047164, - "grad_norm": 1459.7176513671875, - "learning_rate": 4.9986543209801825e-05, - "loss": 140.0131, - "step": 27350 - }, - { - "epoch": 0.11053786204583928, - "grad_norm": 797.2266235351562, - "learning_rate": 4.998642845064086e-05, - "loss": 117.8721, - "step": 27360 - }, - { - "epoch": 0.1105782633112069, - "grad_norm": 902.2604370117188, - "learning_rate": 4.9986313204356594e-05, - "loss": 169.9797, - "step": 27370 - }, - { - "epoch": 0.11061866457657454, - "grad_norm": 504.2903137207031, - "learning_rate": 4.998619747095129e-05, - "loss": 89.4714, - "step": 27380 - }, - { - "epoch": 0.11065906584194217, - "grad_norm": 713.7076416015625, - "learning_rate": 4.998608125042721e-05, - "loss": 97.6478, - "step": 27390 - }, - { - "epoch": 0.1106994671073098, - "grad_norm": 1151.92822265625, - "learning_rate": 4.9985964542786614e-05, - "loss": 115.0625, - "step": 27400 - }, - { - "epoch": 0.11073986837267744, - "grad_norm": 1444.137451171875, - "learning_rate": 4.9985847348031764e-05, - "loss": 119.7467, - "step": 27410 - }, - { - "epoch": 0.11078026963804506, - "grad_norm": 3067.8701171875, - "learning_rate": 4.998572966616496e-05, - "loss": 119.108, - "step": 27420 - }, - { - "epoch": 0.11082067090341269, - "grad_norm": 759.7105712890625, - "learning_rate": 4.99856114971885e-05, - "loss": 99.5433, - "step": 27430 - }, - { - "epoch": 0.11086107216878033, - "grad_norm": 1036.834716796875, - "learning_rate": 4.998549284110468e-05, - "loss": 85.0693, - "step": 27440 - }, - { - "epoch": 0.11090147343414795, - "grad_norm": 1072.163818359375, - "learning_rate": 4.998537369791581e-05, - "loss": 122.0357, - "step": 27450 - }, - { - "epoch": 0.1109418746995156, - "grad_norm": 1213.7109375, - "learning_rate": 4.9985254067624215e-05, - "loss": 81.71, - "step": 27460 - }, - { - "epoch": 0.11098227596488322, - "grad_norm": 641.5134887695312, - "learning_rate": 4.998513395023223e-05, - "loss": 111.3763, - "step": 27470 - }, - { - "epoch": 0.11102267723025085, - "grad_norm": 1474.4072265625, - "learning_rate": 4.99850133457422e-05, - "loss": 103.8446, - "step": 27480 - }, - { - "epoch": 0.11106307849561849, - "grad_norm": 837.8353271484375, - "learning_rate": 4.9984892254156465e-05, - "loss": 85.3186, - "step": 27490 - }, - { - "epoch": 0.11110347976098611, - "grad_norm": 714.3938598632812, - "learning_rate": 4.99847706754774e-05, - "loss": 116.3188, - "step": 27500 - }, - { - "epoch": 0.11114388102635374, - "grad_norm": 1514.875, - "learning_rate": 4.998464860970736e-05, - "loss": 61.2618, - "step": 27510 - }, - { - "epoch": 0.11118428229172138, - "grad_norm": 122.57615661621094, - "learning_rate": 4.998452605684874e-05, - "loss": 103.8598, - "step": 27520 - }, - { - "epoch": 0.111224683557089, - "grad_norm": 700.09521484375, - "learning_rate": 4.9984403016903915e-05, - "loss": 96.256, - "step": 27530 - }, - { - "epoch": 0.11126508482245664, - "grad_norm": 2113.217041015625, - "learning_rate": 4.998427948987528e-05, - "loss": 94.1343, - "step": 27540 - }, - { - "epoch": 0.11130548608782427, - "grad_norm": 710.7130126953125, - "learning_rate": 4.998415547576527e-05, - "loss": 134.276, - "step": 27550 - }, - { - "epoch": 0.1113458873531919, - "grad_norm": 974.3263549804688, - "learning_rate": 4.9984030974576285e-05, - "loss": 70.3129, - "step": 27560 - }, - { - "epoch": 0.11138628861855954, - "grad_norm": 1219.5396728515625, - "learning_rate": 4.998390598631075e-05, - "loss": 119.3183, - "step": 27570 - }, - { - "epoch": 0.11142668988392716, - "grad_norm": 2306.385498046875, - "learning_rate": 4.998378051097111e-05, - "loss": 96.3586, - "step": 27580 - }, - { - "epoch": 0.11146709114929479, - "grad_norm": 676.72998046875, - "learning_rate": 4.99836545485598e-05, - "loss": 87.2172, - "step": 27590 - }, - { - "epoch": 0.11150749241466243, - "grad_norm": 1688.67578125, - "learning_rate": 4.998352809907928e-05, - "loss": 158.3624, - "step": 27600 - }, - { - "epoch": 0.11154789368003006, - "grad_norm": 3076.226806640625, - "learning_rate": 4.9983401162532025e-05, - "loss": 105.7579, - "step": 27610 - }, - { - "epoch": 0.1115882949453977, - "grad_norm": 789.8070678710938, - "learning_rate": 4.99832737389205e-05, - "loss": 114.1961, - "step": 27620 - }, - { - "epoch": 0.11162869621076532, - "grad_norm": 1299.20166015625, - "learning_rate": 4.998314582824719e-05, - "loss": 85.1794, - "step": 27630 - }, - { - "epoch": 0.11166909747613295, - "grad_norm": 611.26123046875, - "learning_rate": 4.998301743051459e-05, - "loss": 63.9051, - "step": 27640 - }, - { - "epoch": 0.11170949874150059, - "grad_norm": 967.321044921875, - "learning_rate": 4.99828885457252e-05, - "loss": 127.1219, - "step": 27650 - }, - { - "epoch": 0.11174990000686821, - "grad_norm": 1000.7772216796875, - "learning_rate": 4.998275917388154e-05, - "loss": 103.4082, - "step": 27660 - }, - { - "epoch": 0.11179030127223584, - "grad_norm": 627.480224609375, - "learning_rate": 4.9982629314986126e-05, - "loss": 96.7553, - "step": 27670 - }, - { - "epoch": 0.11183070253760348, - "grad_norm": 1001.6890869140625, - "learning_rate": 4.998249896904149e-05, - "loss": 92.7391, - "step": 27680 - }, - { - "epoch": 0.1118711038029711, - "grad_norm": 781.9347534179688, - "learning_rate": 4.998236813605017e-05, - "loss": 86.0648, - "step": 27690 - }, - { - "epoch": 0.11191150506833875, - "grad_norm": 2122.2900390625, - "learning_rate": 4.998223681601473e-05, - "loss": 106.2797, - "step": 27700 - }, - { - "epoch": 0.11195190633370637, - "grad_norm": 2083.68798828125, - "learning_rate": 4.9982105008937726e-05, - "loss": 91.295, - "step": 27710 - }, - { - "epoch": 0.111992307599074, - "grad_norm": 1193.2694091796875, - "learning_rate": 4.998197271482171e-05, - "loss": 69.5858, - "step": 27720 - }, - { - "epoch": 0.11203270886444164, - "grad_norm": 2355.148193359375, - "learning_rate": 4.998183993366928e-05, - "loss": 133.6512, - "step": 27730 - }, - { - "epoch": 0.11207311012980926, - "grad_norm": 745.9656372070312, - "learning_rate": 4.998170666548302e-05, - "loss": 99.9143, - "step": 27740 - }, - { - "epoch": 0.11211351139517689, - "grad_norm": 559.490478515625, - "learning_rate": 4.998157291026553e-05, - "loss": 116.1021, - "step": 27750 - }, - { - "epoch": 0.11215391266054453, - "grad_norm": 613.9931030273438, - "learning_rate": 4.998143866801942e-05, - "loss": 79.9714, - "step": 27760 - }, - { - "epoch": 0.11219431392591216, - "grad_norm": 838.7278442382812, - "learning_rate": 4.9981303938747286e-05, - "loss": 74.1118, - "step": 27770 - }, - { - "epoch": 0.1122347151912798, - "grad_norm": 731.7330322265625, - "learning_rate": 4.9981168722451776e-05, - "loss": 85.4063, - "step": 27780 - }, - { - "epoch": 0.11227511645664742, - "grad_norm": 826.3333129882812, - "learning_rate": 4.998103301913552e-05, - "loss": 83.0917, - "step": 27790 - }, - { - "epoch": 0.11231551772201505, - "grad_norm": 1546.168701171875, - "learning_rate": 4.998089682880117e-05, - "loss": 128.3176, - "step": 27800 - }, - { - "epoch": 0.11235591898738269, - "grad_norm": 1162.286865234375, - "learning_rate": 4.998076015145138e-05, - "loss": 122.4255, - "step": 27810 - }, - { - "epoch": 0.11239632025275031, - "grad_norm": 1132.9892578125, - "learning_rate": 4.9980622987088795e-05, - "loss": 98.7629, - "step": 27820 - }, - { - "epoch": 0.11243672151811794, - "grad_norm": 1079.5322265625, - "learning_rate": 4.9980485335716114e-05, - "loss": 119.6111, - "step": 27830 - }, - { - "epoch": 0.11247712278348558, - "grad_norm": 1292.2762451171875, - "learning_rate": 4.9980347197336005e-05, - "loss": 72.3516, - "step": 27840 - }, - { - "epoch": 0.1125175240488532, - "grad_norm": 767.2022094726562, - "learning_rate": 4.998020857195117e-05, - "loss": 144.7648, - "step": 27850 - }, - { - "epoch": 0.11255792531422085, - "grad_norm": 679.2119750976562, - "learning_rate": 4.998006945956431e-05, - "loss": 117.3672, - "step": 27860 - }, - { - "epoch": 0.11259832657958847, - "grad_norm": 1033.4124755859375, - "learning_rate": 4.997992986017813e-05, - "loss": 91.8273, - "step": 27870 - }, - { - "epoch": 0.1126387278449561, - "grad_norm": 1781.468505859375, - "learning_rate": 4.997978977379536e-05, - "loss": 108.8957, - "step": 27880 - }, - { - "epoch": 0.11267912911032374, - "grad_norm": 875.4254150390625, - "learning_rate": 4.9979649200418735e-05, - "loss": 65.4817, - "step": 27890 - }, - { - "epoch": 0.11271953037569137, - "grad_norm": 760.1170654296875, - "learning_rate": 4.997950814005098e-05, - "loss": 105.1915, - "step": 27900 - }, - { - "epoch": 0.11275993164105899, - "grad_norm": 816.7872924804688, - "learning_rate": 4.997936659269486e-05, - "loss": 72.9768, - "step": 27910 - }, - { - "epoch": 0.11280033290642663, - "grad_norm": 1037.8052978515625, - "learning_rate": 4.997922455835311e-05, - "loss": 84.1713, - "step": 27920 - }, - { - "epoch": 0.11284073417179426, - "grad_norm": 599.27197265625, - "learning_rate": 4.9979082037028535e-05, - "loss": 89.3494, - "step": 27930 - }, - { - "epoch": 0.1128811354371619, - "grad_norm": 2486.75830078125, - "learning_rate": 4.9978939028723894e-05, - "loss": 96.0437, - "step": 27940 - }, - { - "epoch": 0.11292153670252952, - "grad_norm": 933.3982543945312, - "learning_rate": 4.9978795533441966e-05, - "loss": 159.8028, - "step": 27950 - }, - { - "epoch": 0.11296193796789715, - "grad_norm": 783.9051513671875, - "learning_rate": 4.997865155118557e-05, - "loss": 80.7255, - "step": 27960 - }, - { - "epoch": 0.11300233923326479, - "grad_norm": 1356.3843994140625, - "learning_rate": 4.9978507081957494e-05, - "loss": 135.2147, - "step": 27970 - }, - { - "epoch": 0.11304274049863242, - "grad_norm": 2129.216796875, - "learning_rate": 4.997836212576057e-05, - "loss": 73.888, - "step": 27980 - }, - { - "epoch": 0.11308314176400004, - "grad_norm": 959.6722412109375, - "learning_rate": 4.9978216682597614e-05, - "loss": 159.4311, - "step": 27990 - }, - { - "epoch": 0.11312354302936768, - "grad_norm": 2303.733642578125, - "learning_rate": 4.997807075247146e-05, - "loss": 98.653, - "step": 28000 - }, - { - "epoch": 0.11316394429473531, - "grad_norm": 906.4110717773438, - "learning_rate": 4.997792433538496e-05, - "loss": 97.6867, - "step": 28010 - }, - { - "epoch": 0.11320434556010295, - "grad_norm": 814.7039184570312, - "learning_rate": 4.997777743134097e-05, - "loss": 91.2434, - "step": 28020 - }, - { - "epoch": 0.11324474682547057, - "grad_norm": 496.1783752441406, - "learning_rate": 4.9977630040342346e-05, - "loss": 109.5629, - "step": 28030 - }, - { - "epoch": 0.1132851480908382, - "grad_norm": 1374.5599365234375, - "learning_rate": 4.997748216239196e-05, - "loss": 115.9625, - "step": 28040 - }, - { - "epoch": 0.11332554935620584, - "grad_norm": 1610.67529296875, - "learning_rate": 4.997733379749271e-05, - "loss": 138.7809, - "step": 28050 - }, - { - "epoch": 0.11336595062157347, - "grad_norm": 655.5691528320312, - "learning_rate": 4.9977184945647473e-05, - "loss": 72.344, - "step": 28060 - }, - { - "epoch": 0.11340635188694109, - "grad_norm": 1008.762939453125, - "learning_rate": 4.9977035606859156e-05, - "loss": 170.6503, - "step": 28070 - }, - { - "epoch": 0.11344675315230873, - "grad_norm": 692.5789184570312, - "learning_rate": 4.9976885781130665e-05, - "loss": 79.409, - "step": 28080 - }, - { - "epoch": 0.11348715441767636, - "grad_norm": 2432.585693359375, - "learning_rate": 4.9976735468464935e-05, - "loss": 181.1055, - "step": 28090 - }, - { - "epoch": 0.113527555683044, - "grad_norm": 1090.251220703125, - "learning_rate": 4.997658466886489e-05, - "loss": 114.0751, - "step": 28100 - }, - { - "epoch": 0.11356795694841162, - "grad_norm": 861.4591674804688, - "learning_rate": 4.997643338233346e-05, - "loss": 97.7165, - "step": 28110 - }, - { - "epoch": 0.11360835821377925, - "grad_norm": 1879.217529296875, - "learning_rate": 4.997628160887361e-05, - "loss": 99.4693, - "step": 28120 - }, - { - "epoch": 0.11364875947914689, - "grad_norm": 617.5592651367188, - "learning_rate": 4.997612934848829e-05, - "loss": 98.8888, - "step": 28130 - }, - { - "epoch": 0.11368916074451452, - "grad_norm": 671.50341796875, - "learning_rate": 4.997597660118046e-05, - "loss": 96.949, - "step": 28140 - }, - { - "epoch": 0.11372956200988214, - "grad_norm": 800.4517211914062, - "learning_rate": 4.9975823366953124e-05, - "loss": 71.7921, - "step": 28150 - }, - { - "epoch": 0.11376996327524978, - "grad_norm": 889.1356201171875, - "learning_rate": 4.9975669645809244e-05, - "loss": 124.3523, - "step": 28160 - }, - { - "epoch": 0.11381036454061741, - "grad_norm": 1013.8363037109375, - "learning_rate": 4.997551543775182e-05, - "loss": 111.264, - "step": 28170 - }, - { - "epoch": 0.11385076580598505, - "grad_norm": 1395.8243408203125, - "learning_rate": 4.997536074278387e-05, - "loss": 112.4192, - "step": 28180 - }, - { - "epoch": 0.11389116707135268, - "grad_norm": 1430.334228515625, - "learning_rate": 4.997520556090841e-05, - "loss": 121.1184, - "step": 28190 - }, - { - "epoch": 0.1139315683367203, - "grad_norm": 1214.85400390625, - "learning_rate": 4.9975049892128455e-05, - "loss": 87.6032, - "step": 28200 - }, - { - "epoch": 0.11397196960208794, - "grad_norm": 867.681396484375, - "learning_rate": 4.9974893736447045e-05, - "loss": 90.5741, - "step": 28210 - }, - { - "epoch": 0.11401237086745557, - "grad_norm": 759.8204345703125, - "learning_rate": 4.997473709386722e-05, - "loss": 79.1436, - "step": 28220 - }, - { - "epoch": 0.1140527721328232, - "grad_norm": 1119.670654296875, - "learning_rate": 4.997457996439204e-05, - "loss": 89.3735, - "step": 28230 - }, - { - "epoch": 0.11409317339819083, - "grad_norm": 843.7247924804688, - "learning_rate": 4.997442234802456e-05, - "loss": 111.084, - "step": 28240 - }, - { - "epoch": 0.11413357466355846, - "grad_norm": 775.5316772460938, - "learning_rate": 4.997426424476787e-05, - "loss": 96.1073, - "step": 28250 - }, - { - "epoch": 0.1141739759289261, - "grad_norm": 1531.5697021484375, - "learning_rate": 4.9974105654625036e-05, - "loss": 82.1885, - "step": 28260 - }, - { - "epoch": 0.11421437719429373, - "grad_norm": 693.6162719726562, - "learning_rate": 4.997394657759915e-05, - "loss": 119.7303, - "step": 28270 - }, - { - "epoch": 0.11425477845966135, - "grad_norm": 4390.083984375, - "learning_rate": 4.997378701369332e-05, - "loss": 156.4863, - "step": 28280 - }, - { - "epoch": 0.11429517972502899, - "grad_norm": 781.9625854492188, - "learning_rate": 4.9973626962910656e-05, - "loss": 90.4073, - "step": 28290 - }, - { - "epoch": 0.11433558099039662, - "grad_norm": 1085.052978515625, - "learning_rate": 4.9973466425254286e-05, - "loss": 74.8492, - "step": 28300 - }, - { - "epoch": 0.11437598225576424, - "grad_norm": 975.8035278320312, - "learning_rate": 4.997330540072732e-05, - "loss": 108.0216, - "step": 28310 - }, - { - "epoch": 0.11441638352113188, - "grad_norm": 664.2997436523438, - "learning_rate": 4.997314388933291e-05, - "loss": 96.9389, - "step": 28320 - }, - { - "epoch": 0.11445678478649951, - "grad_norm": 395.10052490234375, - "learning_rate": 4.997298189107421e-05, - "loss": 114.4039, - "step": 28330 - }, - { - "epoch": 0.11449718605186715, - "grad_norm": 894.0714721679688, - "learning_rate": 4.9972819405954366e-05, - "loss": 66.7973, - "step": 28340 - }, - { - "epoch": 0.11453758731723478, - "grad_norm": 3636.150390625, - "learning_rate": 4.997265643397654e-05, - "loss": 137.2116, - "step": 28350 - }, - { - "epoch": 0.1145779885826024, - "grad_norm": 543.2451782226562, - "learning_rate": 4.997249297514394e-05, - "loss": 72.1923, - "step": 28360 - }, - { - "epoch": 0.11461838984797004, - "grad_norm": 1202.521484375, - "learning_rate": 4.997232902945971e-05, - "loss": 101.5618, - "step": 28370 - }, - { - "epoch": 0.11465879111333767, - "grad_norm": 1286.06494140625, - "learning_rate": 4.997216459692709e-05, - "loss": 99.6231, - "step": 28380 - }, - { - "epoch": 0.1146991923787053, - "grad_norm": 747.28271484375, - "learning_rate": 4.997199967754925e-05, - "loss": 101.1043, - "step": 28390 - }, - { - "epoch": 0.11473959364407293, - "grad_norm": 397.4785461425781, - "learning_rate": 4.997183427132943e-05, - "loss": 99.9764, - "step": 28400 - }, - { - "epoch": 0.11477999490944056, - "grad_norm": 1810.4814453125, - "learning_rate": 4.997166837827084e-05, - "loss": 107.5269, - "step": 28410 - }, - { - "epoch": 0.1148203961748082, - "grad_norm": 1796.510009765625, - "learning_rate": 4.997150199837671e-05, - "loss": 98.7808, - "step": 28420 - }, - { - "epoch": 0.11486079744017583, - "grad_norm": 997.3446044921875, - "learning_rate": 4.997133513165031e-05, - "loss": 101.5585, - "step": 28430 - }, - { - "epoch": 0.11490119870554345, - "grad_norm": 575.0126342773438, - "learning_rate": 4.9971167778094863e-05, - "loss": 115.6277, - "step": 28440 - }, - { - "epoch": 0.11494159997091109, - "grad_norm": 1271.26953125, - "learning_rate": 4.997099993771365e-05, - "loss": 60.656, - "step": 28450 - }, - { - "epoch": 0.11498200123627872, - "grad_norm": 1329.1561279296875, - "learning_rate": 4.997083161050994e-05, - "loss": 83.6766, - "step": 28460 - }, - { - "epoch": 0.11502240250164635, - "grad_norm": 527.1378784179688, - "learning_rate": 4.9970662796487e-05, - "loss": 87.6013, - "step": 28470 - }, - { - "epoch": 0.11506280376701399, - "grad_norm": 888.7230224609375, - "learning_rate": 4.997049349564814e-05, - "loss": 86.2665, - "step": 28480 - }, - { - "epoch": 0.11510320503238161, - "grad_norm": 338.1375732421875, - "learning_rate": 4.997032370799666e-05, - "loss": 130.7292, - "step": 28490 - }, - { - "epoch": 0.11514360629774925, - "grad_norm": 2081.712158203125, - "learning_rate": 4.997015343353585e-05, - "loss": 128.4502, - "step": 28500 - }, - { - "epoch": 0.11518400756311688, - "grad_norm": 1699.38623046875, - "learning_rate": 4.996998267226905e-05, - "loss": 115.8591, - "step": 28510 - }, - { - "epoch": 0.1152244088284845, - "grad_norm": 467.2063293457031, - "learning_rate": 4.996981142419959e-05, - "loss": 73.2754, - "step": 28520 - }, - { - "epoch": 0.11526481009385214, - "grad_norm": 804.551025390625, - "learning_rate": 4.996963968933079e-05, - "loss": 67.7496, - "step": 28530 - }, - { - "epoch": 0.11530521135921977, - "grad_norm": 1599.342529296875, - "learning_rate": 4.996946746766601e-05, - "loss": 91.9091, - "step": 28540 - }, - { - "epoch": 0.1153456126245874, - "grad_norm": 1292.8143310546875, - "learning_rate": 4.996929475920862e-05, - "loss": 113.546, - "step": 28550 - }, - { - "epoch": 0.11538601388995504, - "grad_norm": 1679.7252197265625, - "learning_rate": 4.9969121563961956e-05, - "loss": 83.8834, - "step": 28560 - }, - { - "epoch": 0.11542641515532266, - "grad_norm": 1275.5130615234375, - "learning_rate": 4.9968947881929414e-05, - "loss": 115.3006, - "step": 28570 - }, - { - "epoch": 0.1154668164206903, - "grad_norm": 959.7946166992188, - "learning_rate": 4.996877371311439e-05, - "loss": 87.6582, - "step": 28580 - }, - { - "epoch": 0.11550721768605793, - "grad_norm": 1284.703369140625, - "learning_rate": 4.996859905752026e-05, - "loss": 132.2889, - "step": 28590 - }, - { - "epoch": 0.11554761895142555, - "grad_norm": 0.0, - "learning_rate": 4.996842391515044e-05, - "loss": 91.88, - "step": 28600 - }, - { - "epoch": 0.1155880202167932, - "grad_norm": 1002.761962890625, - "learning_rate": 4.996824828600834e-05, - "loss": 125.265, - "step": 28610 - }, - { - "epoch": 0.11562842148216082, - "grad_norm": 665.4733276367188, - "learning_rate": 4.996807217009738e-05, - "loss": 99.0348, - "step": 28620 - }, - { - "epoch": 0.11566882274752845, - "grad_norm": 1002.7938232421875, - "learning_rate": 4.996789556742101e-05, - "loss": 75.1195, - "step": 28630 - }, - { - "epoch": 0.11570922401289609, - "grad_norm": 858.1141357421875, - "learning_rate": 4.996771847798265e-05, - "loss": 73.2454, - "step": 28640 - }, - { - "epoch": 0.11574962527826371, - "grad_norm": 1226.3555908203125, - "learning_rate": 4.996754090178577e-05, - "loss": 146.9862, - "step": 28650 - }, - { - "epoch": 0.11579002654363135, - "grad_norm": 1391.651123046875, - "learning_rate": 4.996736283883382e-05, - "loss": 142.1579, - "step": 28660 - }, - { - "epoch": 0.11583042780899898, - "grad_norm": 1489.2674560546875, - "learning_rate": 4.9967184289130286e-05, - "loss": 115.9638, - "step": 28670 - }, - { - "epoch": 0.1158708290743666, - "grad_norm": 1078.4761962890625, - "learning_rate": 4.9967005252678634e-05, - "loss": 118.9085, - "step": 28680 - }, - { - "epoch": 0.11591123033973424, - "grad_norm": 2236.838623046875, - "learning_rate": 4.9966825729482364e-05, - "loss": 130.9481, - "step": 28690 - }, - { - "epoch": 0.11595163160510187, - "grad_norm": 862.959716796875, - "learning_rate": 4.996664571954497e-05, - "loss": 111.0184, - "step": 28700 - }, - { - "epoch": 0.1159920328704695, - "grad_norm": 1136.712646484375, - "learning_rate": 4.996646522286997e-05, - "loss": 103.9295, - "step": 28710 - }, - { - "epoch": 0.11603243413583714, - "grad_norm": 1675.60791015625, - "learning_rate": 4.996628423946087e-05, - "loss": 109.6175, - "step": 28720 - }, - { - "epoch": 0.11607283540120476, - "grad_norm": 1874.5843505859375, - "learning_rate": 4.996610276932121e-05, - "loss": 85.4252, - "step": 28730 - }, - { - "epoch": 0.1161132366665724, - "grad_norm": 1308.438232421875, - "learning_rate": 4.996592081245451e-05, - "loss": 115.684, - "step": 28740 - }, - { - "epoch": 0.11615363793194003, - "grad_norm": 428.2576904296875, - "learning_rate": 4.996573836886435e-05, - "loss": 57.5053, - "step": 28750 - }, - { - "epoch": 0.11619403919730766, - "grad_norm": 1469.609619140625, - "learning_rate": 4.9965555438554254e-05, - "loss": 77.9164, - "step": 28760 - }, - { - "epoch": 0.1162344404626753, - "grad_norm": 2238.658447265625, - "learning_rate": 4.9965372021527814e-05, - "loss": 129.5649, - "step": 28770 - }, - { - "epoch": 0.11627484172804292, - "grad_norm": 1731.619140625, - "learning_rate": 4.996518811778858e-05, - "loss": 92.2746, - "step": 28780 - }, - { - "epoch": 0.11631524299341055, - "grad_norm": 2262.8583984375, - "learning_rate": 4.996500372734015e-05, - "loss": 126.2437, - "step": 28790 - }, - { - "epoch": 0.11635564425877819, - "grad_norm": 1330.55908203125, - "learning_rate": 4.9964818850186135e-05, - "loss": 118.3177, - "step": 28800 - }, - { - "epoch": 0.11639604552414581, - "grad_norm": 1293.2772216796875, - "learning_rate": 4.9964633486330116e-05, - "loss": 131.9247, - "step": 28810 - }, - { - "epoch": 0.11643644678951345, - "grad_norm": 626.7706298828125, - "learning_rate": 4.9964447635775714e-05, - "loss": 80.6821, - "step": 28820 - }, - { - "epoch": 0.11647684805488108, - "grad_norm": 680.4353637695312, - "learning_rate": 4.996426129852655e-05, - "loss": 79.4926, - "step": 28830 - }, - { - "epoch": 0.1165172493202487, - "grad_norm": 1769.197998046875, - "learning_rate": 4.996407447458626e-05, - "loss": 105.4986, - "step": 28840 - }, - { - "epoch": 0.11655765058561635, - "grad_norm": 1792.2196044921875, - "learning_rate": 4.996388716395848e-05, - "loss": 108.3764, - "step": 28850 - }, - { - "epoch": 0.11659805185098397, - "grad_norm": 582.6719970703125, - "learning_rate": 4.996369936664688e-05, - "loss": 79.2789, - "step": 28860 - }, - { - "epoch": 0.1166384531163516, - "grad_norm": 1305.8157958984375, - "learning_rate": 4.99635110826551e-05, - "loss": 112.6347, - "step": 28870 - }, - { - "epoch": 0.11667885438171924, - "grad_norm": 1331.4168701171875, - "learning_rate": 4.996332231198683e-05, - "loss": 76.5, - "step": 28880 - }, - { - "epoch": 0.11671925564708686, - "grad_norm": 1037.36767578125, - "learning_rate": 4.996313305464573e-05, - "loss": 159.2613, - "step": 28890 - }, - { - "epoch": 0.1167596569124545, - "grad_norm": 1378.345458984375, - "learning_rate": 4.99629433106355e-05, - "loss": 157.8604, - "step": 28900 - }, - { - "epoch": 0.11680005817782213, - "grad_norm": 846.559814453125, - "learning_rate": 4.9962753079959836e-05, - "loss": 112.2688, - "step": 28910 - }, - { - "epoch": 0.11684045944318976, - "grad_norm": 1050.1434326171875, - "learning_rate": 4.996256236262245e-05, - "loss": 88.0946, - "step": 28920 - }, - { - "epoch": 0.1168808607085574, - "grad_norm": 410.0029296875, - "learning_rate": 4.996237115862706e-05, - "loss": 59.4956, - "step": 28930 - }, - { - "epoch": 0.11692126197392502, - "grad_norm": 573.1680908203125, - "learning_rate": 4.99621794679774e-05, - "loss": 111.6136, - "step": 28940 - }, - { - "epoch": 0.11696166323929265, - "grad_norm": 869.5213012695312, - "learning_rate": 4.996198729067719e-05, - "loss": 146.8108, - "step": 28950 - }, - { - "epoch": 0.11700206450466029, - "grad_norm": 634.8200073242188, - "learning_rate": 4.99617946267302e-05, - "loss": 119.8773, - "step": 28960 - }, - { - "epoch": 0.11704246577002791, - "grad_norm": 1196.7987060546875, - "learning_rate": 4.996160147614016e-05, - "loss": 71.1213, - "step": 28970 - }, - { - "epoch": 0.11708286703539555, - "grad_norm": 1217.557861328125, - "learning_rate": 4.996140783891085e-05, - "loss": 121.3136, - "step": 28980 - }, - { - "epoch": 0.11712326830076318, - "grad_norm": 1255.81396484375, - "learning_rate": 4.9961213715046045e-05, - "loss": 116.6792, - "step": 28990 - }, - { - "epoch": 0.11716366956613081, - "grad_norm": 627.504150390625, - "learning_rate": 4.996101910454953e-05, - "loss": 75.7639, - "step": 29000 - }, - { - "epoch": 0.11720407083149845, - "grad_norm": 529.0521240234375, - "learning_rate": 4.996082400742509e-05, - "loss": 139.7255, - "step": 29010 - }, - { - "epoch": 0.11724447209686607, - "grad_norm": 1349.41357421875, - "learning_rate": 4.996062842367654e-05, - "loss": 138.5896, - "step": 29020 - }, - { - "epoch": 0.1172848733622337, - "grad_norm": 1360.37255859375, - "learning_rate": 4.9960432353307686e-05, - "loss": 80.0865, - "step": 29030 - }, - { - "epoch": 0.11732527462760134, - "grad_norm": 1388.444091796875, - "learning_rate": 4.996023579632236e-05, - "loss": 112.3245, - "step": 29040 - }, - { - "epoch": 0.11736567589296897, - "grad_norm": 1930.4471435546875, - "learning_rate": 4.996003875272438e-05, - "loss": 114.5048, - "step": 29050 - }, - { - "epoch": 0.1174060771583366, - "grad_norm": 569.8203125, - "learning_rate": 4.9959841222517596e-05, - "loss": 62.5382, - "step": 29060 - }, - { - "epoch": 0.11744647842370423, - "grad_norm": 455.6988830566406, - "learning_rate": 4.9959643205705854e-05, - "loss": 100.6463, - "step": 29070 - }, - { - "epoch": 0.11748687968907186, - "grad_norm": 908.3660888671875, - "learning_rate": 4.995944470229302e-05, - "loss": 114.3609, - "step": 29080 - }, - { - "epoch": 0.1175272809544395, - "grad_norm": 755.6206665039062, - "learning_rate": 4.9959245712282966e-05, - "loss": 95.5638, - "step": 29090 - }, - { - "epoch": 0.11756768221980712, - "grad_norm": 0.0, - "learning_rate": 4.9959046235679565e-05, - "loss": 64.4007, - "step": 29100 - }, - { - "epoch": 0.11760808348517475, - "grad_norm": 808.4530639648438, - "learning_rate": 4.9958846272486704e-05, - "loss": 112.1131, - "step": 29110 - }, - { - "epoch": 0.11764848475054239, - "grad_norm": 1141.5194091796875, - "learning_rate": 4.9958645822708285e-05, - "loss": 87.7617, - "step": 29120 - }, - { - "epoch": 0.11768888601591002, - "grad_norm": 653.6248779296875, - "learning_rate": 4.995844488634822e-05, - "loss": 70.7442, - "step": 29130 - }, - { - "epoch": 0.11772928728127766, - "grad_norm": 1141.1375732421875, - "learning_rate": 4.9958243463410414e-05, - "loss": 100.08, - "step": 29140 - }, - { - "epoch": 0.11776968854664528, - "grad_norm": 675.2451171875, - "learning_rate": 4.995804155389881e-05, - "loss": 113.5737, - "step": 29150 - }, - { - "epoch": 0.11781008981201291, - "grad_norm": 822.73486328125, - "learning_rate": 4.995783915781734e-05, - "loss": 78.6838, - "step": 29160 - }, - { - "epoch": 0.11785049107738055, - "grad_norm": 1152.279541015625, - "learning_rate": 4.995763627516994e-05, - "loss": 124.2494, - "step": 29170 - }, - { - "epoch": 0.11789089234274817, - "grad_norm": 696.2450561523438, - "learning_rate": 4.995743290596057e-05, - "loss": 85.5477, - "step": 29180 - }, - { - "epoch": 0.1179312936081158, - "grad_norm": 669.2838745117188, - "learning_rate": 4.9957229050193197e-05, - "loss": 158.3468, - "step": 29190 - }, - { - "epoch": 0.11797169487348344, - "grad_norm": 629.5454711914062, - "learning_rate": 4.9957024707871806e-05, - "loss": 72.3909, - "step": 29200 - }, - { - "epoch": 0.11801209613885107, - "grad_norm": 2218.03564453125, - "learning_rate": 4.995681987900036e-05, - "loss": 131.4396, - "step": 29210 - }, - { - "epoch": 0.1180524974042187, - "grad_norm": 1187.0064697265625, - "learning_rate": 4.995661456358286e-05, - "loss": 91.9417, - "step": 29220 - }, - { - "epoch": 0.11809289866958633, - "grad_norm": 539.1826171875, - "learning_rate": 4.995640876162332e-05, - "loss": 119.5583, - "step": 29230 - }, - { - "epoch": 0.11813329993495396, - "grad_norm": 1318.2589111328125, - "learning_rate": 4.9956202473125736e-05, - "loss": 115.7666, - "step": 29240 - }, - { - "epoch": 0.1181737012003216, - "grad_norm": 466.1606750488281, - "learning_rate": 4.995599569809414e-05, - "loss": 118.0604, - "step": 29250 - }, - { - "epoch": 0.11821410246568922, - "grad_norm": 797.7738647460938, - "learning_rate": 4.9955788436532545e-05, - "loss": 72.8938, - "step": 29260 - }, - { - "epoch": 0.11825450373105685, - "grad_norm": 1671.8890380859375, - "learning_rate": 4.995558068844503e-05, - "loss": 96.0493, - "step": 29270 - }, - { - "epoch": 0.11829490499642449, - "grad_norm": 934.203125, - "learning_rate": 4.99553724538356e-05, - "loss": 97.1457, - "step": 29280 - }, - { - "epoch": 0.11833530626179212, - "grad_norm": 1040.1793212890625, - "learning_rate": 4.9955163732708346e-05, - "loss": 138.6697, - "step": 29290 - }, - { - "epoch": 0.11837570752715976, - "grad_norm": 2738.3046875, - "learning_rate": 4.9954954525067334e-05, - "loss": 120.2477, - "step": 29300 - }, - { - "epoch": 0.11841610879252738, - "grad_norm": 543.4052124023438, - "learning_rate": 4.995474483091662e-05, - "loss": 129.9302, - "step": 29310 - }, - { - "epoch": 0.11845651005789501, - "grad_norm": 534.146240234375, - "learning_rate": 4.995453465026032e-05, - "loss": 67.6277, - "step": 29320 - }, - { - "epoch": 0.11849691132326265, - "grad_norm": 1281.582275390625, - "learning_rate": 4.995432398310252e-05, - "loss": 117.349, - "step": 29330 - }, - { - "epoch": 0.11853731258863028, - "grad_norm": 3242.07568359375, - "learning_rate": 4.995411282944732e-05, - "loss": 136.7699, - "step": 29340 - }, - { - "epoch": 0.1185777138539979, - "grad_norm": 1517.0467529296875, - "learning_rate": 4.9953901189298845e-05, - "loss": 127.2586, - "step": 29350 - }, - { - "epoch": 0.11861811511936554, - "grad_norm": 689.41015625, - "learning_rate": 4.9953689062661226e-05, - "loss": 108.8368, - "step": 29360 - }, - { - "epoch": 0.11865851638473317, - "grad_norm": 663.7586669921875, - "learning_rate": 4.995347644953858e-05, - "loss": 86.3188, - "step": 29370 - }, - { - "epoch": 0.11869891765010081, - "grad_norm": 1621.5384521484375, - "learning_rate": 4.9953263349935074e-05, - "loss": 100.3996, - "step": 29380 - }, - { - "epoch": 0.11873931891546843, - "grad_norm": 1026.15576171875, - "learning_rate": 4.995304976385484e-05, - "loss": 124.3401, - "step": 29390 - }, - { - "epoch": 0.11877972018083606, - "grad_norm": 840.7376098632812, - "learning_rate": 4.995283569130207e-05, - "loss": 99.3957, - "step": 29400 - }, - { - "epoch": 0.1188201214462037, - "grad_norm": 1305.7696533203125, - "learning_rate": 4.995262113228091e-05, - "loss": 87.9966, - "step": 29410 - }, - { - "epoch": 0.11886052271157133, - "grad_norm": 1312.4971923828125, - "learning_rate": 4.9952406086795564e-05, - "loss": 114.1831, - "step": 29420 - }, - { - "epoch": 0.11890092397693895, - "grad_norm": 1050.6826171875, - "learning_rate": 4.995219055485021e-05, - "loss": 117.2113, - "step": 29430 - }, - { - "epoch": 0.11894132524230659, - "grad_norm": 733.8092041015625, - "learning_rate": 4.995197453644905e-05, - "loss": 120.497, - "step": 29440 - }, - { - "epoch": 0.11898172650767422, - "grad_norm": 980.0513305664062, - "learning_rate": 4.9951758031596304e-05, - "loss": 97.3482, - "step": 29450 - }, - { - "epoch": 0.11902212777304186, - "grad_norm": 646.7477416992188, - "learning_rate": 4.995154104029619e-05, - "loss": 103.4561, - "step": 29460 - }, - { - "epoch": 0.11906252903840948, - "grad_norm": 2361.02978515625, - "learning_rate": 4.9951323562552934e-05, - "loss": 104.5779, - "step": 29470 - }, - { - "epoch": 0.11910293030377711, - "grad_norm": 844.0553588867188, - "learning_rate": 4.995110559837078e-05, - "loss": 91.3419, - "step": 29480 - }, - { - "epoch": 0.11914333156914475, - "grad_norm": 2193.67431640625, - "learning_rate": 4.995088714775398e-05, - "loss": 127.1109, - "step": 29490 - }, - { - "epoch": 0.11918373283451238, - "grad_norm": 732.5401611328125, - "learning_rate": 4.995066821070679e-05, - "loss": 120.7417, - "step": 29500 - }, - { - "epoch": 0.11922413409988, - "grad_norm": 1001.8763427734375, - "learning_rate": 4.995044878723348e-05, - "loss": 114.4594, - "step": 29510 - }, - { - "epoch": 0.11926453536524764, - "grad_norm": 928.3828735351562, - "learning_rate": 4.995022887733832e-05, - "loss": 83.6775, - "step": 29520 - }, - { - "epoch": 0.11930493663061527, - "grad_norm": 858.733154296875, - "learning_rate": 4.99500084810256e-05, - "loss": 144.456, - "step": 29530 - }, - { - "epoch": 0.11934533789598291, - "grad_norm": 2473.096435546875, - "learning_rate": 4.994978759829963e-05, - "loss": 117.7391, - "step": 29540 - }, - { - "epoch": 0.11938573916135053, - "grad_norm": 1232.745361328125, - "learning_rate": 4.9949566229164704e-05, - "loss": 87.8614, - "step": 29550 - }, - { - "epoch": 0.11942614042671816, - "grad_norm": 730.4893188476562, - "learning_rate": 4.994934437362513e-05, - "loss": 94.9959, - "step": 29560 - }, - { - "epoch": 0.1194665416920858, - "grad_norm": 1123.4429931640625, - "learning_rate": 4.9949122031685245e-05, - "loss": 116.839, - "step": 29570 - }, - { - "epoch": 0.11950694295745343, - "grad_norm": 723.6360473632812, - "learning_rate": 4.9948899203349384e-05, - "loss": 86.0155, - "step": 29580 - }, - { - "epoch": 0.11954734422282105, - "grad_norm": 480.5324401855469, - "learning_rate": 4.994867588862189e-05, - "loss": 94.0604, - "step": 29590 - }, - { - "epoch": 0.1195877454881887, - "grad_norm": 1117.021484375, - "learning_rate": 4.9948452087507116e-05, - "loss": 69.5577, - "step": 29600 - }, - { - "epoch": 0.11962814675355632, - "grad_norm": 1889.9290771484375, - "learning_rate": 4.9948227800009416e-05, - "loss": 130.7532, - "step": 29610 - }, - { - "epoch": 0.11966854801892396, - "grad_norm": 811.6279907226562, - "learning_rate": 4.994800302613318e-05, - "loss": 93.5169, - "step": 29620 - }, - { - "epoch": 0.11970894928429159, - "grad_norm": 339.779052734375, - "learning_rate": 4.994777776588278e-05, - "loss": 112.6328, - "step": 29630 - }, - { - "epoch": 0.11974935054965921, - "grad_norm": 794.0516967773438, - "learning_rate": 4.9947552019262605e-05, - "loss": 61.7838, - "step": 29640 - }, - { - "epoch": 0.11978975181502685, - "grad_norm": 1287.187255859375, - "learning_rate": 4.994732578627706e-05, - "loss": 156.2145, - "step": 29650 - }, - { - "epoch": 0.11983015308039448, - "grad_norm": 921.3040161132812, - "learning_rate": 4.994709906693056e-05, - "loss": 77.835, - "step": 29660 - }, - { - "epoch": 0.1198705543457621, - "grad_norm": 985.0899047851562, - "learning_rate": 4.9946871861227514e-05, - "loss": 101.6619, - "step": 29670 - }, - { - "epoch": 0.11991095561112974, - "grad_norm": 1570.173583984375, - "learning_rate": 4.9946644169172355e-05, - "loss": 132.0515, - "step": 29680 - }, - { - "epoch": 0.11995135687649737, - "grad_norm": 1566.288818359375, - "learning_rate": 4.9946415990769534e-05, - "loss": 109.4148, - "step": 29690 - }, - { - "epoch": 0.11999175814186501, - "grad_norm": 654.2642822265625, - "learning_rate": 4.994618732602349e-05, - "loss": 92.2775, - "step": 29700 - }, - { - "epoch": 0.12003215940723264, - "grad_norm": 5245.0458984375, - "learning_rate": 4.994595817493867e-05, - "loss": 144.156, - "step": 29710 - }, - { - "epoch": 0.12007256067260026, - "grad_norm": 4157.51708984375, - "learning_rate": 4.9945728537519555e-05, - "loss": 124.7138, - "step": 29720 - }, - { - "epoch": 0.1201129619379679, - "grad_norm": 1033.809814453125, - "learning_rate": 4.994549841377063e-05, - "loss": 108.5572, - "step": 29730 - }, - { - "epoch": 0.12015336320333553, - "grad_norm": 681.5181884765625, - "learning_rate": 4.9945267803696364e-05, - "loss": 99.638, - "step": 29740 - }, - { - "epoch": 0.12019376446870315, - "grad_norm": 2804.89501953125, - "learning_rate": 4.994503670730125e-05, - "loss": 99.5536, - "step": 29750 - }, - { - "epoch": 0.1202341657340708, - "grad_norm": 346.3814392089844, - "learning_rate": 4.994480512458981e-05, - "loss": 108.1357, - "step": 29760 - }, - { - "epoch": 0.12027456699943842, - "grad_norm": 1421.3365478515625, - "learning_rate": 4.9944573055566556e-05, - "loss": 103.8683, - "step": 29770 - }, - { - "epoch": 0.12031496826480606, - "grad_norm": 1197.5367431640625, - "learning_rate": 4.994434050023601e-05, - "loss": 107.7595, - "step": 29780 - }, - { - "epoch": 0.12035536953017369, - "grad_norm": 256.7560729980469, - "learning_rate": 4.9944107458602693e-05, - "loss": 81.2983, - "step": 29790 - }, - { - "epoch": 0.12039577079554131, - "grad_norm": 1710.7000732421875, - "learning_rate": 4.994387393067117e-05, - "loss": 98.1409, - "step": 29800 - }, - { - "epoch": 0.12043617206090895, - "grad_norm": 583.5015258789062, - "learning_rate": 4.994363991644597e-05, - "loss": 67.0093, - "step": 29810 - }, - { - "epoch": 0.12047657332627658, - "grad_norm": 1057.0147705078125, - "learning_rate": 4.9943405415931674e-05, - "loss": 116.4649, - "step": 29820 - }, - { - "epoch": 0.1205169745916442, - "grad_norm": 671.4649658203125, - "learning_rate": 4.9943170429132855e-05, - "loss": 95.8829, - "step": 29830 - }, - { - "epoch": 0.12055737585701184, - "grad_norm": 1301.7279052734375, - "learning_rate": 4.9942934956054076e-05, - "loss": 90.3948, - "step": 29840 - }, - { - "epoch": 0.12059777712237947, - "grad_norm": 1199.511474609375, - "learning_rate": 4.994269899669994e-05, - "loss": 85.8095, - "step": 29850 - }, - { - "epoch": 0.12063817838774711, - "grad_norm": 648.86376953125, - "learning_rate": 4.9942462551075056e-05, - "loss": 77.0479, - "step": 29860 - }, - { - "epoch": 0.12067857965311474, - "grad_norm": 788.9052124023438, - "learning_rate": 4.994222561918401e-05, - "loss": 81.1479, - "step": 29870 - }, - { - "epoch": 0.12071898091848236, - "grad_norm": 931.2744750976562, - "learning_rate": 4.994198820103145e-05, - "loss": 85.522, - "step": 29880 - }, - { - "epoch": 0.12075938218385, - "grad_norm": 1237.2655029296875, - "learning_rate": 4.994175029662198e-05, - "loss": 95.5039, - "step": 29890 - }, - { - "epoch": 0.12079978344921763, - "grad_norm": 704.9238891601562, - "learning_rate": 4.994151190596025e-05, - "loss": 85.7873, - "step": 29900 - }, - { - "epoch": 0.12084018471458526, - "grad_norm": 826.0241088867188, - "learning_rate": 4.9941273029050894e-05, - "loss": 129.6459, - "step": 29910 - }, - { - "epoch": 0.1208805859799529, - "grad_norm": 479.6162109375, - "learning_rate": 4.994103366589859e-05, - "loss": 117.3531, - "step": 29920 - }, - { - "epoch": 0.12092098724532052, - "grad_norm": 1103.497314453125, - "learning_rate": 4.994079381650799e-05, - "loss": 183.9052, - "step": 29930 - }, - { - "epoch": 0.12096138851068816, - "grad_norm": 710.986572265625, - "learning_rate": 4.994055348088378e-05, - "loss": 105.431, - "step": 29940 - }, - { - "epoch": 0.12100178977605579, - "grad_norm": 1297.952880859375, - "learning_rate": 4.994031265903063e-05, - "loss": 128.99, - "step": 29950 - }, - { - "epoch": 0.12104219104142341, - "grad_norm": 1707.3641357421875, - "learning_rate": 4.9940071350953255e-05, - "loss": 127.0656, - "step": 29960 - }, - { - "epoch": 0.12108259230679105, - "grad_norm": 787.633544921875, - "learning_rate": 4.993982955665634e-05, - "loss": 106.8927, - "step": 29970 - }, - { - "epoch": 0.12112299357215868, - "grad_norm": 1233.0062255859375, - "learning_rate": 4.9939587276144616e-05, - "loss": 104.3319, - "step": 29980 - }, - { - "epoch": 0.1211633948375263, - "grad_norm": 1296.74658203125, - "learning_rate": 4.993934450942279e-05, - "loss": 77.4372, - "step": 29990 - }, - { - "epoch": 0.12120379610289395, - "grad_norm": 735.8043212890625, - "learning_rate": 4.993910125649561e-05, - "loss": 147.1198, - "step": 30000 - }, - { - "epoch": 0.12124419736826157, - "grad_norm": 364.9338684082031, - "learning_rate": 4.993885751736781e-05, - "loss": 84.0477, - "step": 30010 - }, - { - "epoch": 0.12128459863362921, - "grad_norm": 719.6060180664062, - "learning_rate": 4.993861329204414e-05, - "loss": 115.0903, - "step": 30020 - }, - { - "epoch": 0.12132499989899684, - "grad_norm": 1294.3851318359375, - "learning_rate": 4.993836858052937e-05, - "loss": 151.7286, - "step": 30030 - }, - { - "epoch": 0.12136540116436446, - "grad_norm": 711.005126953125, - "learning_rate": 4.993812338282826e-05, - "loss": 86.3682, - "step": 30040 - }, - { - "epoch": 0.1214058024297321, - "grad_norm": 723.4884643554688, - "learning_rate": 4.993787769894559e-05, - "loss": 120.8179, - "step": 30050 - }, - { - "epoch": 0.12144620369509973, - "grad_norm": 1060.3338623046875, - "learning_rate": 4.993763152888617e-05, - "loss": 134.1443, - "step": 30060 - }, - { - "epoch": 0.12148660496046736, - "grad_norm": 829.1416625976562, - "learning_rate": 4.993738487265478e-05, - "loss": 119.1408, - "step": 30070 - }, - { - "epoch": 0.121527006225835, - "grad_norm": 267.49755859375, - "learning_rate": 4.993713773025623e-05, - "loss": 112.0577, - "step": 30080 - }, - { - "epoch": 0.12156740749120262, - "grad_norm": 2977.7763671875, - "learning_rate": 4.993689010169534e-05, - "loss": 115.676, - "step": 30090 - }, - { - "epoch": 0.12160780875657026, - "grad_norm": 927.7879638671875, - "learning_rate": 4.993664198697694e-05, - "loss": 80.4086, - "step": 30100 - }, - { - "epoch": 0.12164821002193789, - "grad_norm": 1482.4476318359375, - "learning_rate": 4.993639338610587e-05, - "loss": 90.1057, - "step": 30110 - }, - { - "epoch": 0.12168861128730551, - "grad_norm": 1078.0257568359375, - "learning_rate": 4.993614429908697e-05, - "loss": 112.5462, - "step": 30120 - }, - { - "epoch": 0.12172901255267315, - "grad_norm": 1431.45947265625, - "learning_rate": 4.99358947259251e-05, - "loss": 83.491, - "step": 30130 - }, - { - "epoch": 0.12176941381804078, - "grad_norm": 792.9324340820312, - "learning_rate": 4.9935644666625125e-05, - "loss": 82.8931, - "step": 30140 - }, - { - "epoch": 0.12180981508340841, - "grad_norm": 1509.551025390625, - "learning_rate": 4.9935394121191915e-05, - "loss": 103.0119, - "step": 30150 - }, - { - "epoch": 0.12185021634877605, - "grad_norm": 1039.8441162109375, - "learning_rate": 4.993514308963036e-05, - "loss": 119.8, - "step": 30160 - }, - { - "epoch": 0.12189061761414367, - "grad_norm": 399.6064758300781, - "learning_rate": 4.993489157194536e-05, - "loss": 115.0279, - "step": 30170 - }, - { - "epoch": 0.12193101887951131, - "grad_norm": 794.4744873046875, - "learning_rate": 4.993463956814181e-05, - "loss": 101.3931, - "step": 30180 - }, - { - "epoch": 0.12197142014487894, - "grad_norm": 2071.046142578125, - "learning_rate": 4.993438707822462e-05, - "loss": 114.3679, - "step": 30190 - }, - { - "epoch": 0.12201182141024657, - "grad_norm": 996.6055297851562, - "learning_rate": 4.993413410219871e-05, - "loss": 84.8786, - "step": 30200 - }, - { - "epoch": 0.1220522226756142, - "grad_norm": 1139.685302734375, - "learning_rate": 4.993388064006903e-05, - "loss": 78.988, - "step": 30210 - }, - { - "epoch": 0.12209262394098183, - "grad_norm": 622.7205200195312, - "learning_rate": 4.993362669184051e-05, - "loss": 88.1808, - "step": 30220 - }, - { - "epoch": 0.12213302520634946, - "grad_norm": 7026.68359375, - "learning_rate": 4.9933372257518096e-05, - "loss": 126.5632, - "step": 30230 - }, - { - "epoch": 0.1221734264717171, - "grad_norm": 945.2219848632812, - "learning_rate": 4.993311733710676e-05, - "loss": 78.7985, - "step": 30240 - }, - { - "epoch": 0.12221382773708472, - "grad_norm": 826.4041137695312, - "learning_rate": 4.9932861930611454e-05, - "loss": 72.6881, - "step": 30250 - }, - { - "epoch": 0.12225422900245236, - "grad_norm": 714.5296630859375, - "learning_rate": 4.993260603803718e-05, - "loss": 94.0831, - "step": 30260 - }, - { - "epoch": 0.12229463026781999, - "grad_norm": 692.105224609375, - "learning_rate": 4.99323496593889e-05, - "loss": 60.1789, - "step": 30270 - }, - { - "epoch": 0.12233503153318762, - "grad_norm": 534.4913330078125, - "learning_rate": 4.993209279467164e-05, - "loss": 69.5706, - "step": 30280 - }, - { - "epoch": 0.12237543279855526, - "grad_norm": 261.96044921875, - "learning_rate": 4.99318354438904e-05, - "loss": 77.032, - "step": 30290 - }, - { - "epoch": 0.12241583406392288, - "grad_norm": 1099.048095703125, - "learning_rate": 4.9931577607050175e-05, - "loss": 92.2519, - "step": 30300 - }, - { - "epoch": 0.12245623532929051, - "grad_norm": 1755.97705078125, - "learning_rate": 4.993131928415602e-05, - "loss": 159.9417, - "step": 30310 - }, - { - "epoch": 0.12249663659465815, - "grad_norm": 1634.8353271484375, - "learning_rate": 4.993106047521296e-05, - "loss": 91.0221, - "step": 30320 - }, - { - "epoch": 0.12253703786002577, - "grad_norm": 980.1278686523438, - "learning_rate": 4.993080118022604e-05, - "loss": 97.5692, - "step": 30330 - }, - { - "epoch": 0.12257743912539341, - "grad_norm": 2488.35009765625, - "learning_rate": 4.993054139920032e-05, - "loss": 114.5058, - "step": 30340 - }, - { - "epoch": 0.12261784039076104, - "grad_norm": 1487.991455078125, - "learning_rate": 4.9930281132140846e-05, - "loss": 82.5714, - "step": 30350 - }, - { - "epoch": 0.12265824165612867, - "grad_norm": 513.70654296875, - "learning_rate": 4.993002037905272e-05, - "loss": 127.0465, - "step": 30360 - }, - { - "epoch": 0.1226986429214963, - "grad_norm": 2360.14208984375, - "learning_rate": 4.9929759139941e-05, - "loss": 128.6292, - "step": 30370 - }, - { - "epoch": 0.12273904418686393, - "grad_norm": 537.433349609375, - "learning_rate": 4.99294974148108e-05, - "loss": 84.4188, - "step": 30380 - }, - { - "epoch": 0.12277944545223156, - "grad_norm": 339.3077697753906, - "learning_rate": 4.9929235203667214e-05, - "loss": 84.647, - "step": 30390 - }, - { - "epoch": 0.1228198467175992, - "grad_norm": 697.3407592773438, - "learning_rate": 4.992897250651535e-05, - "loss": 96.315, - "step": 30400 - }, - { - "epoch": 0.12286024798296682, - "grad_norm": 637.2626342773438, - "learning_rate": 4.9928709323360337e-05, - "loss": 87.7648, - "step": 30410 - }, - { - "epoch": 0.12290064924833445, - "grad_norm": 591.4133911132812, - "learning_rate": 4.99284456542073e-05, - "loss": 57.4863, - "step": 30420 - }, - { - "epoch": 0.12294105051370209, - "grad_norm": 1242.639404296875, - "learning_rate": 4.992818149906138e-05, - "loss": 105.4673, - "step": 30430 - }, - { - "epoch": 0.12298145177906972, - "grad_norm": 946.1543579101562, - "learning_rate": 4.992791685792772e-05, - "loss": 101.4292, - "step": 30440 - }, - { - "epoch": 0.12302185304443736, - "grad_norm": 554.2642211914062, - "learning_rate": 4.9927651730811495e-05, - "loss": 55.5201, - "step": 30450 - }, - { - "epoch": 0.12306225430980498, - "grad_norm": 1658.3990478515625, - "learning_rate": 4.992738611771787e-05, - "loss": 90.6332, - "step": 30460 - }, - { - "epoch": 0.12310265557517261, - "grad_norm": 1110.182373046875, - "learning_rate": 4.9927120018652004e-05, - "loss": 112.0799, - "step": 30470 - }, - { - "epoch": 0.12314305684054025, - "grad_norm": 2135.968505859375, - "learning_rate": 4.992685343361911e-05, - "loss": 124.2809, - "step": 30480 - }, - { - "epoch": 0.12318345810590788, - "grad_norm": 0.0, - "learning_rate": 4.992658636262438e-05, - "loss": 130.0038, - "step": 30490 - }, - { - "epoch": 0.1232238593712755, - "grad_norm": 897.2399291992188, - "learning_rate": 4.992631880567301e-05, - "loss": 110.8611, - "step": 30500 - }, - { - "epoch": 0.12326426063664314, - "grad_norm": 1721.460693359375, - "learning_rate": 4.9926050762770224e-05, - "loss": 110.5094, - "step": 30510 - }, - { - "epoch": 0.12330466190201077, - "grad_norm": 994.7217407226562, - "learning_rate": 4.992578223392124e-05, - "loss": 95.7169, - "step": 30520 - }, - { - "epoch": 0.12334506316737841, - "grad_norm": 1169.87939453125, - "learning_rate": 4.9925513219131303e-05, - "loss": 82.8276, - "step": 30530 - }, - { - "epoch": 0.12338546443274603, - "grad_norm": 677.4993286132812, - "learning_rate": 4.992524371840566e-05, - "loss": 105.6593, - "step": 30540 - }, - { - "epoch": 0.12342586569811366, - "grad_norm": 885.8995361328125, - "learning_rate": 4.992497373174955e-05, - "loss": 114.9273, - "step": 30550 - }, - { - "epoch": 0.1234662669634813, - "grad_norm": 1765.774658203125, - "learning_rate": 4.9924703259168244e-05, - "loss": 73.1132, - "step": 30560 - }, - { - "epoch": 0.12350666822884893, - "grad_norm": 1306.00244140625, - "learning_rate": 4.992443230066701e-05, - "loss": 96.6844, - "step": 30570 - }, - { - "epoch": 0.12354706949421655, - "grad_norm": 2164.46044921875, - "learning_rate": 4.992416085625115e-05, - "loss": 101.1072, - "step": 30580 - }, - { - "epoch": 0.12358747075958419, - "grad_norm": 1472.5419921875, - "learning_rate": 4.992388892592594e-05, - "loss": 93.9604, - "step": 30590 - }, - { - "epoch": 0.12362787202495182, - "grad_norm": 376.5589294433594, - "learning_rate": 4.9923616509696683e-05, - "loss": 89.4311, - "step": 30600 - }, - { - "epoch": 0.12366827329031946, - "grad_norm": 809.4692993164062, - "learning_rate": 4.9923343607568684e-05, - "loss": 90.6958, - "step": 30610 - }, - { - "epoch": 0.12370867455568708, - "grad_norm": 1123.7208251953125, - "learning_rate": 4.9923070219547275e-05, - "loss": 113.3677, - "step": 30620 - }, - { - "epoch": 0.12374907582105471, - "grad_norm": 1361.58056640625, - "learning_rate": 4.9922796345637776e-05, - "loss": 109.5225, - "step": 30630 - }, - { - "epoch": 0.12378947708642235, - "grad_norm": 1546.5401611328125, - "learning_rate": 4.992252198584554e-05, - "loss": 156.2372, - "step": 30640 - }, - { - "epoch": 0.12382987835178998, - "grad_norm": 1252.5113525390625, - "learning_rate": 4.99222471401759e-05, - "loss": 98.325, - "step": 30650 - }, - { - "epoch": 0.1238702796171576, - "grad_norm": 587.108642578125, - "learning_rate": 4.992197180863422e-05, - "loss": 83.3731, - "step": 30660 - }, - { - "epoch": 0.12391068088252524, - "grad_norm": 546.9779663085938, - "learning_rate": 4.992169599122587e-05, - "loss": 69.5896, - "step": 30670 - }, - { - "epoch": 0.12395108214789287, - "grad_norm": 533.7169189453125, - "learning_rate": 4.992141968795623e-05, - "loss": 59.3518, - "step": 30680 - }, - { - "epoch": 0.12399148341326051, - "grad_norm": 609.4190673828125, - "learning_rate": 4.992114289883068e-05, - "loss": 132.897, - "step": 30690 - }, - { - "epoch": 0.12403188467862813, - "grad_norm": 1489.8643798828125, - "learning_rate": 4.9920865623854615e-05, - "loss": 84.5661, - "step": 30700 - }, - { - "epoch": 0.12407228594399576, - "grad_norm": 1392.722900390625, - "learning_rate": 4.992058786303345e-05, - "loss": 80.1483, - "step": 30710 - }, - { - "epoch": 0.1241126872093634, - "grad_norm": 1709.162841796875, - "learning_rate": 4.9920309616372596e-05, - "loss": 78.6077, - "step": 30720 - }, - { - "epoch": 0.12415308847473103, - "grad_norm": 319.7249450683594, - "learning_rate": 4.9920030883877476e-05, - "loss": 49.0658, - "step": 30730 - }, - { - "epoch": 0.12419348974009865, - "grad_norm": 2077.73095703125, - "learning_rate": 4.9919751665553525e-05, - "loss": 119.2495, - "step": 30740 - }, - { - "epoch": 0.1242338910054663, - "grad_norm": 1575.6136474609375, - "learning_rate": 4.991947196140618e-05, - "loss": 65.2858, - "step": 30750 - }, - { - "epoch": 0.12427429227083392, - "grad_norm": 891.6358032226562, - "learning_rate": 4.9919191771440905e-05, - "loss": 76.9454, - "step": 30760 - }, - { - "epoch": 0.12431469353620156, - "grad_norm": 836.199951171875, - "learning_rate": 4.991891109566316e-05, - "loss": 86.9683, - "step": 30770 - }, - { - "epoch": 0.12435509480156919, - "grad_norm": 1238.918701171875, - "learning_rate": 4.99186299340784e-05, - "loss": 95.3734, - "step": 30780 - }, - { - "epoch": 0.12439549606693681, - "grad_norm": 1337.448974609375, - "learning_rate": 4.991834828669213e-05, - "loss": 103.7555, - "step": 30790 - }, - { - "epoch": 0.12443589733230445, - "grad_norm": 2682.815673828125, - "learning_rate": 4.9918066153509834e-05, - "loss": 157.3261, - "step": 30800 - }, - { - "epoch": 0.12447629859767208, - "grad_norm": 2427.59033203125, - "learning_rate": 4.9917783534537006e-05, - "loss": 79.6278, - "step": 30810 - }, - { - "epoch": 0.1245166998630397, - "grad_norm": 668.7774658203125, - "learning_rate": 4.991750042977916e-05, - "loss": 77.9551, - "step": 30820 - }, - { - "epoch": 0.12455710112840734, - "grad_norm": 1772.7213134765625, - "learning_rate": 4.991721683924182e-05, - "loss": 109.0242, - "step": 30830 - }, - { - "epoch": 0.12459750239377497, - "grad_norm": 896.2188110351562, - "learning_rate": 4.99169327629305e-05, - "loss": 103.7983, - "step": 30840 - }, - { - "epoch": 0.12463790365914261, - "grad_norm": 1777.919677734375, - "learning_rate": 4.991664820085074e-05, - "loss": 69.7507, - "step": 30850 - }, - { - "epoch": 0.12467830492451024, - "grad_norm": 414.7098693847656, - "learning_rate": 4.9916363153008114e-05, - "loss": 137.439, - "step": 30860 - }, - { - "epoch": 0.12471870618987786, - "grad_norm": 598.8345947265625, - "learning_rate": 4.9916077619408155e-05, - "loss": 120.2765, - "step": 30870 - }, - { - "epoch": 0.1247591074552455, - "grad_norm": 1064.3018798828125, - "learning_rate": 4.991579160005644e-05, - "loss": 84.4768, - "step": 30880 - }, - { - "epoch": 0.12479950872061313, - "grad_norm": 666.4903564453125, - "learning_rate": 4.9915505094958526e-05, - "loss": 136.433, - "step": 30890 - }, - { - "epoch": 0.12483990998598075, - "grad_norm": 1181.445556640625, - "learning_rate": 4.991521810412002e-05, - "loss": 63.556, - "step": 30900 - }, - { - "epoch": 0.1248803112513484, - "grad_norm": 605.0240478515625, - "learning_rate": 4.991493062754651e-05, - "loss": 94.3815, - "step": 30910 - }, - { - "epoch": 0.12492071251671602, - "grad_norm": 794.223388671875, - "learning_rate": 4.99146426652436e-05, - "loss": 76.5061, - "step": 30920 - }, - { - "epoch": 0.12496111378208366, - "grad_norm": 1127.827392578125, - "learning_rate": 4.991435421721691e-05, - "loss": 80.6668, - "step": 30930 - }, - { - "epoch": 0.12500151504745127, - "grad_norm": 494.918212890625, - "learning_rate": 4.991406528347206e-05, - "loss": 137.6075, - "step": 30940 - }, - { - "epoch": 0.1250419163128189, - "grad_norm": 688.9254760742188, - "learning_rate": 4.9913775864014665e-05, - "loss": 77.563, - "step": 30950 - }, - { - "epoch": 0.12508231757818655, - "grad_norm": 1798.6043701171875, - "learning_rate": 4.991348595885039e-05, - "loss": 172.5805, - "step": 30960 - }, - { - "epoch": 0.1251227188435542, - "grad_norm": 629.1405639648438, - "learning_rate": 4.991319556798488e-05, - "loss": 94.1099, - "step": 30970 - }, - { - "epoch": 0.1251631201089218, - "grad_norm": 831.2258911132812, - "learning_rate": 4.99129046914238e-05, - "loss": 82.6184, - "step": 30980 - }, - { - "epoch": 0.12520352137428944, - "grad_norm": 899.6831665039062, - "learning_rate": 4.991261332917282e-05, - "loss": 89.8317, - "step": 30990 - }, - { - "epoch": 0.12524392263965708, - "grad_norm": 1243.12255859375, - "learning_rate": 4.991232148123761e-05, - "loss": 106.8137, - "step": 31000 - }, - { - "epoch": 0.1252843239050247, - "grad_norm": 708.5358276367188, - "learning_rate": 4.9912029147623875e-05, - "loss": 71.765, - "step": 31010 - }, - { - "epoch": 0.12532472517039234, - "grad_norm": 1078.65185546875, - "learning_rate": 4.9911736328337296e-05, - "loss": 98.1934, - "step": 31020 - }, - { - "epoch": 0.12536512643575998, - "grad_norm": 863.23193359375, - "learning_rate": 4.99114430233836e-05, - "loss": 110.7916, - "step": 31030 - }, - { - "epoch": 0.1254055277011276, - "grad_norm": 948.326416015625, - "learning_rate": 4.991114923276849e-05, - "loss": 128.7791, - "step": 31040 - }, - { - "epoch": 0.12544592896649523, - "grad_norm": 1923.48681640625, - "learning_rate": 4.9910854956497696e-05, - "loss": 81.3787, - "step": 31050 - }, - { - "epoch": 0.12548633023186287, - "grad_norm": 1134.7197265625, - "learning_rate": 4.991056019457697e-05, - "loss": 97.8454, - "step": 31060 - }, - { - "epoch": 0.12552673149723048, - "grad_norm": 1076.4554443359375, - "learning_rate": 4.991026494701205e-05, - "loss": 93.0418, - "step": 31070 - }, - { - "epoch": 0.12556713276259812, - "grad_norm": 1976.7174072265625, - "learning_rate": 4.9909969213808683e-05, - "loss": 87.8301, - "step": 31080 - }, - { - "epoch": 0.12560753402796576, - "grad_norm": 1298.41162109375, - "learning_rate": 4.990967299497264e-05, - "loss": 125.1484, - "step": 31090 - }, - { - "epoch": 0.12564793529333337, - "grad_norm": 532.9257202148438, - "learning_rate": 4.990937629050971e-05, - "loss": 101.9732, - "step": 31100 - }, - { - "epoch": 0.125688336558701, - "grad_norm": 1158.4649658203125, - "learning_rate": 4.990907910042566e-05, - "loss": 88.4633, - "step": 31110 - }, - { - "epoch": 0.12572873782406865, - "grad_norm": 1732.9954833984375, - "learning_rate": 4.990878142472628e-05, - "loss": 80.0504, - "step": 31120 - }, - { - "epoch": 0.1257691390894363, - "grad_norm": 432.7400207519531, - "learning_rate": 4.990848326341739e-05, - "loss": 92.5247, - "step": 31130 - }, - { - "epoch": 0.1258095403548039, - "grad_norm": 985.176025390625, - "learning_rate": 4.990818461650479e-05, - "loss": 100.5391, - "step": 31140 - }, - { - "epoch": 0.12584994162017155, - "grad_norm": 789.0088500976562, - "learning_rate": 4.990788548399432e-05, - "loss": 88.8649, - "step": 31150 - }, - { - "epoch": 0.12589034288553919, - "grad_norm": 1790.52978515625, - "learning_rate": 4.990758586589178e-05, - "loss": 108.7384, - "step": 31160 - }, - { - "epoch": 0.1259307441509068, - "grad_norm": 1069.7830810546875, - "learning_rate": 4.9907285762203046e-05, - "loss": 104.5124, - "step": 31170 - }, - { - "epoch": 0.12597114541627444, - "grad_norm": 766.6657104492188, - "learning_rate": 4.990698517293395e-05, - "loss": 59.5578, - "step": 31180 - }, - { - "epoch": 0.12601154668164208, - "grad_norm": 1233.1490478515625, - "learning_rate": 4.990668409809034e-05, - "loss": 63.7087, - "step": 31190 - }, - { - "epoch": 0.1260519479470097, - "grad_norm": 1039.3233642578125, - "learning_rate": 4.990638253767812e-05, - "loss": 110.4396, - "step": 31200 - }, - { - "epoch": 0.12609234921237733, - "grad_norm": 1719.74267578125, - "learning_rate": 4.9906080491703146e-05, - "loss": 84.7901, - "step": 31210 - }, - { - "epoch": 0.12613275047774497, - "grad_norm": 2833.946533203125, - "learning_rate": 4.9905777960171304e-05, - "loss": 146.1511, - "step": 31220 - }, - { - "epoch": 0.12617315174311258, - "grad_norm": 820.8349609375, - "learning_rate": 4.99054749430885e-05, - "loss": 120.1305, - "step": 31230 - }, - { - "epoch": 0.12621355300848022, - "grad_norm": 782.0814208984375, - "learning_rate": 4.990517144046064e-05, - "loss": 68.8798, - "step": 31240 - }, - { - "epoch": 0.12625395427384786, - "grad_norm": 1526.1461181640625, - "learning_rate": 4.990486745229364e-05, - "loss": 109.5388, - "step": 31250 - }, - { - "epoch": 0.12629435553921547, - "grad_norm": 2700.19677734375, - "learning_rate": 4.9904562978593426e-05, - "loss": 141.1139, - "step": 31260 - }, - { - "epoch": 0.12633475680458311, - "grad_norm": 578.1785278320312, - "learning_rate": 4.990425801936594e-05, - "loss": 63.9474, - "step": 31270 - }, - { - "epoch": 0.12637515806995075, - "grad_norm": 1284.421630859375, - "learning_rate": 4.990395257461712e-05, - "loss": 64.6959, - "step": 31280 - }, - { - "epoch": 0.1264155593353184, - "grad_norm": 722.0364379882812, - "learning_rate": 4.9903646644352925e-05, - "loss": 136.6368, - "step": 31290 - }, - { - "epoch": 0.126455960600686, - "grad_norm": 639.962646484375, - "learning_rate": 4.990334022857932e-05, - "loss": 85.277, - "step": 31300 - }, - { - "epoch": 0.12649636186605365, - "grad_norm": 1369.47265625, - "learning_rate": 4.990303332730226e-05, - "loss": 95.2965, - "step": 31310 - }, - { - "epoch": 0.1265367631314213, - "grad_norm": 710.7479858398438, - "learning_rate": 4.990272594052776e-05, - "loss": 103.4021, - "step": 31320 - }, - { - "epoch": 0.1265771643967889, - "grad_norm": 967.9599609375, - "learning_rate": 4.990241806826179e-05, - "loss": 55.237, - "step": 31330 - }, - { - "epoch": 0.12661756566215654, - "grad_norm": 1026.2791748046875, - "learning_rate": 4.990210971051037e-05, - "loss": 76.5986, - "step": 31340 - }, - { - "epoch": 0.12665796692752418, - "grad_norm": 1351.9713134765625, - "learning_rate": 4.990180086727949e-05, - "loss": 78.9581, - "step": 31350 - }, - { - "epoch": 0.1266983681928918, - "grad_norm": 1424.3463134765625, - "learning_rate": 4.9901491538575185e-05, - "loss": 72.768, - "step": 31360 - }, - { - "epoch": 0.12673876945825943, - "grad_norm": 251.47360229492188, - "learning_rate": 4.990118172440348e-05, - "loss": 97.3149, - "step": 31370 - }, - { - "epoch": 0.12677917072362707, - "grad_norm": 1101.8797607421875, - "learning_rate": 4.9900871424770424e-05, - "loss": 113.3635, - "step": 31380 - }, - { - "epoch": 0.12681957198899468, - "grad_norm": 3568.4501953125, - "learning_rate": 4.9900560639682045e-05, - "loss": 123.9855, - "step": 31390 - }, - { - "epoch": 0.12685997325436232, - "grad_norm": 1042.5748291015625, - "learning_rate": 4.9900249369144434e-05, - "loss": 74.6795, - "step": 31400 - }, - { - "epoch": 0.12690037451972996, - "grad_norm": 2803.200439453125, - "learning_rate": 4.9899937613163635e-05, - "loss": 122.4246, - "step": 31410 - }, - { - "epoch": 0.12694077578509758, - "grad_norm": 999.986083984375, - "learning_rate": 4.9899625371745726e-05, - "loss": 103.829, - "step": 31420 - }, - { - "epoch": 0.12698117705046522, - "grad_norm": 420.03607177734375, - "learning_rate": 4.989931264489681e-05, - "loss": 109.0738, - "step": 31430 - }, - { - "epoch": 0.12702157831583286, - "grad_norm": 679.0191040039062, - "learning_rate": 4.9898999432622974e-05, - "loss": 63.1434, - "step": 31440 - }, - { - "epoch": 0.1270619795812005, - "grad_norm": 1536.908203125, - "learning_rate": 4.989868573493032e-05, - "loss": 114.7305, - "step": 31450 - }, - { - "epoch": 0.1271023808465681, - "grad_norm": 2571.055419921875, - "learning_rate": 4.9898371551824974e-05, - "loss": 91.3618, - "step": 31460 - }, - { - "epoch": 0.12714278211193575, - "grad_norm": 1286.792724609375, - "learning_rate": 4.989805688331306e-05, - "loss": 96.6507, - "step": 31470 - }, - { - "epoch": 0.1271831833773034, - "grad_norm": 1014.9740600585938, - "learning_rate": 4.9897741729400705e-05, - "loss": 82.7385, - "step": 31480 - }, - { - "epoch": 0.127223584642671, - "grad_norm": 3557.023681640625, - "learning_rate": 4.989742609009405e-05, - "loss": 105.2578, - "step": 31490 - }, - { - "epoch": 0.12726398590803864, - "grad_norm": 1261.7489013671875, - "learning_rate": 4.989710996539926e-05, - "loss": 110.2257, - "step": 31500 - }, - { - "epoch": 0.12730438717340628, - "grad_norm": 620.3264770507812, - "learning_rate": 4.9896793355322495e-05, - "loss": 130.9313, - "step": 31510 - }, - { - "epoch": 0.1273447884387739, - "grad_norm": 1063.13623046875, - "learning_rate": 4.989647625986993e-05, - "loss": 84.6815, - "step": 31520 - }, - { - "epoch": 0.12738518970414153, - "grad_norm": 1258.741943359375, - "learning_rate": 4.989615867904773e-05, - "loss": 101.0329, - "step": 31530 - }, - { - "epoch": 0.12742559096950917, - "grad_norm": 483.7554931640625, - "learning_rate": 4.989584061286211e-05, - "loss": 115.0386, - "step": 31540 - }, - { - "epoch": 0.12746599223487678, - "grad_norm": 1390.3809814453125, - "learning_rate": 4.989552206131925e-05, - "loss": 95.0824, - "step": 31550 - }, - { - "epoch": 0.12750639350024442, - "grad_norm": 474.9691162109375, - "learning_rate": 4.9895203024425385e-05, - "loss": 123.0268, - "step": 31560 - }, - { - "epoch": 0.12754679476561206, - "grad_norm": 953.542236328125, - "learning_rate": 4.989488350218671e-05, - "loss": 132.4043, - "step": 31570 - }, - { - "epoch": 0.12758719603097968, - "grad_norm": 602.45751953125, - "learning_rate": 4.989456349460947e-05, - "loss": 81.3055, - "step": 31580 - }, - { - "epoch": 0.12762759729634732, - "grad_norm": 1298.2872314453125, - "learning_rate": 4.989424300169989e-05, - "loss": 85.7651, - "step": 31590 - }, - { - "epoch": 0.12766799856171496, - "grad_norm": 539.2799072265625, - "learning_rate": 4.9893922023464236e-05, - "loss": 73.1652, - "step": 31600 - }, - { - "epoch": 0.1277083998270826, - "grad_norm": 506.7164001464844, - "learning_rate": 4.989360055990875e-05, - "loss": 85.522, - "step": 31610 - }, - { - "epoch": 0.1277488010924502, - "grad_norm": 1420.2366943359375, - "learning_rate": 4.98932786110397e-05, - "loss": 101.5808, - "step": 31620 - }, - { - "epoch": 0.12778920235781785, - "grad_norm": 931.4535522460938, - "learning_rate": 4.989295617686337e-05, - "loss": 104.2542, - "step": 31630 - }, - { - "epoch": 0.1278296036231855, - "grad_norm": 1208.561767578125, - "learning_rate": 4.989263325738605e-05, - "loss": 118.4887, - "step": 31640 - }, - { - "epoch": 0.1278700048885531, - "grad_norm": 1164.8492431640625, - "learning_rate": 4.989230985261403e-05, - "loss": 133.7977, - "step": 31650 - }, - { - "epoch": 0.12791040615392074, - "grad_norm": 2086.97900390625, - "learning_rate": 4.9891985962553606e-05, - "loss": 138.2947, - "step": 31660 - }, - { - "epoch": 0.12795080741928838, - "grad_norm": 896.4519653320312, - "learning_rate": 4.98916615872111e-05, - "loss": 108.6655, - "step": 31670 - }, - { - "epoch": 0.127991208684656, - "grad_norm": 921.4371337890625, - "learning_rate": 4.9891336726592844e-05, - "loss": 72.2636, - "step": 31680 - }, - { - "epoch": 0.12803160995002363, - "grad_norm": 2175.284423828125, - "learning_rate": 4.989101138070516e-05, - "loss": 108.8024, - "step": 31690 - }, - { - "epoch": 0.12807201121539127, - "grad_norm": 456.76068115234375, - "learning_rate": 4.989068554955439e-05, - "loss": 124.3368, - "step": 31700 - }, - { - "epoch": 0.12811241248075889, - "grad_norm": 2323.61962890625, - "learning_rate": 4.9890359233146897e-05, - "loss": 83.3551, - "step": 31710 - }, - { - "epoch": 0.12815281374612653, - "grad_norm": 630.8578491210938, - "learning_rate": 4.989003243148904e-05, - "loss": 112.8626, - "step": 31720 - }, - { - "epoch": 0.12819321501149417, - "grad_norm": 1055.7919921875, - "learning_rate": 4.988970514458718e-05, - "loss": 85.2178, - "step": 31730 - }, - { - "epoch": 0.12823361627686178, - "grad_norm": 4026.397705078125, - "learning_rate": 4.9889377372447706e-05, - "loss": 136.7049, - "step": 31740 - }, - { - "epoch": 0.12827401754222942, - "grad_norm": 1399.34130859375, - "learning_rate": 4.9889049115077005e-05, - "loss": 90.4039, - "step": 31750 - }, - { - "epoch": 0.12831441880759706, - "grad_norm": 718.3490600585938, - "learning_rate": 4.988872037248148e-05, - "loss": 122.6609, - "step": 31760 - }, - { - "epoch": 0.1283548200729647, - "grad_norm": 1984.33740234375, - "learning_rate": 4.988839114466753e-05, - "loss": 118.7678, - "step": 31770 - }, - { - "epoch": 0.1283952213383323, - "grad_norm": 732.53662109375, - "learning_rate": 4.988806143164159e-05, - "loss": 59.4425, - "step": 31780 - }, - { - "epoch": 0.12843562260369995, - "grad_norm": 559.675537109375, - "learning_rate": 4.988773123341007e-05, - "loss": 67.2548, - "step": 31790 - }, - { - "epoch": 0.1284760238690676, - "grad_norm": 1089.072998046875, - "learning_rate": 4.988740054997943e-05, - "loss": 96.6428, - "step": 31800 - }, - { - "epoch": 0.1285164251344352, - "grad_norm": 2197.211181640625, - "learning_rate": 4.9887069381356094e-05, - "loss": 116.156, - "step": 31810 - }, - { - "epoch": 0.12855682639980284, - "grad_norm": 459.337158203125, - "learning_rate": 4.988673772754653e-05, - "loss": 105.2984, - "step": 31820 - }, - { - "epoch": 0.12859722766517048, - "grad_norm": 1142.6385498046875, - "learning_rate": 4.98864055885572e-05, - "loss": 97.5586, - "step": 31830 - }, - { - "epoch": 0.1286376289305381, - "grad_norm": 952.4442749023438, - "learning_rate": 4.988607296439458e-05, - "loss": 112.1516, - "step": 31840 - }, - { - "epoch": 0.12867803019590573, - "grad_norm": 633.4367065429688, - "learning_rate": 4.988573985506516e-05, - "loss": 82.8759, - "step": 31850 - }, - { - "epoch": 0.12871843146127337, - "grad_norm": 341.5099792480469, - "learning_rate": 4.988540626057543e-05, - "loss": 79.2266, - "step": 31860 - }, - { - "epoch": 0.128758832726641, - "grad_norm": 521.6610717773438, - "learning_rate": 4.988507218093189e-05, - "loss": 74.551, - "step": 31870 - }, - { - "epoch": 0.12879923399200863, - "grad_norm": 848.8890991210938, - "learning_rate": 4.988473761614105e-05, - "loss": 68.6997, - "step": 31880 - }, - { - "epoch": 0.12883963525737627, - "grad_norm": 1142.2823486328125, - "learning_rate": 4.9884402566209445e-05, - "loss": 84.3612, - "step": 31890 - }, - { - "epoch": 0.12888003652274388, - "grad_norm": 1104.7357177734375, - "learning_rate": 4.98840670311436e-05, - "loss": 114.3409, - "step": 31900 - }, - { - "epoch": 0.12892043778811152, - "grad_norm": 1430.170654296875, - "learning_rate": 4.988373101095006e-05, - "loss": 66.6711, - "step": 31910 - }, - { - "epoch": 0.12896083905347916, - "grad_norm": 962.3909301757812, - "learning_rate": 4.9883394505635364e-05, - "loss": 103.3977, - "step": 31920 - }, - { - "epoch": 0.1290012403188468, - "grad_norm": 1590.525634765625, - "learning_rate": 4.988305751520609e-05, - "loss": 107.3967, - "step": 31930 - }, - { - "epoch": 0.1290416415842144, - "grad_norm": 373.5509033203125, - "learning_rate": 4.988272003966879e-05, - "loss": 61.0093, - "step": 31940 - }, - { - "epoch": 0.12908204284958205, - "grad_norm": 845.4537353515625, - "learning_rate": 4.9882382079030064e-05, - "loss": 87.1438, - "step": 31950 - }, - { - "epoch": 0.1291224441149497, - "grad_norm": 945.7670288085938, - "learning_rate": 4.988204363329648e-05, - "loss": 67.9447, - "step": 31960 - }, - { - "epoch": 0.1291628453803173, - "grad_norm": 398.878173828125, - "learning_rate": 4.988170470247465e-05, - "loss": 81.1569, - "step": 31970 - }, - { - "epoch": 0.12920324664568494, - "grad_norm": 329.99859619140625, - "learning_rate": 4.988136528657118e-05, - "loss": 59.9264, - "step": 31980 - }, - { - "epoch": 0.12924364791105258, - "grad_norm": 875.9232177734375, - "learning_rate": 4.988102538559268e-05, - "loss": 118.8401, - "step": 31990 - }, - { - "epoch": 0.1292840491764202, - "grad_norm": 1466.2398681640625, - "learning_rate": 4.988068499954578e-05, - "loss": 106.1032, - "step": 32000 - }, - { - "epoch": 0.12932445044178784, - "grad_norm": 1423.832763671875, - "learning_rate": 4.9880344128437115e-05, - "loss": 138.5585, - "step": 32010 - }, - { - "epoch": 0.12936485170715548, - "grad_norm": 2598.307373046875, - "learning_rate": 4.988000277227334e-05, - "loss": 115.3402, - "step": 32020 - }, - { - "epoch": 0.1294052529725231, - "grad_norm": 1134.399169921875, - "learning_rate": 4.987966093106109e-05, - "loss": 90.1487, - "step": 32030 - }, - { - "epoch": 0.12944565423789073, - "grad_norm": 765.7633056640625, - "learning_rate": 4.987931860480705e-05, - "loss": 100.8343, - "step": 32040 - }, - { - "epoch": 0.12948605550325837, - "grad_norm": 1252.3284912109375, - "learning_rate": 4.987897579351788e-05, - "loss": 78.8595, - "step": 32050 - }, - { - "epoch": 0.12952645676862598, - "grad_norm": 4336.3466796875, - "learning_rate": 4.987863249720027e-05, - "loss": 95.2753, - "step": 32060 - }, - { - "epoch": 0.12956685803399362, - "grad_norm": 1328.06396484375, - "learning_rate": 4.987828871586091e-05, - "loss": 101.3385, - "step": 32070 - }, - { - "epoch": 0.12960725929936126, - "grad_norm": 520.3338012695312, - "learning_rate": 4.987794444950651e-05, - "loss": 153.1254, - "step": 32080 - }, - { - "epoch": 0.1296476605647289, - "grad_norm": 694.16015625, - "learning_rate": 4.987759969814377e-05, - "loss": 93.5572, - "step": 32090 - }, - { - "epoch": 0.1296880618300965, - "grad_norm": 1401.1295166015625, - "learning_rate": 4.987725446177941e-05, - "loss": 93.8664, - "step": 32100 - }, - { - "epoch": 0.12972846309546415, - "grad_norm": 928.5399780273438, - "learning_rate": 4.9876908740420175e-05, - "loss": 74.3067, - "step": 32110 - }, - { - "epoch": 0.1297688643608318, - "grad_norm": 1556.6954345703125, - "learning_rate": 4.98765625340728e-05, - "loss": 85.9339, - "step": 32120 - }, - { - "epoch": 0.1298092656261994, - "grad_norm": 783.3317260742188, - "learning_rate": 4.987621584274402e-05, - "loss": 85.9349, - "step": 32130 - }, - { - "epoch": 0.12984966689156704, - "grad_norm": 423.84014892578125, - "learning_rate": 4.9875868666440604e-05, - "loss": 49.8796, - "step": 32140 - }, - { - "epoch": 0.12989006815693468, - "grad_norm": 806.3920288085938, - "learning_rate": 4.987552100516933e-05, - "loss": 66.9737, - "step": 32150 - }, - { - "epoch": 0.1299304694223023, - "grad_norm": 974.2014770507812, - "learning_rate": 4.987517285893697e-05, - "loss": 131.7362, - "step": 32160 - }, - { - "epoch": 0.12997087068766994, - "grad_norm": 3982.545654296875, - "learning_rate": 4.9874824227750305e-05, - "loss": 130.7934, - "step": 32170 - }, - { - "epoch": 0.13001127195303758, - "grad_norm": 936.3251953125, - "learning_rate": 4.987447511161612e-05, - "loss": 86.1245, - "step": 32180 - }, - { - "epoch": 0.1300516732184052, - "grad_norm": 1825.6453857421875, - "learning_rate": 4.987412551054126e-05, - "loss": 95.4473, - "step": 32190 - }, - { - "epoch": 0.13009207448377283, - "grad_norm": 1493.9964599609375, - "learning_rate": 4.987377542453251e-05, - "loss": 78.5434, - "step": 32200 - }, - { - "epoch": 0.13013247574914047, - "grad_norm": 748.4346923828125, - "learning_rate": 4.9873424853596695e-05, - "loss": 87.2075, - "step": 32210 - }, - { - "epoch": 0.13017287701450808, - "grad_norm": 1414.2127685546875, - "learning_rate": 4.987307379774066e-05, - "loss": 95.8589, - "step": 32220 - }, - { - "epoch": 0.13021327827987572, - "grad_norm": 799.97705078125, - "learning_rate": 4.987272225697125e-05, - "loss": 94.7114, - "step": 32230 - }, - { - "epoch": 0.13025367954524336, - "grad_norm": 2368.305419921875, - "learning_rate": 4.987237023129531e-05, - "loss": 93.2615, - "step": 32240 - }, - { - "epoch": 0.130294080810611, - "grad_norm": 525.53759765625, - "learning_rate": 4.987201772071971e-05, - "loss": 100.8188, - "step": 32250 - }, - { - "epoch": 0.1303344820759786, - "grad_norm": 1076.550048828125, - "learning_rate": 4.9871664725251314e-05, - "loss": 59.2903, - "step": 32260 - }, - { - "epoch": 0.13037488334134625, - "grad_norm": 1305.9718017578125, - "learning_rate": 4.987131124489701e-05, - "loss": 62.7737, - "step": 32270 - }, - { - "epoch": 0.1304152846067139, - "grad_norm": 1377.45458984375, - "learning_rate": 4.98709572796637e-05, - "loss": 98.2218, - "step": 32280 - }, - { - "epoch": 0.1304556858720815, - "grad_norm": 1250.3414306640625, - "learning_rate": 4.987060282955826e-05, - "loss": 141.3949, - "step": 32290 - }, - { - "epoch": 0.13049608713744915, - "grad_norm": 362.1710205078125, - "learning_rate": 4.987024789458762e-05, - "loss": 56.7362, - "step": 32300 - }, - { - "epoch": 0.13053648840281679, - "grad_norm": 1179.3179931640625, - "learning_rate": 4.9869892474758694e-05, - "loss": 110.4925, - "step": 32310 - }, - { - "epoch": 0.1305768896681844, - "grad_norm": 885.8596801757812, - "learning_rate": 4.986953657007841e-05, - "loss": 103.9362, - "step": 32320 - }, - { - "epoch": 0.13061729093355204, - "grad_norm": 2053.674072265625, - "learning_rate": 4.98691801805537e-05, - "loss": 108.4903, - "step": 32330 - }, - { - "epoch": 0.13065769219891968, - "grad_norm": 628.6939086914062, - "learning_rate": 4.986882330619152e-05, - "loss": 66.5057, - "step": 32340 - }, - { - "epoch": 0.1306980934642873, - "grad_norm": 9528.09765625, - "learning_rate": 4.986846594699883e-05, - "loss": 103.5715, - "step": 32350 - }, - { - "epoch": 0.13073849472965493, - "grad_norm": 0.0, - "learning_rate": 4.9868108102982604e-05, - "loss": 121.9924, - "step": 32360 - }, - { - "epoch": 0.13077889599502257, - "grad_norm": 710.1858520507812, - "learning_rate": 4.986774977414979e-05, - "loss": 65.1207, - "step": 32370 - }, - { - "epoch": 0.13081929726039018, - "grad_norm": 474.6985778808594, - "learning_rate": 4.98673909605074e-05, - "loss": 102.3303, - "step": 32380 - }, - { - "epoch": 0.13085969852575782, - "grad_norm": 0.0, - "learning_rate": 4.986703166206242e-05, - "loss": 75.2703, - "step": 32390 - }, - { - "epoch": 0.13090009979112546, - "grad_norm": 781.9612426757812, - "learning_rate": 4.986667187882186e-05, - "loss": 115.7585, - "step": 32400 - }, - { - "epoch": 0.1309405010564931, - "grad_norm": 1456.1153564453125, - "learning_rate": 4.986631161079272e-05, - "loss": 119.8207, - "step": 32410 - }, - { - "epoch": 0.13098090232186071, - "grad_norm": 767.140625, - "learning_rate": 4.986595085798204e-05, - "loss": 79.3004, - "step": 32420 - }, - { - "epoch": 0.13102130358722835, - "grad_norm": 1090.2476806640625, - "learning_rate": 4.9865589620396837e-05, - "loss": 104.849, - "step": 32430 - }, - { - "epoch": 0.131061704852596, - "grad_norm": 618.4625854492188, - "learning_rate": 4.986522789804417e-05, - "loss": 88.606, - "step": 32440 - }, - { - "epoch": 0.1311021061179636, - "grad_norm": 1400.4354248046875, - "learning_rate": 4.9864865690931086e-05, - "loss": 112.4108, - "step": 32450 - }, - { - "epoch": 0.13114250738333125, - "grad_norm": 1226.9744873046875, - "learning_rate": 4.986450299906464e-05, - "loss": 110.0555, - "step": 32460 - }, - { - "epoch": 0.1311829086486989, - "grad_norm": 1217.69970703125, - "learning_rate": 4.9864139822451905e-05, - "loss": 105.7761, - "step": 32470 - }, - { - "epoch": 0.1312233099140665, - "grad_norm": 944.0647583007812, - "learning_rate": 4.9863776161099964e-05, - "loss": 68.9442, - "step": 32480 - }, - { - "epoch": 0.13126371117943414, - "grad_norm": 1241.845458984375, - "learning_rate": 4.986341201501591e-05, - "loss": 95.3951, - "step": 32490 - }, - { - "epoch": 0.13130411244480178, - "grad_norm": 1810.86572265625, - "learning_rate": 4.9863047384206835e-05, - "loss": 132.1521, - "step": 32500 - }, - { - "epoch": 0.1313445137101694, - "grad_norm": 1479.045166015625, - "learning_rate": 4.986268226867985e-05, - "loss": 109.1159, - "step": 32510 - }, - { - "epoch": 0.13138491497553703, - "grad_norm": 958.012451171875, - "learning_rate": 4.986231666844208e-05, - "loss": 84.0408, - "step": 32520 - }, - { - "epoch": 0.13142531624090467, - "grad_norm": 1029.1376953125, - "learning_rate": 4.9861950583500636e-05, - "loss": 161.0161, - "step": 32530 - }, - { - "epoch": 0.13146571750627228, - "grad_norm": 2178.518798828125, - "learning_rate": 4.986158401386268e-05, - "loss": 125.5231, - "step": 32540 - }, - { - "epoch": 0.13150611877163992, - "grad_norm": 873.2677612304688, - "learning_rate": 4.9861216959535335e-05, - "loss": 82.9297, - "step": 32550 - }, - { - "epoch": 0.13154652003700756, - "grad_norm": 3118.9599609375, - "learning_rate": 4.9860849420525766e-05, - "loss": 133.3087, - "step": 32560 - }, - { - "epoch": 0.1315869213023752, - "grad_norm": 342.8658752441406, - "learning_rate": 4.986048139684114e-05, - "loss": 77.4298, - "step": 32570 - }, - { - "epoch": 0.13162732256774282, - "grad_norm": 1096.45458984375, - "learning_rate": 4.986011288848863e-05, - "loss": 64.3582, - "step": 32580 - }, - { - "epoch": 0.13166772383311046, - "grad_norm": 802.4067993164062, - "learning_rate": 4.9859743895475416e-05, - "loss": 106.3808, - "step": 32590 - }, - { - "epoch": 0.1317081250984781, - "grad_norm": 1518.53173828125, - "learning_rate": 4.98593744178087e-05, - "loss": 86.2783, - "step": 32600 - }, - { - "epoch": 0.1317485263638457, - "grad_norm": 662.360107421875, - "learning_rate": 4.985900445549568e-05, - "loss": 127.7222, - "step": 32610 - }, - { - "epoch": 0.13178892762921335, - "grad_norm": 1433.412109375, - "learning_rate": 4.985863400854358e-05, - "loss": 109.9828, - "step": 32620 - }, - { - "epoch": 0.131829328894581, - "grad_norm": 524.9939575195312, - "learning_rate": 4.98582630769596e-05, - "loss": 114.5297, - "step": 32630 - }, - { - "epoch": 0.1318697301599486, - "grad_norm": 432.99395751953125, - "learning_rate": 4.9857891660750986e-05, - "loss": 74.3833, - "step": 32640 - }, - { - "epoch": 0.13191013142531624, - "grad_norm": 1059.305908203125, - "learning_rate": 4.9857519759924974e-05, - "loss": 103.7896, - "step": 32650 - }, - { - "epoch": 0.13195053269068388, - "grad_norm": 1521.5408935546875, - "learning_rate": 4.985714737448882e-05, - "loss": 110.8204, - "step": 32660 - }, - { - "epoch": 0.1319909339560515, - "grad_norm": 712.7562255859375, - "learning_rate": 4.9856774504449776e-05, - "loss": 77.0656, - "step": 32670 - }, - { - "epoch": 0.13203133522141913, - "grad_norm": 738.4374389648438, - "learning_rate": 4.9856401149815126e-05, - "loss": 76.575, - "step": 32680 - }, - { - "epoch": 0.13207173648678677, - "grad_norm": 521.0355834960938, - "learning_rate": 4.9856027310592134e-05, - "loss": 79.7847, - "step": 32690 - }, - { - "epoch": 0.13211213775215438, - "grad_norm": 738.5291137695312, - "learning_rate": 4.985565298678809e-05, - "loss": 74.041, - "step": 32700 - }, - { - "epoch": 0.13215253901752202, - "grad_norm": 1271.053955078125, - "learning_rate": 4.985527817841029e-05, - "loss": 87.5718, - "step": 32710 - }, - { - "epoch": 0.13219294028288966, - "grad_norm": 1135.2099609375, - "learning_rate": 4.985490288546606e-05, - "loss": 121.2673, - "step": 32720 - }, - { - "epoch": 0.1322333415482573, - "grad_norm": 866.3362426757812, - "learning_rate": 4.9854527107962686e-05, - "loss": 107.6865, - "step": 32730 - }, - { - "epoch": 0.13227374281362492, - "grad_norm": 1969.1063232421875, - "learning_rate": 4.985415084590752e-05, - "loss": 79.8334, - "step": 32740 - }, - { - "epoch": 0.13231414407899256, - "grad_norm": 1149.1275634765625, - "learning_rate": 4.985377409930789e-05, - "loss": 109.8118, - "step": 32750 - }, - { - "epoch": 0.1323545453443602, - "grad_norm": 756.9971313476562, - "learning_rate": 4.985339686817113e-05, - "loss": 118.7679, - "step": 32760 - }, - { - "epoch": 0.1323949466097278, - "grad_norm": 795.5819702148438, - "learning_rate": 4.9853019152504607e-05, - "loss": 91.6352, - "step": 32770 - }, - { - "epoch": 0.13243534787509545, - "grad_norm": 1227.4793701171875, - "learning_rate": 4.9852640952315674e-05, - "loss": 60.6576, - "step": 32780 - }, - { - "epoch": 0.1324757491404631, - "grad_norm": 1278.319091796875, - "learning_rate": 4.985226226761172e-05, - "loss": 113.7857, - "step": 32790 - }, - { - "epoch": 0.1325161504058307, - "grad_norm": 610.24560546875, - "learning_rate": 4.985188309840012e-05, - "loss": 78.6529, - "step": 32800 - }, - { - "epoch": 0.13255655167119834, - "grad_norm": 1682.79736328125, - "learning_rate": 4.9851503444688255e-05, - "loss": 107.8541, - "step": 32810 - }, - { - "epoch": 0.13259695293656598, - "grad_norm": 493.7384033203125, - "learning_rate": 4.985112330648354e-05, - "loss": 79.7233, - "step": 32820 - }, - { - "epoch": 0.1326373542019336, - "grad_norm": 790.5695190429688, - "learning_rate": 4.985074268379338e-05, - "loss": 91.9399, - "step": 32830 - }, - { - "epoch": 0.13267775546730123, - "grad_norm": 1110.0252685546875, - "learning_rate": 4.985036157662521e-05, - "loss": 160.5353, - "step": 32840 - }, - { - "epoch": 0.13271815673266887, - "grad_norm": 1280.8482666015625, - "learning_rate": 4.9849979984986426e-05, - "loss": 139.881, - "step": 32850 - }, - { - "epoch": 0.13275855799803649, - "grad_norm": 973.560302734375, - "learning_rate": 4.98495979088845e-05, - "loss": 124.0516, - "step": 32860 - }, - { - "epoch": 0.13279895926340413, - "grad_norm": 496.84893798828125, - "learning_rate": 4.9849215348326875e-05, - "loss": 90.3565, - "step": 32870 - }, - { - "epoch": 0.13283936052877177, - "grad_norm": 1748.6497802734375, - "learning_rate": 4.984883230332099e-05, - "loss": 84.9742, - "step": 32880 - }, - { - "epoch": 0.1328797617941394, - "grad_norm": 1350.8636474609375, - "learning_rate": 4.984844877387433e-05, - "loss": 115.242, - "step": 32890 - }, - { - "epoch": 0.13292016305950702, - "grad_norm": 776.8285522460938, - "learning_rate": 4.984806475999437e-05, - "loss": 77.2372, - "step": 32900 - }, - { - "epoch": 0.13296056432487466, - "grad_norm": 863.73583984375, - "learning_rate": 4.98476802616886e-05, - "loss": 74.2184, - "step": 32910 - }, - { - "epoch": 0.1330009655902423, - "grad_norm": 793.2598876953125, - "learning_rate": 4.9847295278964514e-05, - "loss": 67.6055, - "step": 32920 - }, - { - "epoch": 0.1330413668556099, - "grad_norm": 1486.9317626953125, - "learning_rate": 4.9846909811829604e-05, - "loss": 127.2083, - "step": 32930 - }, - { - "epoch": 0.13308176812097755, - "grad_norm": 773.4334716796875, - "learning_rate": 4.984652386029139e-05, - "loss": 116.8202, - "step": 32940 - }, - { - "epoch": 0.1331221693863452, - "grad_norm": 1226.421630859375, - "learning_rate": 4.984613742435742e-05, - "loss": 112.299, - "step": 32950 - }, - { - "epoch": 0.1331625706517128, - "grad_norm": 445.5863342285156, - "learning_rate": 4.9845750504035195e-05, - "loss": 87.6496, - "step": 32960 - }, - { - "epoch": 0.13320297191708044, - "grad_norm": 894.6140747070312, - "learning_rate": 4.984536309933227e-05, - "loss": 95.4575, - "step": 32970 - }, - { - "epoch": 0.13324337318244808, - "grad_norm": 823.8974609375, - "learning_rate": 4.9844975210256217e-05, - "loss": 93.921, - "step": 32980 - }, - { - "epoch": 0.1332837744478157, - "grad_norm": 755.107177734375, - "learning_rate": 4.984458683681457e-05, - "loss": 84.3962, - "step": 32990 - }, - { - "epoch": 0.13332417571318333, - "grad_norm": 885.6873779296875, - "learning_rate": 4.984419797901491e-05, - "loss": 105.6254, - "step": 33000 - }, - { - "epoch": 0.13336457697855097, - "grad_norm": 411.86053466796875, - "learning_rate": 4.984380863686482e-05, - "loss": 97.5172, - "step": 33010 - }, - { - "epoch": 0.1334049782439186, - "grad_norm": 563.2962646484375, - "learning_rate": 4.98434188103719e-05, - "loss": 80.259, - "step": 33020 - }, - { - "epoch": 0.13344537950928623, - "grad_norm": 1379.732666015625, - "learning_rate": 4.984302849954373e-05, - "loss": 73.214, - "step": 33030 - }, - { - "epoch": 0.13348578077465387, - "grad_norm": 750.8110961914062, - "learning_rate": 4.984263770438793e-05, - "loss": 106.9937, - "step": 33040 - }, - { - "epoch": 0.1335261820400215, - "grad_norm": 1013.02392578125, - "learning_rate": 4.984224642491212e-05, - "loss": 75.5326, - "step": 33050 - }, - { - "epoch": 0.13356658330538912, - "grad_norm": 1022.1004028320312, - "learning_rate": 4.9841854661123936e-05, - "loss": 108.553, - "step": 33060 - }, - { - "epoch": 0.13360698457075676, - "grad_norm": 809.585205078125, - "learning_rate": 4.9841462413030995e-05, - "loss": 84.0129, - "step": 33070 - }, - { - "epoch": 0.1336473858361244, - "grad_norm": 842.181884765625, - "learning_rate": 4.984106968064095e-05, - "loss": 112.834, - "step": 33080 - }, - { - "epoch": 0.133687787101492, - "grad_norm": 467.4346618652344, - "learning_rate": 4.984067646396147e-05, - "loss": 100.528, - "step": 33090 - }, - { - "epoch": 0.13372818836685965, - "grad_norm": 714.903076171875, - "learning_rate": 4.984028276300021e-05, - "loss": 129.6565, - "step": 33100 - }, - { - "epoch": 0.1337685896322273, - "grad_norm": 709.3721923828125, - "learning_rate": 4.983988857776486e-05, - "loss": 101.3018, - "step": 33110 - }, - { - "epoch": 0.1338089908975949, - "grad_norm": 988.5148315429688, - "learning_rate": 4.983949390826308e-05, - "loss": 86.796, - "step": 33120 - }, - { - "epoch": 0.13384939216296254, - "grad_norm": 1472.410400390625, - "learning_rate": 4.983909875450258e-05, - "loss": 94.508, - "step": 33130 - }, - { - "epoch": 0.13388979342833018, - "grad_norm": 900.3340454101562, - "learning_rate": 4.983870311649107e-05, - "loss": 95.947, - "step": 33140 - }, - { - "epoch": 0.1339301946936978, - "grad_norm": 1466.88134765625, - "learning_rate": 4.983830699423625e-05, - "loss": 74.9536, - "step": 33150 - }, - { - "epoch": 0.13397059595906544, - "grad_norm": 576.2639770507812, - "learning_rate": 4.9837910387745845e-05, - "loss": 115.5023, - "step": 33160 - }, - { - "epoch": 0.13401099722443308, - "grad_norm": 1318.8853759765625, - "learning_rate": 4.9837513297027594e-05, - "loss": 84.5853, - "step": 33170 - }, - { - "epoch": 0.1340513984898007, - "grad_norm": 934.3829345703125, - "learning_rate": 4.983711572208924e-05, - "loss": 75.3381, - "step": 33180 - }, - { - "epoch": 0.13409179975516833, - "grad_norm": 1154.966796875, - "learning_rate": 4.983671766293851e-05, - "loss": 83.0238, - "step": 33190 - }, - { - "epoch": 0.13413220102053597, - "grad_norm": 1403.0806884765625, - "learning_rate": 4.983631911958319e-05, - "loss": 82.2895, - "step": 33200 - }, - { - "epoch": 0.1341726022859036, - "grad_norm": 1025.5306396484375, - "learning_rate": 4.983592009203105e-05, - "loss": 64.9099, - "step": 33210 - }, - { - "epoch": 0.13421300355127122, - "grad_norm": 570.6565551757812, - "learning_rate": 4.9835520580289854e-05, - "loss": 60.6472, - "step": 33220 - }, - { - "epoch": 0.13425340481663886, - "grad_norm": 241.88272094726562, - "learning_rate": 4.98351205843674e-05, - "loss": 101.3504, - "step": 33230 - }, - { - "epoch": 0.1342938060820065, - "grad_norm": 786.84912109375, - "learning_rate": 4.9834720104271484e-05, - "loss": 80.2996, - "step": 33240 - }, - { - "epoch": 0.1343342073473741, - "grad_norm": 1382.0052490234375, - "learning_rate": 4.983431914000991e-05, - "loss": 90.2512, - "step": 33250 - }, - { - "epoch": 0.13437460861274175, - "grad_norm": 1485.1060791015625, - "learning_rate": 4.9833917691590506e-05, - "loss": 80.0044, - "step": 33260 - }, - { - "epoch": 0.1344150098781094, - "grad_norm": 698.7577514648438, - "learning_rate": 4.9833515759021085e-05, - "loss": 102.9575, - "step": 33270 - }, - { - "epoch": 0.134455411143477, - "grad_norm": 693.4671020507812, - "learning_rate": 4.98331133423095e-05, - "loss": 94.7776, - "step": 33280 - }, - { - "epoch": 0.13449581240884464, - "grad_norm": 664.5126342773438, - "learning_rate": 4.983271044146357e-05, - "loss": 85.3929, - "step": 33290 - }, - { - "epoch": 0.13453621367421228, - "grad_norm": 841.61962890625, - "learning_rate": 4.983230705649118e-05, - "loss": 92.7831, - "step": 33300 - }, - { - "epoch": 0.1345766149395799, - "grad_norm": 1524.328125, - "learning_rate": 4.9831903187400166e-05, - "loss": 85.1477, - "step": 33310 - }, - { - "epoch": 0.13461701620494754, - "grad_norm": 551.5435180664062, - "learning_rate": 4.983149883419842e-05, - "loss": 128.3945, - "step": 33320 - }, - { - "epoch": 0.13465741747031518, - "grad_norm": 387.754638671875, - "learning_rate": 4.983109399689382e-05, - "loss": 75.982, - "step": 33330 - }, - { - "epoch": 0.1346978187356828, - "grad_norm": 2052.11669921875, - "learning_rate": 4.9830688675494265e-05, - "loss": 166.9485, - "step": 33340 - }, - { - "epoch": 0.13473822000105043, - "grad_norm": 656.3280639648438, - "learning_rate": 4.9830282870007646e-05, - "loss": 98.0935, - "step": 33350 - }, - { - "epoch": 0.13477862126641807, - "grad_norm": 960.6358032226562, - "learning_rate": 4.982987658044188e-05, - "loss": 88.8502, - "step": 33360 - }, - { - "epoch": 0.1348190225317857, - "grad_norm": 303.2107238769531, - "learning_rate": 4.982946980680488e-05, - "loss": 82.7864, - "step": 33370 - }, - { - "epoch": 0.13485942379715332, - "grad_norm": 786.0392456054688, - "learning_rate": 4.982906254910459e-05, - "loss": 75.8299, - "step": 33380 - }, - { - "epoch": 0.13489982506252096, - "grad_norm": 554.2486572265625, - "learning_rate": 4.982865480734894e-05, - "loss": 71.6516, - "step": 33390 - }, - { - "epoch": 0.1349402263278886, - "grad_norm": 901.5972900390625, - "learning_rate": 4.982824658154589e-05, - "loss": 106.4935, - "step": 33400 - }, - { - "epoch": 0.1349806275932562, - "grad_norm": 363.12115478515625, - "learning_rate": 4.982783787170338e-05, - "loss": 76.4278, - "step": 33410 - }, - { - "epoch": 0.13502102885862385, - "grad_norm": 3618.558349609375, - "learning_rate": 4.982742867782939e-05, - "loss": 130.8146, - "step": 33420 - }, - { - "epoch": 0.1350614301239915, - "grad_norm": 810.9340209960938, - "learning_rate": 4.982701899993189e-05, - "loss": 109.7431, - "step": 33430 - }, - { - "epoch": 0.1351018313893591, - "grad_norm": 1115.0909423828125, - "learning_rate": 4.982660883801889e-05, - "loss": 97.727, - "step": 33440 - }, - { - "epoch": 0.13514223265472675, - "grad_norm": 726.3258056640625, - "learning_rate": 4.982619819209836e-05, - "loss": 129.2327, - "step": 33450 - }, - { - "epoch": 0.13518263392009439, - "grad_norm": 0.0, - "learning_rate": 4.9825787062178315e-05, - "loss": 62.2287, - "step": 33460 - }, - { - "epoch": 0.135223035185462, - "grad_norm": 2895.0556640625, - "learning_rate": 4.982537544826677e-05, - "loss": 99.4745, - "step": 33470 - }, - { - "epoch": 0.13526343645082964, - "grad_norm": 496.2646789550781, - "learning_rate": 4.982496335037175e-05, - "loss": 76.2467, - "step": 33480 - }, - { - "epoch": 0.13530383771619728, - "grad_norm": 1723.678955078125, - "learning_rate": 4.982455076850129e-05, - "loss": 87.5003, - "step": 33490 - }, - { - "epoch": 0.1353442389815649, - "grad_norm": 622.0546875, - "learning_rate": 4.982413770266342e-05, - "loss": 60.5188, - "step": 33500 - }, - { - "epoch": 0.13538464024693253, - "grad_norm": 579.060302734375, - "learning_rate": 4.9823724152866226e-05, - "loss": 121.2273, - "step": 33510 - }, - { - "epoch": 0.13542504151230017, - "grad_norm": 638.8704833984375, - "learning_rate": 4.982331011911774e-05, - "loss": 139.4159, - "step": 33520 - }, - { - "epoch": 0.13546544277766778, - "grad_norm": 684.1176147460938, - "learning_rate": 4.9822895601426034e-05, - "loss": 107.1239, - "step": 33530 - }, - { - "epoch": 0.13550584404303542, - "grad_norm": 626.8060302734375, - "learning_rate": 4.982248059979921e-05, - "loss": 86.9168, - "step": 33540 - }, - { - "epoch": 0.13554624530840306, - "grad_norm": 539.0343017578125, - "learning_rate": 4.982206511424534e-05, - "loss": 69.0915, - "step": 33550 - }, - { - "epoch": 0.1355866465737707, - "grad_norm": 2398.862548828125, - "learning_rate": 4.9821649144772545e-05, - "loss": 70.2374, - "step": 33560 - }, - { - "epoch": 0.13562704783913831, - "grad_norm": 586.1361694335938, - "learning_rate": 4.9821232691388906e-05, - "loss": 70.2269, - "step": 33570 - }, - { - "epoch": 0.13566744910450595, - "grad_norm": 850.7914428710938, - "learning_rate": 4.982081575410256e-05, - "loss": 113.3011, - "step": 33580 - }, - { - "epoch": 0.1357078503698736, - "grad_norm": 642.4421997070312, - "learning_rate": 4.9820398332921634e-05, - "loss": 95.5486, - "step": 33590 - }, - { - "epoch": 0.1357482516352412, - "grad_norm": 706.2947998046875, - "learning_rate": 4.981998042785427e-05, - "loss": 97.3085, - "step": 33600 - }, - { - "epoch": 0.13578865290060885, - "grad_norm": 1023.1542358398438, - "learning_rate": 4.98195620389086e-05, - "loss": 117.8821, - "step": 33610 - }, - { - "epoch": 0.1358290541659765, - "grad_norm": 480.221923828125, - "learning_rate": 4.9819143166092796e-05, - "loss": 93.8683, - "step": 33620 - }, - { - "epoch": 0.1358694554313441, - "grad_norm": 520.1914672851562, - "learning_rate": 4.9818723809415016e-05, - "loss": 83.5975, - "step": 33630 - }, - { - "epoch": 0.13590985669671174, - "grad_norm": 2167.59619140625, - "learning_rate": 4.981830396888344e-05, - "loss": 103.9532, - "step": 33640 - }, - { - "epoch": 0.13595025796207938, - "grad_norm": 801.8353271484375, - "learning_rate": 4.981788364450625e-05, - "loss": 146.1724, - "step": 33650 - }, - { - "epoch": 0.135990659227447, - "grad_norm": 862.556884765625, - "learning_rate": 4.981746283629164e-05, - "loss": 92.8243, - "step": 33660 - }, - { - "epoch": 0.13603106049281463, - "grad_norm": 1226.8758544921875, - "learning_rate": 4.981704154424781e-05, - "loss": 107.6284, - "step": 33670 - }, - { - "epoch": 0.13607146175818227, - "grad_norm": 1550.146484375, - "learning_rate": 4.981661976838299e-05, - "loss": 109.7946, - "step": 33680 - }, - { - "epoch": 0.13611186302354988, - "grad_norm": 619.8020629882812, - "learning_rate": 4.981619750870537e-05, - "loss": 108.0941, - "step": 33690 - }, - { - "epoch": 0.13615226428891752, - "grad_norm": 551.2567749023438, - "learning_rate": 4.9815774765223226e-05, - "loss": 75.5076, - "step": 33700 - }, - { - "epoch": 0.13619266555428516, - "grad_norm": 1045.827880859375, - "learning_rate": 4.9815351537944774e-05, - "loss": 64.202, - "step": 33710 - }, - { - "epoch": 0.1362330668196528, - "grad_norm": 435.85296630859375, - "learning_rate": 4.9814927826878256e-05, - "loss": 123.6437, - "step": 33720 - }, - { - "epoch": 0.13627346808502042, - "grad_norm": 1007.7003784179688, - "learning_rate": 4.9814503632031954e-05, - "loss": 133.8719, - "step": 33730 - }, - { - "epoch": 0.13631386935038806, - "grad_norm": 1092.2225341796875, - "learning_rate": 4.981407895341412e-05, - "loss": 91.8184, - "step": 33740 - }, - { - "epoch": 0.1363542706157557, - "grad_norm": 656.14990234375, - "learning_rate": 4.9813653791033057e-05, - "loss": 68.1897, - "step": 33750 - }, - { - "epoch": 0.1363946718811233, - "grad_norm": 1275.6177978515625, - "learning_rate": 4.981322814489703e-05, - "loss": 91.9896, - "step": 33760 - }, - { - "epoch": 0.13643507314649095, - "grad_norm": 900.5950927734375, - "learning_rate": 4.9812802015014334e-05, - "loss": 90.7882, - "step": 33770 - }, - { - "epoch": 0.1364754744118586, - "grad_norm": 0.0, - "learning_rate": 4.981237540139331e-05, - "loss": 62.0134, - "step": 33780 - }, - { - "epoch": 0.1365158756772262, - "grad_norm": 1441.5318603515625, - "learning_rate": 4.9811948304042234e-05, - "loss": 75.5356, - "step": 33790 - }, - { - "epoch": 0.13655627694259384, - "grad_norm": 783.706787109375, - "learning_rate": 4.9811520722969465e-05, - "loss": 109.1127, - "step": 33800 - }, - { - "epoch": 0.13659667820796148, - "grad_norm": 1493.0281982421875, - "learning_rate": 4.981109265818332e-05, - "loss": 150.1899, - "step": 33810 - }, - { - "epoch": 0.1366370794733291, - "grad_norm": 728.7564086914062, - "learning_rate": 4.981066410969215e-05, - "loss": 64.3308, - "step": 33820 - }, - { - "epoch": 0.13667748073869673, - "grad_norm": 1312.0137939453125, - "learning_rate": 4.981023507750431e-05, - "loss": 119.9241, - "step": 33830 - }, - { - "epoch": 0.13671788200406437, - "grad_norm": 805.4586181640625, - "learning_rate": 4.980980556162816e-05, - "loss": 107.8721, - "step": 33840 - }, - { - "epoch": 0.13675828326943198, - "grad_norm": 609.840087890625, - "learning_rate": 4.980937556207208e-05, - "loss": 89.4216, - "step": 33850 - }, - { - "epoch": 0.13679868453479962, - "grad_norm": 973.7212524414062, - "learning_rate": 4.9808945078844456e-05, - "loss": 81.5434, - "step": 33860 - }, - { - "epoch": 0.13683908580016726, - "grad_norm": 982.0657348632812, - "learning_rate": 4.9808514111953674e-05, - "loss": 67.6982, - "step": 33870 - }, - { - "epoch": 0.1368794870655349, - "grad_norm": 524.6974487304688, - "learning_rate": 4.980808266140813e-05, - "loss": 150.7908, - "step": 33880 - }, - { - "epoch": 0.13691988833090252, - "grad_norm": 600.6156005859375, - "learning_rate": 4.980765072721625e-05, - "loss": 107.8641, - "step": 33890 - }, - { - "epoch": 0.13696028959627016, - "grad_norm": 464.95550537109375, - "learning_rate": 4.9807218309386444e-05, - "loss": 81.4719, - "step": 33900 - }, - { - "epoch": 0.1370006908616378, - "grad_norm": 810.7647094726562, - "learning_rate": 4.980678540792715e-05, - "loss": 91.2513, - "step": 33910 - }, - { - "epoch": 0.1370410921270054, - "grad_norm": 904.2017211914062, - "learning_rate": 4.980635202284679e-05, - "loss": 128.9432, - "step": 33920 - }, - { - "epoch": 0.13708149339237305, - "grad_norm": 751.0636596679688, - "learning_rate": 4.980591815415384e-05, - "loss": 67.3943, - "step": 33930 - }, - { - "epoch": 0.1371218946577407, - "grad_norm": 999.1458740234375, - "learning_rate": 4.980548380185674e-05, - "loss": 122.1365, - "step": 33940 - }, - { - "epoch": 0.1371622959231083, - "grad_norm": 1589.1033935546875, - "learning_rate": 4.980504896596396e-05, - "loss": 77.5396, - "step": 33950 - }, - { - "epoch": 0.13720269718847594, - "grad_norm": 1546.7073974609375, - "learning_rate": 4.980461364648398e-05, - "loss": 117.3774, - "step": 33960 - }, - { - "epoch": 0.13724309845384358, - "grad_norm": 920.711669921875, - "learning_rate": 4.9804177843425295e-05, - "loss": 112.9565, - "step": 33970 - }, - { - "epoch": 0.1372834997192112, - "grad_norm": 1206.650146484375, - "learning_rate": 4.980374155679639e-05, - "loss": 111.5966, - "step": 33980 - }, - { - "epoch": 0.13732390098457883, - "grad_norm": 1469.6834716796875, - "learning_rate": 4.980330478660576e-05, - "loss": 107.332, - "step": 33990 - }, - { - "epoch": 0.13736430224994647, - "grad_norm": 1957.3115234375, - "learning_rate": 4.980286753286195e-05, - "loss": 128.1132, - "step": 34000 - }, - { - "epoch": 0.13740470351531409, - "grad_norm": 337.71295166015625, - "learning_rate": 4.9802429795573455e-05, - "loss": 70.1459, - "step": 34010 - }, - { - "epoch": 0.13744510478068173, - "grad_norm": 714.4182739257812, - "learning_rate": 4.980199157474884e-05, - "loss": 116.3985, - "step": 34020 - }, - { - "epoch": 0.13748550604604937, - "grad_norm": 691.6640014648438, - "learning_rate": 4.980155287039662e-05, - "loss": 54.4459, - "step": 34030 - }, - { - "epoch": 0.137525907311417, - "grad_norm": 1381.4683837890625, - "learning_rate": 4.980111368252535e-05, - "loss": 59.3446, - "step": 34040 - }, - { - "epoch": 0.13756630857678462, - "grad_norm": 1003.1581420898438, - "learning_rate": 4.9800674011143614e-05, - "loss": 79.539, - "step": 34050 - }, - { - "epoch": 0.13760670984215226, - "grad_norm": 705.7440795898438, - "learning_rate": 4.980023385625996e-05, - "loss": 104.7091, - "step": 34060 - }, - { - "epoch": 0.1376471111075199, - "grad_norm": 655.6250610351562, - "learning_rate": 4.979979321788298e-05, - "loss": 65.6317, - "step": 34070 - }, - { - "epoch": 0.1376875123728875, - "grad_norm": 741.8604125976562, - "learning_rate": 4.9799352096021266e-05, - "loss": 70.4658, - "step": 34080 - }, - { - "epoch": 0.13772791363825515, - "grad_norm": 2940.106689453125, - "learning_rate": 4.979891049068342e-05, - "loss": 108.38, - "step": 34090 - }, - { - "epoch": 0.1377683149036228, - "grad_norm": 924.1039428710938, - "learning_rate": 4.979846840187804e-05, - "loss": 69.7543, - "step": 34100 - }, - { - "epoch": 0.1378087161689904, - "grad_norm": 937.28515625, - "learning_rate": 4.979802582961375e-05, - "loss": 97.4457, - "step": 34110 - }, - { - "epoch": 0.13784911743435804, - "grad_norm": 1431.635498046875, - "learning_rate": 4.979758277389919e-05, - "loss": 91.3253, - "step": 34120 - }, - { - "epoch": 0.13788951869972568, - "grad_norm": 953.5215454101562, - "learning_rate": 4.9797139234742975e-05, - "loss": 81.4589, - "step": 34130 - }, - { - "epoch": 0.1379299199650933, - "grad_norm": 4141.5361328125, - "learning_rate": 4.9796695212153764e-05, - "loss": 154.3838, - "step": 34140 - }, - { - "epoch": 0.13797032123046093, - "grad_norm": 779.7581176757812, - "learning_rate": 4.9796250706140224e-05, - "loss": 69.3488, - "step": 34150 - }, - { - "epoch": 0.13801072249582857, - "grad_norm": 778.5997314453125, - "learning_rate": 4.9795805716711e-05, - "loss": 135.2858, - "step": 34160 - }, - { - "epoch": 0.1380511237611962, - "grad_norm": 990.5801391601562, - "learning_rate": 4.979536024387479e-05, - "loss": 108.3737, - "step": 34170 - }, - { - "epoch": 0.13809152502656383, - "grad_norm": 715.4221801757812, - "learning_rate": 4.979491428764026e-05, - "loss": 95.1732, - "step": 34180 - }, - { - "epoch": 0.13813192629193147, - "grad_norm": 2391.443115234375, - "learning_rate": 4.979446784801611e-05, - "loss": 151.664, - "step": 34190 - }, - { - "epoch": 0.1381723275572991, - "grad_norm": 1992.6207275390625, - "learning_rate": 4.9794020925011044e-05, - "loss": 82.8294, - "step": 34200 - }, - { - "epoch": 0.13821272882266672, - "grad_norm": 1085.1295166015625, - "learning_rate": 4.979357351863377e-05, - "loss": 116.4222, - "step": 34210 - }, - { - "epoch": 0.13825313008803436, - "grad_norm": 607.43603515625, - "learning_rate": 4.979312562889302e-05, - "loss": 71.7245, - "step": 34220 - }, - { - "epoch": 0.138293531353402, - "grad_norm": 782.8214111328125, - "learning_rate": 4.9792677255797525e-05, - "loss": 93.9537, - "step": 34230 - }, - { - "epoch": 0.1383339326187696, - "grad_norm": 735.2789306640625, - "learning_rate": 4.979222839935602e-05, - "loss": 71.7033, - "step": 34240 - }, - { - "epoch": 0.13837433388413725, - "grad_norm": 1233.6981201171875, - "learning_rate": 4.979177905957726e-05, - "loss": 90.7622, - "step": 34250 - }, - { - "epoch": 0.1384147351495049, - "grad_norm": 567.0103149414062, - "learning_rate": 4.979132923647001e-05, - "loss": 135.1103, - "step": 34260 - }, - { - "epoch": 0.1384551364148725, - "grad_norm": 594.86669921875, - "learning_rate": 4.979087893004302e-05, - "loss": 82.0568, - "step": 34270 - }, - { - "epoch": 0.13849553768024014, - "grad_norm": 900.0689086914062, - "learning_rate": 4.979042814030509e-05, - "loss": 91.8436, - "step": 34280 - }, - { - "epoch": 0.13853593894560778, - "grad_norm": 0.0, - "learning_rate": 4.9789976867265e-05, - "loss": 67.3411, - "step": 34290 - }, - { - "epoch": 0.1385763402109754, - "grad_norm": 1077.3468017578125, - "learning_rate": 4.9789525110931545e-05, - "loss": 119.6317, - "step": 34300 - }, - { - "epoch": 0.13861674147634304, - "grad_norm": 2667.85400390625, - "learning_rate": 4.978907287131354e-05, - "loss": 95.113, - "step": 34310 - }, - { - "epoch": 0.13865714274171068, - "grad_norm": 1062.1884765625, - "learning_rate": 4.978862014841979e-05, - "loss": 77.8131, - "step": 34320 - }, - { - "epoch": 0.1386975440070783, - "grad_norm": 3490.418701171875, - "learning_rate": 4.9788166942259135e-05, - "loss": 124.19, - "step": 34330 - }, - { - "epoch": 0.13873794527244593, - "grad_norm": 351.10784912109375, - "learning_rate": 4.97877132528404e-05, - "loss": 100.0667, - "step": 34340 - }, - { - "epoch": 0.13877834653781357, - "grad_norm": 901.481201171875, - "learning_rate": 4.978725908017243e-05, - "loss": 80.7147, - "step": 34350 - }, - { - "epoch": 0.1388187478031812, - "grad_norm": 575.747802734375, - "learning_rate": 4.9786804424264085e-05, - "loss": 107.6961, - "step": 34360 - }, - { - "epoch": 0.13885914906854882, - "grad_norm": 816.9763793945312, - "learning_rate": 4.9786349285124235e-05, - "loss": 104.7212, - "step": 34370 - }, - { - "epoch": 0.13889955033391646, - "grad_norm": 672.7227783203125, - "learning_rate": 4.978589366276174e-05, - "loss": 95.8187, - "step": 34380 - }, - { - "epoch": 0.1389399515992841, - "grad_norm": 1123.2491455078125, - "learning_rate": 4.978543755718549e-05, - "loss": 97.5948, - "step": 34390 - }, - { - "epoch": 0.1389803528646517, - "grad_norm": 741.401611328125, - "learning_rate": 4.978498096840436e-05, - "loss": 77.3318, - "step": 34400 - }, - { - "epoch": 0.13902075413001935, - "grad_norm": 2102.46923828125, - "learning_rate": 4.978452389642728e-05, - "loss": 80.4057, - "step": 34410 - }, - { - "epoch": 0.139061155395387, - "grad_norm": 1185.681396484375, - "learning_rate": 4.978406634126315e-05, - "loss": 105.0425, - "step": 34420 - }, - { - "epoch": 0.1391015566607546, - "grad_norm": 1234.7724609375, - "learning_rate": 4.9783608302920873e-05, - "loss": 94.0603, - "step": 34430 - }, - { - "epoch": 0.13914195792612225, - "grad_norm": 1568.1796875, - "learning_rate": 4.9783149781409404e-05, - "loss": 123.3275, - "step": 34440 - }, - { - "epoch": 0.13918235919148988, - "grad_norm": 1667.6737060546875, - "learning_rate": 4.978269077673767e-05, - "loss": 62.5816, - "step": 34450 - }, - { - "epoch": 0.1392227604568575, - "grad_norm": 880.3914794921875, - "learning_rate": 4.9782231288914614e-05, - "loss": 80.6274, - "step": 34460 - }, - { - "epoch": 0.13926316172222514, - "grad_norm": 473.5460510253906, - "learning_rate": 4.97817713179492e-05, - "loss": 136.4202, - "step": 34470 - }, - { - "epoch": 0.13930356298759278, - "grad_norm": 1051.316162109375, - "learning_rate": 4.9781310863850405e-05, - "loss": 79.2705, - "step": 34480 - }, - { - "epoch": 0.1393439642529604, - "grad_norm": 942.3121948242188, - "learning_rate": 4.978084992662719e-05, - "loss": 87.1078, - "step": 34490 - }, - { - "epoch": 0.13938436551832803, - "grad_norm": 663.4690551757812, - "learning_rate": 4.978038850628854e-05, - "loss": 87.4095, - "step": 34500 - }, - { - "epoch": 0.13942476678369567, - "grad_norm": 469.06475830078125, - "learning_rate": 4.977992660284347e-05, - "loss": 92.2786, - "step": 34510 - }, - { - "epoch": 0.1394651680490633, - "grad_norm": 452.9181213378906, - "learning_rate": 4.977946421630098e-05, - "loss": 109.0747, - "step": 34520 - }, - { - "epoch": 0.13950556931443092, - "grad_norm": 3418.622314453125, - "learning_rate": 4.977900134667006e-05, - "loss": 92.9968, - "step": 34530 - }, - { - "epoch": 0.13954597057979856, - "grad_norm": 934.139892578125, - "learning_rate": 4.977853799395976e-05, - "loss": 86.7001, - "step": 34540 - }, - { - "epoch": 0.1395863718451662, - "grad_norm": 541.4644165039062, - "learning_rate": 4.97780741581791e-05, - "loss": 125.5777, - "step": 34550 - }, - { - "epoch": 0.13962677311053381, - "grad_norm": 1703.493896484375, - "learning_rate": 4.977760983933714e-05, - "loss": 113.32, - "step": 34560 - }, - { - "epoch": 0.13966717437590145, - "grad_norm": 469.1068115234375, - "learning_rate": 4.9777145037442906e-05, - "loss": 73.509, - "step": 34570 - }, - { - "epoch": 0.1397075756412691, - "grad_norm": 445.8856201171875, - "learning_rate": 4.9776679752505476e-05, - "loss": 72.2943, - "step": 34580 - }, - { - "epoch": 0.1397479769066367, - "grad_norm": 636.2007446289062, - "learning_rate": 4.977621398453393e-05, - "loss": 91.6618, - "step": 34590 - }, - { - "epoch": 0.13978837817200435, - "grad_norm": 727.9812622070312, - "learning_rate": 4.977574773353732e-05, - "loss": 101.3671, - "step": 34600 - }, - { - "epoch": 0.13982877943737199, - "grad_norm": 1291.48974609375, - "learning_rate": 4.9775280999524766e-05, - "loss": 75.3459, - "step": 34610 - }, - { - "epoch": 0.1398691807027396, - "grad_norm": 975.12109375, - "learning_rate": 4.9774813782505346e-05, - "loss": 108.9374, - "step": 34620 - }, - { - "epoch": 0.13990958196810724, - "grad_norm": 1339.5286865234375, - "learning_rate": 4.9774346082488176e-05, - "loss": 62.3226, - "step": 34630 - }, - { - "epoch": 0.13994998323347488, - "grad_norm": 1278.2696533203125, - "learning_rate": 4.977387789948238e-05, - "loss": 111.346, - "step": 34640 - }, - { - "epoch": 0.1399903844988425, - "grad_norm": 582.800537109375, - "learning_rate": 4.977340923349707e-05, - "loss": 59.1344, - "step": 34650 - }, - { - "epoch": 0.14003078576421013, - "grad_norm": 980.9688110351562, - "learning_rate": 4.9772940084541405e-05, - "loss": 116.2872, - "step": 34660 - }, - { - "epoch": 0.14007118702957777, - "grad_norm": 600.323486328125, - "learning_rate": 4.9772470452624506e-05, - "loss": 98.4954, - "step": 34670 - }, - { - "epoch": 0.1401115882949454, - "grad_norm": 1239.6783447265625, - "learning_rate": 4.977200033775555e-05, - "loss": 108.7115, - "step": 34680 - }, - { - "epoch": 0.14015198956031302, - "grad_norm": 1736.9547119140625, - "learning_rate": 4.97715297399437e-05, - "loss": 134.5554, - "step": 34690 - }, - { - "epoch": 0.14019239082568066, - "grad_norm": 2366.77490234375, - "learning_rate": 4.977105865919812e-05, - "loss": 127.1646, - "step": 34700 - }, - { - "epoch": 0.1402327920910483, - "grad_norm": 1988.361083984375, - "learning_rate": 4.9770587095527995e-05, - "loss": 107.3173, - "step": 34710 - }, - { - "epoch": 0.14027319335641592, - "grad_norm": 632.1370239257812, - "learning_rate": 4.977011504894252e-05, - "loss": 84.2821, - "step": 34720 - }, - { - "epoch": 0.14031359462178356, - "grad_norm": 1778.7666015625, - "learning_rate": 4.9769642519450904e-05, - "loss": 83.9897, - "step": 34730 - }, - { - "epoch": 0.1403539958871512, - "grad_norm": 652.8768310546875, - "learning_rate": 4.9769169507062355e-05, - "loss": 102.6689, - "step": 34740 - }, - { - "epoch": 0.1403943971525188, - "grad_norm": 1475.0391845703125, - "learning_rate": 4.976869601178609e-05, - "loss": 117.3591, - "step": 34750 - }, - { - "epoch": 0.14043479841788645, - "grad_norm": 1975.065673828125, - "learning_rate": 4.976822203363135e-05, - "loss": 98.2487, - "step": 34760 - }, - { - "epoch": 0.1404751996832541, - "grad_norm": 1050.9573974609375, - "learning_rate": 4.976774757260737e-05, - "loss": 94.4713, - "step": 34770 - }, - { - "epoch": 0.1405156009486217, - "grad_norm": 1176.8922119140625, - "learning_rate": 4.9767272628723396e-05, - "loss": 99.8589, - "step": 34780 - }, - { - "epoch": 0.14055600221398934, - "grad_norm": 1319.9659423828125, - "learning_rate": 4.976679720198869e-05, - "loss": 100.7659, - "step": 34790 - }, - { - "epoch": 0.14059640347935698, - "grad_norm": 515.001220703125, - "learning_rate": 4.976632129241252e-05, - "loss": 77.551, - "step": 34800 - }, - { - "epoch": 0.1406368047447246, - "grad_norm": 822.0165405273438, - "learning_rate": 4.9765844900004176e-05, - "loss": 82.4817, - "step": 34810 - }, - { - "epoch": 0.14067720601009223, - "grad_norm": 1176.460693359375, - "learning_rate": 4.976536802477293e-05, - "loss": 116.3207, - "step": 34820 - }, - { - "epoch": 0.14071760727545987, - "grad_norm": 1214.6893310546875, - "learning_rate": 4.976489066672808e-05, - "loss": 90.3449, - "step": 34830 - }, - { - "epoch": 0.1407580085408275, - "grad_norm": 610.4821166992188, - "learning_rate": 4.9764412825878943e-05, - "loss": 104.2971, - "step": 34840 - }, - { - "epoch": 0.14079840980619512, - "grad_norm": 1629.2425537109375, - "learning_rate": 4.976393450223482e-05, - "loss": 100.4276, - "step": 34850 - }, - { - "epoch": 0.14083881107156276, - "grad_norm": 1354.5982666015625, - "learning_rate": 4.9763455695805056e-05, - "loss": 87.7344, - "step": 34860 - }, - { - "epoch": 0.1408792123369304, - "grad_norm": 569.3548583984375, - "learning_rate": 4.976297640659897e-05, - "loss": 72.0556, - "step": 34870 - }, - { - "epoch": 0.14091961360229802, - "grad_norm": 1153.3983154296875, - "learning_rate": 4.97624966346259e-05, - "loss": 76.03, - "step": 34880 - }, - { - "epoch": 0.14096001486766566, - "grad_norm": 550.9409790039062, - "learning_rate": 4.9762016379895225e-05, - "loss": 106.2172, - "step": 34890 - }, - { - "epoch": 0.1410004161330333, - "grad_norm": 621.2599487304688, - "learning_rate": 4.976153564241628e-05, - "loss": 85.7488, - "step": 34900 - }, - { - "epoch": 0.1410408173984009, - "grad_norm": 1340.081787109375, - "learning_rate": 4.976105442219846e-05, - "loss": 108.7937, - "step": 34910 - }, - { - "epoch": 0.14108121866376855, - "grad_norm": 2728.06201171875, - "learning_rate": 4.976057271925113e-05, - "loss": 80.4239, - "step": 34920 - }, - { - "epoch": 0.1411216199291362, - "grad_norm": 571.938720703125, - "learning_rate": 4.9760090533583686e-05, - "loss": 61.1496, - "step": 34930 - }, - { - "epoch": 0.1411620211945038, - "grad_norm": 421.5497131347656, - "learning_rate": 4.9759607865205534e-05, - "loss": 151.0666, - "step": 34940 - }, - { - "epoch": 0.14120242245987144, - "grad_norm": 1543.9168701171875, - "learning_rate": 4.975912471412607e-05, - "loss": 121.0911, - "step": 34950 - }, - { - "epoch": 0.14124282372523908, - "grad_norm": 1396.1658935546875, - "learning_rate": 4.975864108035474e-05, - "loss": 94.3398, - "step": 34960 - }, - { - "epoch": 0.1412832249906067, - "grad_norm": 785.8390502929688, - "learning_rate": 4.975815696390094e-05, - "loss": 89.429, - "step": 34970 - }, - { - "epoch": 0.14132362625597433, - "grad_norm": 719.430908203125, - "learning_rate": 4.975767236477413e-05, - "loss": 116.3824, - "step": 34980 - }, - { - "epoch": 0.14136402752134197, - "grad_norm": 662.4757080078125, - "learning_rate": 4.975718728298375e-05, - "loss": 87.5541, - "step": 34990 - }, - { - "epoch": 0.1414044287867096, - "grad_norm": 1017.3776245117188, - "learning_rate": 4.975670171853926e-05, - "loss": 230.2885, - "step": 35000 - }, - { - "epoch": 0.14144483005207723, - "grad_norm": 391.976806640625, - "learning_rate": 4.975621567145012e-05, - "loss": 81.4888, - "step": 35010 - }, - { - "epoch": 0.14148523131744487, - "grad_norm": 1794.4031982421875, - "learning_rate": 4.975572914172582e-05, - "loss": 102.6515, - "step": 35020 - }, - { - "epoch": 0.1415256325828125, - "grad_norm": 937.76123046875, - "learning_rate": 4.975524212937582e-05, - "loss": 108.7728, - "step": 35030 - }, - { - "epoch": 0.14156603384818012, - "grad_norm": 1814.7794189453125, - "learning_rate": 4.975475463440964e-05, - "loss": 93.3038, - "step": 35040 - }, - { - "epoch": 0.14160643511354776, - "grad_norm": 1418.080322265625, - "learning_rate": 4.975426665683678e-05, - "loss": 92.7187, - "step": 35050 - }, - { - "epoch": 0.1416468363789154, - "grad_norm": 1647.2652587890625, - "learning_rate": 4.9753778196666737e-05, - "loss": 99.5951, - "step": 35060 - }, - { - "epoch": 0.141687237644283, - "grad_norm": 728.3031005859375, - "learning_rate": 4.975328925390904e-05, - "loss": 78.1284, - "step": 35070 - }, - { - "epoch": 0.14172763890965065, - "grad_norm": 1265.145263671875, - "learning_rate": 4.975279982857324e-05, - "loss": 141.2944, - "step": 35080 - }, - { - "epoch": 0.1417680401750183, - "grad_norm": 1588.732666015625, - "learning_rate": 4.975230992066885e-05, - "loss": 80.3437, - "step": 35090 - }, - { - "epoch": 0.1418084414403859, - "grad_norm": 0.0, - "learning_rate": 4.975181953020544e-05, - "loss": 101.6614, - "step": 35100 - }, - { - "epoch": 0.14184884270575354, - "grad_norm": 2776.19140625, - "learning_rate": 4.9751328657192565e-05, - "loss": 95.6715, - "step": 35110 - }, - { - "epoch": 0.14188924397112118, - "grad_norm": 2080.83837890625, - "learning_rate": 4.9750837301639796e-05, - "loss": 153.1414, - "step": 35120 - }, - { - "epoch": 0.1419296452364888, - "grad_norm": 912.7608642578125, - "learning_rate": 4.975034546355671e-05, - "loss": 106.0554, - "step": 35130 - }, - { - "epoch": 0.14197004650185643, - "grad_norm": 358.8157043457031, - "learning_rate": 4.97498531429529e-05, - "loss": 95.7549, - "step": 35140 - }, - { - "epoch": 0.14201044776722407, - "grad_norm": 387.9512634277344, - "learning_rate": 4.974936033983795e-05, - "loss": 88.8157, - "step": 35150 - }, - { - "epoch": 0.14205084903259171, - "grad_norm": 1014.5821533203125, - "learning_rate": 4.974886705422149e-05, - "loss": 103.6284, - "step": 35160 - }, - { - "epoch": 0.14209125029795933, - "grad_norm": 1092.1754150390625, - "learning_rate": 4.974837328611312e-05, - "loss": 107.7396, - "step": 35170 - }, - { - "epoch": 0.14213165156332697, - "grad_norm": 926.5281372070312, - "learning_rate": 4.974787903552247e-05, - "loss": 78.8734, - "step": 35180 - }, - { - "epoch": 0.1421720528286946, - "grad_norm": 546.8348999023438, - "learning_rate": 4.974738430245918e-05, - "loss": 189.5167, - "step": 35190 - }, - { - "epoch": 0.14221245409406222, - "grad_norm": 581.217041015625, - "learning_rate": 4.9746889086932895e-05, - "loss": 82.9471, - "step": 35200 - }, - { - "epoch": 0.14225285535942986, - "grad_norm": 414.46807861328125, - "learning_rate": 4.974639338895326e-05, - "loss": 61.2459, - "step": 35210 - }, - { - "epoch": 0.1422932566247975, - "grad_norm": 1268.71826171875, - "learning_rate": 4.9745897208529956e-05, - "loss": 102.3453, - "step": 35220 - }, - { - "epoch": 0.1423336578901651, - "grad_norm": 1139.93115234375, - "learning_rate": 4.974540054567264e-05, - "loss": 82.09, - "step": 35230 - }, - { - "epoch": 0.14237405915553275, - "grad_norm": 1343.3614501953125, - "learning_rate": 4.9744903400391e-05, - "loss": 98.7603, - "step": 35240 - }, - { - "epoch": 0.1424144604209004, - "grad_norm": 1799.45263671875, - "learning_rate": 4.9744405772694725e-05, - "loss": 90.476, - "step": 35250 - }, - { - "epoch": 0.142454861686268, - "grad_norm": 740.581787109375, - "learning_rate": 4.9743907662593524e-05, - "loss": 80.5426, - "step": 35260 - }, - { - "epoch": 0.14249526295163564, - "grad_norm": 689.4889526367188, - "learning_rate": 4.97434090700971e-05, - "loss": 109.3305, - "step": 35270 - }, - { - "epoch": 0.14253566421700328, - "grad_norm": 1570.7303466796875, - "learning_rate": 4.974290999521519e-05, - "loss": 115.1295, - "step": 35280 - }, - { - "epoch": 0.1425760654823709, - "grad_norm": 1887.6724853515625, - "learning_rate": 4.97424104379575e-05, - "loss": 78.6015, - "step": 35290 - }, - { - "epoch": 0.14261646674773854, - "grad_norm": 906.8424072265625, - "learning_rate": 4.974191039833378e-05, - "loss": 62.9002, - "step": 35300 - }, - { - "epoch": 0.14265686801310618, - "grad_norm": 1138.534423828125, - "learning_rate": 4.974140987635378e-05, - "loss": 114.5066, - "step": 35310 - }, - { - "epoch": 0.14269726927847382, - "grad_norm": 5530.13232421875, - "learning_rate": 4.974090887202726e-05, - "loss": 101.5712, - "step": 35320 - }, - { - "epoch": 0.14273767054384143, - "grad_norm": 571.6856079101562, - "learning_rate": 4.9740407385363983e-05, - "loss": 87.484, - "step": 35330 - }, - { - "epoch": 0.14277807180920907, - "grad_norm": 742.740478515625, - "learning_rate": 4.973990541637373e-05, - "loss": 116.8295, - "step": 35340 - }, - { - "epoch": 0.1428184730745767, - "grad_norm": 1002.74072265625, - "learning_rate": 4.9739402965066276e-05, - "loss": 61.5279, - "step": 35350 - }, - { - "epoch": 0.14285887433994432, - "grad_norm": 782.5518188476562, - "learning_rate": 4.973890003145143e-05, - "loss": 104.612, - "step": 35360 - }, - { - "epoch": 0.14289927560531196, - "grad_norm": 3795.69970703125, - "learning_rate": 4.973839661553899e-05, - "loss": 90.4234, - "step": 35370 - }, - { - "epoch": 0.1429396768706796, - "grad_norm": 679.3167724609375, - "learning_rate": 4.9737892717338774e-05, - "loss": 101.094, - "step": 35380 - }, - { - "epoch": 0.1429800781360472, - "grad_norm": 1362.4091796875, - "learning_rate": 4.97373883368606e-05, - "loss": 121.7002, - "step": 35390 - }, - { - "epoch": 0.14302047940141485, - "grad_norm": 692.154296875, - "learning_rate": 4.973688347411431e-05, - "loss": 99.3686, - "step": 35400 - }, - { - "epoch": 0.1430608806667825, - "grad_norm": 1434.7564697265625, - "learning_rate": 4.973637812910973e-05, - "loss": 98.6788, - "step": 35410 - }, - { - "epoch": 0.1431012819321501, - "grad_norm": 993.9842529296875, - "learning_rate": 4.9735872301856734e-05, - "loss": 98.5837, - "step": 35420 - }, - { - "epoch": 0.14314168319751774, - "grad_norm": 772.5789794921875, - "learning_rate": 4.973536599236517e-05, - "loss": 114.3075, - "step": 35430 - }, - { - "epoch": 0.14318208446288538, - "grad_norm": 1945.123046875, - "learning_rate": 4.9734859200644905e-05, - "loss": 80.5528, - "step": 35440 - }, - { - "epoch": 0.143222485728253, - "grad_norm": 1043.6229248046875, - "learning_rate": 4.973435192670584e-05, - "loss": 104.7841, - "step": 35450 - }, - { - "epoch": 0.14326288699362064, - "grad_norm": 0.0, - "learning_rate": 4.973384417055784e-05, - "loss": 83.0118, - "step": 35460 - }, - { - "epoch": 0.14330328825898828, - "grad_norm": 1075.596435546875, - "learning_rate": 4.9733335932210814e-05, - "loss": 87.0017, - "step": 35470 - }, - { - "epoch": 0.14334368952435592, - "grad_norm": 1106.34765625, - "learning_rate": 4.973282721167467e-05, - "loss": 84.2105, - "step": 35480 - }, - { - "epoch": 0.14338409078972353, - "grad_norm": 680.8936157226562, - "learning_rate": 4.973231800895932e-05, - "loss": 102.21, - "step": 35490 - }, - { - "epoch": 0.14342449205509117, - "grad_norm": 971.0728759765625, - "learning_rate": 4.9731808324074717e-05, - "loss": 89.0123, - "step": 35500 - }, - { - "epoch": 0.1434648933204588, - "grad_norm": 1773.6971435546875, - "learning_rate": 4.973129815703076e-05, - "loss": 60.7568, - "step": 35510 - }, - { - "epoch": 0.14350529458582642, - "grad_norm": 1183.9600830078125, - "learning_rate": 4.973078750783742e-05, - "loss": 91.8321, - "step": 35520 - }, - { - "epoch": 0.14354569585119406, - "grad_norm": 540.8861694335938, - "learning_rate": 4.973027637650464e-05, - "loss": 82.3345, - "step": 35530 - }, - { - "epoch": 0.1435860971165617, - "grad_norm": 2457.4091796875, - "learning_rate": 4.9729764763042394e-05, - "loss": 98.1432, - "step": 35540 - }, - { - "epoch": 0.1436264983819293, - "grad_norm": 778.1422729492188, - "learning_rate": 4.9729252667460655e-05, - "loss": 85.1171, - "step": 35550 - }, - { - "epoch": 0.14366689964729695, - "grad_norm": 501.1910095214844, - "learning_rate": 4.97287400897694e-05, - "loss": 138.8153, - "step": 35560 - }, - { - "epoch": 0.1437073009126646, - "grad_norm": 1607.3275146484375, - "learning_rate": 4.972822702997863e-05, - "loss": 80.1325, - "step": 35570 - }, - { - "epoch": 0.1437477021780322, - "grad_norm": 796.763427734375, - "learning_rate": 4.9727713488098335e-05, - "loss": 79.0268, - "step": 35580 - }, - { - "epoch": 0.14378810344339985, - "grad_norm": 661.5986938476562, - "learning_rate": 4.972719946413854e-05, - "loss": 88.3111, - "step": 35590 - }, - { - "epoch": 0.14382850470876749, - "grad_norm": 1435.8564453125, - "learning_rate": 4.9726684958109266e-05, - "loss": 107.5516, - "step": 35600 - }, - { - "epoch": 0.1438689059741351, - "grad_norm": 679.6939697265625, - "learning_rate": 4.972616997002053e-05, - "loss": 75.3328, - "step": 35610 - }, - { - "epoch": 0.14390930723950274, - "grad_norm": 1368.540283203125, - "learning_rate": 4.972565449988239e-05, - "loss": 83.6638, - "step": 35620 - }, - { - "epoch": 0.14394970850487038, - "grad_norm": 503.572998046875, - "learning_rate": 4.972513854770487e-05, - "loss": 82.9908, - "step": 35630 - }, - { - "epoch": 0.14399010977023802, - "grad_norm": 1151.1427001953125, - "learning_rate": 4.972462211349806e-05, - "loss": 94.219, - "step": 35640 - }, - { - "epoch": 0.14403051103560563, - "grad_norm": 2658.9482421875, - "learning_rate": 4.972410519727201e-05, - "loss": 91.6265, - "step": 35650 - }, - { - "epoch": 0.14407091230097327, - "grad_norm": 953.5581665039062, - "learning_rate": 4.97235877990368e-05, - "loss": 96.4948, - "step": 35660 - }, - { - "epoch": 0.1441113135663409, - "grad_norm": 898.052490234375, - "learning_rate": 4.972306991880251e-05, - "loss": 144.139, - "step": 35670 - }, - { - "epoch": 0.14415171483170852, - "grad_norm": 338.9049072265625, - "learning_rate": 4.972255155657925e-05, - "loss": 81.2305, - "step": 35680 - }, - { - "epoch": 0.14419211609707616, - "grad_norm": 662.849365234375, - "learning_rate": 4.972203271237712e-05, - "loss": 82.9598, - "step": 35690 - }, - { - "epoch": 0.1442325173624438, - "grad_norm": 488.6361999511719, - "learning_rate": 4.972151338620623e-05, - "loss": 64.3388, - "step": 35700 - }, - { - "epoch": 0.14427291862781141, - "grad_norm": 1745.017578125, - "learning_rate": 4.972099357807671e-05, - "loss": 146.5352, - "step": 35710 - }, - { - "epoch": 0.14431331989317905, - "grad_norm": 717.8941650390625, - "learning_rate": 4.9720473287998695e-05, - "loss": 82.1567, - "step": 35720 - }, - { - "epoch": 0.1443537211585467, - "grad_norm": 3314.039794921875, - "learning_rate": 4.9719952515982324e-05, - "loss": 81.9564, - "step": 35730 - }, - { - "epoch": 0.1443941224239143, - "grad_norm": 716.9427490234375, - "learning_rate": 4.9719431262037755e-05, - "loss": 94.6004, - "step": 35740 - }, - { - "epoch": 0.14443452368928195, - "grad_norm": 591.388427734375, - "learning_rate": 4.971890952617515e-05, - "loss": 107.062, - "step": 35750 - }, - { - "epoch": 0.1444749249546496, - "grad_norm": 1554.075927734375, - "learning_rate": 4.9718387308404675e-05, - "loss": 119.4833, - "step": 35760 - }, - { - "epoch": 0.1445153262200172, - "grad_norm": 440.0912780761719, - "learning_rate": 4.9717864608736506e-05, - "loss": 104.854, - "step": 35770 - }, - { - "epoch": 0.14455572748538484, - "grad_norm": 692.7317504882812, - "learning_rate": 4.971734142718085e-05, - "loss": 116.6411, - "step": 35780 - }, - { - "epoch": 0.14459612875075248, - "grad_norm": 520.4461059570312, - "learning_rate": 4.971681776374789e-05, - "loss": 111.4582, - "step": 35790 - }, - { - "epoch": 0.14463653001612012, - "grad_norm": 762.310546875, - "learning_rate": 4.971629361844785e-05, - "loss": 102.7609, - "step": 35800 - }, - { - "epoch": 0.14467693128148773, - "grad_norm": 852.2344970703125, - "learning_rate": 4.971576899129094e-05, - "loss": 121.1159, - "step": 35810 - }, - { - "epoch": 0.14471733254685537, - "grad_norm": 445.6109924316406, - "learning_rate": 4.9715243882287386e-05, - "loss": 89.8152, - "step": 35820 - }, - { - "epoch": 0.144757733812223, - "grad_norm": 950.6737060546875, - "learning_rate": 4.971471829144743e-05, - "loss": 75.9464, - "step": 35830 - }, - { - "epoch": 0.14479813507759062, - "grad_norm": 726.3805541992188, - "learning_rate": 4.9714192218781316e-05, - "loss": 81.0347, - "step": 35840 - }, - { - "epoch": 0.14483853634295826, - "grad_norm": 1368.5889892578125, - "learning_rate": 4.97136656642993e-05, - "loss": 94.3249, - "step": 35850 - }, - { - "epoch": 0.1448789376083259, - "grad_norm": 2891.8837890625, - "learning_rate": 4.9713138628011654e-05, - "loss": 88.5745, - "step": 35860 - }, - { - "epoch": 0.14491933887369352, - "grad_norm": 883.2448120117188, - "learning_rate": 4.971261110992864e-05, - "loss": 86.1088, - "step": 35870 - }, - { - "epoch": 0.14495974013906116, - "grad_norm": 750.9356689453125, - "learning_rate": 4.9712083110060556e-05, - "loss": 157.1846, - "step": 35880 - }, - { - "epoch": 0.1450001414044288, - "grad_norm": 1293.0548095703125, - "learning_rate": 4.971155462841769e-05, - "loss": 97.4993, - "step": 35890 - }, - { - "epoch": 0.1450405426697964, - "grad_norm": 1317.3045654296875, - "learning_rate": 4.971102566501034e-05, - "loss": 77.7684, - "step": 35900 - }, - { - "epoch": 0.14508094393516405, - "grad_norm": 698.4030151367188, - "learning_rate": 4.971049621984882e-05, - "loss": 68.1086, - "step": 35910 - }, - { - "epoch": 0.1451213452005317, - "grad_norm": 491.6965637207031, - "learning_rate": 4.9709966292943455e-05, - "loss": 85.3335, - "step": 35920 - }, - { - "epoch": 0.1451617464658993, - "grad_norm": 3422.90380859375, - "learning_rate": 4.970943588430458e-05, - "loss": 122.6019, - "step": 35930 - }, - { - "epoch": 0.14520214773126694, - "grad_norm": 933.2683715820312, - "learning_rate": 4.970890499394253e-05, - "loss": 116.233, - "step": 35940 - }, - { - "epoch": 0.14524254899663458, - "grad_norm": 1370.2513427734375, - "learning_rate": 4.9708373621867656e-05, - "loss": 68.2386, - "step": 35950 - }, - { - "epoch": 0.14528295026200222, - "grad_norm": 2012.014892578125, - "learning_rate": 4.9707841768090314e-05, - "loss": 73.6452, - "step": 35960 - }, - { - "epoch": 0.14532335152736983, - "grad_norm": 1204.0938720703125, - "learning_rate": 4.9707309432620874e-05, - "loss": 82.8698, - "step": 35970 - }, - { - "epoch": 0.14536375279273747, - "grad_norm": 1037.1630859375, - "learning_rate": 4.9706776615469716e-05, - "loss": 80.2174, - "step": 35980 - }, - { - "epoch": 0.1454041540581051, - "grad_norm": 566.7116088867188, - "learning_rate": 4.970624331664724e-05, - "loss": 80.2169, - "step": 35990 - }, - { - "epoch": 0.14544455532347272, - "grad_norm": 2502.911865234375, - "learning_rate": 4.9705709536163824e-05, - "loss": 148.008, - "step": 36000 - }, - { - "epoch": 0.14548495658884036, - "grad_norm": 569.8108520507812, - "learning_rate": 4.970517527402988e-05, - "loss": 70.3217, - "step": 36010 - }, - { - "epoch": 0.145525357854208, - "grad_norm": 635.7393188476562, - "learning_rate": 4.9704640530255826e-05, - "loss": 84.3072, - "step": 36020 - }, - { - "epoch": 0.14556575911957562, - "grad_norm": 365.2387390136719, - "learning_rate": 4.970410530485209e-05, - "loss": 60.3312, - "step": 36030 - }, - { - "epoch": 0.14560616038494326, - "grad_norm": 529.880126953125, - "learning_rate": 4.970356959782909e-05, - "loss": 125.5716, - "step": 36040 - }, - { - "epoch": 0.1456465616503109, - "grad_norm": 928.6548461914062, - "learning_rate": 4.97030334091973e-05, - "loss": 98.0715, - "step": 36050 - }, - { - "epoch": 0.1456869629156785, - "grad_norm": 937.4443359375, - "learning_rate": 4.970249673896714e-05, - "loss": 123.7811, - "step": 36060 - }, - { - "epoch": 0.14572736418104615, - "grad_norm": 664.5932006835938, - "learning_rate": 4.970195958714909e-05, - "loss": 103.4508, - "step": 36070 - }, - { - "epoch": 0.1457677654464138, - "grad_norm": 600.3163452148438, - "learning_rate": 4.970142195375363e-05, - "loss": 72.4148, - "step": 36080 - }, - { - "epoch": 0.1458081667117814, - "grad_norm": 602.3806762695312, - "learning_rate": 4.970088383879123e-05, - "loss": 103.1301, - "step": 36090 - }, - { - "epoch": 0.14584856797714904, - "grad_norm": 737.54443359375, - "learning_rate": 4.970034524227238e-05, - "loss": 80.4645, - "step": 36100 - }, - { - "epoch": 0.14588896924251668, - "grad_norm": 1445.3460693359375, - "learning_rate": 4.969980616420759e-05, - "loss": 75.5717, - "step": 36110 - }, - { - "epoch": 0.14592937050788432, - "grad_norm": 385.9198913574219, - "learning_rate": 4.9699266604607355e-05, - "loss": 137.0707, - "step": 36120 - }, - { - "epoch": 0.14596977177325193, - "grad_norm": 0.0, - "learning_rate": 4.96987265634822e-05, - "loss": 88.1988, - "step": 36130 - }, - { - "epoch": 0.14601017303861957, - "grad_norm": 793.6890869140625, - "learning_rate": 4.9698186040842654e-05, - "loss": 93.8744, - "step": 36140 - }, - { - "epoch": 0.1460505743039872, - "grad_norm": 1040.031494140625, - "learning_rate": 4.969764503669926e-05, - "loss": 67.5155, - "step": 36150 - }, - { - "epoch": 0.14609097556935483, - "grad_norm": 1021.9496459960938, - "learning_rate": 4.9697103551062556e-05, - "loss": 69.8323, - "step": 36160 - }, - { - "epoch": 0.14613137683472247, - "grad_norm": 1474.1693115234375, - "learning_rate": 4.9696561583943106e-05, - "loss": 100.1957, - "step": 36170 - }, - { - "epoch": 0.1461717781000901, - "grad_norm": 1472.6767578125, - "learning_rate": 4.969601913535148e-05, - "loss": 91.6, - "step": 36180 - }, - { - "epoch": 0.14621217936545772, - "grad_norm": 689.4755249023438, - "learning_rate": 4.9695476205298235e-05, - "loss": 146.9316, - "step": 36190 - }, - { - "epoch": 0.14625258063082536, - "grad_norm": 1301.0394287109375, - "learning_rate": 4.969493279379398e-05, - "loss": 75.4633, - "step": 36200 - }, - { - "epoch": 0.146292981896193, - "grad_norm": 708.2933349609375, - "learning_rate": 4.9694388900849284e-05, - "loss": 73.6771, - "step": 36210 - }, - { - "epoch": 0.1463333831615606, - "grad_norm": 0.0, - "learning_rate": 4.969384452647477e-05, - "loss": 81.5898, - "step": 36220 - }, - { - "epoch": 0.14637378442692825, - "grad_norm": 1018.5614013671875, - "learning_rate": 4.969329967068104e-05, - "loss": 98.2319, - "step": 36230 - }, - { - "epoch": 0.1464141856922959, - "grad_norm": 1839.9981689453125, - "learning_rate": 4.969275433347872e-05, - "loss": 88.1999, - "step": 36240 - }, - { - "epoch": 0.1464545869576635, - "grad_norm": 549.9813842773438, - "learning_rate": 4.9692208514878444e-05, - "loss": 79.6468, - "step": 36250 - }, - { - "epoch": 0.14649498822303114, - "grad_norm": 615.52099609375, - "learning_rate": 4.9691662214890856e-05, - "loss": 93.557, - "step": 36260 - }, - { - "epoch": 0.14653538948839878, - "grad_norm": 0.0, - "learning_rate": 4.969111543352659e-05, - "loss": 159.444, - "step": 36270 - }, - { - "epoch": 0.14657579075376642, - "grad_norm": 1021.6466674804688, - "learning_rate": 4.969056817079633e-05, - "loss": 93.9579, - "step": 36280 - }, - { - "epoch": 0.14661619201913403, - "grad_norm": 1050.8099365234375, - "learning_rate": 4.969002042671072e-05, - "loss": 92.0323, - "step": 36290 - }, - { - "epoch": 0.14665659328450167, - "grad_norm": 933.8421020507812, - "learning_rate": 4.968947220128045e-05, - "loss": 74.9989, - "step": 36300 - }, - { - "epoch": 0.14669699454986931, - "grad_norm": 775.214111328125, - "learning_rate": 4.968892349451621e-05, - "loss": 109.3062, - "step": 36310 - }, - { - "epoch": 0.14673739581523693, - "grad_norm": 275.0602111816406, - "learning_rate": 4.9688374306428696e-05, - "loss": 79.9519, - "step": 36320 - }, - { - "epoch": 0.14677779708060457, - "grad_norm": 1294.7794189453125, - "learning_rate": 4.9687824637028625e-05, - "loss": 113.0867, - "step": 36330 - }, - { - "epoch": 0.1468181983459722, - "grad_norm": 745.861572265625, - "learning_rate": 4.968727448632669e-05, - "loss": 76.9137, - "step": 36340 - }, - { - "epoch": 0.14685859961133982, - "grad_norm": 776.559326171875, - "learning_rate": 4.968672385433364e-05, - "loss": 86.1525, - "step": 36350 - }, - { - "epoch": 0.14689900087670746, - "grad_norm": 1309.9783935546875, - "learning_rate": 4.968617274106019e-05, - "loss": 79.5693, - "step": 36360 - }, - { - "epoch": 0.1469394021420751, - "grad_norm": 1547.9395751953125, - "learning_rate": 4.968562114651709e-05, - "loss": 116.9439, - "step": 36370 - }, - { - "epoch": 0.1469798034074427, - "grad_norm": 453.9913024902344, - "learning_rate": 4.9685069070715106e-05, - "loss": 74.6536, - "step": 36380 - }, - { - "epoch": 0.14702020467281035, - "grad_norm": 346.8396911621094, - "learning_rate": 4.968451651366498e-05, - "loss": 95.8461, - "step": 36390 - }, - { - "epoch": 0.147060605938178, - "grad_norm": 537.4244995117188, - "learning_rate": 4.968396347537751e-05, - "loss": 74.5145, - "step": 36400 - }, - { - "epoch": 0.1471010072035456, - "grad_norm": 897.6705932617188, - "learning_rate": 4.968340995586346e-05, - "loss": 109.4366, - "step": 36410 - }, - { - "epoch": 0.14714140846891324, - "grad_norm": 712.1412963867188, - "learning_rate": 4.9682855955133625e-05, - "loss": 67.6998, - "step": 36420 - }, - { - "epoch": 0.14718180973428088, - "grad_norm": 0.0, - "learning_rate": 4.96823014731988e-05, - "loss": 102.4061, - "step": 36430 - }, - { - "epoch": 0.14722221099964852, - "grad_norm": 1074.514404296875, - "learning_rate": 4.9681746510069805e-05, - "loss": 79.5955, - "step": 36440 - }, - { - "epoch": 0.14726261226501614, - "grad_norm": 1605.676025390625, - "learning_rate": 4.9681191065757455e-05, - "loss": 89.505, - "step": 36450 - }, - { - "epoch": 0.14730301353038378, - "grad_norm": 981.2547607421875, - "learning_rate": 4.9680635140272575e-05, - "loss": 99.4992, - "step": 36460 - }, - { - "epoch": 0.14734341479575142, - "grad_norm": 937.9696655273438, - "learning_rate": 4.9680078733626015e-05, - "loss": 75.9712, - "step": 36470 - }, - { - "epoch": 0.14738381606111903, - "grad_norm": 501.3292236328125, - "learning_rate": 4.9679521845828604e-05, - "loss": 75.7125, - "step": 36480 - }, - { - "epoch": 0.14742421732648667, - "grad_norm": 1447.997802734375, - "learning_rate": 4.967896447689121e-05, - "loss": 83.492, - "step": 36490 - }, - { - "epoch": 0.1474646185918543, - "grad_norm": 555.2802734375, - "learning_rate": 4.96784066268247e-05, - "loss": 63.0523, - "step": 36500 - }, - { - "epoch": 0.14750501985722192, - "grad_norm": 784.3593139648438, - "learning_rate": 4.967784829563995e-05, - "loss": 93.0994, - "step": 36510 - }, - { - "epoch": 0.14754542112258956, - "grad_norm": 1216.3717041015625, - "learning_rate": 4.967728948334784e-05, - "loss": 99.9734, - "step": 36520 - }, - { - "epoch": 0.1475858223879572, - "grad_norm": 717.1402587890625, - "learning_rate": 4.967673018995926e-05, - "loss": 80.4648, - "step": 36530 - }, - { - "epoch": 0.1476262236533248, - "grad_norm": 468.6872253417969, - "learning_rate": 4.967617041548513e-05, - "loss": 96.3743, - "step": 36540 - }, - { - "epoch": 0.14766662491869245, - "grad_norm": 709.4956665039062, - "learning_rate": 4.967561015993635e-05, - "loss": 96.8744, - "step": 36550 - }, - { - "epoch": 0.1477070261840601, - "grad_norm": 691.0542602539062, - "learning_rate": 4.967504942332385e-05, - "loss": 86.1126, - "step": 36560 - }, - { - "epoch": 0.1477474274494277, - "grad_norm": 1028.8739013671875, - "learning_rate": 4.967448820565856e-05, - "loss": 125.4938, - "step": 36570 - }, - { - "epoch": 0.14778782871479534, - "grad_norm": 599.9513549804688, - "learning_rate": 4.9673926506951404e-05, - "loss": 117.7221, - "step": 36580 - }, - { - "epoch": 0.14782822998016298, - "grad_norm": 609.7288818359375, - "learning_rate": 4.967336432721337e-05, - "loss": 80.1608, - "step": 36590 - }, - { - "epoch": 0.1478686312455306, - "grad_norm": 1172.2431640625, - "learning_rate": 4.967280166645538e-05, - "loss": 97.0124, - "step": 36600 - }, - { - "epoch": 0.14790903251089824, - "grad_norm": 774.5439453125, - "learning_rate": 4.967223852468842e-05, - "loss": 106.1079, - "step": 36610 - }, - { - "epoch": 0.14794943377626588, - "grad_norm": 414.7299499511719, - "learning_rate": 4.967167490192347e-05, - "loss": 95.8307, - "step": 36620 - }, - { - "epoch": 0.14798983504163352, - "grad_norm": 1053.8829345703125, - "learning_rate": 4.967111079817151e-05, - "loss": 102.4389, - "step": 36630 - }, - { - "epoch": 0.14803023630700113, - "grad_norm": 730.1495971679688, - "learning_rate": 4.967054621344356e-05, - "loss": 87.2563, - "step": 36640 - }, - { - "epoch": 0.14807063757236877, - "grad_norm": 2739.23583984375, - "learning_rate": 4.96699811477506e-05, - "loss": 126.1713, - "step": 36650 - }, - { - "epoch": 0.1481110388377364, - "grad_norm": 3278.61767578125, - "learning_rate": 4.966941560110366e-05, - "loss": 115.4281, - "step": 36660 - }, - { - "epoch": 0.14815144010310402, - "grad_norm": 1437.7108154296875, - "learning_rate": 4.966884957351375e-05, - "loss": 98.0438, - "step": 36670 - }, - { - "epoch": 0.14819184136847166, - "grad_norm": 582.1204833984375, - "learning_rate": 4.966828306499193e-05, - "loss": 85.8503, - "step": 36680 - }, - { - "epoch": 0.1482322426338393, - "grad_norm": 790.8889770507812, - "learning_rate": 4.966771607554923e-05, - "loss": 75.0409, - "step": 36690 - }, - { - "epoch": 0.1482726438992069, - "grad_norm": 370.1599426269531, - "learning_rate": 4.96671486051967e-05, - "loss": 94.7309, - "step": 36700 - }, - { - "epoch": 0.14831304516457455, - "grad_norm": 2078.58837890625, - "learning_rate": 4.966658065394542e-05, - "loss": 133.5383, - "step": 36710 - }, - { - "epoch": 0.1483534464299422, - "grad_norm": 374.68475341796875, - "learning_rate": 4.9666012221806434e-05, - "loss": 102.0996, - "step": 36720 - }, - { - "epoch": 0.1483938476953098, - "grad_norm": 1200.5596923828125, - "learning_rate": 4.966544330879085e-05, - "loss": 89.5257, - "step": 36730 - }, - { - "epoch": 0.14843424896067745, - "grad_norm": 430.6545104980469, - "learning_rate": 4.9664873914909755e-05, - "loss": 73.1253, - "step": 36740 - }, - { - "epoch": 0.14847465022604509, - "grad_norm": 1570.5799560546875, - "learning_rate": 4.966430404017424e-05, - "loss": 72.1421, - "step": 36750 - }, - { - "epoch": 0.1485150514914127, - "grad_norm": 422.2970886230469, - "learning_rate": 4.966373368459541e-05, - "loss": 147.6407, - "step": 36760 - }, - { - "epoch": 0.14855545275678034, - "grad_norm": 567.98046875, - "learning_rate": 4.966316284818441e-05, - "loss": 78.6361, - "step": 36770 - }, - { - "epoch": 0.14859585402214798, - "grad_norm": 0.0, - "learning_rate": 4.966259153095235e-05, - "loss": 72.8352, - "step": 36780 - }, - { - "epoch": 0.14863625528751562, - "grad_norm": 871.2714233398438, - "learning_rate": 4.966201973291036e-05, - "loss": 104.7993, - "step": 36790 - }, - { - "epoch": 0.14867665655288323, - "grad_norm": 762.0390625, - "learning_rate": 4.966144745406961e-05, - "loss": 63.3974, - "step": 36800 - }, - { - "epoch": 0.14871705781825087, - "grad_norm": 1256.638427734375, - "learning_rate": 4.966087469444124e-05, - "loss": 82.0591, - "step": 36810 - }, - { - "epoch": 0.1487574590836185, - "grad_norm": 1347.9171142578125, - "learning_rate": 4.966030145403642e-05, - "loss": 97.7492, - "step": 36820 - }, - { - "epoch": 0.14879786034898612, - "grad_norm": 2305.42236328125, - "learning_rate": 4.965972773286633e-05, - "loss": 98.8773, - "step": 36830 - }, - { - "epoch": 0.14883826161435376, - "grad_norm": 759.54931640625, - "learning_rate": 4.965915353094215e-05, - "loss": 104.5514, - "step": 36840 - }, - { - "epoch": 0.1488786628797214, - "grad_norm": 409.0998840332031, - "learning_rate": 4.9658578848275076e-05, - "loss": 72.8223, - "step": 36850 - }, - { - "epoch": 0.14891906414508901, - "grad_norm": 464.468017578125, - "learning_rate": 4.965800368487632e-05, - "loss": 112.4646, - "step": 36860 - }, - { - "epoch": 0.14895946541045665, - "grad_norm": 923.630859375, - "learning_rate": 4.9657428040757084e-05, - "loss": 107.7895, - "step": 36870 - }, - { - "epoch": 0.1489998666758243, - "grad_norm": 1179.0242919921875, - "learning_rate": 4.965685191592859e-05, - "loss": 86.0383, - "step": 36880 - }, - { - "epoch": 0.1490402679411919, - "grad_norm": 636.7333374023438, - "learning_rate": 4.9656275310402074e-05, - "loss": 114.6134, - "step": 36890 - }, - { - "epoch": 0.14908066920655955, - "grad_norm": 2465.29296875, - "learning_rate": 4.965569822418877e-05, - "loss": 58.8899, - "step": 36900 - }, - { - "epoch": 0.1491210704719272, - "grad_norm": 757.9246215820312, - "learning_rate": 4.9655120657299945e-05, - "loss": 66.7647, - "step": 36910 - }, - { - "epoch": 0.1491614717372948, - "grad_norm": 671.7369995117188, - "learning_rate": 4.965454260974685e-05, - "loss": 87.831, - "step": 36920 - }, - { - "epoch": 0.14920187300266244, - "grad_norm": 409.745849609375, - "learning_rate": 4.9653964081540756e-05, - "loss": 107.4229, - "step": 36930 - }, - { - "epoch": 0.14924227426803008, - "grad_norm": 1466.3577880859375, - "learning_rate": 4.965338507269294e-05, - "loss": 93.4886, - "step": 36940 - }, - { - "epoch": 0.14928267553339772, - "grad_norm": 554.5855712890625, - "learning_rate": 4.965280558321468e-05, - "loss": 97.7594, - "step": 36950 - }, - { - "epoch": 0.14932307679876533, - "grad_norm": 1820.7589111328125, - "learning_rate": 4.9652225613117284e-05, - "loss": 122.0696, - "step": 36960 - }, - { - "epoch": 0.14936347806413297, - "grad_norm": 1617.1182861328125, - "learning_rate": 4.965164516241206e-05, - "loss": 123.7657, - "step": 36970 - }, - { - "epoch": 0.1494038793295006, - "grad_norm": 669.3226928710938, - "learning_rate": 4.965106423111033e-05, - "loss": 103.5812, - "step": 36980 - }, - { - "epoch": 0.14944428059486822, - "grad_norm": 2139.23876953125, - "learning_rate": 4.9650482819223405e-05, - "loss": 119.0735, - "step": 36990 - }, - { - "epoch": 0.14948468186023586, - "grad_norm": 532.9093017578125, - "learning_rate": 4.964990092676263e-05, - "loss": 82.2448, - "step": 37000 - }, - { - "epoch": 0.1495250831256035, - "grad_norm": 804.912109375, - "learning_rate": 4.964931855373934e-05, - "loss": 80.909, - "step": 37010 - }, - { - "epoch": 0.14956548439097112, - "grad_norm": 847.0060424804688, - "learning_rate": 4.9648735700164895e-05, - "loss": 92.2945, - "step": 37020 - }, - { - "epoch": 0.14960588565633876, - "grad_norm": 1183.10302734375, - "learning_rate": 4.964815236605066e-05, - "loss": 94.6774, - "step": 37030 - }, - { - "epoch": 0.1496462869217064, - "grad_norm": 885.68896484375, - "learning_rate": 4.964756855140801e-05, - "loss": 75.244, - "step": 37040 - }, - { - "epoch": 0.149686688187074, - "grad_norm": 843.5673828125, - "learning_rate": 4.964698425624831e-05, - "loss": 82.0223, - "step": 37050 - }, - { - "epoch": 0.14972708945244165, - "grad_norm": 817.964599609375, - "learning_rate": 4.964639948058297e-05, - "loss": 131.6297, - "step": 37060 - }, - { - "epoch": 0.1497674907178093, - "grad_norm": 413.62335205078125, - "learning_rate": 4.964581422442338e-05, - "loss": 93.1363, - "step": 37070 - }, - { - "epoch": 0.1498078919831769, - "grad_norm": 569.75146484375, - "learning_rate": 4.964522848778096e-05, - "loss": 70.2936, - "step": 37080 - }, - { - "epoch": 0.14984829324854454, - "grad_norm": 507.4297180175781, - "learning_rate": 4.964464227066712e-05, - "loss": 53.4872, - "step": 37090 - }, - { - "epoch": 0.14988869451391218, - "grad_norm": 1582.9383544921875, - "learning_rate": 4.964405557309328e-05, - "loss": 128.3349, - "step": 37100 - }, - { - "epoch": 0.14992909577927982, - "grad_norm": 871.0317993164062, - "learning_rate": 4.9643468395070904e-05, - "loss": 106.9411, - "step": 37110 - }, - { - "epoch": 0.14996949704464743, - "grad_norm": 4390.7822265625, - "learning_rate": 4.964288073661142e-05, - "loss": 98.8651, - "step": 37120 - }, - { - "epoch": 0.15000989831001507, - "grad_norm": 1010.7752075195312, - "learning_rate": 4.9642292597726284e-05, - "loss": 134.204, - "step": 37130 - }, - { - "epoch": 0.1500502995753827, - "grad_norm": 3052.423583984375, - "learning_rate": 4.964170397842697e-05, - "loss": 129.5846, - "step": 37140 - }, - { - "epoch": 0.15009070084075032, - "grad_norm": 363.93865966796875, - "learning_rate": 4.9641114878724956e-05, - "loss": 101.122, - "step": 37150 - }, - { - "epoch": 0.15013110210611796, - "grad_norm": 963.4545288085938, - "learning_rate": 4.964052529863171e-05, - "loss": 98.0533, - "step": 37160 - }, - { - "epoch": 0.1501715033714856, - "grad_norm": 939.9786987304688, - "learning_rate": 4.9639935238158744e-05, - "loss": 89.1619, - "step": 37170 - }, - { - "epoch": 0.15021190463685322, - "grad_norm": 1083.3187255859375, - "learning_rate": 4.963934469731756e-05, - "loss": 77.3688, - "step": 37180 - }, - { - "epoch": 0.15025230590222086, - "grad_norm": 818.65478515625, - "learning_rate": 4.963875367611966e-05, - "loss": 96.2921, - "step": 37190 - }, - { - "epoch": 0.1502927071675885, - "grad_norm": 723.4608154296875, - "learning_rate": 4.963816217457657e-05, - "loss": 128.7513, - "step": 37200 - }, - { - "epoch": 0.1503331084329561, - "grad_norm": 737.654541015625, - "learning_rate": 4.963757019269983e-05, - "loss": 107.9872, - "step": 37210 - }, - { - "epoch": 0.15037350969832375, - "grad_norm": 771.156005859375, - "learning_rate": 4.963697773050097e-05, - "loss": 109.9101, - "step": 37220 - }, - { - "epoch": 0.1504139109636914, - "grad_norm": 748.536376953125, - "learning_rate": 4.9636384787991547e-05, - "loss": 65.0804, - "step": 37230 - }, - { - "epoch": 0.150454312229059, - "grad_norm": 430.6962890625, - "learning_rate": 4.963579136518312e-05, - "loss": 59.6868, - "step": 37240 - }, - { - "epoch": 0.15049471349442664, - "grad_norm": 703.9127807617188, - "learning_rate": 4.963519746208726e-05, - "loss": 111.1007, - "step": 37250 - }, - { - "epoch": 0.15053511475979428, - "grad_norm": 813.0443115234375, - "learning_rate": 4.963460307871553e-05, - "loss": 102.864, - "step": 37260 - }, - { - "epoch": 0.15057551602516192, - "grad_norm": 1400.834716796875, - "learning_rate": 4.963400821507954e-05, - "loss": 116.5066, - "step": 37270 - }, - { - "epoch": 0.15061591729052953, - "grad_norm": 941.7633666992188, - "learning_rate": 4.9633412871190873e-05, - "loss": 97.3467, - "step": 37280 - }, - { - "epoch": 0.15065631855589717, - "grad_norm": 895.1846923828125, - "learning_rate": 4.963281704706115e-05, - "loss": 88.1651, - "step": 37290 - }, - { - "epoch": 0.1506967198212648, - "grad_norm": 1722.0013427734375, - "learning_rate": 4.9632220742701965e-05, - "loss": 64.2686, - "step": 37300 - }, - { - "epoch": 0.15073712108663243, - "grad_norm": 499.2967224121094, - "learning_rate": 4.963162395812496e-05, - "loss": 57.7127, - "step": 37310 - }, - { - "epoch": 0.15077752235200007, - "grad_norm": 642.2814331054688, - "learning_rate": 4.9631026693341764e-05, - "loss": 47.7118, - "step": 37320 - }, - { - "epoch": 0.1508179236173677, - "grad_norm": 985.0557250976562, - "learning_rate": 4.963042894836403e-05, - "loss": 113.7733, - "step": 37330 - }, - { - "epoch": 0.15085832488273532, - "grad_norm": 558.4752197265625, - "learning_rate": 4.9629830723203384e-05, - "loss": 55.6255, - "step": 37340 - }, - { - "epoch": 0.15089872614810296, - "grad_norm": 366.1856689453125, - "learning_rate": 4.9629232017871524e-05, - "loss": 98.4935, - "step": 37350 - }, - { - "epoch": 0.1509391274134706, - "grad_norm": 817.6262817382812, - "learning_rate": 4.96286328323801e-05, - "loss": 69.1073, - "step": 37360 - }, - { - "epoch": 0.1509795286788382, - "grad_norm": 724.3482055664062, - "learning_rate": 4.96280331667408e-05, - "loss": 101.978, - "step": 37370 - }, - { - "epoch": 0.15101992994420585, - "grad_norm": 2478.86962890625, - "learning_rate": 4.9627433020965314e-05, - "loss": 104.1645, - "step": 37380 - }, - { - "epoch": 0.1510603312095735, - "grad_norm": 663.1130981445312, - "learning_rate": 4.962683239506534e-05, - "loss": 80.2843, - "step": 37390 - }, - { - "epoch": 0.1511007324749411, - "grad_norm": 757.1906127929688, - "learning_rate": 4.9626231289052596e-05, - "loss": 87.6918, - "step": 37400 - }, - { - "epoch": 0.15114113374030874, - "grad_norm": 427.3059997558594, - "learning_rate": 4.962562970293879e-05, - "loss": 79.3416, - "step": 37410 - }, - { - "epoch": 0.15118153500567638, - "grad_norm": 821.1558837890625, - "learning_rate": 4.962502763673565e-05, - "loss": 116.5157, - "step": 37420 - }, - { - "epoch": 0.15122193627104402, - "grad_norm": 1046.214111328125, - "learning_rate": 4.962442509045493e-05, - "loss": 53.0469, - "step": 37430 - }, - { - "epoch": 0.15126233753641163, - "grad_norm": 1114.373291015625, - "learning_rate": 4.9623822064108364e-05, - "loss": 75.2076, - "step": 37440 - }, - { - "epoch": 0.15130273880177927, - "grad_norm": 1147.0633544921875, - "learning_rate": 4.9623218557707694e-05, - "loss": 80.5338, - "step": 37450 - }, - { - "epoch": 0.15134314006714691, - "grad_norm": 1153.3177490234375, - "learning_rate": 4.9622614571264715e-05, - "loss": 115.0096, - "step": 37460 - }, - { - "epoch": 0.15138354133251453, - "grad_norm": 560.9694213867188, - "learning_rate": 4.962201010479119e-05, - "loss": 86.4352, - "step": 37470 - }, - { - "epoch": 0.15142394259788217, - "grad_norm": 584.9893188476562, - "learning_rate": 4.96214051582989e-05, - "loss": 63.2892, - "step": 37480 - }, - { - "epoch": 0.1514643438632498, - "grad_norm": 573.4566650390625, - "learning_rate": 4.962079973179963e-05, - "loss": 71.3577, - "step": 37490 - }, - { - "epoch": 0.15150474512861742, - "grad_norm": 696.969482421875, - "learning_rate": 4.962019382530521e-05, - "loss": 111.7284, - "step": 37500 - }, - { - "epoch": 0.15154514639398506, - "grad_norm": 674.3920288085938, - "learning_rate": 4.961958743882742e-05, - "loss": 82.3515, - "step": 37510 - }, - { - "epoch": 0.1515855476593527, - "grad_norm": 835.2359008789062, - "learning_rate": 4.96189805723781e-05, - "loss": 81.7163, - "step": 37520 - }, - { - "epoch": 0.1516259489247203, - "grad_norm": 1307.296142578125, - "learning_rate": 4.96183732259691e-05, - "loss": 92.0704, - "step": 37530 - }, - { - "epoch": 0.15166635019008795, - "grad_norm": 808.1261596679688, - "learning_rate": 4.961776539961222e-05, - "loss": 82.1407, - "step": 37540 - }, - { - "epoch": 0.1517067514554556, - "grad_norm": 1993.1292724609375, - "learning_rate": 4.9617157093319326e-05, - "loss": 85.8768, - "step": 37550 - }, - { - "epoch": 0.1517471527208232, - "grad_norm": 784.2838134765625, - "learning_rate": 4.961654830710229e-05, - "loss": 92.2036, - "step": 37560 - }, - { - "epoch": 0.15178755398619084, - "grad_norm": 412.1575622558594, - "learning_rate": 4.961593904097297e-05, - "loss": 82.7349, - "step": 37570 - }, - { - "epoch": 0.15182795525155848, - "grad_norm": 726.8958129882812, - "learning_rate": 4.961532929494325e-05, - "loss": 71.485, - "step": 37580 - }, - { - "epoch": 0.15186835651692612, - "grad_norm": 1616.0050048828125, - "learning_rate": 4.9614719069025e-05, - "loss": 65.669, - "step": 37590 - }, - { - "epoch": 0.15190875778229374, - "grad_norm": 1195.6494140625, - "learning_rate": 4.9614108363230135e-05, - "loss": 120.262, - "step": 37600 - }, - { - "epoch": 0.15194915904766138, - "grad_norm": 785.0847778320312, - "learning_rate": 4.961349717757056e-05, - "loss": 55.0058, - "step": 37610 - }, - { - "epoch": 0.15198956031302902, - "grad_norm": 367.303466796875, - "learning_rate": 4.961288551205818e-05, - "loss": 85.8025, - "step": 37620 - }, - { - "epoch": 0.15202996157839663, - "grad_norm": 1064.3392333984375, - "learning_rate": 4.961227336670493e-05, - "loss": 120.8695, - "step": 37630 - }, - { - "epoch": 0.15207036284376427, - "grad_norm": 963.6973876953125, - "learning_rate": 4.961166074152274e-05, - "loss": 119.0628, - "step": 37640 - }, - { - "epoch": 0.1521107641091319, - "grad_norm": 1672.767333984375, - "learning_rate": 4.961104763652355e-05, - "loss": 100.2525, - "step": 37650 - }, - { - "epoch": 0.15215116537449952, - "grad_norm": 850.4930419921875, - "learning_rate": 4.961043405171931e-05, - "loss": 63.3998, - "step": 37660 - }, - { - "epoch": 0.15219156663986716, - "grad_norm": 1047.08935546875, - "learning_rate": 4.9609819987122e-05, - "loss": 78.7605, - "step": 37670 - }, - { - "epoch": 0.1522319679052348, - "grad_norm": 689.37744140625, - "learning_rate": 4.9609205442743566e-05, - "loss": 100.5812, - "step": 37680 - }, - { - "epoch": 0.1522723691706024, - "grad_norm": 686.3281860351562, - "learning_rate": 4.9608590418596016e-05, - "loss": 58.7595, - "step": 37690 - }, - { - "epoch": 0.15231277043597005, - "grad_norm": 992.6712036132812, - "learning_rate": 4.9607974914691316e-05, - "loss": 117.9191, - "step": 37700 - }, - { - "epoch": 0.1523531717013377, - "grad_norm": 518.0152587890625, - "learning_rate": 4.960735893104148e-05, - "loss": 74.092, - "step": 37710 - }, - { - "epoch": 0.1523935729667053, - "grad_norm": 0.0, - "learning_rate": 4.960674246765851e-05, - "loss": 74.5977, - "step": 37720 - }, - { - "epoch": 0.15243397423207294, - "grad_norm": 524.2943115234375, - "learning_rate": 4.9606125524554434e-05, - "loss": 47.5876, - "step": 37730 - }, - { - "epoch": 0.15247437549744058, - "grad_norm": 1206.4256591796875, - "learning_rate": 4.960550810174126e-05, - "loss": 84.7391, - "step": 37740 - }, - { - "epoch": 0.15251477676280822, - "grad_norm": 762.7168579101562, - "learning_rate": 4.960489019923105e-05, - "loss": 67.8448, - "step": 37750 - }, - { - "epoch": 0.15255517802817584, - "grad_norm": 1341.15869140625, - "learning_rate": 4.9604271817035834e-05, - "loss": 112.8694, - "step": 37760 - }, - { - "epoch": 0.15259557929354348, - "grad_norm": 603.6417236328125, - "learning_rate": 4.960365295516767e-05, - "loss": 123.1844, - "step": 37770 - }, - { - "epoch": 0.15263598055891112, - "grad_norm": 868.97998046875, - "learning_rate": 4.9603033613638626e-05, - "loss": 75.7884, - "step": 37780 - }, - { - "epoch": 0.15267638182427873, - "grad_norm": 696.1629638671875, - "learning_rate": 4.9602413792460776e-05, - "loss": 82.7375, - "step": 37790 - }, - { - "epoch": 0.15271678308964637, - "grad_norm": 713.6837768554688, - "learning_rate": 4.960179349164621e-05, - "loss": 60.8608, - "step": 37800 - }, - { - "epoch": 0.152757184355014, - "grad_norm": 903.1224365234375, - "learning_rate": 4.9601172711207005e-05, - "loss": 111.0102, - "step": 37810 - }, - { - "epoch": 0.15279758562038162, - "grad_norm": 1082.6353759765625, - "learning_rate": 4.9600551451155274e-05, - "loss": 102.1816, - "step": 37820 - }, - { - "epoch": 0.15283798688574926, - "grad_norm": 1227.940673828125, - "learning_rate": 4.959992971150313e-05, - "loss": 98.8093, - "step": 37830 - }, - { - "epoch": 0.1528783881511169, - "grad_norm": 1349.7529296875, - "learning_rate": 4.959930749226269e-05, - "loss": 99.6163, - "step": 37840 - }, - { - "epoch": 0.1529187894164845, - "grad_norm": 1902.737548828125, - "learning_rate": 4.9598684793446085e-05, - "loss": 108.2246, - "step": 37850 - }, - { - "epoch": 0.15295919068185215, - "grad_norm": 888.9658203125, - "learning_rate": 4.959806161506545e-05, - "loss": 79.167, - "step": 37860 - }, - { - "epoch": 0.1529995919472198, - "grad_norm": 984.05322265625, - "learning_rate": 4.9597437957132955e-05, - "loss": 76.7845, - "step": 37870 - }, - { - "epoch": 0.1530399932125874, - "grad_norm": 849.2679443359375, - "learning_rate": 4.959681381966073e-05, - "loss": 122.3138, - "step": 37880 - }, - { - "epoch": 0.15308039447795505, - "grad_norm": 725.888916015625, - "learning_rate": 4.959618920266096e-05, - "loss": 73.6324, - "step": 37890 - }, - { - "epoch": 0.15312079574332269, - "grad_norm": 792.6710205078125, - "learning_rate": 4.959556410614582e-05, - "loss": 54.1202, - "step": 37900 - }, - { - "epoch": 0.15316119700869033, - "grad_norm": 583.5054931640625, - "learning_rate": 4.959493853012749e-05, - "loss": 81.214, - "step": 37910 - }, - { - "epoch": 0.15320159827405794, - "grad_norm": 793.6455688476562, - "learning_rate": 4.9594312474618175e-05, - "loss": 70.9016, - "step": 37920 - }, - { - "epoch": 0.15324199953942558, - "grad_norm": 1003.0465087890625, - "learning_rate": 4.959368593963007e-05, - "loss": 117.324, - "step": 37930 - }, - { - "epoch": 0.15328240080479322, - "grad_norm": 450.6336364746094, - "learning_rate": 4.9593058925175406e-05, - "loss": 95.6128, - "step": 37940 - }, - { - "epoch": 0.15332280207016083, - "grad_norm": 674.2539672851562, - "learning_rate": 4.959243143126639e-05, - "loss": 86.3896, - "step": 37950 - }, - { - "epoch": 0.15336320333552847, - "grad_norm": 396.0630798339844, - "learning_rate": 4.959180345791528e-05, - "loss": 80.5643, - "step": 37960 - }, - { - "epoch": 0.1534036046008961, - "grad_norm": 601.1489868164062, - "learning_rate": 4.9591175005134286e-05, - "loss": 105.7729, - "step": 37970 - }, - { - "epoch": 0.15344400586626372, - "grad_norm": 884.35693359375, - "learning_rate": 4.959054607293567e-05, - "loss": 100.5772, - "step": 37980 - }, - { - "epoch": 0.15348440713163136, - "grad_norm": 670.9600830078125, - "learning_rate": 4.95899166613317e-05, - "loss": 75.9151, - "step": 37990 - }, - { - "epoch": 0.153524808396999, - "grad_norm": 1574.9869384765625, - "learning_rate": 4.9589286770334654e-05, - "loss": 74.2453, - "step": 38000 - }, - { - "epoch": 0.15356520966236661, - "grad_norm": 1199.295654296875, - "learning_rate": 4.958865639995679e-05, - "loss": 89.8758, - "step": 38010 - }, - { - "epoch": 0.15360561092773425, - "grad_norm": 605.9471435546875, - "learning_rate": 4.958802555021042e-05, - "loss": 96.6403, - "step": 38020 - }, - { - "epoch": 0.1536460121931019, - "grad_norm": 1894.856201171875, - "learning_rate": 4.958739422110783e-05, - "loss": 87.7068, - "step": 38030 - }, - { - "epoch": 0.1536864134584695, - "grad_norm": 1081.8231201171875, - "learning_rate": 4.9586762412661333e-05, - "loss": 88.5522, - "step": 38040 - }, - { - "epoch": 0.15372681472383715, - "grad_norm": 452.8377685546875, - "learning_rate": 4.958613012488324e-05, - "loss": 75.0825, - "step": 38050 - }, - { - "epoch": 0.1537672159892048, - "grad_norm": 855.8710327148438, - "learning_rate": 4.958549735778589e-05, - "loss": 106.2082, - "step": 38060 - }, - { - "epoch": 0.15380761725457243, - "grad_norm": 0.0, - "learning_rate": 4.958486411138161e-05, - "loss": 50.9362, - "step": 38070 - }, - { - "epoch": 0.15384801851994004, - "grad_norm": 677.1991577148438, - "learning_rate": 4.958423038568274e-05, - "loss": 95.3129, - "step": 38080 - }, - { - "epoch": 0.15388841978530768, - "grad_norm": 470.16778564453125, - "learning_rate": 4.958359618070165e-05, - "loss": 92.6209, - "step": 38090 - }, - { - "epoch": 0.15392882105067532, - "grad_norm": 839.3685302734375, - "learning_rate": 4.958296149645069e-05, - "loss": 96.7531, - "step": 38100 - }, - { - "epoch": 0.15396922231604293, - "grad_norm": 1347.764892578125, - "learning_rate": 4.9582326332942244e-05, - "loss": 85.9113, - "step": 38110 - }, - { - "epoch": 0.15400962358141057, - "grad_norm": 452.386962890625, - "learning_rate": 4.958169069018869e-05, - "loss": 104.578, - "step": 38120 - }, - { - "epoch": 0.1540500248467782, - "grad_norm": 1746.528076171875, - "learning_rate": 4.958105456820242e-05, - "loss": 81.6607, - "step": 38130 - }, - { - "epoch": 0.15409042611214582, - "grad_norm": 862.51708984375, - "learning_rate": 4.958041796699583e-05, - "loss": 92.2541, - "step": 38140 - }, - { - "epoch": 0.15413082737751346, - "grad_norm": 1134.7611083984375, - "learning_rate": 4.957978088658134e-05, - "loss": 85.8542, - "step": 38150 - }, - { - "epoch": 0.1541712286428811, - "grad_norm": 737.6195068359375, - "learning_rate": 4.957914332697137e-05, - "loss": 84.3832, - "step": 38160 - }, - { - "epoch": 0.15421162990824872, - "grad_norm": 614.6725463867188, - "learning_rate": 4.957850528817834e-05, - "loss": 83.8153, - "step": 38170 - }, - { - "epoch": 0.15425203117361636, - "grad_norm": 848.0565795898438, - "learning_rate": 4.957786677021471e-05, - "loss": 72.8551, - "step": 38180 - }, - { - "epoch": 0.154292432438984, - "grad_norm": 1637.4676513671875, - "learning_rate": 4.9577227773092904e-05, - "loss": 69.5406, - "step": 38190 - }, - { - "epoch": 0.1543328337043516, - "grad_norm": 1447.67431640625, - "learning_rate": 4.9576588296825386e-05, - "loss": 60.8636, - "step": 38200 - }, - { - "epoch": 0.15437323496971925, - "grad_norm": 815.7072143554688, - "learning_rate": 4.9575948341424634e-05, - "loss": 86.5249, - "step": 38210 - }, - { - "epoch": 0.1544136362350869, - "grad_norm": 455.055419921875, - "learning_rate": 4.957530790690311e-05, - "loss": 87.8417, - "step": 38220 - }, - { - "epoch": 0.15445403750045453, - "grad_norm": 1008.4957275390625, - "learning_rate": 4.957466699327331e-05, - "loss": 111.3395, - "step": 38230 - }, - { - "epoch": 0.15449443876582214, - "grad_norm": 0.0, - "learning_rate": 4.957402560054773e-05, - "loss": 127.6305, - "step": 38240 - }, - { - "epoch": 0.15453484003118978, - "grad_norm": 2538.266357421875, - "learning_rate": 4.957338372873886e-05, - "loss": 126.8811, - "step": 38250 - }, - { - "epoch": 0.15457524129655742, - "grad_norm": 765.6140747070312, - "learning_rate": 4.957274137785922e-05, - "loss": 98.6367, - "step": 38260 - }, - { - "epoch": 0.15461564256192503, - "grad_norm": 1128.1064453125, - "learning_rate": 4.957209854792135e-05, - "loss": 115.5567, - "step": 38270 - }, - { - "epoch": 0.15465604382729267, - "grad_norm": 1656.940673828125, - "learning_rate": 4.957145523893776e-05, - "loss": 96.0133, - "step": 38280 - }, - { - "epoch": 0.1546964450926603, - "grad_norm": 776.5398559570312, - "learning_rate": 4.9570811450921e-05, - "loss": 96.1438, - "step": 38290 - }, - { - "epoch": 0.15473684635802792, - "grad_norm": 541.0315551757812, - "learning_rate": 4.957016718388362e-05, - "loss": 94.2679, - "step": 38300 - }, - { - "epoch": 0.15477724762339556, - "grad_norm": 1425.260498046875, - "learning_rate": 4.956952243783818e-05, - "loss": 85.9477, - "step": 38310 - }, - { - "epoch": 0.1548176488887632, - "grad_norm": 495.0509948730469, - "learning_rate": 4.956887721279726e-05, - "loss": 128.2712, - "step": 38320 - }, - { - "epoch": 0.15485805015413082, - "grad_norm": 1667.0662841796875, - "learning_rate": 4.956823150877342e-05, - "loss": 81.26, - "step": 38330 - }, - { - "epoch": 0.15489845141949846, - "grad_norm": 483.2498474121094, - "learning_rate": 4.956758532577926e-05, - "loss": 76.7887, - "step": 38340 - }, - { - "epoch": 0.1549388526848661, - "grad_norm": 940.1998901367188, - "learning_rate": 4.9566938663827377e-05, - "loss": 62.8369, - "step": 38350 - }, - { - "epoch": 0.1549792539502337, - "grad_norm": 826.6288452148438, - "learning_rate": 4.9566291522930375e-05, - "loss": 82.23, - "step": 38360 - }, - { - "epoch": 0.15501965521560135, - "grad_norm": 596.3945922851562, - "learning_rate": 4.956564390310088e-05, - "loss": 68.8034, - "step": 38370 - }, - { - "epoch": 0.155060056480969, - "grad_norm": 843.1058349609375, - "learning_rate": 4.95649958043515e-05, - "loss": 92.2614, - "step": 38380 - }, - { - "epoch": 0.15510045774633663, - "grad_norm": 368.0083923339844, - "learning_rate": 4.956434722669489e-05, - "loss": 80.4238, - "step": 38390 - }, - { - "epoch": 0.15514085901170424, - "grad_norm": 527.663330078125, - "learning_rate": 4.9563698170143666e-05, - "loss": 98.6083, - "step": 38400 - }, - { - "epoch": 0.15518126027707188, - "grad_norm": 847.8034057617188, - "learning_rate": 4.9563048634710516e-05, - "loss": 154.9312, - "step": 38410 - }, - { - "epoch": 0.15522166154243952, - "grad_norm": 872.8273315429688, - "learning_rate": 4.956239862040808e-05, - "loss": 89.4222, - "step": 38420 - }, - { - "epoch": 0.15526206280780713, - "grad_norm": 2756.822509765625, - "learning_rate": 4.956174812724904e-05, - "loss": 77.4832, - "step": 38430 - }, - { - "epoch": 0.15530246407317477, - "grad_norm": 405.55841064453125, - "learning_rate": 4.956109715524608e-05, - "loss": 104.6995, - "step": 38440 - }, - { - "epoch": 0.1553428653385424, - "grad_norm": 514.676513671875, - "learning_rate": 4.956044570441188e-05, - "loss": 112.402, - "step": 38450 - }, - { - "epoch": 0.15538326660391003, - "grad_norm": 867.9771118164062, - "learning_rate": 4.955979377475915e-05, - "loss": 72.6036, - "step": 38460 - }, - { - "epoch": 0.15542366786927767, - "grad_norm": 407.26898193359375, - "learning_rate": 4.9559141366300594e-05, - "loss": 89.936, - "step": 38470 - }, - { - "epoch": 0.1554640691346453, - "grad_norm": 906.7838745117188, - "learning_rate": 4.955848847904894e-05, - "loss": 111.0247, - "step": 38480 - }, - { - "epoch": 0.15550447040001292, - "grad_norm": 907.695068359375, - "learning_rate": 4.955783511301689e-05, - "loss": 87.2485, - "step": 38490 - }, - { - "epoch": 0.15554487166538056, - "grad_norm": 1016.2330932617188, - "learning_rate": 4.9557181268217227e-05, - "loss": 130.9673, - "step": 38500 - }, - { - "epoch": 0.1555852729307482, - "grad_norm": 1297.313232421875, - "learning_rate": 4.955652694466265e-05, - "loss": 104.8052, - "step": 38510 - }, - { - "epoch": 0.1556256741961158, - "grad_norm": 704.80126953125, - "learning_rate": 4.9555872142365945e-05, - "loss": 97.9365, - "step": 38520 - }, - { - "epoch": 0.15566607546148345, - "grad_norm": 1771.6796875, - "learning_rate": 4.9555216861339876e-05, - "loss": 74.8663, - "step": 38530 - }, - { - "epoch": 0.1557064767268511, - "grad_norm": 527.6610717773438, - "learning_rate": 4.9554561101597206e-05, - "loss": 99.9333, - "step": 38540 - }, - { - "epoch": 0.15574687799221873, - "grad_norm": 380.7573547363281, - "learning_rate": 4.955390486315073e-05, - "loss": 72.2091, - "step": 38550 - }, - { - "epoch": 0.15578727925758634, - "grad_norm": 1313.071044921875, - "learning_rate": 4.955324814601324e-05, - "loss": 84.0544, - "step": 38560 - }, - { - "epoch": 0.15582768052295398, - "grad_norm": 879.2240600585938, - "learning_rate": 4.955259095019753e-05, - "loss": 100.0556, - "step": 38570 - }, - { - "epoch": 0.15586808178832162, - "grad_norm": 1069.8026123046875, - "learning_rate": 4.955193327571642e-05, - "loss": 60.2459, - "step": 38580 - }, - { - "epoch": 0.15590848305368923, - "grad_norm": 1302.9310302734375, - "learning_rate": 4.955127512258273e-05, - "loss": 92.8039, - "step": 38590 - }, - { - "epoch": 0.15594888431905687, - "grad_norm": 1001.041259765625, - "learning_rate": 4.95506164908093e-05, - "loss": 110.6155, - "step": 38600 - }, - { - "epoch": 0.15598928558442451, - "grad_norm": 895.0310668945312, - "learning_rate": 4.954995738040895e-05, - "loss": 107.8179, - "step": 38610 - }, - { - "epoch": 0.15602968684979213, - "grad_norm": 823.3916625976562, - "learning_rate": 4.954929779139455e-05, - "loss": 78.2662, - "step": 38620 - }, - { - "epoch": 0.15607008811515977, - "grad_norm": 409.09527587890625, - "learning_rate": 4.954863772377894e-05, - "loss": 123.7662, - "step": 38630 - }, - { - "epoch": 0.1561104893805274, - "grad_norm": 569.7661743164062, - "learning_rate": 4.9547977177575014e-05, - "loss": 89.6015, - "step": 38640 - }, - { - "epoch": 0.15615089064589502, - "grad_norm": 616.7923583984375, - "learning_rate": 4.954731615279563e-05, - "loss": 94.4896, - "step": 38650 - }, - { - "epoch": 0.15619129191126266, - "grad_norm": 479.69647216796875, - "learning_rate": 4.9546654649453675e-05, - "loss": 66.053, - "step": 38660 - }, - { - "epoch": 0.1562316931766303, - "grad_norm": 2162.659912109375, - "learning_rate": 4.954599266756205e-05, - "loss": 138.2849, - "step": 38670 - }, - { - "epoch": 0.1562720944419979, - "grad_norm": 729.6651000976562, - "learning_rate": 4.9545330207133664e-05, - "loss": 107.9969, - "step": 38680 - }, - { - "epoch": 0.15631249570736555, - "grad_norm": 1174.257568359375, - "learning_rate": 4.9544667268181436e-05, - "loss": 94.6718, - "step": 38690 - }, - { - "epoch": 0.1563528969727332, - "grad_norm": 638.7692260742188, - "learning_rate": 4.9544003850718266e-05, - "loss": 100.2124, - "step": 38700 - }, - { - "epoch": 0.15639329823810083, - "grad_norm": 991.5917358398438, - "learning_rate": 4.954333995475712e-05, - "loss": 58.1896, - "step": 38710 - }, - { - "epoch": 0.15643369950346844, - "grad_norm": 2954.35302734375, - "learning_rate": 4.954267558031092e-05, - "loss": 65.0267, - "step": 38720 - }, - { - "epoch": 0.15647410076883608, - "grad_norm": 1842.9337158203125, - "learning_rate": 4.954201072739262e-05, - "loss": 92.5574, - "step": 38730 - }, - { - "epoch": 0.15651450203420372, - "grad_norm": 1125.166748046875, - "learning_rate": 4.9541345396015193e-05, - "loss": 89.0116, - "step": 38740 - }, - { - "epoch": 0.15655490329957134, - "grad_norm": 420.5506896972656, - "learning_rate": 4.9540679586191605e-05, - "loss": 91.9055, - "step": 38750 - }, - { - "epoch": 0.15659530456493898, - "grad_norm": 1417.18115234375, - "learning_rate": 4.9540013297934826e-05, - "loss": 72.1004, - "step": 38760 - }, - { - "epoch": 0.15663570583030662, - "grad_norm": 711.8256225585938, - "learning_rate": 4.953934653125786e-05, - "loss": 110.1404, - "step": 38770 - }, - { - "epoch": 0.15667610709567423, - "grad_norm": 936.8480834960938, - "learning_rate": 4.9538679286173696e-05, - "loss": 63.1972, - "step": 38780 - }, - { - "epoch": 0.15671650836104187, - "grad_norm": 965.4445190429688, - "learning_rate": 4.953801156269534e-05, - "loss": 96.635, - "step": 38790 - }, - { - "epoch": 0.1567569096264095, - "grad_norm": 388.2055969238281, - "learning_rate": 4.953734336083583e-05, - "loss": 69.8237, - "step": 38800 - }, - { - "epoch": 0.15679731089177712, - "grad_norm": 1032.505126953125, - "learning_rate": 4.953667468060816e-05, - "loss": 80.3915, - "step": 38810 - }, - { - "epoch": 0.15683771215714476, - "grad_norm": 2164.00341796875, - "learning_rate": 4.95360055220254e-05, - "loss": 117.7873, - "step": 38820 - }, - { - "epoch": 0.1568781134225124, - "grad_norm": 820.93310546875, - "learning_rate": 4.9535335885100575e-05, - "loss": 106.5048, - "step": 38830 - }, - { - "epoch": 0.15691851468788, - "grad_norm": 513.0703735351562, - "learning_rate": 4.953466576984675e-05, - "loss": 67.942, - "step": 38840 - }, - { - "epoch": 0.15695891595324765, - "grad_norm": 479.7278137207031, - "learning_rate": 4.953399517627698e-05, - "loss": 92.5827, - "step": 38850 - }, - { - "epoch": 0.1569993172186153, - "grad_norm": 3824.49951171875, - "learning_rate": 4.953332410440435e-05, - "loss": 76.3334, - "step": 38860 - }, - { - "epoch": 0.15703971848398293, - "grad_norm": 492.0223693847656, - "learning_rate": 4.953265255424192e-05, - "loss": 90.9314, - "step": 38870 - }, - { - "epoch": 0.15708011974935054, - "grad_norm": 487.1667175292969, - "learning_rate": 4.953198052580281e-05, - "loss": 83.6485, - "step": 38880 - }, - { - "epoch": 0.15712052101471818, - "grad_norm": 821.9052734375, - "learning_rate": 4.953130801910011e-05, - "loss": 66.1471, - "step": 38890 - }, - { - "epoch": 0.15716092228008582, - "grad_norm": 501.9080810546875, - "learning_rate": 4.953063503414692e-05, - "loss": 59.9543, - "step": 38900 - }, - { - "epoch": 0.15720132354545344, - "grad_norm": 1479.7249755859375, - "learning_rate": 4.9529961570956383e-05, - "loss": 83.3378, - "step": 38910 - }, - { - "epoch": 0.15724172481082108, - "grad_norm": 636.5172119140625, - "learning_rate": 4.952928762954161e-05, - "loss": 94.2658, - "step": 38920 - }, - { - "epoch": 0.15728212607618872, - "grad_norm": 1404.88134765625, - "learning_rate": 4.952861320991575e-05, - "loss": 86.6847, - "step": 38930 - }, - { - "epoch": 0.15732252734155633, - "grad_norm": 1033.673828125, - "learning_rate": 4.952793831209195e-05, - "loss": 109.079, - "step": 38940 - }, - { - "epoch": 0.15736292860692397, - "grad_norm": 990.5618896484375, - "learning_rate": 4.952726293608335e-05, - "loss": 80.7069, - "step": 38950 - }, - { - "epoch": 0.1574033298722916, - "grad_norm": 675.2631225585938, - "learning_rate": 4.9526587081903145e-05, - "loss": 81.1813, - "step": 38960 - }, - { - "epoch": 0.15744373113765922, - "grad_norm": 911.697509765625, - "learning_rate": 4.9525910749564494e-05, - "loss": 107.4708, - "step": 38970 - }, - { - "epoch": 0.15748413240302686, - "grad_norm": 506.453369140625, - "learning_rate": 4.952523393908059e-05, - "loss": 94.5604, - "step": 38980 - }, - { - "epoch": 0.1575245336683945, - "grad_norm": 1586.1180419921875, - "learning_rate": 4.9524556650464616e-05, - "loss": 97.9683, - "step": 38990 - }, - { - "epoch": 0.1575649349337621, - "grad_norm": 525.472900390625, - "learning_rate": 4.952387888372979e-05, - "loss": 126.3809, - "step": 39000 - }, - { - "epoch": 0.15760533619912975, - "grad_norm": 2915.437255859375, - "learning_rate": 4.952320063888932e-05, - "loss": 100.4383, - "step": 39010 - }, - { - "epoch": 0.1576457374644974, - "grad_norm": 656.8854370117188, - "learning_rate": 4.952252191595643e-05, - "loss": 41.7866, - "step": 39020 - }, - { - "epoch": 0.15768613872986503, - "grad_norm": 1504.2880859375, - "learning_rate": 4.9521842714944345e-05, - "loss": 95.3066, - "step": 39030 - }, - { - "epoch": 0.15772653999523265, - "grad_norm": 425.61871337890625, - "learning_rate": 4.952116303586631e-05, - "loss": 89.983, - "step": 39040 - }, - { - "epoch": 0.15776694126060029, - "grad_norm": 1021.040771484375, - "learning_rate": 4.952048287873558e-05, - "loss": 103.6552, - "step": 39050 - }, - { - "epoch": 0.15780734252596793, - "grad_norm": 770.2539672851562, - "learning_rate": 4.9519802243565414e-05, - "loss": 88.245, - "step": 39060 - }, - { - "epoch": 0.15784774379133554, - "grad_norm": 509.4902038574219, - "learning_rate": 4.951912113036908e-05, - "loss": 125.9709, - "step": 39070 - }, - { - "epoch": 0.15788814505670318, - "grad_norm": 577.9376831054688, - "learning_rate": 4.951843953915985e-05, - "loss": 56.5803, - "step": 39080 - }, - { - "epoch": 0.15792854632207082, - "grad_norm": 1800.318115234375, - "learning_rate": 4.951775746995102e-05, - "loss": 126.1482, - "step": 39090 - }, - { - "epoch": 0.15796894758743843, - "grad_norm": 644.34521484375, - "learning_rate": 4.951707492275589e-05, - "loss": 62.9948, - "step": 39100 - }, - { - "epoch": 0.15800934885280607, - "grad_norm": 2003.1749267578125, - "learning_rate": 4.9516391897587764e-05, - "loss": 83.1591, - "step": 39110 - }, - { - "epoch": 0.1580497501181737, - "grad_norm": 663.0642700195312, - "learning_rate": 4.951570839445995e-05, - "loss": 86.2755, - "step": 39120 - }, - { - "epoch": 0.15809015138354132, - "grad_norm": 2192.492919921875, - "learning_rate": 4.951502441338578e-05, - "loss": 69.6609, - "step": 39130 - }, - { - "epoch": 0.15813055264890896, - "grad_norm": 437.15411376953125, - "learning_rate": 4.951433995437859e-05, - "loss": 98.7007, - "step": 39140 - }, - { - "epoch": 0.1581709539142766, - "grad_norm": 643.024658203125, - "learning_rate": 4.951365501745172e-05, - "loss": 69.265, - "step": 39150 - }, - { - "epoch": 0.15821135517964421, - "grad_norm": 818.115234375, - "learning_rate": 4.951296960261853e-05, - "loss": 82.4836, - "step": 39160 - }, - { - "epoch": 0.15825175644501185, - "grad_norm": 0.0, - "learning_rate": 4.9512283709892374e-05, - "loss": 64.5836, - "step": 39170 - }, - { - "epoch": 0.1582921577103795, - "grad_norm": 476.3262634277344, - "learning_rate": 4.951159733928663e-05, - "loss": 113.6856, - "step": 39180 - }, - { - "epoch": 0.15833255897574713, - "grad_norm": 617.4471435546875, - "learning_rate": 4.9510910490814666e-05, - "loss": 53.7248, - "step": 39190 - }, - { - "epoch": 0.15837296024111475, - "grad_norm": 1832.762939453125, - "learning_rate": 4.95102231644899e-05, - "loss": 80.7921, - "step": 39200 - }, - { - "epoch": 0.1584133615064824, - "grad_norm": 736.6915283203125, - "learning_rate": 4.95095353603257e-05, - "loss": 81.6178, - "step": 39210 - }, - { - "epoch": 0.15845376277185003, - "grad_norm": 881.178466796875, - "learning_rate": 4.9508847078335495e-05, - "loss": 78.0822, - "step": 39220 - }, - { - "epoch": 0.15849416403721764, - "grad_norm": 482.23944091796875, - "learning_rate": 4.9508158318532696e-05, - "loss": 83.8018, - "step": 39230 - }, - { - "epoch": 0.15853456530258528, - "grad_norm": 717.8798217773438, - "learning_rate": 4.9507469080930734e-05, - "loss": 69.0619, - "step": 39240 - }, - { - "epoch": 0.15857496656795292, - "grad_norm": 502.1875305175781, - "learning_rate": 4.9506779365543046e-05, - "loss": 53.7278, - "step": 39250 - }, - { - "epoch": 0.15861536783332053, - "grad_norm": 437.0827331542969, - "learning_rate": 4.950608917238308e-05, - "loss": 79.8096, - "step": 39260 - }, - { - "epoch": 0.15865576909868817, - "grad_norm": 1342.6405029296875, - "learning_rate": 4.9505398501464284e-05, - "loss": 78.4081, - "step": 39270 - }, - { - "epoch": 0.1586961703640558, - "grad_norm": 849.6737670898438, - "learning_rate": 4.9504707352800125e-05, - "loss": 79.9384, - "step": 39280 - }, - { - "epoch": 0.15873657162942342, - "grad_norm": 925.02392578125, - "learning_rate": 4.95040157264041e-05, - "loss": 59.0614, - "step": 39290 - }, - { - "epoch": 0.15877697289479106, - "grad_norm": 1974.9837646484375, - "learning_rate": 4.9503323622289655e-05, - "loss": 91.3175, - "step": 39300 - }, - { - "epoch": 0.1588173741601587, - "grad_norm": 1192.585693359375, - "learning_rate": 4.950263104047031e-05, - "loss": 62.4365, - "step": 39310 - }, - { - "epoch": 0.15885777542552632, - "grad_norm": 1465.087158203125, - "learning_rate": 4.9501937980959545e-05, - "loss": 75.6416, - "step": 39320 - }, - { - "epoch": 0.15889817669089396, - "grad_norm": 669.2102661132812, - "learning_rate": 4.950124444377089e-05, - "loss": 101.0025, - "step": 39330 - }, - { - "epoch": 0.1589385779562616, - "grad_norm": 1937.0089111328125, - "learning_rate": 4.950055042891786e-05, - "loss": 82.6265, - "step": 39340 - }, - { - "epoch": 0.15897897922162924, - "grad_norm": 1510.9599609375, - "learning_rate": 4.949985593641399e-05, - "loss": 117.1577, - "step": 39350 - }, - { - "epoch": 0.15901938048699685, - "grad_norm": 1171.20703125, - "learning_rate": 4.949916096627282e-05, - "loss": 106.9967, - "step": 39360 - }, - { - "epoch": 0.1590597817523645, - "grad_norm": 1075.4632568359375, - "learning_rate": 4.949846551850788e-05, - "loss": 110.2698, - "step": 39370 - }, - { - "epoch": 0.15910018301773213, - "grad_norm": 1426.9444580078125, - "learning_rate": 4.949776959313275e-05, - "loss": 68.2341, - "step": 39380 - }, - { - "epoch": 0.15914058428309974, - "grad_norm": 734.63525390625, - "learning_rate": 4.9497073190160994e-05, - "loss": 97.0607, - "step": 39390 - }, - { - "epoch": 0.15918098554846738, - "grad_norm": 682.6970825195312, - "learning_rate": 4.949637630960617e-05, - "loss": 83.067, - "step": 39400 - }, - { - "epoch": 0.15922138681383502, - "grad_norm": 239.862060546875, - "learning_rate": 4.9495678951481896e-05, - "loss": 93.8866, - "step": 39410 - }, - { - "epoch": 0.15926178807920263, - "grad_norm": 1805.31591796875, - "learning_rate": 4.949498111580174e-05, - "loss": 81.097, - "step": 39420 - }, - { - "epoch": 0.15930218934457027, - "grad_norm": 1094.3526611328125, - "learning_rate": 4.949428280257932e-05, - "loss": 122.2046, - "step": 39430 - }, - { - "epoch": 0.1593425906099379, - "grad_norm": 395.03936767578125, - "learning_rate": 4.949358401182824e-05, - "loss": 80.3976, - "step": 39440 - }, - { - "epoch": 0.15938299187530552, - "grad_norm": 896.0783081054688, - "learning_rate": 4.949288474356213e-05, - "loss": 100.0894, - "step": 39450 - }, - { - "epoch": 0.15942339314067316, - "grad_norm": 1049.211669921875, - "learning_rate": 4.9492184997794624e-05, - "loss": 115.2995, - "step": 39460 - }, - { - "epoch": 0.1594637944060408, - "grad_norm": 1161.3402099609375, - "learning_rate": 4.949148477453936e-05, - "loss": 81.019, - "step": 39470 - }, - { - "epoch": 0.15950419567140842, - "grad_norm": 2834.7724609375, - "learning_rate": 4.949078407381e-05, - "loss": 96.1574, - "step": 39480 - }, - { - "epoch": 0.15954459693677606, - "grad_norm": 808.9170532226562, - "learning_rate": 4.949008289562019e-05, - "loss": 79.7376, - "step": 39490 - }, - { - "epoch": 0.1595849982021437, - "grad_norm": 530.290771484375, - "learning_rate": 4.94893812399836e-05, - "loss": 69.7185, - "step": 39500 - }, - { - "epoch": 0.1596253994675113, - "grad_norm": 1201.916015625, - "learning_rate": 4.9488679106913924e-05, - "loss": 76.6368, - "step": 39510 - }, - { - "epoch": 0.15966580073287895, - "grad_norm": 1412.6275634765625, - "learning_rate": 4.948797649642484e-05, - "loss": 64.3421, - "step": 39520 - }, - { - "epoch": 0.1597062019982466, - "grad_norm": 927.9067993164062, - "learning_rate": 4.9487273408530044e-05, - "loss": 103.6948, - "step": 39530 - }, - { - "epoch": 0.15974660326361423, - "grad_norm": 722.8312377929688, - "learning_rate": 4.9486569843243244e-05, - "loss": 76.603, - "step": 39540 - }, - { - "epoch": 0.15978700452898184, - "grad_norm": 606.1805419921875, - "learning_rate": 4.948586580057816e-05, - "loss": 96.2999, - "step": 39550 - }, - { - "epoch": 0.15982740579434948, - "grad_norm": 787.3512573242188, - "learning_rate": 4.948516128054852e-05, - "loss": 91.6384, - "step": 39560 - }, - { - "epoch": 0.15986780705971712, - "grad_norm": 477.5809631347656, - "learning_rate": 4.948445628316805e-05, - "loss": 84.977, - "step": 39570 - }, - { - "epoch": 0.15990820832508473, - "grad_norm": 2748.46630859375, - "learning_rate": 4.94837508084505e-05, - "loss": 113.5827, - "step": 39580 - }, - { - "epoch": 0.15994860959045237, - "grad_norm": 1089.883544921875, - "learning_rate": 4.948304485640963e-05, - "loss": 110.2332, - "step": 39590 - }, - { - "epoch": 0.15998901085582, - "grad_norm": 2496.34326171875, - "learning_rate": 4.948233842705919e-05, - "loss": 62.8154, - "step": 39600 - }, - { - "epoch": 0.16002941212118763, - "grad_norm": 1067.39501953125, - "learning_rate": 4.948163152041295e-05, - "loss": 92.6578, - "step": 39610 - }, - { - "epoch": 0.16006981338655527, - "grad_norm": 401.0006408691406, - "learning_rate": 4.948092413648471e-05, - "loss": 94.2235, - "step": 39620 - }, - { - "epoch": 0.1601102146519229, - "grad_norm": 599.7701416015625, - "learning_rate": 4.948021627528825e-05, - "loss": 119.0192, - "step": 39630 - }, - { - "epoch": 0.16015061591729052, - "grad_norm": 5756.70068359375, - "learning_rate": 4.9479507936837364e-05, - "loss": 117.7867, - "step": 39640 - }, - { - "epoch": 0.16019101718265816, - "grad_norm": 1125.0499267578125, - "learning_rate": 4.947879912114588e-05, - "loss": 90.0595, - "step": 39650 - }, - { - "epoch": 0.1602314184480258, - "grad_norm": 5096.72216796875, - "learning_rate": 4.947808982822759e-05, - "loss": 109.1604, - "step": 39660 - }, - { - "epoch": 0.1602718197133934, - "grad_norm": 750.5372314453125, - "learning_rate": 4.9477380058096343e-05, - "loss": 75.6724, - "step": 39670 - }, - { - "epoch": 0.16031222097876105, - "grad_norm": 721.302734375, - "learning_rate": 4.947666981076597e-05, - "loss": 76.214, - "step": 39680 - }, - { - "epoch": 0.1603526222441287, - "grad_norm": 430.94671630859375, - "learning_rate": 4.947595908625032e-05, - "loss": 78.1926, - "step": 39690 - }, - { - "epoch": 0.16039302350949633, - "grad_norm": 871.6290893554688, - "learning_rate": 4.947524788456325e-05, - "loss": 56.1409, - "step": 39700 - }, - { - "epoch": 0.16043342477486394, - "grad_norm": 1097.3226318359375, - "learning_rate": 4.9474536205718615e-05, - "loss": 70.7656, - "step": 39710 - }, - { - "epoch": 0.16047382604023158, - "grad_norm": 821.6055908203125, - "learning_rate": 4.94738240497303e-05, - "loss": 116.6263, - "step": 39720 - }, - { - "epoch": 0.16051422730559922, - "grad_norm": 988.2919311523438, - "learning_rate": 4.947311141661218e-05, - "loss": 75.0355, - "step": 39730 - }, - { - "epoch": 0.16055462857096683, - "grad_norm": 1874.0450439453125, - "learning_rate": 4.947239830637815e-05, - "loss": 93.3599, - "step": 39740 - }, - { - "epoch": 0.16059502983633447, - "grad_norm": 1665.5904541015625, - "learning_rate": 4.947168471904213e-05, - "loss": 110.6867, - "step": 39750 - }, - { - "epoch": 0.16063543110170211, - "grad_norm": 768.0989990234375, - "learning_rate": 4.947097065461801e-05, - "loss": 74.4268, - "step": 39760 - }, - { - "epoch": 0.16067583236706973, - "grad_norm": 1051.240234375, - "learning_rate": 4.947025611311972e-05, - "loss": 114.7893, - "step": 39770 - }, - { - "epoch": 0.16071623363243737, - "grad_norm": 4345.51953125, - "learning_rate": 4.946954109456118e-05, - "loss": 108.7796, - "step": 39780 - }, - { - "epoch": 0.160756634897805, - "grad_norm": 1246.1304931640625, - "learning_rate": 4.946882559895635e-05, - "loss": 58.0561, - "step": 39790 - }, - { - "epoch": 0.16079703616317262, - "grad_norm": 3426.0712890625, - "learning_rate": 4.946810962631916e-05, - "loss": 86.2152, - "step": 39800 - }, - { - "epoch": 0.16083743742854026, - "grad_norm": 971.04052734375, - "learning_rate": 4.9467393176663576e-05, - "loss": 80.0207, - "step": 39810 - }, - { - "epoch": 0.1608778386939079, - "grad_norm": 791.5706787109375, - "learning_rate": 4.9466676250003576e-05, - "loss": 83.3073, - "step": 39820 - }, - { - "epoch": 0.1609182399592755, - "grad_norm": 1009.4277954101562, - "learning_rate": 4.9465958846353114e-05, - "loss": 81.5609, - "step": 39830 - }, - { - "epoch": 0.16095864122464315, - "grad_norm": 645.2051391601562, - "learning_rate": 4.9465240965726195e-05, - "loss": 110.7899, - "step": 39840 - }, - { - "epoch": 0.1609990424900108, - "grad_norm": 2333.77294921875, - "learning_rate": 4.9464522608136805e-05, - "loss": 67.2439, - "step": 39850 - }, - { - "epoch": 0.16103944375537843, - "grad_norm": 657.8132934570312, - "learning_rate": 4.946380377359895e-05, - "loss": 60.2396, - "step": 39860 - }, - { - "epoch": 0.16107984502074604, - "grad_norm": 1080.5504150390625, - "learning_rate": 4.9463084462126655e-05, - "loss": 101.5745, - "step": 39870 - }, - { - "epoch": 0.16112024628611368, - "grad_norm": 550.1509399414062, - "learning_rate": 4.946236467373392e-05, - "loss": 76.9606, - "step": 39880 - }, - { - "epoch": 0.16116064755148132, - "grad_norm": 686.2342529296875, - "learning_rate": 4.94616444084348e-05, - "loss": 87.1771, - "step": 39890 - }, - { - "epoch": 0.16120104881684894, - "grad_norm": 563.68017578125, - "learning_rate": 4.946092366624333e-05, - "loss": 90.5833, - "step": 39900 - }, - { - "epoch": 0.16124145008221658, - "grad_norm": 508.9067687988281, - "learning_rate": 4.946020244717355e-05, - "loss": 110.8761, - "step": 39910 - }, - { - "epoch": 0.16128185134758422, - "grad_norm": 447.7995910644531, - "learning_rate": 4.945948075123954e-05, - "loss": 84.4401, - "step": 39920 - }, - { - "epoch": 0.16132225261295183, - "grad_norm": 4074.44482421875, - "learning_rate": 4.9458758578455354e-05, - "loss": 130.2105, - "step": 39930 - }, - { - "epoch": 0.16136265387831947, - "grad_norm": 434.82470703125, - "learning_rate": 4.945803592883509e-05, - "loss": 76.4253, - "step": 39940 - }, - { - "epoch": 0.1614030551436871, - "grad_norm": 761.01123046875, - "learning_rate": 4.945731280239281e-05, - "loss": 106.6753, - "step": 39950 - }, - { - "epoch": 0.16144345640905472, - "grad_norm": 1182.587646484375, - "learning_rate": 4.9456589199142637e-05, - "loss": 101.2267, - "step": 39960 - }, - { - "epoch": 0.16148385767442236, - "grad_norm": 1377.45947265625, - "learning_rate": 4.945586511909865e-05, - "loss": 135.843, - "step": 39970 - }, - { - "epoch": 0.16152425893979, - "grad_norm": 1164.2666015625, - "learning_rate": 4.9455140562274995e-05, - "loss": 104.891, - "step": 39980 - }, - { - "epoch": 0.1615646602051576, - "grad_norm": 602.760498046875, - "learning_rate": 4.9454415528685785e-05, - "loss": 117.2372, - "step": 39990 - }, - { - "epoch": 0.16160506147052525, - "grad_norm": 1094.737548828125, - "learning_rate": 4.9453690018345144e-05, - "loss": 85.9165, - "step": 40000 - }, - { - "epoch": 0.1616454627358929, - "grad_norm": 599.16015625, - "learning_rate": 4.9452964031267236e-05, - "loss": 69.1269, - "step": 40010 - }, - { - "epoch": 0.16168586400126053, - "grad_norm": 613.2879028320312, - "learning_rate": 4.9452237567466194e-05, - "loss": 88.8299, - "step": 40020 - }, - { - "epoch": 0.16172626526662814, - "grad_norm": 735.5689086914062, - "learning_rate": 4.9451510626956196e-05, - "loss": 76.922, - "step": 40030 - }, - { - "epoch": 0.16176666653199578, - "grad_norm": 696.985595703125, - "learning_rate": 4.945078320975142e-05, - "loss": 79.6425, - "step": 40040 - }, - { - "epoch": 0.16180706779736342, - "grad_norm": 1030.8795166015625, - "learning_rate": 4.9450055315866026e-05, - "loss": 104.0844, - "step": 40050 - }, - { - "epoch": 0.16184746906273104, - "grad_norm": 3068.77294921875, - "learning_rate": 4.944932694531422e-05, - "loss": 85.7902, - "step": 40060 - }, - { - "epoch": 0.16188787032809868, - "grad_norm": 483.2486267089844, - "learning_rate": 4.94485980981102e-05, - "loss": 63.3755, - "step": 40070 - }, - { - "epoch": 0.16192827159346632, - "grad_norm": 2090.39208984375, - "learning_rate": 4.9447868774268166e-05, - "loss": 83.095, - "step": 40080 - }, - { - "epoch": 0.16196867285883393, - "grad_norm": 837.720947265625, - "learning_rate": 4.944713897380235e-05, - "loss": 68.0856, - "step": 40090 - }, - { - "epoch": 0.16200907412420157, - "grad_norm": 3449.74267578125, - "learning_rate": 4.9446408696726974e-05, - "loss": 80.644, - "step": 40100 - }, - { - "epoch": 0.1620494753895692, - "grad_norm": 532.7826538085938, - "learning_rate": 4.944567794305627e-05, - "loss": 72.2116, - "step": 40110 - }, - { - "epoch": 0.16208987665493682, - "grad_norm": 518.95556640625, - "learning_rate": 4.9444946712804494e-05, - "loss": 95.1311, - "step": 40120 - }, - { - "epoch": 0.16213027792030446, - "grad_norm": 2341.940673828125, - "learning_rate": 4.944421500598589e-05, - "loss": 101.21, - "step": 40130 - }, - { - "epoch": 0.1621706791856721, - "grad_norm": 694.1212158203125, - "learning_rate": 4.944348282261474e-05, - "loss": 124.4218, - "step": 40140 - }, - { - "epoch": 0.1622110804510397, - "grad_norm": 900.7708129882812, - "learning_rate": 4.9442750162705295e-05, - "loss": 89.3225, - "step": 40150 - }, - { - "epoch": 0.16225148171640735, - "grad_norm": 1264.3707275390625, - "learning_rate": 4.9442017026271864e-05, - "loss": 116.5199, - "step": 40160 - }, - { - "epoch": 0.162291882981775, - "grad_norm": 853.1660766601562, - "learning_rate": 4.944128341332872e-05, - "loss": 101.1801, - "step": 40170 - }, - { - "epoch": 0.16233228424714263, - "grad_norm": 696.7931518554688, - "learning_rate": 4.9440549323890176e-05, - "loss": 81.5634, - "step": 40180 - }, - { - "epoch": 0.16237268551251025, - "grad_norm": 1033.1129150390625, - "learning_rate": 4.9439814757970535e-05, - "loss": 79.6033, - "step": 40190 - }, - { - "epoch": 0.16241308677787789, - "grad_norm": 605.3699951171875, - "learning_rate": 4.9439079715584135e-05, - "loss": 107.7314, - "step": 40200 - }, - { - "epoch": 0.16245348804324553, - "grad_norm": 3663.65380859375, - "learning_rate": 4.943834419674529e-05, - "loss": 97.9108, - "step": 40210 - }, - { - "epoch": 0.16249388930861314, - "grad_norm": 504.4712829589844, - "learning_rate": 4.9437608201468336e-05, - "loss": 79.6529, - "step": 40220 - }, - { - "epoch": 0.16253429057398078, - "grad_norm": 759.597412109375, - "learning_rate": 4.9436871729767634e-05, - "loss": 65.1314, - "step": 40230 - }, - { - "epoch": 0.16257469183934842, - "grad_norm": 810.0010986328125, - "learning_rate": 4.943613478165753e-05, - "loss": 76.6986, - "step": 40240 - }, - { - "epoch": 0.16261509310471603, - "grad_norm": 701.2088623046875, - "learning_rate": 4.94353973571524e-05, - "loss": 59.842, - "step": 40250 - }, - { - "epoch": 0.16265549437008367, - "grad_norm": 460.05841064453125, - "learning_rate": 4.943465945626662e-05, - "loss": 74.4412, - "step": 40260 - }, - { - "epoch": 0.1626958956354513, - "grad_norm": 1123.1805419921875, - "learning_rate": 4.943392107901458e-05, - "loss": 141.1575, - "step": 40270 - }, - { - "epoch": 0.16273629690081892, - "grad_norm": 524.522705078125, - "learning_rate": 4.943318222541066e-05, - "loss": 117.3683, - "step": 40280 - }, - { - "epoch": 0.16277669816618656, - "grad_norm": 406.4377136230469, - "learning_rate": 4.943244289546928e-05, - "loss": 68.4648, - "step": 40290 - }, - { - "epoch": 0.1628170994315542, - "grad_norm": 752.1702880859375, - "learning_rate": 4.943170308920484e-05, - "loss": 74.2303, - "step": 40300 - }, - { - "epoch": 0.16285750069692181, - "grad_norm": 1674.6246337890625, - "learning_rate": 4.943096280663178e-05, - "loss": 123.305, - "step": 40310 - }, - { - "epoch": 0.16289790196228945, - "grad_norm": 1532.0059814453125, - "learning_rate": 4.9430222047764506e-05, - "loss": 82.7036, - "step": 40320 - }, - { - "epoch": 0.1629383032276571, - "grad_norm": 723.3096923828125, - "learning_rate": 4.942948081261749e-05, - "loss": 131.268, - "step": 40330 - }, - { - "epoch": 0.16297870449302473, - "grad_norm": 1614.462646484375, - "learning_rate": 4.942873910120516e-05, - "loss": 109.0338, - "step": 40340 - }, - { - "epoch": 0.16301910575839235, - "grad_norm": 720.302734375, - "learning_rate": 4.9427996913542e-05, - "loss": 63.5732, - "step": 40350 - }, - { - "epoch": 0.16305950702376, - "grad_norm": 1045.8563232421875, - "learning_rate": 4.9427254249642444e-05, - "loss": 96.6388, - "step": 40360 - }, - { - "epoch": 0.16309990828912763, - "grad_norm": 983.675048828125, - "learning_rate": 4.9426511109521e-05, - "loss": 66.8883, - "step": 40370 - }, - { - "epoch": 0.16314030955449524, - "grad_norm": 668.4130249023438, - "learning_rate": 4.9425767493192144e-05, - "loss": 107.8524, - "step": 40380 - }, - { - "epoch": 0.16318071081986288, - "grad_norm": 542.26318359375, - "learning_rate": 4.942502340067038e-05, - "loss": 64.179, - "step": 40390 - }, - { - "epoch": 0.16322111208523052, - "grad_norm": 931.4650268554688, - "learning_rate": 4.942427883197021e-05, - "loss": 120.4847, - "step": 40400 - }, - { - "epoch": 0.16326151335059813, - "grad_norm": 606.0546264648438, - "learning_rate": 4.942353378710614e-05, - "loss": 54.7097, - "step": 40410 - }, - { - "epoch": 0.16330191461596577, - "grad_norm": 1016.0202026367188, - "learning_rate": 4.9422788266092715e-05, - "loss": 108.11, - "step": 40420 - }, - { - "epoch": 0.1633423158813334, - "grad_norm": 1137.9029541015625, - "learning_rate": 4.942204226894445e-05, - "loss": 105.8649, - "step": 40430 - }, - { - "epoch": 0.16338271714670102, - "grad_norm": 1075.5977783203125, - "learning_rate": 4.94212957956759e-05, - "loss": 101.3763, - "step": 40440 - }, - { - "epoch": 0.16342311841206866, - "grad_norm": 674.2771606445312, - "learning_rate": 4.942054884630162e-05, - "loss": 81.1654, - "step": 40450 - }, - { - "epoch": 0.1634635196774363, - "grad_norm": 582.1860961914062, - "learning_rate": 4.941980142083617e-05, - "loss": 78.6935, - "step": 40460 - }, - { - "epoch": 0.16350392094280392, - "grad_norm": 688.1861572265625, - "learning_rate": 4.9419053519294115e-05, - "loss": 79.6108, - "step": 40470 - }, - { - "epoch": 0.16354432220817156, - "grad_norm": 1063.1949462890625, - "learning_rate": 4.941830514169004e-05, - "loss": 85.3794, - "step": 40480 - }, - { - "epoch": 0.1635847234735392, - "grad_norm": 802.294189453125, - "learning_rate": 4.941755628803853e-05, - "loss": 86.5771, - "step": 40490 - }, - { - "epoch": 0.16362512473890684, - "grad_norm": 2401.399169921875, - "learning_rate": 4.94168069583542e-05, - "loss": 123.1953, - "step": 40500 - }, - { - "epoch": 0.16366552600427445, - "grad_norm": 472.39874267578125, - "learning_rate": 4.941605715265164e-05, - "loss": 40.4517, - "step": 40510 - }, - { - "epoch": 0.1637059272696421, - "grad_norm": 502.11492919921875, - "learning_rate": 4.941530687094548e-05, - "loss": 93.8861, - "step": 40520 - }, - { - "epoch": 0.16374632853500973, - "grad_norm": 481.69085693359375, - "learning_rate": 4.9414556113250344e-05, - "loss": 100.2681, - "step": 40530 - }, - { - "epoch": 0.16378672980037734, - "grad_norm": 718.2989501953125, - "learning_rate": 4.941380487958086e-05, - "loss": 67.0155, - "step": 40540 - }, - { - "epoch": 0.16382713106574498, - "grad_norm": 707.5899658203125, - "learning_rate": 4.941305316995169e-05, - "loss": 80.2658, - "step": 40550 - }, - { - "epoch": 0.16386753233111262, - "grad_norm": 411.375244140625, - "learning_rate": 4.941230098437747e-05, - "loss": 86.6765, - "step": 40560 - }, - { - "epoch": 0.16390793359648023, - "grad_norm": 1048.686279296875, - "learning_rate": 4.941154832287288e-05, - "loss": 90.3216, - "step": 40570 - }, - { - "epoch": 0.16394833486184787, - "grad_norm": 951.8493041992188, - "learning_rate": 4.941079518545258e-05, - "loss": 64.0513, - "step": 40580 - }, - { - "epoch": 0.1639887361272155, - "grad_norm": 5584.05908203125, - "learning_rate": 4.9410041572131266e-05, - "loss": 109.0281, - "step": 40590 - }, - { - "epoch": 0.16402913739258312, - "grad_norm": 1825.2513427734375, - "learning_rate": 4.940928748292363e-05, - "loss": 83.6246, - "step": 40600 - }, - { - "epoch": 0.16406953865795076, - "grad_norm": 562.28076171875, - "learning_rate": 4.940853291784435e-05, - "loss": 72.4572, - "step": 40610 - }, - { - "epoch": 0.1641099399233184, - "grad_norm": 2325.668212890625, - "learning_rate": 4.9407777876908174e-05, - "loss": 104.7029, - "step": 40620 - }, - { - "epoch": 0.16415034118868602, - "grad_norm": 1462.803955078125, - "learning_rate": 4.9407022360129796e-05, - "loss": 113.1334, - "step": 40630 - }, - { - "epoch": 0.16419074245405366, - "grad_norm": 539.4075927734375, - "learning_rate": 4.9406266367523945e-05, - "loss": 87.7285, - "step": 40640 - }, - { - "epoch": 0.1642311437194213, - "grad_norm": 1116.6656494140625, - "learning_rate": 4.940550989910537e-05, - "loss": 127.9007, - "step": 40650 - }, - { - "epoch": 0.16427154498478894, - "grad_norm": 745.6444702148438, - "learning_rate": 4.9404752954888824e-05, - "loss": 95.8032, - "step": 40660 - }, - { - "epoch": 0.16431194625015655, - "grad_norm": 1984.2926025390625, - "learning_rate": 4.9403995534889044e-05, - "loss": 61.1674, - "step": 40670 - }, - { - "epoch": 0.1643523475155242, - "grad_norm": 553.41748046875, - "learning_rate": 4.9403237639120805e-05, - "loss": 87.0418, - "step": 40680 - }, - { - "epoch": 0.16439274878089183, - "grad_norm": 786.6685791015625, - "learning_rate": 4.9402479267598887e-05, - "loss": 93.9468, - "step": 40690 - }, - { - "epoch": 0.16443315004625944, - "grad_norm": 826.0059814453125, - "learning_rate": 4.940172042033808e-05, - "loss": 102.5501, - "step": 40700 - }, - { - "epoch": 0.16447355131162708, - "grad_norm": 1732.5660400390625, - "learning_rate": 4.9400961097353166e-05, - "loss": 110.0081, - "step": 40710 - }, - { - "epoch": 0.16451395257699472, - "grad_norm": 989.8864135742188, - "learning_rate": 4.940020129865895e-05, - "loss": 63.4497, - "step": 40720 - }, - { - "epoch": 0.16455435384236233, - "grad_norm": 484.3291931152344, - "learning_rate": 4.939944102427025e-05, - "loss": 111.996, - "step": 40730 - }, - { - "epoch": 0.16459475510772997, - "grad_norm": 218.7427520751953, - "learning_rate": 4.939868027420189e-05, - "loss": 83.2038, - "step": 40740 - }, - { - "epoch": 0.1646351563730976, - "grad_norm": 3197.0927734375, - "learning_rate": 4.939791904846869e-05, - "loss": 99.5802, - "step": 40750 - }, - { - "epoch": 0.16467555763846523, - "grad_norm": 805.2986450195312, - "learning_rate": 4.93971573470855e-05, - "loss": 85.1965, - "step": 40760 - }, - { - "epoch": 0.16471595890383287, - "grad_norm": 846.134765625, - "learning_rate": 4.939639517006717e-05, - "loss": 67.5336, - "step": 40770 - }, - { - "epoch": 0.1647563601692005, - "grad_norm": 780.6873779296875, - "learning_rate": 4.939563251742855e-05, - "loss": 85.52, - "step": 40780 - }, - { - "epoch": 0.16479676143456812, - "grad_norm": 1174.27685546875, - "learning_rate": 4.939486938918451e-05, - "loss": 97.5447, - "step": 40790 - }, - { - "epoch": 0.16483716269993576, - "grad_norm": 763.7103271484375, - "learning_rate": 4.9394105785349944e-05, - "loss": 50.4769, - "step": 40800 - }, - { - "epoch": 0.1648775639653034, - "grad_norm": 704.8574829101562, - "learning_rate": 4.939334170593972e-05, - "loss": 107.5855, - "step": 40810 - }, - { - "epoch": 0.16491796523067104, - "grad_norm": 695.6289672851562, - "learning_rate": 4.9392577150968745e-05, - "loss": 119.9477, - "step": 40820 - }, - { - "epoch": 0.16495836649603865, - "grad_norm": 960.3919677734375, - "learning_rate": 4.939181212045192e-05, - "loss": 75.6716, - "step": 40830 - }, - { - "epoch": 0.1649987677614063, - "grad_norm": 930.9579467773438, - "learning_rate": 4.939104661440415e-05, - "loss": 75.375, - "step": 40840 - }, - { - "epoch": 0.16503916902677393, - "grad_norm": 517.5274658203125, - "learning_rate": 4.939028063284038e-05, - "loss": 48.7727, - "step": 40850 - }, - { - "epoch": 0.16507957029214154, - "grad_norm": 559.23974609375, - "learning_rate": 4.938951417577552e-05, - "loss": 88.6447, - "step": 40860 - }, - { - "epoch": 0.16511997155750918, - "grad_norm": 1341.4295654296875, - "learning_rate": 4.938874724322454e-05, - "loss": 58.3951, - "step": 40870 - }, - { - "epoch": 0.16516037282287682, - "grad_norm": 818.9078369140625, - "learning_rate": 4.938797983520237e-05, - "loss": 90.6307, - "step": 40880 - }, - { - "epoch": 0.16520077408824443, - "grad_norm": 719.2857055664062, - "learning_rate": 4.938721195172398e-05, - "loss": 89.5419, - "step": 40890 - }, - { - "epoch": 0.16524117535361207, - "grad_norm": 1062.7843017578125, - "learning_rate": 4.938644359280433e-05, - "loss": 57.1088, - "step": 40900 - }, - { - "epoch": 0.16528157661897971, - "grad_norm": 713.6543579101562, - "learning_rate": 4.938567475845841e-05, - "loss": 98.1421, - "step": 40910 - }, - { - "epoch": 0.16532197788434733, - "grad_norm": 1030.1905517578125, - "learning_rate": 4.938490544870121e-05, - "loss": 115.5957, - "step": 40920 - }, - { - "epoch": 0.16536237914971497, - "grad_norm": 4530.09912109375, - "learning_rate": 4.938413566354772e-05, - "loss": 105.8919, - "step": 40930 - }, - { - "epoch": 0.1654027804150826, - "grad_norm": 1015.1611328125, - "learning_rate": 4.938336540301295e-05, - "loss": 130.0979, - "step": 40940 - }, - { - "epoch": 0.16544318168045022, - "grad_norm": 974.16064453125, - "learning_rate": 4.938259466711193e-05, - "loss": 61.6297, - "step": 40950 - }, - { - "epoch": 0.16548358294581786, - "grad_norm": 874.3550415039062, - "learning_rate": 4.938182345585966e-05, - "loss": 76.4001, - "step": 40960 - }, - { - "epoch": 0.1655239842111855, - "grad_norm": 2546.218017578125, - "learning_rate": 4.938105176927119e-05, - "loss": 78.0685, - "step": 40970 - }, - { - "epoch": 0.16556438547655314, - "grad_norm": 1073.3936767578125, - "learning_rate": 4.9380279607361575e-05, - "loss": 72.0243, - "step": 40980 - }, - { - "epoch": 0.16560478674192075, - "grad_norm": 1150.05029296875, - "learning_rate": 4.937950697014585e-05, - "loss": 76.5943, - "step": 40990 - }, - { - "epoch": 0.1656451880072884, - "grad_norm": 482.436279296875, - "learning_rate": 4.937873385763908e-05, - "loss": 75.9616, - "step": 41000 - }, - { - "epoch": 0.16568558927265603, - "grad_norm": 650.821044921875, - "learning_rate": 4.9377960269856346e-05, - "loss": 80.1524, - "step": 41010 - }, - { - "epoch": 0.16572599053802364, - "grad_norm": 486.8204040527344, - "learning_rate": 4.937718620681273e-05, - "loss": 73.0701, - "step": 41020 - }, - { - "epoch": 0.16576639180339128, - "grad_norm": 1333.5208740234375, - "learning_rate": 4.937641166852332e-05, - "loss": 73.1544, - "step": 41030 - }, - { - "epoch": 0.16580679306875892, - "grad_norm": 528.667236328125, - "learning_rate": 4.937563665500321e-05, - "loss": 89.639, - "step": 41040 - }, - { - "epoch": 0.16584719433412654, - "grad_norm": 924.5604858398438, - "learning_rate": 4.937486116626752e-05, - "loss": 122.8974, - "step": 41050 - }, - { - "epoch": 0.16588759559949418, - "grad_norm": 1802.9498291015625, - "learning_rate": 4.9374085202331354e-05, - "loss": 69.5255, - "step": 41060 - }, - { - "epoch": 0.16592799686486182, - "grad_norm": 519.0206909179688, - "learning_rate": 4.937330876320985e-05, - "loss": 89.9981, - "step": 41070 - }, - { - "epoch": 0.16596839813022943, - "grad_norm": 561.7305908203125, - "learning_rate": 4.9372531848918145e-05, - "loss": 76.0487, - "step": 41080 - }, - { - "epoch": 0.16600879939559707, - "grad_norm": 740.5826416015625, - "learning_rate": 4.9371754459471384e-05, - "loss": 115.638, - "step": 41090 - }, - { - "epoch": 0.1660492006609647, - "grad_norm": 674.3680419921875, - "learning_rate": 4.9370976594884723e-05, - "loss": 73.8446, - "step": 41100 - }, - { - "epoch": 0.16608960192633232, - "grad_norm": 539.9810180664062, - "learning_rate": 4.937019825517333e-05, - "loss": 57.1198, - "step": 41110 - }, - { - "epoch": 0.16613000319169996, - "grad_norm": 1040.4820556640625, - "learning_rate": 4.936941944035237e-05, - "loss": 144.8092, - "step": 41120 - }, - { - "epoch": 0.1661704044570676, - "grad_norm": 849.093994140625, - "learning_rate": 4.936864015043703e-05, - "loss": 52.7319, - "step": 41130 - }, - { - "epoch": 0.16621080572243524, - "grad_norm": 641.831787109375, - "learning_rate": 4.936786038544251e-05, - "loss": 59.0339, - "step": 41140 - }, - { - "epoch": 0.16625120698780285, - "grad_norm": 554.9229736328125, - "learning_rate": 4.9367080145384006e-05, - "loss": 95.6639, - "step": 41150 - }, - { - "epoch": 0.1662916082531705, - "grad_norm": 392.3255920410156, - "learning_rate": 4.936629943027672e-05, - "loss": 96.7075, - "step": 41160 - }, - { - "epoch": 0.16633200951853813, - "grad_norm": 448.32647705078125, - "learning_rate": 4.936551824013589e-05, - "loss": 65.1692, - "step": 41170 - }, - { - "epoch": 0.16637241078390574, - "grad_norm": 992.157470703125, - "learning_rate": 4.9364736574976736e-05, - "loss": 116.2756, - "step": 41180 - }, - { - "epoch": 0.16641281204927338, - "grad_norm": 383.3032531738281, - "learning_rate": 4.93639544348145e-05, - "loss": 82.3815, - "step": 41190 - }, - { - "epoch": 0.16645321331464102, - "grad_norm": 3468.877685546875, - "learning_rate": 4.9363171819664434e-05, - "loss": 124.7114, - "step": 41200 - }, - { - "epoch": 0.16649361458000864, - "grad_norm": 959.5894165039062, - "learning_rate": 4.936238872954178e-05, - "loss": 115.3659, - "step": 41210 - }, - { - "epoch": 0.16653401584537628, - "grad_norm": 654.4769287109375, - "learning_rate": 4.936160516446182e-05, - "loss": 84.5516, - "step": 41220 - }, - { - "epoch": 0.16657441711074392, - "grad_norm": 513.7091064453125, - "learning_rate": 4.936082112443983e-05, - "loss": 72.7509, - "step": 41230 - }, - { - "epoch": 0.16661481837611153, - "grad_norm": 1236.0504150390625, - "learning_rate": 4.936003660949108e-05, - "loss": 82.1154, - "step": 41240 - }, - { - "epoch": 0.16665521964147917, - "grad_norm": 1588.6417236328125, - "learning_rate": 4.9359251619630886e-05, - "loss": 76.5787, - "step": 41250 - }, - { - "epoch": 0.1666956209068468, - "grad_norm": 851.9100341796875, - "learning_rate": 4.935846615487453e-05, - "loss": 73.6734, - "step": 41260 - }, - { - "epoch": 0.16673602217221442, - "grad_norm": 439.6374816894531, - "learning_rate": 4.935768021523734e-05, - "loss": 75.6803, - "step": 41270 - }, - { - "epoch": 0.16677642343758206, - "grad_norm": 2391.953125, - "learning_rate": 4.935689380073464e-05, - "loss": 167.1796, - "step": 41280 - }, - { - "epoch": 0.1668168247029497, - "grad_norm": 591.0051879882812, - "learning_rate": 4.935610691138175e-05, - "loss": 89.2333, - "step": 41290 - }, - { - "epoch": 0.16685722596831734, - "grad_norm": 895.1334228515625, - "learning_rate": 4.9355319547194014e-05, - "loss": 110.6889, - "step": 41300 - }, - { - "epoch": 0.16689762723368495, - "grad_norm": 873.7748413085938, - "learning_rate": 4.935453170818679e-05, - "loss": 82.6634, - "step": 41310 - }, - { - "epoch": 0.1669380284990526, - "grad_norm": 582.4258422851562, - "learning_rate": 4.935374339437543e-05, - "loss": 82.0953, - "step": 41320 - }, - { - "epoch": 0.16697842976442023, - "grad_norm": 1372.591064453125, - "learning_rate": 4.9352954605775305e-05, - "loss": 110.3943, - "step": 41330 - }, - { - "epoch": 0.16701883102978785, - "grad_norm": 1086.363525390625, - "learning_rate": 4.935216534240179e-05, - "loss": 103.8, - "step": 41340 - }, - { - "epoch": 0.16705923229515549, - "grad_norm": 1426.1312255859375, - "learning_rate": 4.935137560427027e-05, - "loss": 99.1618, - "step": 41350 - }, - { - "epoch": 0.16709963356052313, - "grad_norm": 1669.37744140625, - "learning_rate": 4.935058539139615e-05, - "loss": 122.2307, - "step": 41360 - }, - { - "epoch": 0.16714003482589074, - "grad_norm": 766.4739379882812, - "learning_rate": 4.934979470379484e-05, - "loss": 77.6077, - "step": 41370 - }, - { - "epoch": 0.16718043609125838, - "grad_norm": 846.854248046875, - "learning_rate": 4.934900354148173e-05, - "loss": 87.1017, - "step": 41380 - }, - { - "epoch": 0.16722083735662602, - "grad_norm": 1178.765869140625, - "learning_rate": 4.934821190447228e-05, - "loss": 118.6451, - "step": 41390 - }, - { - "epoch": 0.16726123862199363, - "grad_norm": 622.7171020507812, - "learning_rate": 4.9347419792781876e-05, - "loss": 92.5647, - "step": 41400 - }, - { - "epoch": 0.16730163988736127, - "grad_norm": 871.9891967773438, - "learning_rate": 4.934662720642601e-05, - "loss": 78.3886, - "step": 41410 - }, - { - "epoch": 0.1673420411527289, - "grad_norm": 601.3154296875, - "learning_rate": 4.934583414542011e-05, - "loss": 69.2431, - "step": 41420 - }, - { - "epoch": 0.16738244241809652, - "grad_norm": 620.5543823242188, - "learning_rate": 4.9345040609779634e-05, - "loss": 74.3392, - "step": 41430 - }, - { - "epoch": 0.16742284368346416, - "grad_norm": 803.2666625976562, - "learning_rate": 4.934424659952006e-05, - "loss": 90.4268, - "step": 41440 - }, - { - "epoch": 0.1674632449488318, - "grad_norm": 1084.6322021484375, - "learning_rate": 4.934345211465686e-05, - "loss": 91.4339, - "step": 41450 - }, - { - "epoch": 0.16750364621419944, - "grad_norm": 570.4534912109375, - "learning_rate": 4.934265715520553e-05, - "loss": 67.0539, - "step": 41460 - }, - { - "epoch": 0.16754404747956705, - "grad_norm": 5001.0390625, - "learning_rate": 4.934186172118157e-05, - "loss": 115.2946, - "step": 41470 - }, - { - "epoch": 0.1675844487449347, - "grad_norm": 0.0, - "learning_rate": 4.934106581260049e-05, - "loss": 71.3725, - "step": 41480 - }, - { - "epoch": 0.16762485001030233, - "grad_norm": 674.1163940429688, - "learning_rate": 4.934026942947779e-05, - "loss": 67.2382, - "step": 41490 - }, - { - "epoch": 0.16766525127566995, - "grad_norm": 1031.8499755859375, - "learning_rate": 4.933947257182901e-05, - "loss": 81.7795, - "step": 41500 - }, - { - "epoch": 0.1677056525410376, - "grad_norm": 1388.8055419921875, - "learning_rate": 4.933867523966968e-05, - "loss": 67.2926, - "step": 41510 - }, - { - "epoch": 0.16774605380640523, - "grad_norm": 1057.6483154296875, - "learning_rate": 4.933787743301534e-05, - "loss": 78.5172, - "step": 41520 - }, - { - "epoch": 0.16778645507177284, - "grad_norm": 1191.138427734375, - "learning_rate": 4.933707915188156e-05, - "loss": 115.7021, - "step": 41530 - }, - { - "epoch": 0.16782685633714048, - "grad_norm": 803.36376953125, - "learning_rate": 4.933628039628389e-05, - "loss": 80.2134, - "step": 41540 - }, - { - "epoch": 0.16786725760250812, - "grad_norm": 434.1454772949219, - "learning_rate": 4.9335481166237904e-05, - "loss": 93.0307, - "step": 41550 - }, - { - "epoch": 0.16790765886787573, - "grad_norm": 1040.9764404296875, - "learning_rate": 4.933468146175918e-05, - "loss": 108.4676, - "step": 41560 - }, - { - "epoch": 0.16794806013324337, - "grad_norm": 2650.668701171875, - "learning_rate": 4.933388128286331e-05, - "loss": 124.0231, - "step": 41570 - }, - { - "epoch": 0.167988461398611, - "grad_norm": 850.1209716796875, - "learning_rate": 4.933308062956591e-05, - "loss": 98.8213, - "step": 41580 - }, - { - "epoch": 0.16802886266397862, - "grad_norm": 1043.2742919921875, - "learning_rate": 4.9332279501882564e-05, - "loss": 86.2264, - "step": 41590 - }, - { - "epoch": 0.16806926392934626, - "grad_norm": 1196.578369140625, - "learning_rate": 4.93314778998289e-05, - "loss": 80.5056, - "step": 41600 - }, - { - "epoch": 0.1681096651947139, - "grad_norm": 738.3115234375, - "learning_rate": 4.933067582342056e-05, - "loss": 111.9835, - "step": 41610 - }, - { - "epoch": 0.16815006646008154, - "grad_norm": 828.2302856445312, - "learning_rate": 4.932987327267316e-05, - "loss": 87.0005, - "step": 41620 - }, - { - "epoch": 0.16819046772544916, - "grad_norm": 474.343017578125, - "learning_rate": 4.932907024760236e-05, - "loss": 93.0332, - "step": 41630 - }, - { - "epoch": 0.1682308689908168, - "grad_norm": 718.5823364257812, - "learning_rate": 4.93282667482238e-05, - "loss": 66.0602, - "step": 41640 - }, - { - "epoch": 0.16827127025618444, - "grad_norm": 491.5641784667969, - "learning_rate": 4.9327462774553166e-05, - "loss": 93.1942, - "step": 41650 - }, - { - "epoch": 0.16831167152155205, - "grad_norm": 244.37391662597656, - "learning_rate": 4.9326658326606114e-05, - "loss": 80.5441, - "step": 41660 - }, - { - "epoch": 0.1683520727869197, - "grad_norm": 621.1357421875, - "learning_rate": 4.9325853404398337e-05, - "loss": 107.9278, - "step": 41670 - }, - { - "epoch": 0.16839247405228733, - "grad_norm": 1189.0419921875, - "learning_rate": 4.9325048007945526e-05, - "loss": 98.2764, - "step": 41680 - }, - { - "epoch": 0.16843287531765494, - "grad_norm": 1160.5760498046875, - "learning_rate": 4.9324242137263376e-05, - "loss": 85.1483, - "step": 41690 - }, - { - "epoch": 0.16847327658302258, - "grad_norm": 717.5256958007812, - "learning_rate": 4.93234357923676e-05, - "loss": 68.967, - "step": 41700 - }, - { - "epoch": 0.16851367784839022, - "grad_norm": 738.8279418945312, - "learning_rate": 4.932262897327393e-05, - "loss": 94.0461, - "step": 41710 - }, - { - "epoch": 0.16855407911375783, - "grad_norm": 372.3353576660156, - "learning_rate": 4.9321821679998074e-05, - "loss": 107.4272, - "step": 41720 - }, - { - "epoch": 0.16859448037912547, - "grad_norm": 778.2406616210938, - "learning_rate": 4.932101391255579e-05, - "loss": 127.5508, - "step": 41730 - }, - { - "epoch": 0.1686348816444931, - "grad_norm": 296.44610595703125, - "learning_rate": 4.9320205670962814e-05, - "loss": 68.8699, - "step": 41740 - }, - { - "epoch": 0.16867528290986072, - "grad_norm": 1934.7763671875, - "learning_rate": 4.931939695523492e-05, - "loss": 96.5082, - "step": 41750 - }, - { - "epoch": 0.16871568417522836, - "grad_norm": 920.8310546875, - "learning_rate": 4.9318587765387845e-05, - "loss": 84.8141, - "step": 41760 - }, - { - "epoch": 0.168756085440596, - "grad_norm": 785.7876586914062, - "learning_rate": 4.93177781014374e-05, - "loss": 69.8683, - "step": 41770 - }, - { - "epoch": 0.16879648670596364, - "grad_norm": 747.9173583984375, - "learning_rate": 4.9316967963399335e-05, - "loss": 87.638, - "step": 41780 - }, - { - "epoch": 0.16883688797133126, - "grad_norm": 1194.38037109375, - "learning_rate": 4.931615735128947e-05, - "loss": 66.9209, - "step": 41790 - }, - { - "epoch": 0.1688772892366989, - "grad_norm": 944.1591796875, - "learning_rate": 4.9315346265123594e-05, - "loss": 80.2107, - "step": 41800 - }, - { - "epoch": 0.16891769050206654, - "grad_norm": 1037.114013671875, - "learning_rate": 4.9314534704917525e-05, - "loss": 82.2505, - "step": 41810 - }, - { - "epoch": 0.16895809176743415, - "grad_norm": 723.7459716796875, - "learning_rate": 4.931372267068708e-05, - "loss": 65.5946, - "step": 41820 - }, - { - "epoch": 0.1689984930328018, - "grad_norm": 800.6956176757812, - "learning_rate": 4.93129101624481e-05, - "loss": 88.3285, - "step": 41830 - }, - { - "epoch": 0.16903889429816943, - "grad_norm": 634.937744140625, - "learning_rate": 4.9312097180216414e-05, - "loss": 85.8466, - "step": 41840 - }, - { - "epoch": 0.16907929556353704, - "grad_norm": 438.7951965332031, - "learning_rate": 4.9311283724007887e-05, - "loss": 59.9148, - "step": 41850 - }, - { - "epoch": 0.16911969682890468, - "grad_norm": 6580.62451171875, - "learning_rate": 4.931046979383835e-05, - "loss": 127.95, - "step": 41860 - }, - { - "epoch": 0.16916009809427232, - "grad_norm": 1237.6083984375, - "learning_rate": 4.9309655389723705e-05, - "loss": 81.8312, - "step": 41870 - }, - { - "epoch": 0.16920049935963993, - "grad_norm": 645.1431274414062, - "learning_rate": 4.9308840511679804e-05, - "loss": 83.625, - "step": 41880 - }, - { - "epoch": 0.16924090062500757, - "grad_norm": 808.107177734375, - "learning_rate": 4.930802515972255e-05, - "loss": 82.5783, - "step": 41890 - }, - { - "epoch": 0.1692813018903752, - "grad_norm": 542.672119140625, - "learning_rate": 4.930720933386782e-05, - "loss": 77.4106, - "step": 41900 - }, - { - "epoch": 0.16932170315574283, - "grad_norm": 1336.5858154296875, - "learning_rate": 4.930639303413154e-05, - "loss": 124.7732, - "step": 41910 - }, - { - "epoch": 0.16936210442111047, - "grad_norm": 591.5770263671875, - "learning_rate": 4.9305576260529607e-05, - "loss": 65.4329, - "step": 41920 - }, - { - "epoch": 0.1694025056864781, - "grad_norm": 804.681396484375, - "learning_rate": 4.930475901307795e-05, - "loss": 76.3269, - "step": 41930 - }, - { - "epoch": 0.16944290695184575, - "grad_norm": 987.9590454101562, - "learning_rate": 4.930394129179251e-05, - "loss": 71.606, - "step": 41940 - }, - { - "epoch": 0.16948330821721336, - "grad_norm": 766.5450439453125, - "learning_rate": 4.930312309668922e-05, - "loss": 109.5266, - "step": 41950 - }, - { - "epoch": 0.169523709482581, - "grad_norm": 337.4928894042969, - "learning_rate": 4.930230442778403e-05, - "loss": 74.3608, - "step": 41960 - }, - { - "epoch": 0.16956411074794864, - "grad_norm": 3450.510009765625, - "learning_rate": 4.930148528509291e-05, - "loss": 109.2802, - "step": 41970 - }, - { - "epoch": 0.16960451201331625, - "grad_norm": 864.0614624023438, - "learning_rate": 4.930066566863182e-05, - "loss": 78.3614, - "step": 41980 - }, - { - "epoch": 0.1696449132786839, - "grad_norm": 930.443115234375, - "learning_rate": 4.929984557841674e-05, - "loss": 126.5678, - "step": 41990 - }, - { - "epoch": 0.16968531454405153, - "grad_norm": 416.3900146484375, - "learning_rate": 4.929902501446366e-05, - "loss": 62.9766, - "step": 42000 - }, - { - "epoch": 0.16972571580941914, - "grad_norm": 533.2973022460938, - "learning_rate": 4.929820397678858e-05, - "loss": 64.3053, - "step": 42010 - }, - { - "epoch": 0.16976611707478678, - "grad_norm": 357.4167175292969, - "learning_rate": 4.92973824654075e-05, - "loss": 43.4001, - "step": 42020 - }, - { - "epoch": 0.16980651834015442, - "grad_norm": 1577.94384765625, - "learning_rate": 4.929656048033644e-05, - "loss": 72.9057, - "step": 42030 - }, - { - "epoch": 0.16984691960552203, - "grad_norm": 4723.44775390625, - "learning_rate": 4.929573802159143e-05, - "loss": 78.4315, - "step": 42040 - }, - { - "epoch": 0.16988732087088967, - "grad_norm": 607.1101684570312, - "learning_rate": 4.929491508918849e-05, - "loss": 83.3789, - "step": 42050 - }, - { - "epoch": 0.16992772213625731, - "grad_norm": 527.4968872070312, - "learning_rate": 4.929409168314368e-05, - "loss": 48.889, - "step": 42060 - }, - { - "epoch": 0.16996812340162493, - "grad_norm": 1297.30029296875, - "learning_rate": 4.9293267803473046e-05, - "loss": 85.3707, - "step": 42070 - }, - { - "epoch": 0.17000852466699257, - "grad_norm": 993.5496215820312, - "learning_rate": 4.9292443450192645e-05, - "loss": 103.5595, - "step": 42080 - }, - { - "epoch": 0.1700489259323602, - "grad_norm": 948.1973876953125, - "learning_rate": 4.929161862331855e-05, - "loss": 91.8254, - "step": 42090 - }, - { - "epoch": 0.17008932719772785, - "grad_norm": 740.2516479492188, - "learning_rate": 4.929079332286685e-05, - "loss": 65.2068, - "step": 42100 - }, - { - "epoch": 0.17012972846309546, - "grad_norm": 745.3865356445312, - "learning_rate": 4.9289967548853627e-05, - "loss": 49.6714, - "step": 42110 - }, - { - "epoch": 0.1701701297284631, - "grad_norm": 1320.7667236328125, - "learning_rate": 4.928914130129498e-05, - "loss": 86.5712, - "step": 42120 - }, - { - "epoch": 0.17021053099383074, - "grad_norm": 1176.5162353515625, - "learning_rate": 4.928831458020702e-05, - "loss": 113.8014, - "step": 42130 - }, - { - "epoch": 0.17025093225919835, - "grad_norm": 566.5439453125, - "learning_rate": 4.928748738560586e-05, - "loss": 72.2683, - "step": 42140 - }, - { - "epoch": 0.170291333524566, - "grad_norm": 1156.8316650390625, - "learning_rate": 4.9286659717507635e-05, - "loss": 76.2927, - "step": 42150 - }, - { - "epoch": 0.17033173478993363, - "grad_norm": 719.6638793945312, - "learning_rate": 4.9285831575928465e-05, - "loss": 69.5816, - "step": 42160 - }, - { - "epoch": 0.17037213605530124, - "grad_norm": 1295.44384765625, - "learning_rate": 4.9285002960884515e-05, - "loss": 84.5863, - "step": 42170 - }, - { - "epoch": 0.17041253732066888, - "grad_norm": 1051.867431640625, - "learning_rate": 4.9284173872391925e-05, - "loss": 82.4616, - "step": 42180 - }, - { - "epoch": 0.17045293858603652, - "grad_norm": 859.74609375, - "learning_rate": 4.928334431046686e-05, - "loss": 111.8195, - "step": 42190 - }, - { - "epoch": 0.17049333985140414, - "grad_norm": 1075.9024658203125, - "learning_rate": 4.92825142751255e-05, - "loss": 85.3877, - "step": 42200 - }, - { - "epoch": 0.17053374111677178, - "grad_norm": 1071.4013671875, - "learning_rate": 4.9281683766384026e-05, - "loss": 77.8994, - "step": 42210 - }, - { - "epoch": 0.17057414238213942, - "grad_norm": 841.3255615234375, - "learning_rate": 4.9280852784258624e-05, - "loss": 104.7014, - "step": 42220 - }, - { - "epoch": 0.17061454364750703, - "grad_norm": 1585.5380859375, - "learning_rate": 4.928002132876549e-05, - "loss": 73.6184, - "step": 42230 - }, - { - "epoch": 0.17065494491287467, - "grad_norm": 1064.724609375, - "learning_rate": 4.9279189399920844e-05, - "loss": 80.9862, - "step": 42240 - }, - { - "epoch": 0.1706953461782423, - "grad_norm": 1124.08642578125, - "learning_rate": 4.9278356997740904e-05, - "loss": 81.2137, - "step": 42250 - }, - { - "epoch": 0.17073574744360995, - "grad_norm": 1296.80078125, - "learning_rate": 4.9277524122241894e-05, - "loss": 76.6494, - "step": 42260 - }, - { - "epoch": 0.17077614870897756, - "grad_norm": 705.2342529296875, - "learning_rate": 4.927669077344005e-05, - "loss": 124.0301, - "step": 42270 - }, - { - "epoch": 0.1708165499743452, - "grad_norm": 699.51123046875, - "learning_rate": 4.927585695135162e-05, - "loss": 90.7621, - "step": 42280 - }, - { - "epoch": 0.17085695123971284, - "grad_norm": 1193.2637939453125, - "learning_rate": 4.9275022655992864e-05, - "loss": 77.3909, - "step": 42290 - }, - { - "epoch": 0.17089735250508045, - "grad_norm": 1224.87744140625, - "learning_rate": 4.927418788738004e-05, - "loss": 90.4889, - "step": 42300 - }, - { - "epoch": 0.1709377537704481, - "grad_norm": 566.4915771484375, - "learning_rate": 4.927335264552943e-05, - "loss": 76.954, - "step": 42310 - }, - { - "epoch": 0.17097815503581573, - "grad_norm": 485.48248291015625, - "learning_rate": 4.9272516930457314e-05, - "loss": 85.5353, - "step": 42320 - }, - { - "epoch": 0.17101855630118334, - "grad_norm": 510.9328918457031, - "learning_rate": 4.927168074217998e-05, - "loss": 78.9659, - "step": 42330 - }, - { - "epoch": 0.17105895756655098, - "grad_norm": 542.828125, - "learning_rate": 4.927084408071373e-05, - "loss": 77.3036, - "step": 42340 - }, - { - "epoch": 0.17109935883191862, - "grad_norm": 1816.97265625, - "learning_rate": 4.927000694607489e-05, - "loss": 89.3204, - "step": 42350 - }, - { - "epoch": 0.17113976009728624, - "grad_norm": 558.980712890625, - "learning_rate": 4.9269169338279766e-05, - "loss": 97.5531, - "step": 42360 - }, - { - "epoch": 0.17118016136265388, - "grad_norm": 2137.310791015625, - "learning_rate": 4.9268331257344685e-05, - "loss": 111.0955, - "step": 42370 - }, - { - "epoch": 0.17122056262802152, - "grad_norm": 349.211181640625, - "learning_rate": 4.9267492703286e-05, - "loss": 116.3871, - "step": 42380 - }, - { - "epoch": 0.17126096389338913, - "grad_norm": 1252.0584716796875, - "learning_rate": 4.926665367612005e-05, - "loss": 78.8891, - "step": 42390 - }, - { - "epoch": 0.17130136515875677, - "grad_norm": 587.7830200195312, - "learning_rate": 4.9265814175863186e-05, - "loss": 90.0841, - "step": 42400 - }, - { - "epoch": 0.1713417664241244, - "grad_norm": 1336.141845703125, - "learning_rate": 4.926497420253179e-05, - "loss": 99.6293, - "step": 42410 - }, - { - "epoch": 0.17138216768949205, - "grad_norm": 1955.26171875, - "learning_rate": 4.9264133756142224e-05, - "loss": 74.8807, - "step": 42420 - }, - { - "epoch": 0.17142256895485966, - "grad_norm": 1671.5946044921875, - "learning_rate": 4.926329283671088e-05, - "loss": 100.2369, - "step": 42430 - }, - { - "epoch": 0.1714629702202273, - "grad_norm": 489.6711120605469, - "learning_rate": 4.926245144425415e-05, - "loss": 92.0346, - "step": 42440 - }, - { - "epoch": 0.17150337148559494, - "grad_norm": 1527.400634765625, - "learning_rate": 4.9261609578788435e-05, - "loss": 104.6484, - "step": 42450 - }, - { - "epoch": 0.17154377275096255, - "grad_norm": 1441.6905517578125, - "learning_rate": 4.926076724033016e-05, - "loss": 118.5627, - "step": 42460 - }, - { - "epoch": 0.1715841740163302, - "grad_norm": 1650.8604736328125, - "learning_rate": 4.9259924428895734e-05, - "loss": 95.2558, - "step": 42470 - }, - { - "epoch": 0.17162457528169783, - "grad_norm": 1053.914794921875, - "learning_rate": 4.925908114450158e-05, - "loss": 92.1593, - "step": 42480 - }, - { - "epoch": 0.17166497654706545, - "grad_norm": 485.0592956542969, - "learning_rate": 4.925823738716416e-05, - "loss": 48.5927, - "step": 42490 - }, - { - "epoch": 0.17170537781243309, - "grad_norm": 551.2293701171875, - "learning_rate": 4.925739315689991e-05, - "loss": 111.4056, - "step": 42500 - }, - { - "epoch": 0.17174577907780073, - "grad_norm": 462.1259460449219, - "learning_rate": 4.92565484537253e-05, - "loss": 81.3713, - "step": 42510 - }, - { - "epoch": 0.17178618034316834, - "grad_norm": 976.673095703125, - "learning_rate": 4.925570327765678e-05, - "loss": 97.2899, - "step": 42520 - }, - { - "epoch": 0.17182658160853598, - "grad_norm": 1159.7283935546875, - "learning_rate": 4.9254857628710846e-05, - "loss": 72.4565, - "step": 42530 - }, - { - "epoch": 0.17186698287390362, - "grad_norm": 886.2018432617188, - "learning_rate": 4.9254011506903963e-05, - "loss": 75.7661, - "step": 42540 - }, - { - "epoch": 0.17190738413927123, - "grad_norm": 716.5491333007812, - "learning_rate": 4.925316491225265e-05, - "loss": 88.0013, - "step": 42550 - }, - { - "epoch": 0.17194778540463887, - "grad_norm": 741.2266235351562, - "learning_rate": 4.925231784477339e-05, - "loss": 106.4336, - "step": 42560 - }, - { - "epoch": 0.1719881866700065, - "grad_norm": 1328.2393798828125, - "learning_rate": 4.9251470304482716e-05, - "loss": 76.8043, - "step": 42570 - }, - { - "epoch": 0.17202858793537412, - "grad_norm": 3505.359375, - "learning_rate": 4.925062229139714e-05, - "loss": 113.7022, - "step": 42580 - }, - { - "epoch": 0.17206898920074176, - "grad_norm": 560.7538452148438, - "learning_rate": 4.924977380553321e-05, - "loss": 78.8128, - "step": 42590 - }, - { - "epoch": 0.1721093904661094, - "grad_norm": 936.1580810546875, - "learning_rate": 4.924892484690743e-05, - "loss": 86.6639, - "step": 42600 - }, - { - "epoch": 0.17214979173147704, - "grad_norm": 484.0994873046875, - "learning_rate": 4.924807541553639e-05, - "loss": 91.6706, - "step": 42610 - }, - { - "epoch": 0.17219019299684465, - "grad_norm": 490.91448974609375, - "learning_rate": 4.924722551143664e-05, - "loss": 52.3336, - "step": 42620 - }, - { - "epoch": 0.1722305942622123, - "grad_norm": 1924.5557861328125, - "learning_rate": 4.924637513462474e-05, - "loss": 98.4392, - "step": 42630 - }, - { - "epoch": 0.17227099552757993, - "grad_norm": 738.7843017578125, - "learning_rate": 4.9245524285117274e-05, - "loss": 90.864, - "step": 42640 - }, - { - "epoch": 0.17231139679294755, - "grad_norm": 805.4695434570312, - "learning_rate": 4.924467296293083e-05, - "loss": 102.7175, - "step": 42650 - }, - { - "epoch": 0.1723517980583152, - "grad_norm": 710.0062866210938, - "learning_rate": 4.924382116808201e-05, - "loss": 107.4096, - "step": 42660 - }, - { - "epoch": 0.17239219932368283, - "grad_norm": 703.4191284179688, - "learning_rate": 4.924296890058741e-05, - "loss": 82.6557, - "step": 42670 - }, - { - "epoch": 0.17243260058905044, - "grad_norm": 1078.1011962890625, - "learning_rate": 4.924211616046365e-05, - "loss": 86.0516, - "step": 42680 - }, - { - "epoch": 0.17247300185441808, - "grad_norm": 970.2047729492188, - "learning_rate": 4.924126294772735e-05, - "loss": 69.9036, - "step": 42690 - }, - { - "epoch": 0.17251340311978572, - "grad_norm": 1400.3450927734375, - "learning_rate": 4.924040926239515e-05, - "loss": 103.4677, - "step": 42700 - }, - { - "epoch": 0.17255380438515333, - "grad_norm": 1445.791259765625, - "learning_rate": 4.9239555104483695e-05, - "loss": 115.1777, - "step": 42710 - }, - { - "epoch": 0.17259420565052097, - "grad_norm": 570.3566284179688, - "learning_rate": 4.923870047400964e-05, - "loss": 83.1902, - "step": 42720 - }, - { - "epoch": 0.1726346069158886, - "grad_norm": 880.5896606445312, - "learning_rate": 4.923784537098963e-05, - "loss": 106.7852, - "step": 42730 - }, - { - "epoch": 0.17267500818125622, - "grad_norm": 13018.2724609375, - "learning_rate": 4.9236989795440346e-05, - "loss": 108.5354, - "step": 42740 - }, - { - "epoch": 0.17271540944662386, - "grad_norm": 1054.298583984375, - "learning_rate": 4.9236133747378475e-05, - "loss": 81.4467, - "step": 42750 - }, - { - "epoch": 0.1727558107119915, - "grad_norm": 1258.8382568359375, - "learning_rate": 4.9235277226820695e-05, - "loss": 115.5568, - "step": 42760 - }, - { - "epoch": 0.17279621197735914, - "grad_norm": 955.5006103515625, - "learning_rate": 4.923442023378371e-05, - "loss": 104.4606, - "step": 42770 - }, - { - "epoch": 0.17283661324272676, - "grad_norm": 797.49951171875, - "learning_rate": 4.9233562768284225e-05, - "loss": 75.7812, - "step": 42780 - }, - { - "epoch": 0.1728770145080944, - "grad_norm": 550.285400390625, - "learning_rate": 4.923270483033896e-05, - "loss": 90.7212, - "step": 42790 - }, - { - "epoch": 0.17291741577346204, - "grad_norm": 828.7791137695312, - "learning_rate": 4.923184641996463e-05, - "loss": 66.7497, - "step": 42800 - }, - { - "epoch": 0.17295781703882965, - "grad_norm": 862.7196044921875, - "learning_rate": 4.923098753717798e-05, - "loss": 74.5591, - "step": 42810 - }, - { - "epoch": 0.1729982183041973, - "grad_norm": 1932.47802734375, - "learning_rate": 4.923012818199576e-05, - "loss": 113.689, - "step": 42820 - }, - { - "epoch": 0.17303861956956493, - "grad_norm": 467.0488586425781, - "learning_rate": 4.922926835443472e-05, - "loss": 80.5391, - "step": 42830 - }, - { - "epoch": 0.17307902083493254, - "grad_norm": 1409.27734375, - "learning_rate": 4.922840805451161e-05, - "loss": 141.5631, - "step": 42840 - }, - { - "epoch": 0.17311942210030018, - "grad_norm": 470.03057861328125, - "learning_rate": 4.9227547282243214e-05, - "loss": 55.4408, - "step": 42850 - }, - { - "epoch": 0.17315982336566782, - "grad_norm": 399.5194091796875, - "learning_rate": 4.9226686037646314e-05, - "loss": 95.3332, - "step": 42860 - }, - { - "epoch": 0.17320022463103543, - "grad_norm": 1028.5482177734375, - "learning_rate": 4.92258243207377e-05, - "loss": 84.2015, - "step": 42870 - }, - { - "epoch": 0.17324062589640307, - "grad_norm": 936.9313354492188, - "learning_rate": 4.922496213153416e-05, - "loss": 85.3661, - "step": 42880 - }, - { - "epoch": 0.1732810271617707, - "grad_norm": 614.690673828125, - "learning_rate": 4.922409947005251e-05, - "loss": 49.2994, - "step": 42890 - }, - { - "epoch": 0.17332142842713832, - "grad_norm": 845.6972045898438, - "learning_rate": 4.922323633630958e-05, - "loss": 44.1013, - "step": 42900 - }, - { - "epoch": 0.17336182969250596, - "grad_norm": 1084.47900390625, - "learning_rate": 4.9222372730322176e-05, - "loss": 131.938, - "step": 42910 - }, - { - "epoch": 0.1734022309578736, - "grad_norm": 1015.8482666015625, - "learning_rate": 4.922150865210715e-05, - "loss": 90.8332, - "step": 42920 - }, - { - "epoch": 0.17344263222324124, - "grad_norm": 548.1792602539062, - "learning_rate": 4.922064410168134e-05, - "loss": 94.3187, - "step": 42930 - }, - { - "epoch": 0.17348303348860886, - "grad_norm": 625.8770751953125, - "learning_rate": 4.92197790790616e-05, - "loss": 86.6768, - "step": 42940 - }, - { - "epoch": 0.1735234347539765, - "grad_norm": 0.0, - "learning_rate": 4.9218913584264814e-05, - "loss": 86.3633, - "step": 42950 - }, - { - "epoch": 0.17356383601934414, - "grad_norm": 646.9606323242188, - "learning_rate": 4.9218047617307824e-05, - "loss": 88.7674, - "step": 42960 - }, - { - "epoch": 0.17360423728471175, - "grad_norm": 249.1079864501953, - "learning_rate": 4.9217181178207535e-05, - "loss": 61.6128, - "step": 42970 - }, - { - "epoch": 0.1736446385500794, - "grad_norm": 543.1034545898438, - "learning_rate": 4.9216314266980824e-05, - "loss": 79.8014, - "step": 42980 - }, - { - "epoch": 0.17368503981544703, - "grad_norm": 662.4139404296875, - "learning_rate": 4.921544688364461e-05, - "loss": 95.9196, - "step": 42990 - }, - { - "epoch": 0.17372544108081464, - "grad_norm": 422.02825927734375, - "learning_rate": 4.9214579028215776e-05, - "loss": 91.8231, - "step": 43000 - }, - { - "epoch": 0.17376584234618228, - "grad_norm": 891.07958984375, - "learning_rate": 4.921371070071127e-05, - "loss": 74.39, - "step": 43010 - }, - { - "epoch": 0.17380624361154992, - "grad_norm": 1176.2237548828125, - "learning_rate": 4.9212841901148e-05, - "loss": 90.9235, - "step": 43020 - }, - { - "epoch": 0.17384664487691753, - "grad_norm": 4489.8310546875, - "learning_rate": 4.9211972629542926e-05, - "loss": 124.854, - "step": 43030 - }, - { - "epoch": 0.17388704614228517, - "grad_norm": 639.3281860351562, - "learning_rate": 4.9211102885912965e-05, - "loss": 76.7055, - "step": 43040 - }, - { - "epoch": 0.1739274474076528, - "grad_norm": 1555.4083251953125, - "learning_rate": 4.9210232670275094e-05, - "loss": 110.0548, - "step": 43050 - }, - { - "epoch": 0.17396784867302043, - "grad_norm": 727.7996826171875, - "learning_rate": 4.920936198264627e-05, - "loss": 95.9471, - "step": 43060 - }, - { - "epoch": 0.17400824993838807, - "grad_norm": 1188.0120849609375, - "learning_rate": 4.920849082304347e-05, - "loss": 113.6858, - "step": 43070 - }, - { - "epoch": 0.1740486512037557, - "grad_norm": 1449.6580810546875, - "learning_rate": 4.920761919148369e-05, - "loss": 94.3256, - "step": 43080 - }, - { - "epoch": 0.17408905246912335, - "grad_norm": 454.728759765625, - "learning_rate": 4.9206747087983894e-05, - "loss": 78.3794, - "step": 43090 - }, - { - "epoch": 0.17412945373449096, - "grad_norm": 815.9066162109375, - "learning_rate": 4.9205874512561115e-05, - "loss": 89.8247, - "step": 43100 - }, - { - "epoch": 0.1741698549998586, - "grad_norm": 759.9569702148438, - "learning_rate": 4.920500146523234e-05, - "loss": 63.8019, - "step": 43110 - }, - { - "epoch": 0.17421025626522624, - "grad_norm": 2164.500732421875, - "learning_rate": 4.920412794601461e-05, - "loss": 100.4632, - "step": 43120 - }, - { - "epoch": 0.17425065753059385, - "grad_norm": 662.5512084960938, - "learning_rate": 4.920325395492493e-05, - "loss": 107.611, - "step": 43130 - }, - { - "epoch": 0.1742910587959615, - "grad_norm": 422.92645263671875, - "learning_rate": 4.920237949198037e-05, - "loss": 87.6762, - "step": 43140 - }, - { - "epoch": 0.17433146006132913, - "grad_norm": 915.8494873046875, - "learning_rate": 4.9201504557197955e-05, - "loss": 66.672, - "step": 43150 - }, - { - "epoch": 0.17437186132669674, - "grad_norm": 988.4778442382812, - "learning_rate": 4.9200629150594744e-05, - "loss": 91.4257, - "step": 43160 - }, - { - "epoch": 0.17441226259206438, - "grad_norm": 794.6514282226562, - "learning_rate": 4.919975327218781e-05, - "loss": 87.8521, - "step": 43170 - }, - { - "epoch": 0.17445266385743202, - "grad_norm": 884.67578125, - "learning_rate": 4.919887692199423e-05, - "loss": 74.1221, - "step": 43180 - }, - { - "epoch": 0.17449306512279963, - "grad_norm": 764.0117797851562, - "learning_rate": 4.919800010003108e-05, - "loss": 84.2991, - "step": 43190 - }, - { - "epoch": 0.17453346638816727, - "grad_norm": 1031.7420654296875, - "learning_rate": 4.919712280631547e-05, - "loss": 116.3033, - "step": 43200 - }, - { - "epoch": 0.17457386765353491, - "grad_norm": 593.7903442382812, - "learning_rate": 4.9196245040864486e-05, - "loss": 81.53, - "step": 43210 - }, - { - "epoch": 0.17461426891890253, - "grad_norm": 929.322021484375, - "learning_rate": 4.919536680369525e-05, - "loss": 87.4219, - "step": 43220 - }, - { - "epoch": 0.17465467018427017, - "grad_norm": 3907.416015625, - "learning_rate": 4.919448809482489e-05, - "loss": 81.7991, - "step": 43230 - }, - { - "epoch": 0.1746950714496378, - "grad_norm": 893.3704223632812, - "learning_rate": 4.9193608914270515e-05, - "loss": 96.2677, - "step": 43240 - }, - { - "epoch": 0.17473547271500545, - "grad_norm": 659.1729125976562, - "learning_rate": 4.919272926204929e-05, - "loss": 98.551, - "step": 43250 - }, - { - "epoch": 0.17477587398037306, - "grad_norm": 995.1451416015625, - "learning_rate": 4.9191849138178334e-05, - "loss": 71.9214, - "step": 43260 - }, - { - "epoch": 0.1748162752457407, - "grad_norm": 1777.868896484375, - "learning_rate": 4.919096854267484e-05, - "loss": 95.7281, - "step": 43270 - }, - { - "epoch": 0.17485667651110834, - "grad_norm": 779.3260498046875, - "learning_rate": 4.9190087475555955e-05, - "loss": 104.3814, - "step": 43280 - }, - { - "epoch": 0.17489707777647595, - "grad_norm": 939.8629760742188, - "learning_rate": 4.9189205936838864e-05, - "loss": 71.219, - "step": 43290 - }, - { - "epoch": 0.1749374790418436, - "grad_norm": 808.994140625, - "learning_rate": 4.9188323926540746e-05, - "loss": 63.1995, - "step": 43300 - }, - { - "epoch": 0.17497788030721123, - "grad_norm": 1255.6544189453125, - "learning_rate": 4.918744144467881e-05, - "loss": 73.3233, - "step": 43310 - }, - { - "epoch": 0.17501828157257884, - "grad_norm": 1129.1463623046875, - "learning_rate": 4.918655849127024e-05, - "loss": 80.2645, - "step": 43320 - }, - { - "epoch": 0.17505868283794648, - "grad_norm": 0.0, - "learning_rate": 4.918567506633226e-05, - "loss": 107.1662, - "step": 43330 - }, - { - "epoch": 0.17509908410331412, - "grad_norm": 219.4386444091797, - "learning_rate": 4.91847911698821e-05, - "loss": 66.5381, - "step": 43340 - }, - { - "epoch": 0.17513948536868174, - "grad_norm": 432.4940185546875, - "learning_rate": 4.918390680193698e-05, - "loss": 73.7235, - "step": 43350 - }, - { - "epoch": 0.17517988663404938, - "grad_norm": 645.31494140625, - "learning_rate": 4.918302196251415e-05, - "loss": 110.384, - "step": 43360 - }, - { - "epoch": 0.17522028789941702, - "grad_norm": 3361.419677734375, - "learning_rate": 4.918213665163085e-05, - "loss": 122.9304, - "step": 43370 - }, - { - "epoch": 0.17526068916478463, - "grad_norm": 898.6963500976562, - "learning_rate": 4.918125086930435e-05, - "loss": 79.2204, - "step": 43380 - }, - { - "epoch": 0.17530109043015227, - "grad_norm": 982.2764892578125, - "learning_rate": 4.918036461555192e-05, - "loss": 87.4724, - "step": 43390 - }, - { - "epoch": 0.1753414916955199, - "grad_norm": 423.7979431152344, - "learning_rate": 4.9179477890390825e-05, - "loss": 96.8271, - "step": 43400 - }, - { - "epoch": 0.17538189296088755, - "grad_norm": 994.8875732421875, - "learning_rate": 4.917859069383836e-05, - "loss": 65.6866, - "step": 43410 - }, - { - "epoch": 0.17542229422625516, - "grad_norm": 488.28973388671875, - "learning_rate": 4.9177703025911825e-05, - "loss": 67.0737, - "step": 43420 - }, - { - "epoch": 0.1754626954916228, - "grad_norm": 927.5771484375, - "learning_rate": 4.917681488662852e-05, - "loss": 96.7948, - "step": 43430 - }, - { - "epoch": 0.17550309675699044, - "grad_norm": 1078.08837890625, - "learning_rate": 4.917592627600577e-05, - "loss": 77.3857, - "step": 43440 - }, - { - "epoch": 0.17554349802235805, - "grad_norm": 1068.2432861328125, - "learning_rate": 4.917503719406088e-05, - "loss": 82.1481, - "step": 43450 - }, - { - "epoch": 0.1755838992877257, - "grad_norm": 622.7982177734375, - "learning_rate": 4.91741476408112e-05, - "loss": 88.3467, - "step": 43460 - }, - { - "epoch": 0.17562430055309333, - "grad_norm": 529.6117553710938, - "learning_rate": 4.917325761627406e-05, - "loss": 94.7772, - "step": 43470 - }, - { - "epoch": 0.17566470181846094, - "grad_norm": 527.8485107421875, - "learning_rate": 4.917236712046682e-05, - "loss": 49.5305, - "step": 43480 - }, - { - "epoch": 0.17570510308382858, - "grad_norm": 714.2109375, - "learning_rate": 4.917147615340684e-05, - "loss": 84.2201, - "step": 43490 - }, - { - "epoch": 0.17574550434919622, - "grad_norm": 836.039794921875, - "learning_rate": 4.917058471511149e-05, - "loss": 73.1889, - "step": 43500 - }, - { - "epoch": 0.17578590561456384, - "grad_norm": 1239.9703369140625, - "learning_rate": 4.9169692805598145e-05, - "loss": 94.0238, - "step": 43510 - }, - { - "epoch": 0.17582630687993148, - "grad_norm": 447.8238830566406, - "learning_rate": 4.916880042488419e-05, - "loss": 56.3747, - "step": 43520 - }, - { - "epoch": 0.17586670814529912, - "grad_norm": 646.1732788085938, - "learning_rate": 4.916790757298704e-05, - "loss": 73.1238, - "step": 43530 - }, - { - "epoch": 0.17590710941066673, - "grad_norm": 2080.33984375, - "learning_rate": 4.9167014249924075e-05, - "loss": 111.906, - "step": 43540 - }, - { - "epoch": 0.17594751067603437, - "grad_norm": 500.1817932128906, - "learning_rate": 4.9166120455712736e-05, - "loss": 55.3391, - "step": 43550 - }, - { - "epoch": 0.175987911941402, - "grad_norm": 495.2890319824219, - "learning_rate": 4.916522619037043e-05, - "loss": 111.2715, - "step": 43560 - }, - { - "epoch": 0.17602831320676965, - "grad_norm": 539.0352783203125, - "learning_rate": 4.91643314539146e-05, - "loss": 72.2024, - "step": 43570 - }, - { - "epoch": 0.17606871447213726, - "grad_norm": 1020.1885375976562, - "learning_rate": 4.916343624636269e-05, - "loss": 133.4901, - "step": 43580 - }, - { - "epoch": 0.1761091157375049, - "grad_norm": 463.427978515625, - "learning_rate": 4.916254056773215e-05, - "loss": 79.8422, - "step": 43590 - }, - { - "epoch": 0.17614951700287254, - "grad_norm": 1095.344970703125, - "learning_rate": 4.916164441804044e-05, - "loss": 79.1968, - "step": 43600 - }, - { - "epoch": 0.17618991826824015, - "grad_norm": 1455.2132568359375, - "learning_rate": 4.916074779730504e-05, - "loss": 79.4097, - "step": 43610 - }, - { - "epoch": 0.1762303195336078, - "grad_norm": 854.4801025390625, - "learning_rate": 4.915985070554341e-05, - "loss": 108.5478, - "step": 43620 - }, - { - "epoch": 0.17627072079897543, - "grad_norm": 1308.75341796875, - "learning_rate": 4.915895314277306e-05, - "loss": 64.448, - "step": 43630 - }, - { - "epoch": 0.17631112206434305, - "grad_norm": 755.5027465820312, - "learning_rate": 4.915805510901148e-05, - "loss": 99.9466, - "step": 43640 - }, - { - "epoch": 0.17635152332971069, - "grad_norm": 3085.270751953125, - "learning_rate": 4.9157156604276175e-05, - "loss": 99.849, - "step": 43650 - }, - { - "epoch": 0.17639192459507833, - "grad_norm": 1174.489013671875, - "learning_rate": 4.915625762858467e-05, - "loss": 71.8641, - "step": 43660 - }, - { - "epoch": 0.17643232586044594, - "grad_norm": 1123.778076171875, - "learning_rate": 4.9155358181954494e-05, - "loss": 92.842, - "step": 43670 - }, - { - "epoch": 0.17647272712581358, - "grad_norm": 424.877197265625, - "learning_rate": 4.915445826440316e-05, - "loss": 82.3663, - "step": 43680 - }, - { - "epoch": 0.17651312839118122, - "grad_norm": 2889.92041015625, - "learning_rate": 4.915355787594823e-05, - "loss": 81.4271, - "step": 43690 - }, - { - "epoch": 0.17655352965654883, - "grad_norm": 5192.76611328125, - "learning_rate": 4.915265701660726e-05, - "loss": 80.7691, - "step": 43700 - }, - { - "epoch": 0.17659393092191647, - "grad_norm": 995.813720703125, - "learning_rate": 4.9151755686397793e-05, - "loss": 75.4785, - "step": 43710 - }, - { - "epoch": 0.1766343321872841, - "grad_norm": 648.2084350585938, - "learning_rate": 4.9150853885337426e-05, - "loss": 93.2939, - "step": 43720 - }, - { - "epoch": 0.17667473345265175, - "grad_norm": 601.667724609375, - "learning_rate": 4.914995161344373e-05, - "loss": 68.4272, - "step": 43730 - }, - { - "epoch": 0.17671513471801936, - "grad_norm": 553.5189819335938, - "learning_rate": 4.9149048870734296e-05, - "loss": 75.1263, - "step": 43740 - }, - { - "epoch": 0.176755535983387, - "grad_norm": 816.7628784179688, - "learning_rate": 4.914814565722671e-05, - "loss": 100.5337, - "step": 43750 - }, - { - "epoch": 0.17679593724875464, - "grad_norm": 1510.35302734375, - "learning_rate": 4.9147241972938596e-05, - "loss": 94.5766, - "step": 43760 - }, - { - "epoch": 0.17683633851412225, - "grad_norm": 834.1815795898438, - "learning_rate": 4.9146337817887575e-05, - "loss": 70.5322, - "step": 43770 - }, - { - "epoch": 0.1768767397794899, - "grad_norm": 1132.84619140625, - "learning_rate": 4.914543319209126e-05, - "loss": 74.2719, - "step": 43780 - }, - { - "epoch": 0.17691714104485753, - "grad_norm": 0.0, - "learning_rate": 4.91445280955673e-05, - "loss": 91.3708, - "step": 43790 - }, - { - "epoch": 0.17695754231022515, - "grad_norm": 347.47705078125, - "learning_rate": 4.914362252833332e-05, - "loss": 70.5821, - "step": 43800 - }, - { - "epoch": 0.1769979435755928, - "grad_norm": 820.30419921875, - "learning_rate": 4.9142716490407e-05, - "loss": 122.0043, - "step": 43810 - }, - { - "epoch": 0.17703834484096043, - "grad_norm": 910.3120727539062, - "learning_rate": 4.9141809981805995e-05, - "loss": 86.2672, - "step": 43820 - }, - { - "epoch": 0.17707874610632804, - "grad_norm": 536.81982421875, - "learning_rate": 4.914090300254798e-05, - "loss": 67.7232, - "step": 43830 - }, - { - "epoch": 0.17711914737169568, - "grad_norm": 799.4282836914062, - "learning_rate": 4.913999555265062e-05, - "loss": 66.7534, - "step": 43840 - }, - { - "epoch": 0.17715954863706332, - "grad_norm": 978.2288208007812, - "learning_rate": 4.913908763213162e-05, - "loss": 97.7384, - "step": 43850 - }, - { - "epoch": 0.17719994990243093, - "grad_norm": 1155.240478515625, - "learning_rate": 4.913817924100869e-05, - "loss": 83.567, - "step": 43860 - }, - { - "epoch": 0.17724035116779857, - "grad_norm": 4742.7177734375, - "learning_rate": 4.913727037929952e-05, - "loss": 106.2576, - "step": 43870 - }, - { - "epoch": 0.1772807524331662, - "grad_norm": 725.6898193359375, - "learning_rate": 4.913636104702183e-05, - "loss": 95.1339, - "step": 43880 - }, - { - "epoch": 0.17732115369853385, - "grad_norm": 933.677978515625, - "learning_rate": 4.913545124419336e-05, - "loss": 77.6824, - "step": 43890 - }, - { - "epoch": 0.17736155496390146, - "grad_norm": 484.01092529296875, - "learning_rate": 4.913454097083185e-05, - "loss": 95.2952, - "step": 43900 - }, - { - "epoch": 0.1774019562292691, - "grad_norm": 4791.2919921875, - "learning_rate": 4.9133630226955026e-05, - "loss": 66.6884, - "step": 43910 - }, - { - "epoch": 0.17744235749463674, - "grad_norm": 750.3236694335938, - "learning_rate": 4.913271901258067e-05, - "loss": 71.9357, - "step": 43920 - }, - { - "epoch": 0.17748275876000436, - "grad_norm": 738.4391479492188, - "learning_rate": 4.913180732772652e-05, - "loss": 73.5072, - "step": 43930 - }, - { - "epoch": 0.177523160025372, - "grad_norm": 3437.857666015625, - "learning_rate": 4.913089517241037e-05, - "loss": 102.427, - "step": 43940 - }, - { - "epoch": 0.17756356129073964, - "grad_norm": 790.71044921875, - "learning_rate": 4.912998254665e-05, - "loss": 75.0368, - "step": 43950 - }, - { - "epoch": 0.17760396255610725, - "grad_norm": 417.4804382324219, - "learning_rate": 4.9129069450463186e-05, - "loss": 97.0161, - "step": 43960 - }, - { - "epoch": 0.1776443638214749, - "grad_norm": 988.0493774414062, - "learning_rate": 4.912815588386775e-05, - "loss": 67.218, - "step": 43970 - }, - { - "epoch": 0.17768476508684253, - "grad_norm": 867.89404296875, - "learning_rate": 4.912724184688149e-05, - "loss": 70.8273, - "step": 43980 - }, - { - "epoch": 0.17772516635221014, - "grad_norm": 678.370849609375, - "learning_rate": 4.9126327339522225e-05, - "loss": 79.2882, - "step": 43990 - }, - { - "epoch": 0.17776556761757778, - "grad_norm": 1945.7412109375, - "learning_rate": 4.912541236180779e-05, - "loss": 102.0449, - "step": 44000 - }, - { - "epoch": 0.17780596888294542, - "grad_norm": 874.1896362304688, - "learning_rate": 4.912449691375602e-05, - "loss": 73.3128, - "step": 44010 - }, - { - "epoch": 0.17784637014831303, - "grad_norm": 922.020751953125, - "learning_rate": 4.912358099538476e-05, - "loss": 105.76, - "step": 44020 - }, - { - "epoch": 0.17788677141368067, - "grad_norm": 295.5516662597656, - "learning_rate": 4.912266460671187e-05, - "loss": 71.2611, - "step": 44030 - }, - { - "epoch": 0.1779271726790483, - "grad_norm": 524.7007446289062, - "learning_rate": 4.912174774775522e-05, - "loss": 132.5543, - "step": 44040 - }, - { - "epoch": 0.17796757394441595, - "grad_norm": 1069.502685546875, - "learning_rate": 4.912083041853267e-05, - "loss": 49.895, - "step": 44050 - }, - { - "epoch": 0.17800797520978356, - "grad_norm": 1046.591796875, - "learning_rate": 4.911991261906212e-05, - "loss": 121.5496, - "step": 44060 - }, - { - "epoch": 0.1780483764751512, - "grad_norm": 1215.2974853515625, - "learning_rate": 4.9118994349361455e-05, - "loss": 67.8808, - "step": 44070 - }, - { - "epoch": 0.17808877774051884, - "grad_norm": 419.9411315917969, - "learning_rate": 4.911807560944858e-05, - "loss": 94.9882, - "step": 44080 - }, - { - "epoch": 0.17812917900588646, - "grad_norm": 555.5469360351562, - "learning_rate": 4.911715639934139e-05, - "loss": 75.9314, - "step": 44090 - }, - { - "epoch": 0.1781695802712541, - "grad_norm": 961.8922729492188, - "learning_rate": 4.911623671905784e-05, - "loss": 281.5219, - "step": 44100 - }, - { - "epoch": 0.17820998153662174, - "grad_norm": 443.88385009765625, - "learning_rate": 4.9115316568615824e-05, - "loss": 72.1933, - "step": 44110 - }, - { - "epoch": 0.17825038280198935, - "grad_norm": 670.9072265625, - "learning_rate": 4.9114395948033296e-05, - "loss": 59.2318, - "step": 44120 - }, - { - "epoch": 0.178290784067357, - "grad_norm": 951.0693359375, - "learning_rate": 4.911347485732821e-05, - "loss": 92.5108, - "step": 44130 - }, - { - "epoch": 0.17833118533272463, - "grad_norm": 742.0170288085938, - "learning_rate": 4.911255329651851e-05, - "loss": 81.8882, - "step": 44140 - }, - { - "epoch": 0.17837158659809224, - "grad_norm": 1449.71826171875, - "learning_rate": 4.9111631265622184e-05, - "loss": 99.7724, - "step": 44150 - }, - { - "epoch": 0.17841198786345988, - "grad_norm": 884.3943481445312, - "learning_rate": 4.911070876465719e-05, - "loss": 77.8581, - "step": 44160 - }, - { - "epoch": 0.17845238912882752, - "grad_norm": 930.3131103515625, - "learning_rate": 4.910978579364151e-05, - "loss": 75.9937, - "step": 44170 - }, - { - "epoch": 0.17849279039419513, - "grad_norm": 741.734130859375, - "learning_rate": 4.910886235259314e-05, - "loss": 49.2752, - "step": 44180 - }, - { - "epoch": 0.17853319165956277, - "grad_norm": 1187.31982421875, - "learning_rate": 4.910793844153009e-05, - "loss": 72.5356, - "step": 44190 - }, - { - "epoch": 0.1785735929249304, - "grad_norm": 1084.1396484375, - "learning_rate": 4.910701406047037e-05, - "loss": 54.3127, - "step": 44200 - }, - { - "epoch": 0.17861399419029805, - "grad_norm": 1090.561767578125, - "learning_rate": 4.910608920943199e-05, - "loss": 89.8081, - "step": 44210 - }, - { - "epoch": 0.17865439545566567, - "grad_norm": 926.6570434570312, - "learning_rate": 4.9105163888433e-05, - "loss": 102.4193, - "step": 44220 - }, - { - "epoch": 0.1786947967210333, - "grad_norm": 823.6111450195312, - "learning_rate": 4.910423809749143e-05, - "loss": 71.3021, - "step": 44230 - }, - { - "epoch": 0.17873519798640095, - "grad_norm": 1753.1083984375, - "learning_rate": 4.910331183662533e-05, - "loss": 73.3729, - "step": 44240 - }, - { - "epoch": 0.17877559925176856, - "grad_norm": 1302.8763427734375, - "learning_rate": 4.910238510585276e-05, - "loss": 73.3519, - "step": 44250 - }, - { - "epoch": 0.1788160005171362, - "grad_norm": 1096.9420166015625, - "learning_rate": 4.9101457905191774e-05, - "loss": 97.6709, - "step": 44260 - }, - { - "epoch": 0.17885640178250384, - "grad_norm": 1942.8685302734375, - "learning_rate": 4.910053023466046e-05, - "loss": 144.2541, - "step": 44270 - }, - { - "epoch": 0.17889680304787145, - "grad_norm": 784.9008178710938, - "learning_rate": 4.90996020942769e-05, - "loss": 80.2861, - "step": 44280 - }, - { - "epoch": 0.1789372043132391, - "grad_norm": 1027.322509765625, - "learning_rate": 4.9098673484059195e-05, - "loss": 69.3643, - "step": 44290 - }, - { - "epoch": 0.17897760557860673, - "grad_norm": 1583.6845703125, - "learning_rate": 4.9097744404025435e-05, - "loss": 92.1036, - "step": 44300 - }, - { - "epoch": 0.17901800684397434, - "grad_norm": 735.26806640625, - "learning_rate": 4.909681485419375e-05, - "loss": 60.5722, - "step": 44310 - }, - { - "epoch": 0.17905840810934198, - "grad_norm": 0.0, - "learning_rate": 4.909588483458225e-05, - "loss": 65.9718, - "step": 44320 - }, - { - "epoch": 0.17909880937470962, - "grad_norm": 676.1497802734375, - "learning_rate": 4.9094954345209075e-05, - "loss": 85.0213, - "step": 44330 - }, - { - "epoch": 0.17913921064007723, - "grad_norm": 873.891357421875, - "learning_rate": 4.909402338609236e-05, - "loss": 77.8588, - "step": 44340 - }, - { - "epoch": 0.17917961190544487, - "grad_norm": 693.5972900390625, - "learning_rate": 4.909309195725025e-05, - "loss": 124.6474, - "step": 44350 - }, - { - "epoch": 0.17922001317081251, - "grad_norm": 460.6761779785156, - "learning_rate": 4.90921600587009e-05, - "loss": 87.2825, - "step": 44360 - }, - { - "epoch": 0.17926041443618015, - "grad_norm": 1261.747314453125, - "learning_rate": 4.90912276904625e-05, - "loss": 70.8723, - "step": 44370 - }, - { - "epoch": 0.17930081570154777, - "grad_norm": 538.2672729492188, - "learning_rate": 4.909029485255321e-05, - "loss": 67.4372, - "step": 44380 - }, - { - "epoch": 0.1793412169669154, - "grad_norm": 1068.8714599609375, - "learning_rate": 4.9089361544991215e-05, - "loss": 68.0589, - "step": 44390 - }, - { - "epoch": 0.17938161823228305, - "grad_norm": 1119.72314453125, - "learning_rate": 4.908842776779472e-05, - "loss": 68.3467, - "step": 44400 - }, - { - "epoch": 0.17942201949765066, - "grad_norm": 568.3760986328125, - "learning_rate": 4.908749352098192e-05, - "loss": 73.3346, - "step": 44410 - }, - { - "epoch": 0.1794624207630183, - "grad_norm": 1218.2125244140625, - "learning_rate": 4.9086558804571034e-05, - "loss": 109.4271, - "step": 44420 - }, - { - "epoch": 0.17950282202838594, - "grad_norm": 582.5836181640625, - "learning_rate": 4.908562361858028e-05, - "loss": 72.2208, - "step": 44430 - }, - { - "epoch": 0.17954322329375355, - "grad_norm": 1486.9588623046875, - "learning_rate": 4.9084687963027894e-05, - "loss": 87.6006, - "step": 44440 - }, - { - "epoch": 0.1795836245591212, - "grad_norm": 646.9586791992188, - "learning_rate": 4.9083751837932126e-05, - "loss": 84.1968, - "step": 44450 - }, - { - "epoch": 0.17962402582448883, - "grad_norm": 665.9592895507812, - "learning_rate": 4.908281524331121e-05, - "loss": 61.7497, - "step": 44460 - }, - { - "epoch": 0.17966442708985644, - "grad_norm": 514.8817749023438, - "learning_rate": 4.908187817918341e-05, - "loss": 71.9887, - "step": 44470 - }, - { - "epoch": 0.17970482835522408, - "grad_norm": 883.0592651367188, - "learning_rate": 4.9080940645567e-05, - "loss": 72.1492, - "step": 44480 - }, - { - "epoch": 0.17974522962059172, - "grad_norm": 1375.27001953125, - "learning_rate": 4.908000264248025e-05, - "loss": 96.8598, - "step": 44490 - }, - { - "epoch": 0.17978563088595934, - "grad_norm": 645.9873046875, - "learning_rate": 4.907906416994146e-05, - "loss": 64.9413, - "step": 44500 - }, - { - "epoch": 0.17982603215132698, - "grad_norm": 1111.731201171875, - "learning_rate": 4.9078125227968904e-05, - "loss": 86.5026, - "step": 44510 - }, - { - "epoch": 0.17986643341669462, - "grad_norm": 934.5158081054688, - "learning_rate": 4.907718581658091e-05, - "loss": 110.799, - "step": 44520 - }, - { - "epoch": 0.17990683468206226, - "grad_norm": 920.4935913085938, - "learning_rate": 4.9076245935795786e-05, - "loss": 62.2967, - "step": 44530 - }, - { - "epoch": 0.17994723594742987, - "grad_norm": 892.095947265625, - "learning_rate": 4.9075305585631845e-05, - "loss": 82.56, - "step": 44540 - }, - { - "epoch": 0.1799876372127975, - "grad_norm": 1195.0810546875, - "learning_rate": 4.907436476610743e-05, - "loss": 48.5051, - "step": 44550 - }, - { - "epoch": 0.18002803847816515, - "grad_norm": 768.6405029296875, - "learning_rate": 4.907342347724087e-05, - "loss": 67.0793, - "step": 44560 - }, - { - "epoch": 0.18006843974353276, - "grad_norm": 1605.87060546875, - "learning_rate": 4.907248171905055e-05, - "loss": 114.835, - "step": 44570 - }, - { - "epoch": 0.1801088410089004, - "grad_norm": 718.5313720703125, - "learning_rate": 4.907153949155479e-05, - "loss": 100.6462, - "step": 44580 - }, - { - "epoch": 0.18014924227426804, - "grad_norm": 430.45037841796875, - "learning_rate": 4.907059679477197e-05, - "loss": 67.9887, - "step": 44590 - }, - { - "epoch": 0.18018964353963565, - "grad_norm": 234.9417266845703, - "learning_rate": 4.906965362872047e-05, - "loss": 77.6404, - "step": 44600 - }, - { - "epoch": 0.1802300448050033, - "grad_norm": 911.5045776367188, - "learning_rate": 4.906870999341869e-05, - "loss": 88.723, - "step": 44610 - }, - { - "epoch": 0.18027044607037093, - "grad_norm": 772.7266845703125, - "learning_rate": 4.906776588888502e-05, - "loss": 93.7803, - "step": 44620 - }, - { - "epoch": 0.18031084733573854, - "grad_norm": 1137.0054931640625, - "learning_rate": 4.9066821315137856e-05, - "loss": 110.4208, - "step": 44630 - }, - { - "epoch": 0.18035124860110618, - "grad_norm": 898.4769287109375, - "learning_rate": 4.906587627219562e-05, - "loss": 73.2961, - "step": 44640 - }, - { - "epoch": 0.18039164986647382, - "grad_norm": 958.9808959960938, - "learning_rate": 4.906493076007674e-05, - "loss": 57.1335, - "step": 44650 - }, - { - "epoch": 0.18043205113184144, - "grad_norm": 938.7362670898438, - "learning_rate": 4.9063984778799645e-05, - "loss": 101.8361, - "step": 44660 - }, - { - "epoch": 0.18047245239720908, - "grad_norm": 515.5078735351562, - "learning_rate": 4.906303832838278e-05, - "loss": 54.5177, - "step": 44670 - }, - { - "epoch": 0.18051285366257672, - "grad_norm": 531.8738403320312, - "learning_rate": 4.906209140884459e-05, - "loss": 82.9638, - "step": 44680 - }, - { - "epoch": 0.18055325492794436, - "grad_norm": 1305.5316162109375, - "learning_rate": 4.906114402020354e-05, - "loss": 95.9825, - "step": 44690 - }, - { - "epoch": 0.18059365619331197, - "grad_norm": 777.7756958007812, - "learning_rate": 4.90601961624781e-05, - "loss": 81.2786, - "step": 44700 - }, - { - "epoch": 0.1806340574586796, - "grad_norm": 1133.5772705078125, - "learning_rate": 4.905924783568675e-05, - "loss": 85.8932, - "step": 44710 - }, - { - "epoch": 0.18067445872404725, - "grad_norm": 599.99951171875, - "learning_rate": 4.9058299039847975e-05, - "loss": 101.0232, - "step": 44720 - }, - { - "epoch": 0.18071485998941486, - "grad_norm": 675.3793334960938, - "learning_rate": 4.9057349774980275e-05, - "loss": 56.9, - "step": 44730 - }, - { - "epoch": 0.1807552612547825, - "grad_norm": 961.1941528320312, - "learning_rate": 4.905640004110216e-05, - "loss": 84.8394, - "step": 44740 - }, - { - "epoch": 0.18079566252015014, - "grad_norm": 1229.0406494140625, - "learning_rate": 4.905544983823214e-05, - "loss": 65.7035, - "step": 44750 - }, - { - "epoch": 0.18083606378551775, - "grad_norm": 1367.959228515625, - "learning_rate": 4.905449916638873e-05, - "loss": 79.8659, - "step": 44760 - }, - { - "epoch": 0.1808764650508854, - "grad_norm": 705.6975708007812, - "learning_rate": 4.905354802559049e-05, - "loss": 109.2609, - "step": 44770 - }, - { - "epoch": 0.18091686631625303, - "grad_norm": 738.3800048828125, - "learning_rate": 4.905259641585594e-05, - "loss": 45.9301, - "step": 44780 - }, - { - "epoch": 0.18095726758162065, - "grad_norm": 746.9801025390625, - "learning_rate": 4.905164433720364e-05, - "loss": 65.5784, - "step": 44790 - }, - { - "epoch": 0.18099766884698829, - "grad_norm": 546.0762329101562, - "learning_rate": 4.905069178965215e-05, - "loss": 97.4426, - "step": 44800 - }, - { - "epoch": 0.18103807011235593, - "grad_norm": 714.2156372070312, - "learning_rate": 4.9049738773220046e-05, - "loss": 83.1105, - "step": 44810 - }, - { - "epoch": 0.18107847137772354, - "grad_norm": 862.12939453125, - "learning_rate": 4.9048785287925895e-05, - "loss": 56.6679, - "step": 44820 - }, - { - "epoch": 0.18111887264309118, - "grad_norm": 233.91058349609375, - "learning_rate": 4.9047831333788295e-05, - "loss": 82.0974, - "step": 44830 - }, - { - "epoch": 0.18115927390845882, - "grad_norm": 820.8073120117188, - "learning_rate": 4.904687691082585e-05, - "loss": 72.0476, - "step": 44840 - }, - { - "epoch": 0.18119967517382646, - "grad_norm": 1259.7579345703125, - "learning_rate": 4.9045922019057155e-05, - "loss": 83.561, - "step": 44850 - }, - { - "epoch": 0.18124007643919407, - "grad_norm": 879.6514282226562, - "learning_rate": 4.904496665850084e-05, - "loss": 82.4675, - "step": 44860 - }, - { - "epoch": 0.1812804777045617, - "grad_norm": 684.139892578125, - "learning_rate": 4.90440108291755e-05, - "loss": 90.4461, - "step": 44870 - }, - { - "epoch": 0.18132087896992935, - "grad_norm": 2262.56591796875, - "learning_rate": 4.904305453109981e-05, - "loss": 53.0787, - "step": 44880 - }, - { - "epoch": 0.18136128023529696, - "grad_norm": 1157.9019775390625, - "learning_rate": 4.9042097764292385e-05, - "loss": 83.7821, - "step": 44890 - }, - { - "epoch": 0.1814016815006646, - "grad_norm": 945.2867431640625, - "learning_rate": 4.904114052877188e-05, - "loss": 81.2998, - "step": 44900 - }, - { - "epoch": 0.18144208276603224, - "grad_norm": 1119.50048828125, - "learning_rate": 4.904018282455697e-05, - "loss": 49.5603, - "step": 44910 - }, - { - "epoch": 0.18148248403139985, - "grad_norm": 1104.3919677734375, - "learning_rate": 4.9039224651666325e-05, - "loss": 89.8467, - "step": 44920 - }, - { - "epoch": 0.1815228852967675, - "grad_norm": 852.224365234375, - "learning_rate": 4.903826601011861e-05, - "loss": 69.1575, - "step": 44930 - }, - { - "epoch": 0.18156328656213513, - "grad_norm": 473.6451110839844, - "learning_rate": 4.903730689993253e-05, - "loss": 92.4134, - "step": 44940 - }, - { - "epoch": 0.18160368782750275, - "grad_norm": 406.23199462890625, - "learning_rate": 4.903634732112678e-05, - "loss": 42.3387, - "step": 44950 - }, - { - "epoch": 0.1816440890928704, - "grad_norm": 1381.426025390625, - "learning_rate": 4.903538727372005e-05, - "loss": 81.0296, - "step": 44960 - }, - { - "epoch": 0.18168449035823803, - "grad_norm": 1150.0416259765625, - "learning_rate": 4.903442675773108e-05, - "loss": 67.7141, - "step": 44970 - }, - { - "epoch": 0.18172489162360564, - "grad_norm": 816.09619140625, - "learning_rate": 4.903346577317859e-05, - "loss": 85.8159, - "step": 44980 - }, - { - "epoch": 0.18176529288897328, - "grad_norm": 602.0270385742188, - "learning_rate": 4.90325043200813e-05, - "loss": 63.4132, - "step": 44990 - }, - { - "epoch": 0.18180569415434092, - "grad_norm": 815.766357421875, - "learning_rate": 4.9031542398457974e-05, - "loss": 88.5122, - "step": 45000 - }, - { - "epoch": 0.18184609541970856, - "grad_norm": 863.7614135742188, - "learning_rate": 4.9030580008327353e-05, - "loss": 91.4247, - "step": 45010 - }, - { - "epoch": 0.18188649668507617, - "grad_norm": 2180.32373046875, - "learning_rate": 4.902961714970821e-05, - "loss": 104.5198, - "step": 45020 - }, - { - "epoch": 0.1819268979504438, - "grad_norm": 527.2975463867188, - "learning_rate": 4.90286538226193e-05, - "loss": 89.9132, - "step": 45030 - }, - { - "epoch": 0.18196729921581145, - "grad_norm": 797.1927490234375, - "learning_rate": 4.902769002707942e-05, - "loss": 60.7632, - "step": 45040 - }, - { - "epoch": 0.18200770048117906, - "grad_norm": 1439.7625732421875, - "learning_rate": 4.902672576310735e-05, - "loss": 94.8452, - "step": 45050 - }, - { - "epoch": 0.1820481017465467, - "grad_norm": 962.4320678710938, - "learning_rate": 4.902576103072189e-05, - "loss": 93.4051, - "step": 45060 - }, - { - "epoch": 0.18208850301191434, - "grad_norm": 509.36468505859375, - "learning_rate": 4.902479582994185e-05, - "loss": 101.6452, - "step": 45070 - }, - { - "epoch": 0.18212890427728196, - "grad_norm": 674.908447265625, - "learning_rate": 4.902383016078605e-05, - "loss": 82.7371, - "step": 45080 - }, - { - "epoch": 0.1821693055426496, - "grad_norm": 581.795166015625, - "learning_rate": 4.902286402327331e-05, - "loss": 78.2122, - "step": 45090 - }, - { - "epoch": 0.18220970680801724, - "grad_norm": 1014.2080078125, - "learning_rate": 4.902189741742247e-05, - "loss": 93.4292, - "step": 45100 - }, - { - "epoch": 0.18225010807338485, - "grad_norm": 1033.72412109375, - "learning_rate": 4.902093034325237e-05, - "loss": 86.2924, - "step": 45110 - }, - { - "epoch": 0.1822905093387525, - "grad_norm": 553.49755859375, - "learning_rate": 4.901996280078186e-05, - "loss": 115.3292, - "step": 45120 - }, - { - "epoch": 0.18233091060412013, - "grad_norm": 867.1639404296875, - "learning_rate": 4.901899479002982e-05, - "loss": 64.9478, - "step": 45130 - }, - { - "epoch": 0.18237131186948774, - "grad_norm": 314.60528564453125, - "learning_rate": 4.901802631101511e-05, - "loss": 65.43, - "step": 45140 - }, - { - "epoch": 0.18241171313485538, - "grad_norm": 676.8156127929688, - "learning_rate": 4.90170573637566e-05, - "loss": 71.8611, - "step": 45150 - }, - { - "epoch": 0.18245211440022302, - "grad_norm": 947.8834228515625, - "learning_rate": 4.90160879482732e-05, - "loss": 94.2625, - "step": 45160 - }, - { - "epoch": 0.18249251566559066, - "grad_norm": 628.1900634765625, - "learning_rate": 4.901511806458381e-05, - "loss": 59.9568, - "step": 45170 - }, - { - "epoch": 0.18253291693095827, - "grad_norm": 1697.5426025390625, - "learning_rate": 4.9014147712707316e-05, - "loss": 78.6836, - "step": 45180 - }, - { - "epoch": 0.1825733181963259, - "grad_norm": 1006.1732177734375, - "learning_rate": 4.9013176892662654e-05, - "loss": 82.129, - "step": 45190 - }, - { - "epoch": 0.18261371946169355, - "grad_norm": 1150.0892333984375, - "learning_rate": 4.9012205604468744e-05, - "loss": 117.7069, - "step": 45200 - }, - { - "epoch": 0.18265412072706116, - "grad_norm": 707.0164184570312, - "learning_rate": 4.9011233848144525e-05, - "loss": 82.716, - "step": 45210 - }, - { - "epoch": 0.1826945219924288, - "grad_norm": 355.14959716796875, - "learning_rate": 4.9010261623708944e-05, - "loss": 47.5373, - "step": 45220 - }, - { - "epoch": 0.18273492325779644, - "grad_norm": 784.4992065429688, - "learning_rate": 4.9009288931180947e-05, - "loss": 82.2301, - "step": 45230 - }, - { - "epoch": 0.18277532452316406, - "grad_norm": 1023.0245361328125, - "learning_rate": 4.90083157705795e-05, - "loss": 67.702, - "step": 45240 - }, - { - "epoch": 0.1828157257885317, - "grad_norm": 801.0816040039062, - "learning_rate": 4.900734214192358e-05, - "loss": 76.0119, - "step": 45250 - }, - { - "epoch": 0.18285612705389934, - "grad_norm": 1161.0206298828125, - "learning_rate": 4.900636804523217e-05, - "loss": 75.7235, - "step": 45260 - }, - { - "epoch": 0.18289652831926695, - "grad_norm": 1342.0335693359375, - "learning_rate": 4.900539348052424e-05, - "loss": 82.4319, - "step": 45270 - }, - { - "epoch": 0.1829369295846346, - "grad_norm": 1176.3917236328125, - "learning_rate": 4.9004418447818815e-05, - "loss": 97.9844, - "step": 45280 - }, - { - "epoch": 0.18297733085000223, - "grad_norm": 1022.2837524414062, - "learning_rate": 4.9003442947134895e-05, - "loss": 78.1378, - "step": 45290 - }, - { - "epoch": 0.18301773211536984, - "grad_norm": 816.5732421875, - "learning_rate": 4.90024669784915e-05, - "loss": 70.9764, - "step": 45300 - }, - { - "epoch": 0.18305813338073748, - "grad_norm": 830.2484741210938, - "learning_rate": 4.9001490541907645e-05, - "loss": 62.789, - "step": 45310 - }, - { - "epoch": 0.18309853464610512, - "grad_norm": 1164.3995361328125, - "learning_rate": 4.900051363740238e-05, - "loss": 63.8086, - "step": 45320 - }, - { - "epoch": 0.18313893591147276, - "grad_norm": 1487.6907958984375, - "learning_rate": 4.899953626499475e-05, - "loss": 78.6914, - "step": 45330 - }, - { - "epoch": 0.18317933717684037, - "grad_norm": 570.93212890625, - "learning_rate": 4.89985584247038e-05, - "loss": 80.8218, - "step": 45340 - }, - { - "epoch": 0.183219738442208, - "grad_norm": 822.6448974609375, - "learning_rate": 4.8997580116548595e-05, - "loss": 105.5186, - "step": 45350 - }, - { - "epoch": 0.18326013970757565, - "grad_norm": 1853.758544921875, - "learning_rate": 4.8996601340548215e-05, - "loss": 116.5778, - "step": 45360 - }, - { - "epoch": 0.18330054097294327, - "grad_norm": 712.744140625, - "learning_rate": 4.899562209672174e-05, - "loss": 67.3311, - "step": 45370 - }, - { - "epoch": 0.1833409422383109, - "grad_norm": 752.5093994140625, - "learning_rate": 4.899464238508825e-05, - "loss": 75.8195, - "step": 45380 - }, - { - "epoch": 0.18338134350367855, - "grad_norm": 1256.2913818359375, - "learning_rate": 4.899366220566686e-05, - "loss": 112.7811, - "step": 45390 - }, - { - "epoch": 0.18342174476904616, - "grad_norm": 1340.5006103515625, - "learning_rate": 4.899268155847667e-05, - "loss": 103.8136, - "step": 45400 - }, - { - "epoch": 0.1834621460344138, - "grad_norm": 698.527587890625, - "learning_rate": 4.89917004435368e-05, - "loss": 85.698, - "step": 45410 - }, - { - "epoch": 0.18350254729978144, - "grad_norm": 697.1134033203125, - "learning_rate": 4.899071886086638e-05, - "loss": 95.746, - "step": 45420 - }, - { - "epoch": 0.18354294856514905, - "grad_norm": 860.381591796875, - "learning_rate": 4.898973681048454e-05, - "loss": 71.6687, - "step": 45430 - }, - { - "epoch": 0.1835833498305167, - "grad_norm": 300.5081481933594, - "learning_rate": 4.898875429241044e-05, - "loss": 57.1445, - "step": 45440 - }, - { - "epoch": 0.18362375109588433, - "grad_norm": 325.8232727050781, - "learning_rate": 4.898777130666322e-05, - "loss": 102.5872, - "step": 45450 - }, - { - "epoch": 0.18366415236125194, - "grad_norm": 714.4207153320312, - "learning_rate": 4.898678785326205e-05, - "loss": 96.0007, - "step": 45460 - }, - { - "epoch": 0.18370455362661958, - "grad_norm": 879.5983276367188, - "learning_rate": 4.8985803932226094e-05, - "loss": 91.5776, - "step": 45470 - }, - { - "epoch": 0.18374495489198722, - "grad_norm": 1637.9261474609375, - "learning_rate": 4.898481954357455e-05, - "loss": 108.3861, - "step": 45480 - }, - { - "epoch": 0.18378535615735486, - "grad_norm": 839.2368774414062, - "learning_rate": 4.8983834687326596e-05, - "loss": 96.1661, - "step": 45490 - }, - { - "epoch": 0.18382575742272247, - "grad_norm": 1169.771240234375, - "learning_rate": 4.898284936350144e-05, - "loss": 102.8537, - "step": 45500 - }, - { - "epoch": 0.18386615868809011, - "grad_norm": 760.4490966796875, - "learning_rate": 4.898186357211829e-05, - "loss": 74.1794, - "step": 45510 - }, - { - "epoch": 0.18390655995345775, - "grad_norm": 784.6276245117188, - "learning_rate": 4.898087731319636e-05, - "loss": 67.2023, - "step": 45520 - }, - { - "epoch": 0.18394696121882537, - "grad_norm": 1203.34228515625, - "learning_rate": 4.8979890586754875e-05, - "loss": 104.9335, - "step": 45530 - }, - { - "epoch": 0.183987362484193, - "grad_norm": 1361.6192626953125, - "learning_rate": 4.897890339281309e-05, - "loss": 83.1708, - "step": 45540 - }, - { - "epoch": 0.18402776374956065, - "grad_norm": 1109.7901611328125, - "learning_rate": 4.897791573139023e-05, - "loss": 89.0058, - "step": 45550 - }, - { - "epoch": 0.18406816501492826, - "grad_norm": 945.8731689453125, - "learning_rate": 4.897692760250556e-05, - "loss": 124.9025, - "step": 45560 - }, - { - "epoch": 0.1841085662802959, - "grad_norm": 931.4639282226562, - "learning_rate": 4.897593900617834e-05, - "loss": 87.5932, - "step": 45570 - }, - { - "epoch": 0.18414896754566354, - "grad_norm": 928.37646484375, - "learning_rate": 4.897494994242785e-05, - "loss": 121.7126, - "step": 45580 - }, - { - "epoch": 0.18418936881103115, - "grad_norm": 1160.150634765625, - "learning_rate": 4.8973960411273364e-05, - "loss": 88.2009, - "step": 45590 - }, - { - "epoch": 0.1842297700763988, - "grad_norm": 1830.1046142578125, - "learning_rate": 4.8972970412734176e-05, - "loss": 94.9112, - "step": 45600 - }, - { - "epoch": 0.18427017134176643, - "grad_norm": 760.2313232421875, - "learning_rate": 4.897197994682959e-05, - "loss": 66.2683, - "step": 45610 - }, - { - "epoch": 0.18431057260713404, - "grad_norm": 1265.816650390625, - "learning_rate": 4.897098901357891e-05, - "loss": 145.3422, - "step": 45620 - }, - { - "epoch": 0.18435097387250168, - "grad_norm": 1088.4708251953125, - "learning_rate": 4.896999761300146e-05, - "loss": 124.1203, - "step": 45630 - }, - { - "epoch": 0.18439137513786932, - "grad_norm": 2023.6080322265625, - "learning_rate": 4.896900574511657e-05, - "loss": 89.7067, - "step": 45640 - }, - { - "epoch": 0.18443177640323694, - "grad_norm": 467.109130859375, - "learning_rate": 4.896801340994357e-05, - "loss": 67.6761, - "step": 45650 - }, - { - "epoch": 0.18447217766860458, - "grad_norm": 1232.957763671875, - "learning_rate": 4.896702060750181e-05, - "loss": 87.1448, - "step": 45660 - }, - { - "epoch": 0.18451257893397222, - "grad_norm": 1274.385009765625, - "learning_rate": 4.896602733781065e-05, - "loss": 90.0393, - "step": 45670 - }, - { - "epoch": 0.18455298019933986, - "grad_norm": 913.9437255859375, - "learning_rate": 4.8965033600889435e-05, - "loss": 58.4704, - "step": 45680 - }, - { - "epoch": 0.18459338146470747, - "grad_norm": 1077.12744140625, - "learning_rate": 4.896403939675756e-05, - "loss": 101.7226, - "step": 45690 - }, - { - "epoch": 0.1846337827300751, - "grad_norm": 648.5958862304688, - "learning_rate": 4.89630447254344e-05, - "loss": 59.2482, - "step": 45700 - }, - { - "epoch": 0.18467418399544275, - "grad_norm": 1022.220703125, - "learning_rate": 4.896204958693934e-05, - "loss": 62.0977, - "step": 45710 - }, - { - "epoch": 0.18471458526081036, - "grad_norm": 902.6193237304688, - "learning_rate": 4.8961053981291795e-05, - "loss": 57.4871, - "step": 45720 - }, - { - "epoch": 0.184754986526178, - "grad_norm": 971.0794067382812, - "learning_rate": 4.896005790851116e-05, - "loss": 67.6352, - "step": 45730 - }, - { - "epoch": 0.18479538779154564, - "grad_norm": 696.0235595703125, - "learning_rate": 4.8959061368616863e-05, - "loss": 60.1176, - "step": 45740 - }, - { - "epoch": 0.18483578905691325, - "grad_norm": 403.1430969238281, - "learning_rate": 4.895806436162833e-05, - "loss": 92.4626, - "step": 45750 - }, - { - "epoch": 0.1848761903222809, - "grad_norm": 524.8573608398438, - "learning_rate": 4.8957066887565e-05, - "loss": 82.3987, - "step": 45760 - }, - { - "epoch": 0.18491659158764853, - "grad_norm": 671.1500244140625, - "learning_rate": 4.8956068946446314e-05, - "loss": 63.0742, - "step": 45770 - }, - { - "epoch": 0.18495699285301614, - "grad_norm": 1303.492431640625, - "learning_rate": 4.8955070538291735e-05, - "loss": 80.9564, - "step": 45780 - }, - { - "epoch": 0.18499739411838378, - "grad_norm": 433.896240234375, - "learning_rate": 4.8954071663120715e-05, - "loss": 58.4002, - "step": 45790 - }, - { - "epoch": 0.18503779538375142, - "grad_norm": 504.2688903808594, - "learning_rate": 4.8953072320952745e-05, - "loss": 73.8806, - "step": 45800 - }, - { - "epoch": 0.18507819664911904, - "grad_norm": 974.8590087890625, - "learning_rate": 4.895207251180729e-05, - "loss": 112.7232, - "step": 45810 - }, - { - "epoch": 0.18511859791448668, - "grad_norm": 1069.8795166015625, - "learning_rate": 4.8951072235703855e-05, - "loss": 108.1218, - "step": 45820 - }, - { - "epoch": 0.18515899917985432, - "grad_norm": 385.00994873046875, - "learning_rate": 4.895007149266193e-05, - "loss": 55.9683, - "step": 45830 - }, - { - "epoch": 0.18519940044522196, - "grad_norm": 680.8944091796875, - "learning_rate": 4.8949070282701034e-05, - "loss": 81.2045, - "step": 45840 - }, - { - "epoch": 0.18523980171058957, - "grad_norm": 885.607177734375, - "learning_rate": 4.8948068605840694e-05, - "loss": 83.6066, - "step": 45850 - }, - { - "epoch": 0.1852802029759572, - "grad_norm": 794.8469848632812, - "learning_rate": 4.894706646210041e-05, - "loss": 97.3249, - "step": 45860 - }, - { - "epoch": 0.18532060424132485, - "grad_norm": 865.634033203125, - "learning_rate": 4.8946063851499746e-05, - "loss": 51.3998, - "step": 45870 - }, - { - "epoch": 0.18536100550669246, - "grad_norm": 882.5504760742188, - "learning_rate": 4.894506077405824e-05, - "loss": 65.3006, - "step": 45880 - }, - { - "epoch": 0.1854014067720601, - "grad_norm": 712.5584106445312, - "learning_rate": 4.894405722979544e-05, - "loss": 60.6922, - "step": 45890 - }, - { - "epoch": 0.18544180803742774, - "grad_norm": 1048.72216796875, - "learning_rate": 4.894305321873092e-05, - "loss": 79.0462, - "step": 45900 - }, - { - "epoch": 0.18548220930279535, - "grad_norm": 757.8132934570312, - "learning_rate": 4.894204874088425e-05, - "loss": 82.1683, - "step": 45910 - }, - { - "epoch": 0.185522610568163, - "grad_norm": 946.23828125, - "learning_rate": 4.8941043796275015e-05, - "loss": 80.1746, - "step": 45920 - }, - { - "epoch": 0.18556301183353063, - "grad_norm": 1357.65380859375, - "learning_rate": 4.8940038384922806e-05, - "loss": 77.1498, - "step": 45930 - }, - { - "epoch": 0.18560341309889825, - "grad_norm": 1069.114990234375, - "learning_rate": 4.8939032506847224e-05, - "loss": 97.8804, - "step": 45940 - }, - { - "epoch": 0.18564381436426589, - "grad_norm": 1382.1502685546875, - "learning_rate": 4.893802616206787e-05, - "loss": 108.694, - "step": 45950 - }, - { - "epoch": 0.18568421562963353, - "grad_norm": 1493.73486328125, - "learning_rate": 4.893701935060439e-05, - "loss": 81.302, - "step": 45960 - }, - { - "epoch": 0.18572461689500114, - "grad_norm": 873.5044555664062, - "learning_rate": 4.893601207247638e-05, - "loss": 76.8872, - "step": 45970 - }, - { - "epoch": 0.18576501816036878, - "grad_norm": 2981.951416015625, - "learning_rate": 4.893500432770349e-05, - "loss": 68.5081, - "step": 45980 - }, - { - "epoch": 0.18580541942573642, - "grad_norm": 396.176513671875, - "learning_rate": 4.893399611630538e-05, - "loss": 65.5169, - "step": 45990 - }, - { - "epoch": 0.18584582069110406, - "grad_norm": 1273.2371826171875, - "learning_rate": 4.893298743830168e-05, - "loss": 69.0687, - "step": 46000 - }, - { - "epoch": 0.18588622195647167, - "grad_norm": 2934.45361328125, - "learning_rate": 4.8931978293712074e-05, - "loss": 67.4254, - "step": 46010 - }, - { - "epoch": 0.1859266232218393, - "grad_norm": 665.5234985351562, - "learning_rate": 4.8930968682556234e-05, - "loss": 49.5943, - "step": 46020 - }, - { - "epoch": 0.18596702448720695, - "grad_norm": 688.6306762695312, - "learning_rate": 4.892995860485384e-05, - "loss": 50.1611, - "step": 46030 - }, - { - "epoch": 0.18600742575257456, - "grad_norm": 544.0740356445312, - "learning_rate": 4.892894806062458e-05, - "loss": 90.3554, - "step": 46040 - }, - { - "epoch": 0.1860478270179422, - "grad_norm": 817.1072998046875, - "learning_rate": 4.892793704988816e-05, - "loss": 100.2068, - "step": 46050 - }, - { - "epoch": 0.18608822828330984, - "grad_norm": 901.2867431640625, - "learning_rate": 4.892692557266429e-05, - "loss": 82.2532, - "step": 46060 - }, - { - "epoch": 0.18612862954867745, - "grad_norm": 670.2236328125, - "learning_rate": 4.892591362897268e-05, - "loss": 113.8737, - "step": 46070 - }, - { - "epoch": 0.1861690308140451, - "grad_norm": 500.85870361328125, - "learning_rate": 4.892490121883306e-05, - "loss": 81.973, - "step": 46080 - }, - { - "epoch": 0.18620943207941273, - "grad_norm": 0.0, - "learning_rate": 4.892388834226519e-05, - "loss": 63.0598, - "step": 46090 - }, - { - "epoch": 0.18624983334478035, - "grad_norm": 1111.806396484375, - "learning_rate": 4.892287499928879e-05, - "loss": 70.0805, - "step": 46100 - }, - { - "epoch": 0.186290234610148, - "grad_norm": 1321.56884765625, - "learning_rate": 4.892186118992362e-05, - "loss": 118.3066, - "step": 46110 - }, - { - "epoch": 0.18633063587551563, - "grad_norm": 916.1724243164062, - "learning_rate": 4.892084691418947e-05, - "loss": 99.126, - "step": 46120 - }, - { - "epoch": 0.18637103714088324, - "grad_norm": 1164.2841796875, - "learning_rate": 4.891983217210607e-05, - "loss": 66.5533, - "step": 46130 - }, - { - "epoch": 0.18641143840625088, - "grad_norm": 589.622314453125, - "learning_rate": 4.891881696369325e-05, - "loss": 76.5307, - "step": 46140 - }, - { - "epoch": 0.18645183967161852, - "grad_norm": 1253.39306640625, - "learning_rate": 4.891780128897077e-05, - "loss": 72.5351, - "step": 46150 - }, - { - "epoch": 0.18649224093698616, - "grad_norm": 1563.49658203125, - "learning_rate": 4.891678514795843e-05, - "loss": 93.3726, - "step": 46160 - }, - { - "epoch": 0.18653264220235377, - "grad_norm": 3200.355712890625, - "learning_rate": 4.891576854067607e-05, - "loss": 124.7522, - "step": 46170 - }, - { - "epoch": 0.1865730434677214, - "grad_norm": 897.306640625, - "learning_rate": 4.891475146714347e-05, - "loss": 68.6371, - "step": 46180 - }, - { - "epoch": 0.18661344473308905, - "grad_norm": 992.8071899414062, - "learning_rate": 4.891373392738049e-05, - "loss": 80.4052, - "step": 46190 - }, - { - "epoch": 0.18665384599845666, - "grad_norm": 947.232421875, - "learning_rate": 4.891271592140695e-05, - "loss": 92.7935, - "step": 46200 - }, - { - "epoch": 0.1866942472638243, - "grad_norm": 1065.53759765625, - "learning_rate": 4.891169744924271e-05, - "loss": 180.0239, - "step": 46210 - }, - { - "epoch": 0.18673464852919194, - "grad_norm": 960.1724243164062, - "learning_rate": 4.8910678510907606e-05, - "loss": 61.9903, - "step": 46220 - }, - { - "epoch": 0.18677504979455956, - "grad_norm": 1468.2706298828125, - "learning_rate": 4.890965910642152e-05, - "loss": 79.8836, - "step": 46230 - }, - { - "epoch": 0.1868154510599272, - "grad_norm": 0.0, - "learning_rate": 4.8908639235804324e-05, - "loss": 87.0793, - "step": 46240 - }, - { - "epoch": 0.18685585232529484, - "grad_norm": 0.0, - "learning_rate": 4.890761889907589e-05, - "loss": 78.9375, - "step": 46250 - }, - { - "epoch": 0.18689625359066245, - "grad_norm": 1007.9310302734375, - "learning_rate": 4.890659809625612e-05, - "loss": 102.4231, - "step": 46260 - }, - { - "epoch": 0.1869366548560301, - "grad_norm": 794.476318359375, - "learning_rate": 4.890557682736491e-05, - "loss": 84.0354, - "step": 46270 - }, - { - "epoch": 0.18697705612139773, - "grad_norm": 530.2271118164062, - "learning_rate": 4.890455509242218e-05, - "loss": 68.9714, - "step": 46280 - }, - { - "epoch": 0.18701745738676534, - "grad_norm": 1666.1671142578125, - "learning_rate": 4.8903532891447836e-05, - "loss": 84.9628, - "step": 46290 - }, - { - "epoch": 0.18705785865213298, - "grad_norm": 787.04833984375, - "learning_rate": 4.890251022446181e-05, - "loss": 75.6712, - "step": 46300 - }, - { - "epoch": 0.18709825991750062, - "grad_norm": 2045.2965087890625, - "learning_rate": 4.890148709148404e-05, - "loss": 98.4277, - "step": 46310 - }, - { - "epoch": 0.18713866118286826, - "grad_norm": 949.1669311523438, - "learning_rate": 4.890046349253448e-05, - "loss": 63.0205, - "step": 46320 - }, - { - "epoch": 0.18717906244823587, - "grad_norm": 578.8233642578125, - "learning_rate": 4.8899439427633076e-05, - "loss": 47.0746, - "step": 46330 - }, - { - "epoch": 0.1872194637136035, - "grad_norm": 406.12615966796875, - "learning_rate": 4.88984148967998e-05, - "loss": 53.2635, - "step": 46340 - }, - { - "epoch": 0.18725986497897115, - "grad_norm": 1593.3697509765625, - "learning_rate": 4.889738990005462e-05, - "loss": 93.6755, - "step": 46350 - }, - { - "epoch": 0.18730026624433876, - "grad_norm": 722.7239990234375, - "learning_rate": 4.889636443741752e-05, - "loss": 77.3317, - "step": 46360 - }, - { - "epoch": 0.1873406675097064, - "grad_norm": 2375.264892578125, - "learning_rate": 4.88953385089085e-05, - "loss": 88.7548, - "step": 46370 - }, - { - "epoch": 0.18738106877507404, - "grad_norm": 641.643798828125, - "learning_rate": 4.8894312114547535e-05, - "loss": 95.3137, - "step": 46380 - }, - { - "epoch": 0.18742147004044166, - "grad_norm": 1006.8427734375, - "learning_rate": 4.889328525435467e-05, - "loss": 74.9918, - "step": 46390 - }, - { - "epoch": 0.1874618713058093, - "grad_norm": 638.9917602539062, - "learning_rate": 4.889225792834991e-05, - "loss": 73.9908, - "step": 46400 - }, - { - "epoch": 0.18750227257117694, - "grad_norm": 2413.532470703125, - "learning_rate": 4.889123013655327e-05, - "loss": 129.9344, - "step": 46410 - }, - { - "epoch": 0.18754267383654455, - "grad_norm": 447.9295349121094, - "learning_rate": 4.8890201878984796e-05, - "loss": 79.6906, - "step": 46420 - }, - { - "epoch": 0.1875830751019122, - "grad_norm": 1658.515380859375, - "learning_rate": 4.888917315566455e-05, - "loss": 74.6684, - "step": 46430 - }, - { - "epoch": 0.18762347636727983, - "grad_norm": 814.1004028320312, - "learning_rate": 4.888814396661256e-05, - "loss": 64.3334, - "step": 46440 - }, - { - "epoch": 0.18766387763264744, - "grad_norm": 723.90673828125, - "learning_rate": 4.8887114311848915e-05, - "loss": 98.6432, - "step": 46450 - }, - { - "epoch": 0.18770427889801508, - "grad_norm": 535.9703369140625, - "learning_rate": 4.8886084191393677e-05, - "loss": 50.3761, - "step": 46460 - }, - { - "epoch": 0.18774468016338272, - "grad_norm": 452.6556701660156, - "learning_rate": 4.888505360526693e-05, - "loss": 109.0327, - "step": 46470 - }, - { - "epoch": 0.18778508142875036, - "grad_norm": 691.0087280273438, - "learning_rate": 4.888402255348876e-05, - "loss": 101.6102, - "step": 46480 - }, - { - "epoch": 0.18782548269411797, - "grad_norm": 391.9655456542969, - "learning_rate": 4.888299103607928e-05, - "loss": 82.483, - "step": 46490 - }, - { - "epoch": 0.1878658839594856, - "grad_norm": 898.39892578125, - "learning_rate": 4.888195905305859e-05, - "loss": 58.2394, - "step": 46500 - }, - { - "epoch": 0.18790628522485325, - "grad_norm": 902.1316528320312, - "learning_rate": 4.888092660444682e-05, - "loss": 48.3732, - "step": 46510 - }, - { - "epoch": 0.18794668649022087, - "grad_norm": 873.6351928710938, - "learning_rate": 4.887989369026409e-05, - "loss": 55.4155, - "step": 46520 - }, - { - "epoch": 0.1879870877555885, - "grad_norm": 684.0804443359375, - "learning_rate": 4.887886031053053e-05, - "loss": 80.0627, - "step": 46530 - }, - { - "epoch": 0.18802748902095615, - "grad_norm": 932.9844360351562, - "learning_rate": 4.887782646526631e-05, - "loss": 62.5363, - "step": 46540 - }, - { - "epoch": 0.18806789028632376, - "grad_norm": 676.0230102539062, - "learning_rate": 4.8876792154491556e-05, - "loss": 67.6069, - "step": 46550 - }, - { - "epoch": 0.1881082915516914, - "grad_norm": 673.8368530273438, - "learning_rate": 4.887575737822645e-05, - "loss": 62.4173, - "step": 46560 - }, - { - "epoch": 0.18814869281705904, - "grad_norm": 1231.91943359375, - "learning_rate": 4.8874722136491155e-05, - "loss": 58.3462, - "step": 46570 - }, - { - "epoch": 0.18818909408242665, - "grad_norm": 669.4678344726562, - "learning_rate": 4.887368642930588e-05, - "loss": 140.696, - "step": 46580 - }, - { - "epoch": 0.1882294953477943, - "grad_norm": 797.5696411132812, - "learning_rate": 4.887265025669078e-05, - "loss": 68.3853, - "step": 46590 - }, - { - "epoch": 0.18826989661316193, - "grad_norm": 722.3099975585938, - "learning_rate": 4.887161361866608e-05, - "loss": 101.0911, - "step": 46600 - }, - { - "epoch": 0.18831029787852954, - "grad_norm": 1002.6024169921875, - "learning_rate": 4.887057651525198e-05, - "loss": 109.1899, - "step": 46610 - }, - { - "epoch": 0.18835069914389718, - "grad_norm": 950.6636352539062, - "learning_rate": 4.8869538946468694e-05, - "loss": 59.6831, - "step": 46620 - }, - { - "epoch": 0.18839110040926482, - "grad_norm": 1075.5361328125, - "learning_rate": 4.8868500912336465e-05, - "loss": 73.4534, - "step": 46630 - }, - { - "epoch": 0.18843150167463246, - "grad_norm": 371.59368896484375, - "learning_rate": 4.8867462412875526e-05, - "loss": 94.6471, - "step": 46640 - }, - { - "epoch": 0.18847190294000007, - "grad_norm": 1247.0185546875, - "learning_rate": 4.886642344810611e-05, - "loss": 86.5103, - "step": 46650 - }, - { - "epoch": 0.18851230420536771, - "grad_norm": 86.08003234863281, - "learning_rate": 4.8865384018048494e-05, - "loss": 77.3341, - "step": 46660 - }, - { - "epoch": 0.18855270547073535, - "grad_norm": 517.982421875, - "learning_rate": 4.886434412272293e-05, - "loss": 58.2183, - "step": 46670 - }, - { - "epoch": 0.18859310673610297, - "grad_norm": 772.2702026367188, - "learning_rate": 4.886330376214968e-05, - "loss": 90.4057, - "step": 46680 - }, - { - "epoch": 0.1886335080014706, - "grad_norm": 827.40234375, - "learning_rate": 4.886226293634904e-05, - "loss": 62.3181, - "step": 46690 - }, - { - "epoch": 0.18867390926683825, - "grad_norm": 662.444580078125, - "learning_rate": 4.886122164534131e-05, - "loss": 72.7424, - "step": 46700 - }, - { - "epoch": 0.18871431053220586, - "grad_norm": 423.5435791015625, - "learning_rate": 4.886017988914676e-05, - "loss": 132.9958, - "step": 46710 - }, - { - "epoch": 0.1887547117975735, - "grad_norm": 773.7642822265625, - "learning_rate": 4.8859137667785735e-05, - "loss": 112.7814, - "step": 46720 - }, - { - "epoch": 0.18879511306294114, - "grad_norm": 942.0858154296875, - "learning_rate": 4.8858094981278524e-05, - "loss": 74.1822, - "step": 46730 - }, - { - "epoch": 0.18883551432830875, - "grad_norm": 623.146484375, - "learning_rate": 4.8857051829645485e-05, - "loss": 67.047, - "step": 46740 - }, - { - "epoch": 0.1888759155936764, - "grad_norm": 2191.332275390625, - "learning_rate": 4.8856008212906925e-05, - "loss": 74.4607, - "step": 46750 - }, - { - "epoch": 0.18891631685904403, - "grad_norm": 796.4672241210938, - "learning_rate": 4.88549641310832e-05, - "loss": 65.4805, - "step": 46760 - }, - { - "epoch": 0.18895671812441164, - "grad_norm": 897.36181640625, - "learning_rate": 4.885391958419468e-05, - "loss": 104.9449, - "step": 46770 - }, - { - "epoch": 0.18899711938977928, - "grad_norm": 4131.201171875, - "learning_rate": 4.885287457226172e-05, - "loss": 109.5929, - "step": 46780 - }, - { - "epoch": 0.18903752065514692, - "grad_norm": 1081.7864990234375, - "learning_rate": 4.885182909530468e-05, - "loss": 101.8282, - "step": 46790 - }, - { - "epoch": 0.18907792192051456, - "grad_norm": 761.68408203125, - "learning_rate": 4.885078315334395e-05, - "loss": 59.1006, - "step": 46800 - }, - { - "epoch": 0.18911832318588218, - "grad_norm": 1586.106689453125, - "learning_rate": 4.884973674639993e-05, - "loss": 71.1058, - "step": 46810 - }, - { - "epoch": 0.18915872445124982, - "grad_norm": 1164.787841796875, - "learning_rate": 4.884868987449301e-05, - "loss": 64.9034, - "step": 46820 - }, - { - "epoch": 0.18919912571661746, - "grad_norm": 629.8527221679688, - "learning_rate": 4.8847642537643604e-05, - "loss": 71.8605, - "step": 46830 - }, - { - "epoch": 0.18923952698198507, - "grad_norm": 824.6557006835938, - "learning_rate": 4.884659473587213e-05, - "loss": 91.3435, - "step": 46840 - }, - { - "epoch": 0.1892799282473527, - "grad_norm": 213.20458984375, - "learning_rate": 4.884554646919901e-05, - "loss": 64.7744, - "step": 46850 - }, - { - "epoch": 0.18932032951272035, - "grad_norm": 957.3148803710938, - "learning_rate": 4.884449773764469e-05, - "loss": 68.4271, - "step": 46860 - }, - { - "epoch": 0.18936073077808796, - "grad_norm": 1541.1634521484375, - "learning_rate": 4.884344854122961e-05, - "loss": 93.5629, - "step": 46870 - }, - { - "epoch": 0.1894011320434556, - "grad_norm": 1493.8668212890625, - "learning_rate": 4.884239887997423e-05, - "loss": 80.8613, - "step": 46880 - }, - { - "epoch": 0.18944153330882324, - "grad_norm": 678.560546875, - "learning_rate": 4.8841348753899e-05, - "loss": 87.8491, - "step": 46890 - }, - { - "epoch": 0.18948193457419085, - "grad_norm": 711.7306518554688, - "learning_rate": 4.88402981630244e-05, - "loss": 93.6465, - "step": 46900 - }, - { - "epoch": 0.1895223358395585, - "grad_norm": 554.8831176757812, - "learning_rate": 4.883924710737092e-05, - "loss": 73.9221, - "step": 46910 - }, - { - "epoch": 0.18956273710492613, - "grad_norm": 1330.556884765625, - "learning_rate": 4.8838195586959046e-05, - "loss": 79.4741, - "step": 46920 - }, - { - "epoch": 0.18960313837029374, - "grad_norm": 598.888427734375, - "learning_rate": 4.883714360180927e-05, - "loss": 66.7534, - "step": 46930 - }, - { - "epoch": 0.18964353963566138, - "grad_norm": 614.5370483398438, - "learning_rate": 4.883609115194211e-05, - "loss": 90.6562, - "step": 46940 - }, - { - "epoch": 0.18968394090102902, - "grad_norm": 1026.0899658203125, - "learning_rate": 4.883503823737808e-05, - "loss": 86.146, - "step": 46950 - }, - { - "epoch": 0.18972434216639666, - "grad_norm": 1076.3065185546875, - "learning_rate": 4.8833984858137715e-05, - "loss": 74.9594, - "step": 46960 - }, - { - "epoch": 0.18976474343176428, - "grad_norm": 555.6848754882812, - "learning_rate": 4.8832931014241534e-05, - "loss": 69.4619, - "step": 46970 - }, - { - "epoch": 0.18980514469713192, - "grad_norm": 1051.6075439453125, - "learning_rate": 4.88318767057101e-05, - "loss": 67.6875, - "step": 46980 - }, - { - "epoch": 0.18984554596249956, - "grad_norm": 1332.5283203125, - "learning_rate": 4.883082193256397e-05, - "loss": 73.1094, - "step": 46990 - }, - { - "epoch": 0.18988594722786717, - "grad_norm": 1304.3714599609375, - "learning_rate": 4.882976669482367e-05, - "loss": 91.2835, - "step": 47000 - }, - { - "epoch": 0.1899263484932348, - "grad_norm": 1050.1197509765625, - "learning_rate": 4.882871099250982e-05, - "loss": 96.9666, - "step": 47010 - }, - { - "epoch": 0.18996674975860245, - "grad_norm": 2204.67626953125, - "learning_rate": 4.882765482564298e-05, - "loss": 79.6932, - "step": 47020 - }, - { - "epoch": 0.19000715102397006, - "grad_norm": 308.5193786621094, - "learning_rate": 4.882659819424374e-05, - "loss": 60.4664, - "step": 47030 - }, - { - "epoch": 0.1900475522893377, - "grad_norm": 760.51953125, - "learning_rate": 4.8825541098332706e-05, - "loss": 93.623, - "step": 47040 - }, - { - "epoch": 0.19008795355470534, - "grad_norm": 1053.7843017578125, - "learning_rate": 4.882448353793048e-05, - "loss": 72.0362, - "step": 47050 - }, - { - "epoch": 0.19012835482007295, - "grad_norm": 510.9100341796875, - "learning_rate": 4.8823425513057674e-05, - "loss": 76.0767, - "step": 47060 - }, - { - "epoch": 0.1901687560854406, - "grad_norm": 1290.3902587890625, - "learning_rate": 4.8822367023734925e-05, - "loss": 76.7432, - "step": 47070 - }, - { - "epoch": 0.19020915735080823, - "grad_norm": 669.0762939453125, - "learning_rate": 4.8821308069982867e-05, - "loss": 72.9376, - "step": 47080 - }, - { - "epoch": 0.19024955861617585, - "grad_norm": 708.4724731445312, - "learning_rate": 4.8820248651822145e-05, - "loss": 74.2905, - "step": 47090 - }, - { - "epoch": 0.19028995988154349, - "grad_norm": 810.4485473632812, - "learning_rate": 4.8819188769273414e-05, - "loss": 85.8345, - "step": 47100 - }, - { - "epoch": 0.19033036114691113, - "grad_norm": 1206.7943115234375, - "learning_rate": 4.8818128422357335e-05, - "loss": 62.0077, - "step": 47110 - }, - { - "epoch": 0.19037076241227877, - "grad_norm": 763.1806030273438, - "learning_rate": 4.881706761109458e-05, - "loss": 80.4839, - "step": 47120 - }, - { - "epoch": 0.19041116367764638, - "grad_norm": 1197.156005859375, - "learning_rate": 4.8816006335505825e-05, - "loss": 130.0592, - "step": 47130 - }, - { - "epoch": 0.19045156494301402, - "grad_norm": 522.46484375, - "learning_rate": 4.8814944595611776e-05, - "loss": 71.9689, - "step": 47140 - }, - { - "epoch": 0.19049196620838166, - "grad_norm": 653.2218627929688, - "learning_rate": 4.881388239143311e-05, - "loss": 88.4613, - "step": 47150 - }, - { - "epoch": 0.19053236747374927, - "grad_norm": 696.4855346679688, - "learning_rate": 4.881281972299055e-05, - "loss": 99.2132, - "step": 47160 - }, - { - "epoch": 0.1905727687391169, - "grad_norm": 779.6743774414062, - "learning_rate": 4.8811756590304815e-05, - "loss": 54.9467, - "step": 47170 - }, - { - "epoch": 0.19061317000448455, - "grad_norm": 489.7877197265625, - "learning_rate": 4.881069299339662e-05, - "loss": 98.4642, - "step": 47180 - }, - { - "epoch": 0.19065357126985216, - "grad_norm": 1307.5743408203125, - "learning_rate": 4.880962893228671e-05, - "loss": 77.5308, - "step": 47190 - }, - { - "epoch": 0.1906939725352198, - "grad_norm": 1113.280517578125, - "learning_rate": 4.880856440699582e-05, - "loss": 65.3198, - "step": 47200 - }, - { - "epoch": 0.19073437380058744, - "grad_norm": 859.1810913085938, - "learning_rate": 4.880749941754471e-05, - "loss": 119.8937, - "step": 47210 - }, - { - "epoch": 0.19077477506595505, - "grad_norm": 676.0623779296875, - "learning_rate": 4.8806433963954154e-05, - "loss": 88.6286, - "step": 47220 - }, - { - "epoch": 0.1908151763313227, - "grad_norm": 922.7902221679688, - "learning_rate": 4.880536804624491e-05, - "loss": 102.1759, - "step": 47230 - }, - { - "epoch": 0.19085557759669033, - "grad_norm": 1272.20068359375, - "learning_rate": 4.880430166443775e-05, - "loss": 107.2287, - "step": 47240 - }, - { - "epoch": 0.19089597886205795, - "grad_norm": 1099.0933837890625, - "learning_rate": 4.880323481855347e-05, - "loss": 71.5676, - "step": 47250 - }, - { - "epoch": 0.1909363801274256, - "grad_norm": 958.9481201171875, - "learning_rate": 4.880216750861288e-05, - "loss": 51.0081, - "step": 47260 - }, - { - "epoch": 0.19097678139279323, - "grad_norm": 850.7756958007812, - "learning_rate": 4.880109973463678e-05, - "loss": 102.9255, - "step": 47270 - }, - { - "epoch": 0.19101718265816087, - "grad_norm": 522.798095703125, - "learning_rate": 4.880003149664599e-05, - "loss": 73.2628, - "step": 47280 - }, - { - "epoch": 0.19105758392352848, - "grad_norm": 1138.0860595703125, - "learning_rate": 4.879896279466133e-05, - "loss": 93.8309, - "step": 47290 - }, - { - "epoch": 0.19109798518889612, - "grad_norm": 877.7420654296875, - "learning_rate": 4.8797893628703635e-05, - "loss": 68.0124, - "step": 47300 - }, - { - "epoch": 0.19113838645426376, - "grad_norm": 372.2701416015625, - "learning_rate": 4.879682399879375e-05, - "loss": 83.2249, - "step": 47310 - }, - { - "epoch": 0.19117878771963137, - "grad_norm": 600.6828002929688, - "learning_rate": 4.8795753904952534e-05, - "loss": 69.8246, - "step": 47320 - }, - { - "epoch": 0.191219188984999, - "grad_norm": 1143.0029296875, - "learning_rate": 4.879468334720085e-05, - "loss": 61.1958, - "step": 47330 - }, - { - "epoch": 0.19125959025036665, - "grad_norm": 1139.1126708984375, - "learning_rate": 4.879361232555956e-05, - "loss": 79.5726, - "step": 47340 - }, - { - "epoch": 0.19129999151573426, - "grad_norm": 1177.59033203125, - "learning_rate": 4.879254084004955e-05, - "loss": 84.1079, - "step": 47350 - }, - { - "epoch": 0.1913403927811019, - "grad_norm": 643.5814819335938, - "learning_rate": 4.8791468890691696e-05, - "loss": 89.2118, - "step": 47360 - }, - { - "epoch": 0.19138079404646954, - "grad_norm": 273.9512939453125, - "learning_rate": 4.879039647750692e-05, - "loss": 72.0723, - "step": 47370 - }, - { - "epoch": 0.19142119531183716, - "grad_norm": 670.9833984375, - "learning_rate": 4.8789323600516104e-05, - "loss": 79.4604, - "step": 47380 - }, - { - "epoch": 0.1914615965772048, - "grad_norm": 573.1339721679688, - "learning_rate": 4.8788250259740185e-05, - "loss": 57.4144, - "step": 47390 - }, - { - "epoch": 0.19150199784257244, - "grad_norm": 746.1387939453125, - "learning_rate": 4.878717645520008e-05, - "loss": 71.1703, - "step": 47400 - }, - { - "epoch": 0.19154239910794005, - "grad_norm": 770.6351318359375, - "learning_rate": 4.878610218691673e-05, - "loss": 71.4196, - "step": 47410 - }, - { - "epoch": 0.1915828003733077, - "grad_norm": 419.6976318359375, - "learning_rate": 4.878502745491106e-05, - "loss": 55.445, - "step": 47420 - }, - { - "epoch": 0.19162320163867533, - "grad_norm": 4431.18115234375, - "learning_rate": 4.8783952259204036e-05, - "loss": 75.606, - "step": 47430 - }, - { - "epoch": 0.19166360290404297, - "grad_norm": 534.2245483398438, - "learning_rate": 4.878287659981662e-05, - "loss": 74.2982, - "step": 47440 - }, - { - "epoch": 0.19170400416941058, - "grad_norm": 1988.7900390625, - "learning_rate": 4.878180047676978e-05, - "loss": 100.465, - "step": 47450 - }, - { - "epoch": 0.19174440543477822, - "grad_norm": 605.2179565429688, - "learning_rate": 4.87807238900845e-05, - "loss": 54.1781, - "step": 47460 - }, - { - "epoch": 0.19178480670014586, - "grad_norm": 1887.808837890625, - "learning_rate": 4.8779646839781765e-05, - "loss": 103.9908, - "step": 47470 - }, - { - "epoch": 0.19182520796551347, - "grad_norm": 1397.8228759765625, - "learning_rate": 4.877856932588257e-05, - "loss": 112.4378, - "step": 47480 - }, - { - "epoch": 0.1918656092308811, - "grad_norm": 1550.006103515625, - "learning_rate": 4.877749134840792e-05, - "loss": 104.1645, - "step": 47490 - }, - { - "epoch": 0.19190601049624875, - "grad_norm": 555.1295166015625, - "learning_rate": 4.877641290737884e-05, - "loss": 61.3664, - "step": 47500 - }, - { - "epoch": 0.19194641176161636, - "grad_norm": 1102.827392578125, - "learning_rate": 4.877533400281635e-05, - "loss": 119.7094, - "step": 47510 - }, - { - "epoch": 0.191986813026984, - "grad_norm": 890.612548828125, - "learning_rate": 4.877425463474148e-05, - "loss": 75.4402, - "step": 47520 - }, - { - "epoch": 0.19202721429235164, - "grad_norm": 586.08056640625, - "learning_rate": 4.877317480317528e-05, - "loss": 87.8783, - "step": 47530 - }, - { - "epoch": 0.19206761555771926, - "grad_norm": 574.8908081054688, - "learning_rate": 4.8772094508138796e-05, - "loss": 88.9533, - "step": 47540 - }, - { - "epoch": 0.1921080168230869, - "grad_norm": 761.5361938476562, - "learning_rate": 4.877101374965308e-05, - "loss": 62.7146, - "step": 47550 - }, - { - "epoch": 0.19214841808845454, - "grad_norm": 709.9255981445312, - "learning_rate": 4.8769932527739225e-05, - "loss": 78.0876, - "step": 47560 - }, - { - "epoch": 0.19218881935382215, - "grad_norm": 1276.592529296875, - "learning_rate": 4.87688508424183e-05, - "loss": 74.2328, - "step": 47570 - }, - { - "epoch": 0.1922292206191898, - "grad_norm": 1980.7711181640625, - "learning_rate": 4.876776869371139e-05, - "loss": 84.7106, - "step": 47580 - }, - { - "epoch": 0.19226962188455743, - "grad_norm": 1390.198486328125, - "learning_rate": 4.876668608163959e-05, - "loss": 89.8972, - "step": 47590 - }, - { - "epoch": 0.19231002314992507, - "grad_norm": 413.9287109375, - "learning_rate": 4.8765603006224006e-05, - "loss": 95.8701, - "step": 47600 - }, - { - "epoch": 0.19235042441529268, - "grad_norm": 1712.273193359375, - "learning_rate": 4.876451946748576e-05, - "loss": 123.683, - "step": 47610 - }, - { - "epoch": 0.19239082568066032, - "grad_norm": 1209.84423828125, - "learning_rate": 4.8763435465445964e-05, - "loss": 102.8881, - "step": 47620 - }, - { - "epoch": 0.19243122694602796, - "grad_norm": 919.6061401367188, - "learning_rate": 4.8762351000125766e-05, - "loss": 83.7829, - "step": 47630 - }, - { - "epoch": 0.19247162821139557, - "grad_norm": 938.0872192382812, - "learning_rate": 4.87612660715463e-05, - "loss": 43.0572, - "step": 47640 - }, - { - "epoch": 0.1925120294767632, - "grad_norm": 1763.4127197265625, - "learning_rate": 4.876018067972872e-05, - "loss": 91.6115, - "step": 47650 - }, - { - "epoch": 0.19255243074213085, - "grad_norm": 2353.800537109375, - "learning_rate": 4.8759094824694184e-05, - "loss": 79.1209, - "step": 47660 - }, - { - "epoch": 0.19259283200749847, - "grad_norm": 1034.469482421875, - "learning_rate": 4.875800850646387e-05, - "loss": 63.04, - "step": 47670 - }, - { - "epoch": 0.1926332332728661, - "grad_norm": 497.1041564941406, - "learning_rate": 4.8756921725058934e-05, - "loss": 78.9805, - "step": 47680 - }, - { - "epoch": 0.19267363453823375, - "grad_norm": 410.94970703125, - "learning_rate": 4.875583448050059e-05, - "loss": 77.002, - "step": 47690 - }, - { - "epoch": 0.19271403580360136, - "grad_norm": 1801.6767578125, - "learning_rate": 4.875474677281002e-05, - "loss": 80.726, - "step": 47700 - }, - { - "epoch": 0.192754437068969, - "grad_norm": 946.7315673828125, - "learning_rate": 4.8753658602008425e-05, - "loss": 64.131, - "step": 47710 - }, - { - "epoch": 0.19279483833433664, - "grad_norm": 1104.990478515625, - "learning_rate": 4.875256996811703e-05, - "loss": 63.1742, - "step": 47720 - }, - { - "epoch": 0.19283523959970425, - "grad_norm": 889.265869140625, - "learning_rate": 4.875148087115706e-05, - "loss": 121.2112, - "step": 47730 - }, - { - "epoch": 0.1928756408650719, - "grad_norm": 502.1475830078125, - "learning_rate": 4.875039131114975e-05, - "loss": 81.0771, - "step": 47740 - }, - { - "epoch": 0.19291604213043953, - "grad_norm": 2143.898193359375, - "learning_rate": 4.874930128811631e-05, - "loss": 87.0424, - "step": 47750 - }, - { - "epoch": 0.19295644339580717, - "grad_norm": 792.10693359375, - "learning_rate": 4.874821080207803e-05, - "loss": 73.0605, - "step": 47760 - }, - { - "epoch": 0.19299684466117478, - "grad_norm": 483.91546630859375, - "learning_rate": 4.8747119853056156e-05, - "loss": 88.888, - "step": 47770 - }, - { - "epoch": 0.19303724592654242, - "grad_norm": 1531.7535400390625, - "learning_rate": 4.8746028441071943e-05, - "loss": 59.7669, - "step": 47780 - }, - { - "epoch": 0.19307764719191006, - "grad_norm": 677.5689086914062, - "learning_rate": 4.874493656614669e-05, - "loss": 74.3335, - "step": 47790 - }, - { - "epoch": 0.19311804845727767, - "grad_norm": 515.4765625, - "learning_rate": 4.874384422830167e-05, - "loss": 56.0753, - "step": 47800 - }, - { - "epoch": 0.19315844972264531, - "grad_norm": 763.7691040039062, - "learning_rate": 4.8742751427558186e-05, - "loss": 77.0294, - "step": 47810 - }, - { - "epoch": 0.19319885098801295, - "grad_norm": 1666.2574462890625, - "learning_rate": 4.874165816393754e-05, - "loss": 46.0998, - "step": 47820 - }, - { - "epoch": 0.19323925225338057, - "grad_norm": 753.572021484375, - "learning_rate": 4.874056443746104e-05, - "loss": 55.663, - "step": 47830 - }, - { - "epoch": 0.1932796535187482, - "grad_norm": 664.1289672851562, - "learning_rate": 4.873947024815002e-05, - "loss": 145.2848, - "step": 47840 - }, - { - "epoch": 0.19332005478411585, - "grad_norm": 910.7630615234375, - "learning_rate": 4.87383755960258e-05, - "loss": 100.3147, - "step": 47850 - }, - { - "epoch": 0.19336045604948346, - "grad_norm": 1756.9093017578125, - "learning_rate": 4.8737280481109724e-05, - "loss": 103.6948, - "step": 47860 - }, - { - "epoch": 0.1934008573148511, - "grad_norm": 587.9384155273438, - "learning_rate": 4.8736184903423155e-05, - "loss": 90.1571, - "step": 47870 - }, - { - "epoch": 0.19344125858021874, - "grad_norm": 852.3531494140625, - "learning_rate": 4.873508886298743e-05, - "loss": 72.187, - "step": 47880 - }, - { - "epoch": 0.19348165984558635, - "grad_norm": 844.1437377929688, - "learning_rate": 4.8733992359823936e-05, - "loss": 76.1646, - "step": 47890 - }, - { - "epoch": 0.193522061110954, - "grad_norm": 1602.298095703125, - "learning_rate": 4.8732895393954036e-05, - "loss": 84.8224, - "step": 47900 - }, - { - "epoch": 0.19356246237632163, - "grad_norm": 870.3892211914062, - "learning_rate": 4.8731797965399125e-05, - "loss": 115.4123, - "step": 47910 - }, - { - "epoch": 0.19360286364168927, - "grad_norm": 1025.730224609375, - "learning_rate": 4.873070007418059e-05, - "loss": 74.2097, - "step": 47920 - }, - { - "epoch": 0.19364326490705688, - "grad_norm": 1289.6861572265625, - "learning_rate": 4.8729601720319845e-05, - "loss": 112.8718, - "step": 47930 - }, - { - "epoch": 0.19368366617242452, - "grad_norm": 592.2373657226562, - "learning_rate": 4.8728502903838295e-05, - "loss": 63.9621, - "step": 47940 - }, - { - "epoch": 0.19372406743779216, - "grad_norm": 5277.1982421875, - "learning_rate": 4.8727403624757365e-05, - "loss": 100.8859, - "step": 47950 - }, - { - "epoch": 0.19376446870315978, - "grad_norm": 816.9944458007812, - "learning_rate": 4.872630388309849e-05, - "loss": 82.8986, - "step": 47960 - }, - { - "epoch": 0.19380486996852742, - "grad_norm": 181.56996154785156, - "learning_rate": 4.8725203678883104e-05, - "loss": 65.7631, - "step": 47970 - }, - { - "epoch": 0.19384527123389506, - "grad_norm": 1124.836181640625, - "learning_rate": 4.872410301213265e-05, - "loss": 81.7168, - "step": 47980 - }, - { - "epoch": 0.19388567249926267, - "grad_norm": 1639.8275146484375, - "learning_rate": 4.8723001882868604e-05, - "loss": 103.2546, - "step": 47990 - }, - { - "epoch": 0.1939260737646303, - "grad_norm": 633.078857421875, - "learning_rate": 4.8721900291112415e-05, - "loss": 98.4941, - "step": 48000 - }, - { - "epoch": 0.19396647502999795, - "grad_norm": 766.0423583984375, - "learning_rate": 4.872079823688557e-05, - "loss": 51.5216, - "step": 48010 - }, - { - "epoch": 0.19400687629536556, - "grad_norm": 755.6162719726562, - "learning_rate": 4.871969572020955e-05, - "loss": 76.6551, - "step": 48020 - }, - { - "epoch": 0.1940472775607332, - "grad_norm": 8129.7333984375, - "learning_rate": 4.871859274110585e-05, - "loss": 132.5075, - "step": 48030 - }, - { - "epoch": 0.19408767882610084, - "grad_norm": 1120.7578125, - "learning_rate": 4.871748929959598e-05, - "loss": 87.0844, - "step": 48040 - }, - { - "epoch": 0.19412808009146845, - "grad_norm": 651.3961181640625, - "learning_rate": 4.8716385395701435e-05, - "loss": 61.7545, - "step": 48050 - }, - { - "epoch": 0.1941684813568361, - "grad_norm": 646.9969482421875, - "learning_rate": 4.871528102944376e-05, - "loss": 100.9905, - "step": 48060 - }, - { - "epoch": 0.19420888262220373, - "grad_norm": 478.8792419433594, - "learning_rate": 4.8714176200844464e-05, - "loss": 70.1561, - "step": 48070 - }, - { - "epoch": 0.19424928388757137, - "grad_norm": 617.067626953125, - "learning_rate": 4.8713070909925094e-05, - "loss": 87.2428, - "step": 48080 - }, - { - "epoch": 0.19428968515293898, - "grad_norm": 773.7765502929688, - "learning_rate": 4.8711965156707195e-05, - "loss": 78.5379, - "step": 48090 - }, - { - "epoch": 0.19433008641830662, - "grad_norm": 1457.695068359375, - "learning_rate": 4.871085894121233e-05, - "loss": 106.1761, - "step": 48100 - }, - { - "epoch": 0.19437048768367426, - "grad_norm": 1464.89013671875, - "learning_rate": 4.8709752263462064e-05, - "loss": 59.2071, - "step": 48110 - }, - { - "epoch": 0.19441088894904188, - "grad_norm": 1820.7398681640625, - "learning_rate": 4.870864512347797e-05, - "loss": 71.2924, - "step": 48120 - }, - { - "epoch": 0.19445129021440952, - "grad_norm": 596.8287353515625, - "learning_rate": 4.8707537521281635e-05, - "loss": 60.9779, - "step": 48130 - }, - { - "epoch": 0.19449169147977716, - "grad_norm": 1118.587890625, - "learning_rate": 4.870642945689465e-05, - "loss": 97.0064, - "step": 48140 - }, - { - "epoch": 0.19453209274514477, - "grad_norm": 1756.2308349609375, - "learning_rate": 4.8705320930338615e-05, - "loss": 63.2261, - "step": 48150 - }, - { - "epoch": 0.1945724940105124, - "grad_norm": 846.0660400390625, - "learning_rate": 4.870421194163515e-05, - "loss": 118.8603, - "step": 48160 - }, - { - "epoch": 0.19461289527588005, - "grad_norm": 1581.7791748046875, - "learning_rate": 4.8703102490805865e-05, - "loss": 122.0613, - "step": 48170 - }, - { - "epoch": 0.19465329654124766, - "grad_norm": 1016.5093383789062, - "learning_rate": 4.87019925778724e-05, - "loss": 60.2842, - "step": 48180 - }, - { - "epoch": 0.1946936978066153, - "grad_norm": 717.7647094726562, - "learning_rate": 4.870088220285638e-05, - "loss": 79.8649, - "step": 48190 - }, - { - "epoch": 0.19473409907198294, - "grad_norm": 1193.3416748046875, - "learning_rate": 4.8699771365779453e-05, - "loss": 60.5539, - "step": 48200 - }, - { - "epoch": 0.19477450033735055, - "grad_norm": 2215.272216796875, - "learning_rate": 4.8698660066663294e-05, - "loss": 87.0759, - "step": 48210 - }, - { - "epoch": 0.1948149016027182, - "grad_norm": 1735.73828125, - "learning_rate": 4.869754830552956e-05, - "loss": 96.5737, - "step": 48220 - }, - { - "epoch": 0.19485530286808583, - "grad_norm": 375.5717468261719, - "learning_rate": 4.869643608239991e-05, - "loss": 88.8928, - "step": 48230 - }, - { - "epoch": 0.19489570413345347, - "grad_norm": 612.527587890625, - "learning_rate": 4.8695323397296044e-05, - "loss": 66.2606, - "step": 48240 - }, - { - "epoch": 0.19493610539882109, - "grad_norm": 1690.7991943359375, - "learning_rate": 4.869421025023965e-05, - "loss": 92.1369, - "step": 48250 - }, - { - "epoch": 0.19497650666418873, - "grad_norm": 1483.668212890625, - "learning_rate": 4.8693096641252424e-05, - "loss": 65.0718, - "step": 48260 - }, - { - "epoch": 0.19501690792955637, - "grad_norm": 601.1848754882812, - "learning_rate": 4.8691982570356084e-05, - "loss": 49.7445, - "step": 48270 - }, - { - "epoch": 0.19505730919492398, - "grad_norm": 1077.0008544921875, - "learning_rate": 4.8690868037572346e-05, - "loss": 71.7525, - "step": 48280 - }, - { - "epoch": 0.19509771046029162, - "grad_norm": 1292.0035400390625, - "learning_rate": 4.8689753042922935e-05, - "loss": 75.1074, - "step": 48290 - }, - { - "epoch": 0.19513811172565926, - "grad_norm": 1112.646484375, - "learning_rate": 4.8688637586429595e-05, - "loss": 82.0207, - "step": 48300 - }, - { - "epoch": 0.19517851299102687, - "grad_norm": 1545.366943359375, - "learning_rate": 4.8687521668114064e-05, - "loss": 102.4248, - "step": 48310 - }, - { - "epoch": 0.1952189142563945, - "grad_norm": 814.1812744140625, - "learning_rate": 4.8686405287998116e-05, - "loss": 111.8968, - "step": 48320 - }, - { - "epoch": 0.19525931552176215, - "grad_norm": 632.5703735351562, - "learning_rate": 4.8685288446103495e-05, - "loss": 78.0891, - "step": 48330 - }, - { - "epoch": 0.19529971678712976, - "grad_norm": 2291.05859375, - "learning_rate": 4.8684171142451986e-05, - "loss": 63.0176, - "step": 48340 - }, - { - "epoch": 0.1953401180524974, - "grad_norm": 954.6275024414062, - "learning_rate": 4.8683053377065356e-05, - "loss": 113.7088, - "step": 48350 - }, - { - "epoch": 0.19538051931786504, - "grad_norm": 619.459228515625, - "learning_rate": 4.8681935149965416e-05, - "loss": 66.4437, - "step": 48360 - }, - { - "epoch": 0.19542092058323265, - "grad_norm": 981.1046142578125, - "learning_rate": 4.868081646117395e-05, - "loss": 81.0435, - "step": 48370 - }, - { - "epoch": 0.1954613218486003, - "grad_norm": 714.146728515625, - "learning_rate": 4.867969731071279e-05, - "loss": 101.9001, - "step": 48380 - }, - { - "epoch": 0.19550172311396793, - "grad_norm": 1931.5303955078125, - "learning_rate": 4.8678577698603734e-05, - "loss": 95.807, - "step": 48390 - }, - { - "epoch": 0.19554212437933557, - "grad_norm": 3275.3095703125, - "learning_rate": 4.867745762486861e-05, - "loss": 73.3455, - "step": 48400 - }, - { - "epoch": 0.1955825256447032, - "grad_norm": 1230.974609375, - "learning_rate": 4.867633708952926e-05, - "loss": 77.9758, - "step": 48410 - }, - { - "epoch": 0.19562292691007083, - "grad_norm": 849.453125, - "learning_rate": 4.867521609260754e-05, - "loss": 73.4305, - "step": 48420 - }, - { - "epoch": 0.19566332817543847, - "grad_norm": 903.135009765625, - "learning_rate": 4.867409463412528e-05, - "loss": 56.7117, - "step": 48430 - }, - { - "epoch": 0.19570372944080608, - "grad_norm": 1139.8895263671875, - "learning_rate": 4.8672972714104357e-05, - "loss": 67.9619, - "step": 48440 - }, - { - "epoch": 0.19574413070617372, - "grad_norm": 908.2789916992188, - "learning_rate": 4.867185033256665e-05, - "loss": 62.4522, - "step": 48450 - }, - { - "epoch": 0.19578453197154136, - "grad_norm": 1399.5394287109375, - "learning_rate": 4.8670727489534034e-05, - "loss": 113.5989, - "step": 48460 - }, - { - "epoch": 0.19582493323690897, - "grad_norm": 2880.655029296875, - "learning_rate": 4.8669604185028394e-05, - "loss": 107.0346, - "step": 48470 - }, - { - "epoch": 0.1958653345022766, - "grad_norm": 689.0923461914062, - "learning_rate": 4.866848041907164e-05, - "loss": 89.6595, - "step": 48480 - }, - { - "epoch": 0.19590573576764425, - "grad_norm": 2163.55029296875, - "learning_rate": 4.866735619168568e-05, - "loss": 87.9743, - "step": 48490 - }, - { - "epoch": 0.19594613703301186, - "grad_norm": 816.2272338867188, - "learning_rate": 4.8666231502892415e-05, - "loss": 102.1195, - "step": 48500 - }, - { - "epoch": 0.1959865382983795, - "grad_norm": 3147.00830078125, - "learning_rate": 4.866510635271379e-05, - "loss": 98.8874, - "step": 48510 - }, - { - "epoch": 0.19602693956374714, - "grad_norm": 693.9053955078125, - "learning_rate": 4.8663980741171724e-05, - "loss": 76.9899, - "step": 48520 - }, - { - "epoch": 0.19606734082911476, - "grad_norm": 833.5950317382812, - "learning_rate": 4.866285466828817e-05, - "loss": 56.6829, - "step": 48530 - }, - { - "epoch": 0.1961077420944824, - "grad_norm": 945.19873046875, - "learning_rate": 4.86617281340851e-05, - "loss": 82.8859, - "step": 48540 - }, - { - "epoch": 0.19614814335985004, - "grad_norm": 607.2847290039062, - "learning_rate": 4.866060113858444e-05, - "loss": 71.8742, - "step": 48550 - }, - { - "epoch": 0.19618854462521768, - "grad_norm": 787.0588989257812, - "learning_rate": 4.865947368180818e-05, - "loss": 81.9591, - "step": 48560 - }, - { - "epoch": 0.1962289458905853, - "grad_norm": 406.9506530761719, - "learning_rate": 4.865834576377831e-05, - "loss": 42.3592, - "step": 48570 - }, - { - "epoch": 0.19626934715595293, - "grad_norm": 525.7552490234375, - "learning_rate": 4.86572173845168e-05, - "loss": 109.6562, - "step": 48580 - }, - { - "epoch": 0.19630974842132057, - "grad_norm": 1523.6407470703125, - "learning_rate": 4.865608854404566e-05, - "loss": 133.584, - "step": 48590 - }, - { - "epoch": 0.19635014968668818, - "grad_norm": 620.2904052734375, - "learning_rate": 4.8654959242386896e-05, - "loss": 72.112, - "step": 48600 - }, - { - "epoch": 0.19639055095205582, - "grad_norm": 926.3845825195312, - "learning_rate": 4.865382947956253e-05, - "loss": 106.2173, - "step": 48610 - }, - { - "epoch": 0.19643095221742346, - "grad_norm": 1164.56982421875, - "learning_rate": 4.865269925559457e-05, - "loss": 75.1725, - "step": 48620 - }, - { - "epoch": 0.19647135348279107, - "grad_norm": 946.3316650390625, - "learning_rate": 4.865156857050507e-05, - "loss": 91.8206, - "step": 48630 - }, - { - "epoch": 0.1965117547481587, - "grad_norm": 1000.8588256835938, - "learning_rate": 4.865043742431605e-05, - "loss": 68.0882, - "step": 48640 - }, - { - "epoch": 0.19655215601352635, - "grad_norm": 1161.449951171875, - "learning_rate": 4.8649305817049596e-05, - "loss": 70.0598, - "step": 48650 - }, - { - "epoch": 0.19659255727889396, - "grad_norm": 1116.1414794921875, - "learning_rate": 4.864817374872773e-05, - "loss": 100.4134, - "step": 48660 - }, - { - "epoch": 0.1966329585442616, - "grad_norm": 1149.848388671875, - "learning_rate": 4.864704121937256e-05, - "loss": 59.9185, - "step": 48670 - }, - { - "epoch": 0.19667335980962924, - "grad_norm": 932.8038940429688, - "learning_rate": 4.8645908229006135e-05, - "loss": 49.7766, - "step": 48680 - }, - { - "epoch": 0.19671376107499686, - "grad_norm": 592.9822387695312, - "learning_rate": 4.864477477765056e-05, - "loss": 46.122, - "step": 48690 - }, - { - "epoch": 0.1967541623403645, - "grad_norm": 484.1387023925781, - "learning_rate": 4.864364086532792e-05, - "loss": 60.0866, - "step": 48700 - }, - { - "epoch": 0.19679456360573214, - "grad_norm": 1089.63818359375, - "learning_rate": 4.8642506492060335e-05, - "loss": 102.3337, - "step": 48710 - }, - { - "epoch": 0.19683496487109975, - "grad_norm": 1087.8228759765625, - "learning_rate": 4.8641371657869916e-05, - "loss": 83.8455, - "step": 48720 - }, - { - "epoch": 0.1968753661364674, - "grad_norm": 465.7780456542969, - "learning_rate": 4.864023636277878e-05, - "loss": 58.4958, - "step": 48730 - }, - { - "epoch": 0.19691576740183503, - "grad_norm": 1844.8414306640625, - "learning_rate": 4.863910060680907e-05, - "loss": 84.4384, - "step": 48740 - }, - { - "epoch": 0.19695616866720267, - "grad_norm": 1077.40478515625, - "learning_rate": 4.8637964389982926e-05, - "loss": 134.2446, - "step": 48750 - }, - { - "epoch": 0.19699656993257028, - "grad_norm": 755.9237060546875, - "learning_rate": 4.863682771232248e-05, - "loss": 101.4888, - "step": 48760 - }, - { - "epoch": 0.19703697119793792, - "grad_norm": 947.1353149414062, - "learning_rate": 4.8635690573849926e-05, - "loss": 99.0996, - "step": 48770 - }, - { - "epoch": 0.19707737246330556, - "grad_norm": 758.8486938476562, - "learning_rate": 4.8634552974587414e-05, - "loss": 65.2204, - "step": 48780 - }, - { - "epoch": 0.19711777372867317, - "grad_norm": 2005.01025390625, - "learning_rate": 4.863341491455712e-05, - "loss": 119.6658, - "step": 48790 - }, - { - "epoch": 0.1971581749940408, - "grad_norm": 980.075439453125, - "learning_rate": 4.863227639378124e-05, - "loss": 92.1868, - "step": 48800 - }, - { - "epoch": 0.19719857625940845, - "grad_norm": 511.9769592285156, - "learning_rate": 4.8631137412281954e-05, - "loss": 54.6814, - "step": 48810 - }, - { - "epoch": 0.19723897752477607, - "grad_norm": 993.7817993164062, - "learning_rate": 4.862999797008149e-05, - "loss": 90.118, - "step": 48820 - }, - { - "epoch": 0.1972793787901437, - "grad_norm": 687.139404296875, - "learning_rate": 4.8628858067202045e-05, - "loss": 98.846, - "step": 48830 - }, - { - "epoch": 0.19731978005551135, - "grad_norm": 1727.8497314453125, - "learning_rate": 4.862771770366584e-05, - "loss": 91.3292, - "step": 48840 - }, - { - "epoch": 0.19736018132087896, - "grad_norm": 527.6170043945312, - "learning_rate": 4.862657687949512e-05, - "loss": 84.25, - "step": 48850 - }, - { - "epoch": 0.1974005825862466, - "grad_norm": 657.8152465820312, - "learning_rate": 4.862543559471212e-05, - "loss": 49.7685, - "step": 48860 - }, - { - "epoch": 0.19744098385161424, - "grad_norm": 537.2709350585938, - "learning_rate": 4.8624293849339095e-05, - "loss": 61.4911, - "step": 48870 - }, - { - "epoch": 0.19748138511698185, - "grad_norm": 598.13671875, - "learning_rate": 4.862315164339829e-05, - "loss": 82.7801, - "step": 48880 - }, - { - "epoch": 0.1975217863823495, - "grad_norm": 480.8949279785156, - "learning_rate": 4.862200897691199e-05, - "loss": 58.1646, - "step": 48890 - }, - { - "epoch": 0.19756218764771713, - "grad_norm": 868.3237915039062, - "learning_rate": 4.8620865849902456e-05, - "loss": 73.8393, - "step": 48900 - }, - { - "epoch": 0.19760258891308477, - "grad_norm": 451.89483642578125, - "learning_rate": 4.861972226239199e-05, - "loss": 63.7517, - "step": 48910 - }, - { - "epoch": 0.19764299017845238, - "grad_norm": 598.8991088867188, - "learning_rate": 4.861857821440287e-05, - "loss": 76.1932, - "step": 48920 - }, - { - "epoch": 0.19768339144382002, - "grad_norm": 717.087646484375, - "learning_rate": 4.861743370595741e-05, - "loss": 68.1542, - "step": 48930 - }, - { - "epoch": 0.19772379270918766, - "grad_norm": 1464.67333984375, - "learning_rate": 4.861628873707792e-05, - "loss": 86.0041, - "step": 48940 - }, - { - "epoch": 0.19776419397455527, - "grad_norm": 442.174560546875, - "learning_rate": 4.861514330778673e-05, - "loss": 81.9746, - "step": 48950 - }, - { - "epoch": 0.19780459523992291, - "grad_norm": 1277.0028076171875, - "learning_rate": 4.861399741810615e-05, - "loss": 87.9091, - "step": 48960 - }, - { - "epoch": 0.19784499650529055, - "grad_norm": 485.0671081542969, - "learning_rate": 4.8612851068058544e-05, - "loss": 57.811, - "step": 48970 - }, - { - "epoch": 0.19788539777065817, - "grad_norm": 1858.374267578125, - "learning_rate": 4.861170425766625e-05, - "loss": 95.383, - "step": 48980 - }, - { - "epoch": 0.1979257990360258, - "grad_norm": 944.0939331054688, - "learning_rate": 4.861055698695162e-05, - "loss": 104.2803, - "step": 48990 - }, - { - "epoch": 0.19796620030139345, - "grad_norm": 970.7718505859375, - "learning_rate": 4.860940925593703e-05, - "loss": 100.1417, - "step": 49000 - }, - { - "epoch": 0.19800660156676106, - "grad_norm": 1792.8084716796875, - "learning_rate": 4.860826106464484e-05, - "loss": 86.6734, - "step": 49010 - }, - { - "epoch": 0.1980470028321287, - "grad_norm": 1215.9122314453125, - "learning_rate": 4.8607112413097464e-05, - "loss": 88.0753, - "step": 49020 - }, - { - "epoch": 0.19808740409749634, - "grad_norm": 484.51873779296875, - "learning_rate": 4.860596330131727e-05, - "loss": 78.6937, - "step": 49030 - }, - { - "epoch": 0.19812780536286395, - "grad_norm": 945.8279418945312, - "learning_rate": 4.860481372932667e-05, - "loss": 69.6853, - "step": 49040 - }, - { - "epoch": 0.1981682066282316, - "grad_norm": 507.302978515625, - "learning_rate": 4.860366369714807e-05, - "loss": 91.7122, - "step": 49050 - }, - { - "epoch": 0.19820860789359923, - "grad_norm": 921.6329956054688, - "learning_rate": 4.8602513204803896e-05, - "loss": 103.2615, - "step": 49060 - }, - { - "epoch": 0.19824900915896687, - "grad_norm": 596.2329711914062, - "learning_rate": 4.8601362252316574e-05, - "loss": 103.5869, - "step": 49070 - }, - { - "epoch": 0.19828941042433448, - "grad_norm": 1672.80615234375, - "learning_rate": 4.860021083970855e-05, - "loss": 83.6912, - "step": 49080 - }, - { - "epoch": 0.19832981168970212, - "grad_norm": 271.271728515625, - "learning_rate": 4.8599058967002254e-05, - "loss": 74.6612, - "step": 49090 - }, - { - "epoch": 0.19837021295506976, - "grad_norm": 497.50048828125, - "learning_rate": 4.859790663422016e-05, - "loss": 69.9331, - "step": 49100 - }, - { - "epoch": 0.19841061422043738, - "grad_norm": 573.849365234375, - "learning_rate": 4.8596753841384735e-05, - "loss": 54.5267, - "step": 49110 - }, - { - "epoch": 0.19845101548580502, - "grad_norm": 1481.6771240234375, - "learning_rate": 4.859560058851844e-05, - "loss": 63.2541, - "step": 49120 - }, - { - "epoch": 0.19849141675117266, - "grad_norm": 3614.8828125, - "learning_rate": 4.859444687564376e-05, - "loss": 98.0111, - "step": 49130 - }, - { - "epoch": 0.19853181801654027, - "grad_norm": 1466.5594482421875, - "learning_rate": 4.859329270278319e-05, - "loss": 67.2758, - "step": 49140 - }, - { - "epoch": 0.1985722192819079, - "grad_norm": 611.2523803710938, - "learning_rate": 4.859213806995924e-05, - "loss": 99.23, - "step": 49150 - }, - { - "epoch": 0.19861262054727555, - "grad_norm": 791.8298950195312, - "learning_rate": 4.85909829771944e-05, - "loss": 92.561, - "step": 49160 - }, - { - "epoch": 0.19865302181264316, - "grad_norm": 1123.1046142578125, - "learning_rate": 4.8589827424511216e-05, - "loss": 90.6359, - "step": 49170 - }, - { - "epoch": 0.1986934230780108, - "grad_norm": 0.0, - "learning_rate": 4.858867141193219e-05, - "loss": 75.7769, - "step": 49180 - }, - { - "epoch": 0.19873382434337844, - "grad_norm": 654.7210693359375, - "learning_rate": 4.858751493947987e-05, - "loss": 39.3021, - "step": 49190 - }, - { - "epoch": 0.19877422560874605, - "grad_norm": 1822.922607421875, - "learning_rate": 4.858635800717681e-05, - "loss": 112.5853, - "step": 49200 - }, - { - "epoch": 0.1988146268741137, - "grad_norm": 422.10791015625, - "learning_rate": 4.8585200615045555e-05, - "loss": 66.1211, - "step": 49210 - }, - { - "epoch": 0.19885502813948133, - "grad_norm": 1020.6856689453125, - "learning_rate": 4.8584042763108675e-05, - "loss": 70.5579, - "step": 49220 - }, - { - "epoch": 0.19889542940484897, - "grad_norm": 628.3194580078125, - "learning_rate": 4.858288445138873e-05, - "loss": 110.4609, - "step": 49230 - }, - { - "epoch": 0.19893583067021658, - "grad_norm": 1215.436767578125, - "learning_rate": 4.8581725679908317e-05, - "loss": 145.442, - "step": 49240 - }, - { - "epoch": 0.19897623193558422, - "grad_norm": 255.2740478515625, - "learning_rate": 4.858056644869002e-05, - "loss": 74.1351, - "step": 49250 - }, - { - "epoch": 0.19901663320095186, - "grad_norm": 686.521484375, - "learning_rate": 4.8579406757756455e-05, - "loss": 57.5146, - "step": 49260 - }, - { - "epoch": 0.19905703446631948, - "grad_norm": 452.3636169433594, - "learning_rate": 4.85782466071302e-05, - "loss": 68.5748, - "step": 49270 - }, - { - "epoch": 0.19909743573168712, - "grad_norm": 2242.72412109375, - "learning_rate": 4.857708599683389e-05, - "loss": 86.5858, - "step": 49280 - }, - { - "epoch": 0.19913783699705476, - "grad_norm": 625.6463623046875, - "learning_rate": 4.8575924926890145e-05, - "loss": 50.7092, - "step": 49290 - }, - { - "epoch": 0.19917823826242237, - "grad_norm": 558.0465698242188, - "learning_rate": 4.8574763397321614e-05, - "loss": 53.2504, - "step": 49300 - }, - { - "epoch": 0.19921863952779, - "grad_norm": 932.68212890625, - "learning_rate": 4.857360140815093e-05, - "loss": 65.5452, - "step": 49310 - }, - { - "epoch": 0.19925904079315765, - "grad_norm": 965.7637939453125, - "learning_rate": 4.857243895940076e-05, - "loss": 68.6537, - "step": 49320 - }, - { - "epoch": 0.19929944205852526, - "grad_norm": 676.4684448242188, - "learning_rate": 4.857127605109374e-05, - "loss": 80.4764, - "step": 49330 - }, - { - "epoch": 0.1993398433238929, - "grad_norm": 499.67083740234375, - "learning_rate": 4.8570112683252565e-05, - "loss": 96.8114, - "step": 49340 - }, - { - "epoch": 0.19938024458926054, - "grad_norm": 1086.6888427734375, - "learning_rate": 4.856894885589991e-05, - "loss": 98.3765, - "step": 49350 - }, - { - "epoch": 0.19942064585462815, - "grad_norm": 626.739990234375, - "learning_rate": 4.856778456905846e-05, - "loss": 64.8752, - "step": 49360 - }, - { - "epoch": 0.1994610471199958, - "grad_norm": 464.48565673828125, - "learning_rate": 4.856661982275093e-05, - "loss": 52.9684, - "step": 49370 - }, - { - "epoch": 0.19950144838536343, - "grad_norm": 536.5781860351562, - "learning_rate": 4.8565454617e-05, - "loss": 71.4869, - "step": 49380 - }, - { - "epoch": 0.19954184965073107, - "grad_norm": 3326.29833984375, - "learning_rate": 4.85642889518284e-05, - "loss": 165.3745, - "step": 49390 - }, - { - "epoch": 0.19958225091609869, - "grad_norm": 2225.459716796875, - "learning_rate": 4.856312282725886e-05, - "loss": 92.2177, - "step": 49400 - }, - { - "epoch": 0.19962265218146633, - "grad_norm": 611.5123901367188, - "learning_rate": 4.85619562433141e-05, - "loss": 53.4561, - "step": 49410 - }, - { - "epoch": 0.19966305344683397, - "grad_norm": 763.9044189453125, - "learning_rate": 4.8560789200016884e-05, - "loss": 65.2781, - "step": 49420 - }, - { - "epoch": 0.19970345471220158, - "grad_norm": 1733.5440673828125, - "learning_rate": 4.8559621697389946e-05, - "loss": 149.0616, - "step": 49430 - }, - { - "epoch": 0.19974385597756922, - "grad_norm": 411.6968688964844, - "learning_rate": 4.855845373545605e-05, - "loss": 71.7242, - "step": 49440 - }, - { - "epoch": 0.19978425724293686, - "grad_norm": 258.44677734375, - "learning_rate": 4.855728531423798e-05, - "loss": 71.4265, - "step": 49450 - }, - { - "epoch": 0.19982465850830447, - "grad_norm": 1239.368896484375, - "learning_rate": 4.85561164337585e-05, - "loss": 75.7033, - "step": 49460 - }, - { - "epoch": 0.1998650597736721, - "grad_norm": 1232.6944580078125, - "learning_rate": 4.85549470940404e-05, - "loss": 54.9064, - "step": 49470 - }, - { - "epoch": 0.19990546103903975, - "grad_norm": 698.7216796875, - "learning_rate": 4.855377729510648e-05, - "loss": 64.2161, - "step": 49480 - }, - { - "epoch": 0.19994586230440736, - "grad_norm": 590.6932983398438, - "learning_rate": 4.8552607036979553e-05, - "loss": 59.4658, - "step": 49490 - }, - { - "epoch": 0.199986263569775, - "grad_norm": 1102.7227783203125, - "learning_rate": 4.855143631968242e-05, - "loss": 64.1912, - "step": 49500 - }, - { - "epoch": 0.20002666483514264, - "grad_norm": 2396.97119140625, - "learning_rate": 4.855026514323792e-05, - "loss": 101.3902, - "step": 49510 - }, - { - "epoch": 0.20006706610051025, - "grad_norm": 622.559326171875, - "learning_rate": 4.8549093507668865e-05, - "loss": 91.4989, - "step": 49520 - }, - { - "epoch": 0.2001074673658779, - "grad_norm": 1117.0147705078125, - "learning_rate": 4.854792141299811e-05, - "loss": 52.4889, - "step": 49530 - }, - { - "epoch": 0.20014786863124553, - "grad_norm": 726.4915771484375, - "learning_rate": 4.85467488592485e-05, - "loss": 69.4123, - "step": 49540 - }, - { - "epoch": 0.20018826989661317, - "grad_norm": 563.3633422851562, - "learning_rate": 4.85455758464429e-05, - "loss": 79.5041, - "step": 49550 - }, - { - "epoch": 0.2002286711619808, - "grad_norm": 1249.004150390625, - "learning_rate": 4.854440237460418e-05, - "loss": 113.2019, - "step": 49560 - }, - { - "epoch": 0.20026907242734843, - "grad_norm": 1637.2099609375, - "learning_rate": 4.854322844375522e-05, - "loss": 94.2531, - "step": 49570 - }, - { - "epoch": 0.20030947369271607, - "grad_norm": 916.0922241210938, - "learning_rate": 4.85420540539189e-05, - "loss": 74.7405, - "step": 49580 - }, - { - "epoch": 0.20034987495808368, - "grad_norm": 660.0616455078125, - "learning_rate": 4.8540879205118106e-05, - "loss": 63.3157, - "step": 49590 - }, - { - "epoch": 0.20039027622345132, - "grad_norm": 1970.4896240234375, - "learning_rate": 4.8539703897375755e-05, - "loss": 93.7624, - "step": 49600 - }, - { - "epoch": 0.20043067748881896, - "grad_norm": 765.5314331054688, - "learning_rate": 4.853852813071476e-05, - "loss": 83.952, - "step": 49610 - }, - { - "epoch": 0.20047107875418657, - "grad_norm": 883.2976684570312, - "learning_rate": 4.853735190515804e-05, - "loss": 100.3142, - "step": 49620 - }, - { - "epoch": 0.2005114800195542, - "grad_norm": 1285.146240234375, - "learning_rate": 4.853617522072853e-05, - "loss": 84.2881, - "step": 49630 - }, - { - "epoch": 0.20055188128492185, - "grad_norm": 561.2415771484375, - "learning_rate": 4.853499807744916e-05, - "loss": 67.512, - "step": 49640 - }, - { - "epoch": 0.20059228255028946, - "grad_norm": 759.5241088867188, - "learning_rate": 4.85338204753429e-05, - "loss": 70.0843, - "step": 49650 - }, - { - "epoch": 0.2006326838156571, - "grad_norm": 1011.6978759765625, - "learning_rate": 4.8532642414432674e-05, - "loss": 91.1174, - "step": 49660 - }, - { - "epoch": 0.20067308508102474, - "grad_norm": 710.1160888671875, - "learning_rate": 4.853146389474148e-05, - "loss": 78.4036, - "step": 49670 - }, - { - "epoch": 0.20071348634639236, - "grad_norm": 1832.636474609375, - "learning_rate": 4.853028491629228e-05, - "loss": 69.5313, - "step": 49680 - }, - { - "epoch": 0.20075388761176, - "grad_norm": 630.8060913085938, - "learning_rate": 4.852910547910806e-05, - "loss": 72.0492, - "step": 49690 - }, - { - "epoch": 0.20079428887712764, - "grad_norm": 891.0122680664062, - "learning_rate": 4.852792558321182e-05, - "loss": 88.1902, - "step": 49700 - }, - { - "epoch": 0.20083469014249528, - "grad_norm": 493.8849182128906, - "learning_rate": 4.852674522862656e-05, - "loss": 82.2126, - "step": 49710 - }, - { - "epoch": 0.2008750914078629, - "grad_norm": 916.6065063476562, - "learning_rate": 4.852556441537528e-05, - "loss": 73.7906, - "step": 49720 - }, - { - "epoch": 0.20091549267323053, - "grad_norm": 1729.7864990234375, - "learning_rate": 4.852438314348101e-05, - "loss": 98.6484, - "step": 49730 - }, - { - "epoch": 0.20095589393859817, - "grad_norm": 971.2913818359375, - "learning_rate": 4.852320141296679e-05, - "loss": 74.1662, - "step": 49740 - }, - { - "epoch": 0.20099629520396578, - "grad_norm": 673.4738159179688, - "learning_rate": 4.852201922385564e-05, - "loss": 87.5311, - "step": 49750 - }, - { - "epoch": 0.20103669646933342, - "grad_norm": 700.3085327148438, - "learning_rate": 4.852083657617061e-05, - "loss": 119.7827, - "step": 49760 - }, - { - "epoch": 0.20107709773470106, - "grad_norm": 574.7796630859375, - "learning_rate": 4.851965346993478e-05, - "loss": 118.024, - "step": 49770 - }, - { - "epoch": 0.20111749900006867, - "grad_norm": 1227.26904296875, - "learning_rate": 4.851846990517118e-05, - "loss": 96.7121, - "step": 49780 - }, - { - "epoch": 0.2011579002654363, - "grad_norm": 1115.0523681640625, - "learning_rate": 4.8517285881902904e-05, - "loss": 69.7559, - "step": 49790 - }, - { - "epoch": 0.20119830153080395, - "grad_norm": 820.30126953125, - "learning_rate": 4.851610140015304e-05, - "loss": 94.3278, - "step": 49800 - }, - { - "epoch": 0.20123870279617156, - "grad_norm": 3716.187744140625, - "learning_rate": 4.8514916459944666e-05, - "loss": 136.7065, - "step": 49810 - }, - { - "epoch": 0.2012791040615392, - "grad_norm": 1158.3758544921875, - "learning_rate": 4.8513731061300887e-05, - "loss": 88.3688, - "step": 49820 - }, - { - "epoch": 0.20131950532690684, - "grad_norm": 2444.747314453125, - "learning_rate": 4.851254520424482e-05, - "loss": 58.1214, - "step": 49830 - }, - { - "epoch": 0.20135990659227446, - "grad_norm": 594.2299194335938, - "learning_rate": 4.851135888879958e-05, - "loss": 50.3362, - "step": 49840 - }, - { - "epoch": 0.2014003078576421, - "grad_norm": 422.1976318359375, - "learning_rate": 4.851017211498829e-05, - "loss": 114.6325, - "step": 49850 - }, - { - "epoch": 0.20144070912300974, - "grad_norm": 348.53741455078125, - "learning_rate": 4.85089848828341e-05, - "loss": 81.1028, - "step": 49860 - }, - { - "epoch": 0.20148111038837738, - "grad_norm": 629.5589599609375, - "learning_rate": 4.8507797192360134e-05, - "loss": 47.3838, - "step": 49870 - }, - { - "epoch": 0.201521511653745, - "grad_norm": 719.5811157226562, - "learning_rate": 4.850660904358956e-05, - "loss": 57.1744, - "step": 49880 - }, - { - "epoch": 0.20156191291911263, - "grad_norm": 497.17913818359375, - "learning_rate": 4.850542043654555e-05, - "loss": 75.8821, - "step": 49890 - }, - { - "epoch": 0.20160231418448027, - "grad_norm": 569.685546875, - "learning_rate": 4.8504231371251255e-05, - "loss": 59.8405, - "step": 49900 - }, - { - "epoch": 0.20164271544984788, - "grad_norm": 503.3626708984375, - "learning_rate": 4.850304184772988e-05, - "loss": 64.7331, - "step": 49910 - }, - { - "epoch": 0.20168311671521552, - "grad_norm": 444.8017883300781, - "learning_rate": 4.85018518660046e-05, - "loss": 132.0521, - "step": 49920 - }, - { - "epoch": 0.20172351798058316, - "grad_norm": 557.6558227539062, - "learning_rate": 4.850066142609862e-05, - "loss": 66.8273, - "step": 49930 - }, - { - "epoch": 0.20176391924595077, - "grad_norm": 625.9427490234375, - "learning_rate": 4.849947052803514e-05, - "loss": 97.3501, - "step": 49940 - }, - { - "epoch": 0.2018043205113184, - "grad_norm": 413.3929748535156, - "learning_rate": 4.849827917183739e-05, - "loss": 104.1396, - "step": 49950 - }, - { - "epoch": 0.20184472177668605, - "grad_norm": 1487.6632080078125, - "learning_rate": 4.849708735752859e-05, - "loss": 84.6355, - "step": 49960 - }, - { - "epoch": 0.20188512304205367, - "grad_norm": 399.767578125, - "learning_rate": 4.849589508513197e-05, - "loss": 71.0594, - "step": 49970 - }, - { - "epoch": 0.2019255243074213, - "grad_norm": 739.0697631835938, - "learning_rate": 4.849470235467078e-05, - "loss": 91.7866, - "step": 49980 - }, - { - "epoch": 0.20196592557278895, - "grad_norm": 962.0905151367188, - "learning_rate": 4.849350916616827e-05, - "loss": 142.8859, - "step": 49990 - }, - { - "epoch": 0.20200632683815656, - "grad_norm": 556.9208374023438, - "learning_rate": 4.849231551964771e-05, - "loss": 69.2205, - "step": 50000 - }, - { - "epoch": 0.2020467281035242, - "grad_norm": 994.56298828125, - "learning_rate": 4.849112141513236e-05, - "loss": 98.4823, - "step": 50010 - }, - { - "epoch": 0.20208712936889184, - "grad_norm": 446.2982482910156, - "learning_rate": 4.8489926852645505e-05, - "loss": 91.0375, - "step": 50020 - }, - { - "epoch": 0.20212753063425948, - "grad_norm": 2271.33349609375, - "learning_rate": 4.848873183221043e-05, - "loss": 89.0778, - "step": 50030 - }, - { - "epoch": 0.2021679318996271, - "grad_norm": 1082.55810546875, - "learning_rate": 4.8487536353850444e-05, - "loss": 109.4579, - "step": 50040 - }, - { - "epoch": 0.20220833316499473, - "grad_norm": 2170.5400390625, - "learning_rate": 4.8486340417588835e-05, - "loss": 84.2964, - "step": 50050 - }, - { - "epoch": 0.20224873443036237, - "grad_norm": 454.11248779296875, - "learning_rate": 4.8485144023448936e-05, - "loss": 62.0007, - "step": 50060 - }, - { - "epoch": 0.20228913569572998, - "grad_norm": 494.5736083984375, - "learning_rate": 4.848394717145406e-05, - "loss": 87.3821, - "step": 50070 - }, - { - "epoch": 0.20232953696109762, - "grad_norm": 1059.5152587890625, - "learning_rate": 4.848274986162754e-05, - "loss": 68.3891, - "step": 50080 - }, - { - "epoch": 0.20236993822646526, - "grad_norm": 778.6331176757812, - "learning_rate": 4.848155209399272e-05, - "loss": 72.0611, - "step": 50090 - }, - { - "epoch": 0.20241033949183287, - "grad_norm": 819.3021240234375, - "learning_rate": 4.848035386857296e-05, - "loss": 67.9465, - "step": 50100 - }, - { - "epoch": 0.20245074075720051, - "grad_norm": 1114.4420166015625, - "learning_rate": 4.847915518539161e-05, - "loss": 75.596, - "step": 50110 - }, - { - "epoch": 0.20249114202256815, - "grad_norm": 1062.3887939453125, - "learning_rate": 4.847795604447204e-05, - "loss": 102.3436, - "step": 50120 - }, - { - "epoch": 0.20253154328793577, - "grad_norm": 1471.7073974609375, - "learning_rate": 4.847675644583764e-05, - "loss": 129.1764, - "step": 50130 - }, - { - "epoch": 0.2025719445533034, - "grad_norm": 1061.678955078125, - "learning_rate": 4.847555638951177e-05, - "loss": 84.9815, - "step": 50140 - }, - { - "epoch": 0.20261234581867105, - "grad_norm": 1374.27001953125, - "learning_rate": 4.8474355875517854e-05, - "loss": 88.13, - "step": 50150 - }, - { - "epoch": 0.20265274708403866, - "grad_norm": 1075.933837890625, - "learning_rate": 4.8473154903879276e-05, - "loss": 94.9892, - "step": 50160 - }, - { - "epoch": 0.2026931483494063, - "grad_norm": 314.6831359863281, - "learning_rate": 4.8471953474619466e-05, - "loss": 83.2022, - "step": 50170 - }, - { - "epoch": 0.20273354961477394, - "grad_norm": 1138.607421875, - "learning_rate": 4.847075158776183e-05, - "loss": 78.0873, - "step": 50180 - }, - { - "epoch": 0.20277395088014158, - "grad_norm": 561.6530151367188, - "learning_rate": 4.846954924332981e-05, - "loss": 57.0055, - "step": 50190 - }, - { - "epoch": 0.2028143521455092, - "grad_norm": 1306.6680908203125, - "learning_rate": 4.846834644134686e-05, - "loss": 69.6428, - "step": 50200 - }, - { - "epoch": 0.20285475341087683, - "grad_norm": 418.66046142578125, - "learning_rate": 4.846714318183639e-05, - "loss": 87.8257, - "step": 50210 - }, - { - "epoch": 0.20289515467624447, - "grad_norm": 1192.7034912109375, - "learning_rate": 4.84659394648219e-05, - "loss": 93.4679, - "step": 50220 - }, - { - "epoch": 0.20293555594161208, - "grad_norm": 1370.776611328125, - "learning_rate": 4.846473529032684e-05, - "loss": 112.6145, - "step": 50230 - }, - { - "epoch": 0.20297595720697972, - "grad_norm": 555.2098388671875, - "learning_rate": 4.846353065837467e-05, - "loss": 104.6596, - "step": 50240 - }, - { - "epoch": 0.20301635847234736, - "grad_norm": 714.0413208007812, - "learning_rate": 4.84623255689889e-05, - "loss": 63.1508, - "step": 50250 - }, - { - "epoch": 0.20305675973771498, - "grad_norm": 779.4690551757812, - "learning_rate": 4.846112002219301e-05, - "loss": 77.2726, - "step": 50260 - }, - { - "epoch": 0.20309716100308262, - "grad_norm": 857.3240966796875, - "learning_rate": 4.845991401801051e-05, - "loss": 86.2318, - "step": 50270 - }, - { - "epoch": 0.20313756226845026, - "grad_norm": 700.0541381835938, - "learning_rate": 4.845870755646491e-05, - "loss": 72.9242, - "step": 50280 - }, - { - "epoch": 0.20317796353381787, - "grad_norm": 357.010009765625, - "learning_rate": 4.8457500637579726e-05, - "loss": 53.2458, - "step": 50290 - }, - { - "epoch": 0.2032183647991855, - "grad_norm": 1076.287841796875, - "learning_rate": 4.845629326137849e-05, - "loss": 60.6979, - "step": 50300 - }, - { - "epoch": 0.20325876606455315, - "grad_norm": 1700.1820068359375, - "learning_rate": 4.845508542788474e-05, - "loss": 92.155, - "step": 50310 - }, - { - "epoch": 0.20329916732992076, - "grad_norm": 800.1776123046875, - "learning_rate": 4.845387713712203e-05, - "loss": 89.9958, - "step": 50320 - }, - { - "epoch": 0.2033395685952884, - "grad_norm": 953.5119018554688, - "learning_rate": 4.8452668389113895e-05, - "loss": 103.765, - "step": 50330 - }, - { - "epoch": 0.20337996986065604, - "grad_norm": 558.611083984375, - "learning_rate": 4.845145918388393e-05, - "loss": 48.235, - "step": 50340 - }, - { - "epoch": 0.20342037112602368, - "grad_norm": 106.23624420166016, - "learning_rate": 4.8450249521455695e-05, - "loss": 87.897, - "step": 50350 - }, - { - "epoch": 0.2034607723913913, - "grad_norm": 1008.41748046875, - "learning_rate": 4.844903940185276e-05, - "loss": 55.0348, - "step": 50360 - }, - { - "epoch": 0.20350117365675893, - "grad_norm": 1108.453369140625, - "learning_rate": 4.844782882509874e-05, - "loss": 79.4438, - "step": 50370 - }, - { - "epoch": 0.20354157492212657, - "grad_norm": 985.6517944335938, - "learning_rate": 4.844661779121723e-05, - "loss": 61.7789, - "step": 50380 - }, - { - "epoch": 0.20358197618749418, - "grad_norm": 863.3707885742188, - "learning_rate": 4.844540630023182e-05, - "loss": 75.1732, - "step": 50390 - }, - { - "epoch": 0.20362237745286182, - "grad_norm": 843.4656372070312, - "learning_rate": 4.844419435216615e-05, - "loss": 61.1795, - "step": 50400 - }, - { - "epoch": 0.20366277871822946, - "grad_norm": 739.8035888671875, - "learning_rate": 4.844298194704384e-05, - "loss": 89.2932, - "step": 50410 - }, - { - "epoch": 0.20370317998359708, - "grad_norm": 455.288818359375, - "learning_rate": 4.8441769084888534e-05, - "loss": 58.3124, - "step": 50420 - }, - { - "epoch": 0.20374358124896472, - "grad_norm": 1115.42333984375, - "learning_rate": 4.844055576572387e-05, - "loss": 70.2969, - "step": 50430 - }, - { - "epoch": 0.20378398251433236, - "grad_norm": 1393.1253662109375, - "learning_rate": 4.84393419895735e-05, - "loss": 89.79, - "step": 50440 - }, - { - "epoch": 0.20382438377969997, - "grad_norm": 1216.634033203125, - "learning_rate": 4.84381277564611e-05, - "loss": 72.9308, - "step": 50450 - }, - { - "epoch": 0.2038647850450676, - "grad_norm": 548.3377075195312, - "learning_rate": 4.8436913066410316e-05, - "loss": 73.1693, - "step": 50460 - }, - { - "epoch": 0.20390518631043525, - "grad_norm": 1117.44091796875, - "learning_rate": 4.843569791944486e-05, - "loss": 121.1625, - "step": 50470 - }, - { - "epoch": 0.20394558757580286, - "grad_norm": 1097.303955078125, - "learning_rate": 4.843448231558839e-05, - "loss": 103.4902, - "step": 50480 - }, - { - "epoch": 0.2039859888411705, - "grad_norm": 797.4445190429688, - "learning_rate": 4.843326625486464e-05, - "loss": 78.5437, - "step": 50490 - }, - { - "epoch": 0.20402639010653814, - "grad_norm": 876.6442260742188, - "learning_rate": 4.843204973729729e-05, - "loss": 72.4397, - "step": 50500 - }, - { - "epoch": 0.20406679137190578, - "grad_norm": 696.5914306640625, - "learning_rate": 4.843083276291007e-05, - "loss": 75.2865, - "step": 50510 - }, - { - "epoch": 0.2041071926372734, - "grad_norm": 202.0533447265625, - "learning_rate": 4.84296153317267e-05, - "loss": 42.6595, - "step": 50520 - }, - { - "epoch": 0.20414759390264103, - "grad_norm": 711.9306030273438, - "learning_rate": 4.8428397443770926e-05, - "loss": 76.4253, - "step": 50530 - }, - { - "epoch": 0.20418799516800867, - "grad_norm": 1101.085205078125, - "learning_rate": 4.842717909906647e-05, - "loss": 91.274, - "step": 50540 - }, - { - "epoch": 0.20422839643337629, - "grad_norm": 934.9185180664062, - "learning_rate": 4.84259602976371e-05, - "loss": 99.1536, - "step": 50550 - }, - { - "epoch": 0.20426879769874393, - "grad_norm": 1069.0614013671875, - "learning_rate": 4.8424741039506575e-05, - "loss": 76.847, - "step": 50560 - }, - { - "epoch": 0.20430919896411157, - "grad_norm": 5626.541015625, - "learning_rate": 4.842352132469867e-05, - "loss": 85.2173, - "step": 50570 - }, - { - "epoch": 0.20434960022947918, - "grad_norm": 467.4293212890625, - "learning_rate": 4.8422301153237145e-05, - "loss": 57.4749, - "step": 50580 - }, - { - "epoch": 0.20439000149484682, - "grad_norm": 721.87353515625, - "learning_rate": 4.842108052514581e-05, - "loss": 67.7757, - "step": 50590 - }, - { - "epoch": 0.20443040276021446, - "grad_norm": 1129.6400146484375, - "learning_rate": 4.841985944044845e-05, - "loss": 69.0111, - "step": 50600 - }, - { - "epoch": 0.20447080402558207, - "grad_norm": 566.9580688476562, - "learning_rate": 4.8418637899168874e-05, - "loss": 96.5319, - "step": 50610 - }, - { - "epoch": 0.2045112052909497, - "grad_norm": 1207.4417724609375, - "learning_rate": 4.8417415901330886e-05, - "loss": 86.0825, - "step": 50620 - }, - { - "epoch": 0.20455160655631735, - "grad_norm": 912.6641235351562, - "learning_rate": 4.841619344695833e-05, - "loss": 75.0874, - "step": 50630 - }, - { - "epoch": 0.20459200782168496, - "grad_norm": 619.9535522460938, - "learning_rate": 4.8414970536075024e-05, - "loss": 82.5721, - "step": 50640 - }, - { - "epoch": 0.2046324090870526, - "grad_norm": 522.5993041992188, - "learning_rate": 4.841374716870481e-05, - "loss": 95.127, - "step": 50650 - }, - { - "epoch": 0.20467281035242024, - "grad_norm": 368.2784729003906, - "learning_rate": 4.841252334487154e-05, - "loss": 61.5005, - "step": 50660 - }, - { - "epoch": 0.20471321161778788, - "grad_norm": 1060.747802734375, - "learning_rate": 4.841129906459908e-05, - "loss": 69.1981, - "step": 50670 - }, - { - "epoch": 0.2047536128831555, - "grad_norm": 1138.5040283203125, - "learning_rate": 4.841007432791129e-05, - "loss": 56.1066, - "step": 50680 - }, - { - "epoch": 0.20479401414852313, - "grad_norm": 1501.6522216796875, - "learning_rate": 4.840884913483204e-05, - "loss": 108.5628, - "step": 50690 - }, - { - "epoch": 0.20483441541389077, - "grad_norm": 1450.2125244140625, - "learning_rate": 4.8407623485385234e-05, - "loss": 89.9987, - "step": 50700 - }, - { - "epoch": 0.2048748166792584, - "grad_norm": 503.9439697265625, - "learning_rate": 4.840639737959476e-05, - "loss": 69.8462, - "step": 50710 - }, - { - "epoch": 0.20491521794462603, - "grad_norm": 702.1473388671875, - "learning_rate": 4.8405170817484515e-05, - "loss": 102.1393, - "step": 50720 - }, - { - "epoch": 0.20495561920999367, - "grad_norm": 557.0831909179688, - "learning_rate": 4.840394379907841e-05, - "loss": 70.9738, - "step": 50730 - }, - { - "epoch": 0.20499602047536128, - "grad_norm": 905.9644775390625, - "learning_rate": 4.840271632440038e-05, - "loss": 63.7002, - "step": 50740 - }, - { - "epoch": 0.20503642174072892, - "grad_norm": 1401.463623046875, - "learning_rate": 4.840148839347434e-05, - "loss": 82.3691, - "step": 50750 - }, - { - "epoch": 0.20507682300609656, - "grad_norm": 457.6974182128906, - "learning_rate": 4.8400260006324235e-05, - "loss": 64.2692, - "step": 50760 - }, - { - "epoch": 0.20511722427146417, - "grad_norm": 1235.328857421875, - "learning_rate": 4.839903116297401e-05, - "loss": 76.285, - "step": 50770 - }, - { - "epoch": 0.2051576255368318, - "grad_norm": 1098.241943359375, - "learning_rate": 4.8397801863447635e-05, - "loss": 98.041, - "step": 50780 - }, - { - "epoch": 0.20519802680219945, - "grad_norm": 1030.691650390625, - "learning_rate": 4.8396572107769066e-05, - "loss": 81.4219, - "step": 50790 - }, - { - "epoch": 0.20523842806756706, - "grad_norm": 546.6036987304688, - "learning_rate": 4.839534189596228e-05, - "loss": 62.711, - "step": 50800 - }, - { - "epoch": 0.2052788293329347, - "grad_norm": 595.7460327148438, - "learning_rate": 4.839411122805125e-05, - "loss": 78.6366, - "step": 50810 - }, - { - "epoch": 0.20531923059830234, - "grad_norm": 435.6410217285156, - "learning_rate": 4.839288010405998e-05, - "loss": 54.1102, - "step": 50820 - }, - { - "epoch": 0.20535963186366998, - "grad_norm": 756.9534301757812, - "learning_rate": 4.839164852401247e-05, - "loss": 55.358, - "step": 50830 - }, - { - "epoch": 0.2054000331290376, - "grad_norm": 1443.138671875, - "learning_rate": 4.8390416487932733e-05, - "loss": 76.8449, - "step": 50840 - }, - { - "epoch": 0.20544043439440524, - "grad_norm": 666.9951171875, - "learning_rate": 4.8389183995844785e-05, - "loss": 98.6886, - "step": 50850 - }, - { - "epoch": 0.20548083565977288, - "grad_norm": 720.92919921875, - "learning_rate": 4.838795104777265e-05, - "loss": 101.151, - "step": 50860 - }, - { - "epoch": 0.2055212369251405, - "grad_norm": 929.8797607421875, - "learning_rate": 4.8386717643740366e-05, - "loss": 87.1545, - "step": 50870 - }, - { - "epoch": 0.20556163819050813, - "grad_norm": 1356.6619873046875, - "learning_rate": 4.8385483783771986e-05, - "loss": 92.7927, - "step": 50880 - }, - { - "epoch": 0.20560203945587577, - "grad_norm": 2006.3673095703125, - "learning_rate": 4.838424946789156e-05, - "loss": 75.9321, - "step": 50890 - }, - { - "epoch": 0.20564244072124338, - "grad_norm": 741.2835693359375, - "learning_rate": 4.8383014696123144e-05, - "loss": 74.4236, - "step": 50900 - }, - { - "epoch": 0.20568284198661102, - "grad_norm": 719.8021850585938, - "learning_rate": 4.838177946849083e-05, - "loss": 100.4028, - "step": 50910 - }, - { - "epoch": 0.20572324325197866, - "grad_norm": 410.4776306152344, - "learning_rate": 4.8380543785018677e-05, - "loss": 78.5173, - "step": 50920 - }, - { - "epoch": 0.20576364451734627, - "grad_norm": 563.4188842773438, - "learning_rate": 4.8379307645730795e-05, - "loss": 116.8776, - "step": 50930 - }, - { - "epoch": 0.2058040457827139, - "grad_norm": 882.4771728515625, - "learning_rate": 4.837807105065127e-05, - "loss": 63.1256, - "step": 50940 - }, - { - "epoch": 0.20584444704808155, - "grad_norm": 576.0557861328125, - "learning_rate": 4.837683399980421e-05, - "loss": 100.8891, - "step": 50950 - }, - { - "epoch": 0.20588484831344916, - "grad_norm": 2097.596923828125, - "learning_rate": 4.837559649321374e-05, - "loss": 107.397, - "step": 50960 - }, - { - "epoch": 0.2059252495788168, - "grad_norm": 1427.4742431640625, - "learning_rate": 4.837435853090398e-05, - "loss": 68.9581, - "step": 50970 - }, - { - "epoch": 0.20596565084418444, - "grad_norm": 1769.10107421875, - "learning_rate": 4.837312011289907e-05, - "loss": 77.3765, - "step": 50980 - }, - { - "epoch": 0.20600605210955208, - "grad_norm": 745.1818237304688, - "learning_rate": 4.837188123922314e-05, - "loss": 58.5138, - "step": 50990 - }, - { - "epoch": 0.2060464533749197, - "grad_norm": 1252.1639404296875, - "learning_rate": 4.837064190990036e-05, - "loss": 87.0167, - "step": 51000 - }, - { - "epoch": 0.20608685464028734, - "grad_norm": 702.9400024414062, - "learning_rate": 4.836940212495489e-05, - "loss": 100.8639, - "step": 51010 - }, - { - "epoch": 0.20612725590565498, - "grad_norm": 1918.0904541015625, - "learning_rate": 4.836816188441089e-05, - "loss": 67.2167, - "step": 51020 - }, - { - "epoch": 0.2061676571710226, - "grad_norm": 1665.779296875, - "learning_rate": 4.8366921188292534e-05, - "loss": 70.5055, - "step": 51030 - }, - { - "epoch": 0.20620805843639023, - "grad_norm": 1276.0400390625, - "learning_rate": 4.8365680036624026e-05, - "loss": 69.3662, - "step": 51040 - }, - { - "epoch": 0.20624845970175787, - "grad_norm": 1376.8558349609375, - "learning_rate": 4.836443842942956e-05, - "loss": 88.9127, - "step": 51050 - }, - { - "epoch": 0.20628886096712548, - "grad_norm": 1290.6573486328125, - "learning_rate": 4.836319636673334e-05, - "loss": 114.1035, - "step": 51060 - }, - { - "epoch": 0.20632926223249312, - "grad_norm": 789.5421142578125, - "learning_rate": 4.836195384855957e-05, - "loss": 67.4084, - "step": 51070 - }, - { - "epoch": 0.20636966349786076, - "grad_norm": 843.2109985351562, - "learning_rate": 4.8360710874932485e-05, - "loss": 77.7251, - "step": 51080 - }, - { - "epoch": 0.20641006476322837, - "grad_norm": 873.6146850585938, - "learning_rate": 4.8359467445876314e-05, - "loss": 111.2144, - "step": 51090 - }, - { - "epoch": 0.206450466028596, - "grad_norm": 1589.0201416015625, - "learning_rate": 4.8358223561415304e-05, - "loss": 95.8929, - "step": 51100 - }, - { - "epoch": 0.20649086729396365, - "grad_norm": 914.6998291015625, - "learning_rate": 4.8356979221573696e-05, - "loss": 58.4896, - "step": 51110 - }, - { - "epoch": 0.20653126855933127, - "grad_norm": 566.5862426757812, - "learning_rate": 4.8355734426375753e-05, - "loss": 82.7033, - "step": 51120 - }, - { - "epoch": 0.2065716698246989, - "grad_norm": 1035.8038330078125, - "learning_rate": 4.835448917584574e-05, - "loss": 93.2203, - "step": 51130 - }, - { - "epoch": 0.20661207109006655, - "grad_norm": 645.23388671875, - "learning_rate": 4.8353243470007944e-05, - "loss": 84.6093, - "step": 51140 - }, - { - "epoch": 0.20665247235543419, - "grad_norm": 919.4197998046875, - "learning_rate": 4.835199730888664e-05, - "loss": 70.3317, - "step": 51150 - }, - { - "epoch": 0.2066928736208018, - "grad_norm": 647.1675415039062, - "learning_rate": 4.835075069250613e-05, - "loss": 70.5222, - "step": 51160 - }, - { - "epoch": 0.20673327488616944, - "grad_norm": 689.3214111328125, - "learning_rate": 4.8349503620890705e-05, - "loss": 73.7844, - "step": 51170 - }, - { - "epoch": 0.20677367615153708, - "grad_norm": 974.9696044921875, - "learning_rate": 4.8348256094064695e-05, - "loss": 113.3237, - "step": 51180 - }, - { - "epoch": 0.2068140774169047, - "grad_norm": 595.2847290039062, - "learning_rate": 4.834700811205241e-05, - "loss": 65.6414, - "step": 51190 - }, - { - "epoch": 0.20685447868227233, - "grad_norm": 1128.4539794921875, - "learning_rate": 4.834575967487817e-05, - "loss": 105.4969, - "step": 51200 - }, - { - "epoch": 0.20689487994763997, - "grad_norm": 445.4708557128906, - "learning_rate": 4.834451078256634e-05, - "loss": 62.121, - "step": 51210 - }, - { - "epoch": 0.20693528121300758, - "grad_norm": 308.1874694824219, - "learning_rate": 4.8343261435141244e-05, - "loss": 74.7087, - "step": 51220 - }, - { - "epoch": 0.20697568247837522, - "grad_norm": 579.1897583007812, - "learning_rate": 4.8342011632627254e-05, - "loss": 115.5755, - "step": 51230 - }, - { - "epoch": 0.20701608374374286, - "grad_norm": 1018.190185546875, - "learning_rate": 4.834076137504873e-05, - "loss": 77.1754, - "step": 51240 - }, - { - "epoch": 0.20705648500911047, - "grad_norm": 560.5921020507812, - "learning_rate": 4.8339510662430046e-05, - "loss": 89.5999, - "step": 51250 - }, - { - "epoch": 0.20709688627447811, - "grad_norm": 554.189453125, - "learning_rate": 4.833825949479558e-05, - "loss": 60.0793, - "step": 51260 - }, - { - "epoch": 0.20713728753984575, - "grad_norm": 666.8161010742188, - "learning_rate": 4.8337007872169735e-05, - "loss": 79.2109, - "step": 51270 - }, - { - "epoch": 0.20717768880521337, - "grad_norm": 758.923828125, - "learning_rate": 4.833575579457691e-05, - "loss": 71.3804, - "step": 51280 - }, - { - "epoch": 0.207218090070581, - "grad_norm": 768.92236328125, - "learning_rate": 4.8334503262041505e-05, - "loss": 54.9832, - "step": 51290 - }, - { - "epoch": 0.20725849133594865, - "grad_norm": 453.7713317871094, - "learning_rate": 4.833325027458795e-05, - "loss": 86.5132, - "step": 51300 - }, - { - "epoch": 0.2072988926013163, - "grad_norm": 688.4905395507812, - "learning_rate": 4.8331996832240675e-05, - "loss": 62.2691, - "step": 51310 - }, - { - "epoch": 0.2073392938666839, - "grad_norm": 941.0972900390625, - "learning_rate": 4.83307429350241e-05, - "loss": 68.9129, - "step": 51320 - }, - { - "epoch": 0.20737969513205154, - "grad_norm": 553.0337524414062, - "learning_rate": 4.832948858296268e-05, - "loss": 70.7225, - "step": 51330 - }, - { - "epoch": 0.20742009639741918, - "grad_norm": 736.6832885742188, - "learning_rate": 4.832823377608087e-05, - "loss": 90.5628, - "step": 51340 - }, - { - "epoch": 0.2074604976627868, - "grad_norm": 1483.525390625, - "learning_rate": 4.832697851440313e-05, - "loss": 60.9628, - "step": 51350 - }, - { - "epoch": 0.20750089892815443, - "grad_norm": 1290.9232177734375, - "learning_rate": 4.8325722797953945e-05, - "loss": 56.6402, - "step": 51360 - }, - { - "epoch": 0.20754130019352207, - "grad_norm": 1492.0635986328125, - "learning_rate": 4.8324466626757775e-05, - "loss": 88.9837, - "step": 51370 - }, - { - "epoch": 0.20758170145888968, - "grad_norm": 1464.2392578125, - "learning_rate": 4.8323210000839124e-05, - "loss": 82.0623, - "step": 51380 - }, - { - "epoch": 0.20762210272425732, - "grad_norm": 771.36767578125, - "learning_rate": 4.832195292022249e-05, - "loss": 99.1351, - "step": 51390 - }, - { - "epoch": 0.20766250398962496, - "grad_norm": 513.316650390625, - "learning_rate": 4.832069538493237e-05, - "loss": 54.0263, - "step": 51400 - }, - { - "epoch": 0.20770290525499258, - "grad_norm": 813.9430541992188, - "learning_rate": 4.831943739499328e-05, - "loss": 79.7442, - "step": 51410 - }, - { - "epoch": 0.20774330652036022, - "grad_norm": 1018.91650390625, - "learning_rate": 4.831817895042977e-05, - "loss": 103.3693, - "step": 51420 - }, - { - "epoch": 0.20778370778572786, - "grad_norm": 1169.8072509765625, - "learning_rate": 4.8316920051266343e-05, - "loss": 68.9814, - "step": 51430 - }, - { - "epoch": 0.20782410905109547, - "grad_norm": 739.5846557617188, - "learning_rate": 4.8315660697527566e-05, - "loss": 47.6177, - "step": 51440 - }, - { - "epoch": 0.2078645103164631, - "grad_norm": 996.7125854492188, - "learning_rate": 4.831440088923797e-05, - "loss": 72.8957, - "step": 51450 - }, - { - "epoch": 0.20790491158183075, - "grad_norm": 765.3619384765625, - "learning_rate": 4.8313140626422125e-05, - "loss": 56.8616, - "step": 51460 - }, - { - "epoch": 0.2079453128471984, - "grad_norm": 2611.9541015625, - "learning_rate": 4.831187990910461e-05, - "loss": 117.1232, - "step": 51470 - }, - { - "epoch": 0.207985714112566, - "grad_norm": 1099.97314453125, - "learning_rate": 4.831061873730999e-05, - "loss": 65.8406, - "step": 51480 - }, - { - "epoch": 0.20802611537793364, - "grad_norm": 672.7211303710938, - "learning_rate": 4.8309357111062856e-05, - "loss": 101.9847, - "step": 51490 - }, - { - "epoch": 0.20806651664330128, - "grad_norm": 559.6053466796875, - "learning_rate": 4.830809503038781e-05, - "loss": 60.1119, - "step": 51500 - }, - { - "epoch": 0.2081069179086689, - "grad_norm": 611.0339965820312, - "learning_rate": 4.8306832495309445e-05, - "loss": 77.4869, - "step": 51510 - }, - { - "epoch": 0.20814731917403653, - "grad_norm": 955.992919921875, - "learning_rate": 4.830556950585238e-05, - "loss": 94.2499, - "step": 51520 - }, - { - "epoch": 0.20818772043940417, - "grad_norm": 1458.7508544921875, - "learning_rate": 4.830430606204125e-05, - "loss": 107.4706, - "step": 51530 - }, - { - "epoch": 0.20822812170477178, - "grad_norm": 550.4443359375, - "learning_rate": 4.830304216390066e-05, - "loss": 58.3049, - "step": 51540 - }, - { - "epoch": 0.20826852297013942, - "grad_norm": 758.5188598632812, - "learning_rate": 4.8301777811455276e-05, - "loss": 82.5834, - "step": 51550 - }, - { - "epoch": 0.20830892423550706, - "grad_norm": 715.7566528320312, - "learning_rate": 4.8300513004729735e-05, - "loss": 47.3867, - "step": 51560 - }, - { - "epoch": 0.20834932550087468, - "grad_norm": 517.6349487304688, - "learning_rate": 4.82992477437487e-05, - "loss": 60.1919, - "step": 51570 - }, - { - "epoch": 0.20838972676624232, - "grad_norm": 1060.4306640625, - "learning_rate": 4.8297982028536826e-05, - "loss": 106.7914, - "step": 51580 - }, - { - "epoch": 0.20843012803160996, - "grad_norm": 603.7183227539062, - "learning_rate": 4.82967158591188e-05, - "loss": 59.8083, - "step": 51590 - }, - { - "epoch": 0.20847052929697757, - "grad_norm": 1019.84326171875, - "learning_rate": 4.829544923551931e-05, - "loss": 63.1892, - "step": 51600 - }, - { - "epoch": 0.2085109305623452, - "grad_norm": 648.5969848632812, - "learning_rate": 4.8294182157763044e-05, - "loss": 95.7722, - "step": 51610 - }, - { - "epoch": 0.20855133182771285, - "grad_norm": 2398.117431640625, - "learning_rate": 4.82929146258747e-05, - "loss": 65.186, - "step": 51620 - }, - { - "epoch": 0.20859173309308046, - "grad_norm": 342.774658203125, - "learning_rate": 4.8291646639878995e-05, - "loss": 81.3063, - "step": 51630 - }, - { - "epoch": 0.2086321343584481, - "grad_norm": 1235.5203857421875, - "learning_rate": 4.829037819980065e-05, - "loss": 44.4151, - "step": 51640 - }, - { - "epoch": 0.20867253562381574, - "grad_norm": 470.635498046875, - "learning_rate": 4.828910930566439e-05, - "loss": 62.5535, - "step": 51650 - }, - { - "epoch": 0.20871293688918338, - "grad_norm": 830.7872924804688, - "learning_rate": 4.828783995749495e-05, - "loss": 80.7936, - "step": 51660 - }, - { - "epoch": 0.208753338154551, - "grad_norm": 1270.7835693359375, - "learning_rate": 4.828657015531709e-05, - "loss": 89.139, - "step": 51670 - }, - { - "epoch": 0.20879373941991863, - "grad_norm": 726.8765258789062, - "learning_rate": 4.828529989915555e-05, - "loss": 53.5606, - "step": 51680 - }, - { - "epoch": 0.20883414068528627, - "grad_norm": 2210.846435546875, - "learning_rate": 4.8284029189035094e-05, - "loss": 97.6787, - "step": 51690 - }, - { - "epoch": 0.20887454195065389, - "grad_norm": 0.0, - "learning_rate": 4.828275802498051e-05, - "loss": 71.8659, - "step": 51700 - }, - { - "epoch": 0.20891494321602153, - "grad_norm": 935.220458984375, - "learning_rate": 4.828148640701657e-05, - "loss": 65.6444, - "step": 51710 - }, - { - "epoch": 0.20895534448138917, - "grad_norm": 287.4408264160156, - "learning_rate": 4.828021433516806e-05, - "loss": 61.6178, - "step": 51720 - }, - { - "epoch": 0.20899574574675678, - "grad_norm": 477.3240966796875, - "learning_rate": 4.82789418094598e-05, - "loss": 93.1937, - "step": 51730 - }, - { - "epoch": 0.20903614701212442, - "grad_norm": 516.9016723632812, - "learning_rate": 4.827766882991657e-05, - "loss": 77.4756, - "step": 51740 - }, - { - "epoch": 0.20907654827749206, - "grad_norm": 1626.0362548828125, - "learning_rate": 4.827639539656321e-05, - "loss": 100.3807, - "step": 51750 - }, - { - "epoch": 0.20911694954285967, - "grad_norm": 530.7556762695312, - "learning_rate": 4.827512150942454e-05, - "loss": 96.0756, - "step": 51760 - }, - { - "epoch": 0.2091573508082273, - "grad_norm": 865.0823974609375, - "learning_rate": 4.827384716852539e-05, - "loss": 50.7269, - "step": 51770 - }, - { - "epoch": 0.20919775207359495, - "grad_norm": 1538.5042724609375, - "learning_rate": 4.82725723738906e-05, - "loss": 147.1135, - "step": 51780 - }, - { - "epoch": 0.20923815333896256, - "grad_norm": 574.5728149414062, - "learning_rate": 4.827129712554504e-05, - "loss": 60.3232, - "step": 51790 - }, - { - "epoch": 0.2092785546043302, - "grad_norm": 674.2813110351562, - "learning_rate": 4.8270021423513554e-05, - "loss": 58.9629, - "step": 51800 - }, - { - "epoch": 0.20931895586969784, - "grad_norm": 937.812255859375, - "learning_rate": 4.826874526782103e-05, - "loss": 83.8374, - "step": 51810 - }, - { - "epoch": 0.20935935713506548, - "grad_norm": 713.9767456054688, - "learning_rate": 4.8267468658492335e-05, - "loss": 63.4093, - "step": 51820 - }, - { - "epoch": 0.2093997584004331, - "grad_norm": 1410.48583984375, - "learning_rate": 4.826619159555236e-05, - "loss": 77.9979, - "step": 51830 - }, - { - "epoch": 0.20944015966580073, - "grad_norm": 845.0786743164062, - "learning_rate": 4.826491407902599e-05, - "loss": 86.6751, - "step": 51840 - }, - { - "epoch": 0.20948056093116837, - "grad_norm": 2135.817626953125, - "learning_rate": 4.8263636108938156e-05, - "loss": 96.0103, - "step": 51850 - }, - { - "epoch": 0.209520962196536, - "grad_norm": 1159.19775390625, - "learning_rate": 4.8262357685313754e-05, - "loss": 135.6379, - "step": 51860 - }, - { - "epoch": 0.20956136346190363, - "grad_norm": 962.7410278320312, - "learning_rate": 4.826107880817771e-05, - "loss": 65.2435, - "step": 51870 - }, - { - "epoch": 0.20960176472727127, - "grad_norm": 958.7470703125, - "learning_rate": 4.8259799477554965e-05, - "loss": 99.5246, - "step": 51880 - }, - { - "epoch": 0.20964216599263888, - "grad_norm": 675.7080078125, - "learning_rate": 4.825851969347045e-05, - "loss": 61.0314, - "step": 51890 - }, - { - "epoch": 0.20968256725800652, - "grad_norm": 577.6461791992188, - "learning_rate": 4.8257239455949124e-05, - "loss": 58.5346, - "step": 51900 - }, - { - "epoch": 0.20972296852337416, - "grad_norm": 1043.0440673828125, - "learning_rate": 4.825595876501593e-05, - "loss": 90.7892, - "step": 51910 - }, - { - "epoch": 0.20976336978874177, - "grad_norm": 597.583984375, - "learning_rate": 4.825467762069585e-05, - "loss": 89.8225, - "step": 51920 - }, - { - "epoch": 0.2098037710541094, - "grad_norm": 588.63427734375, - "learning_rate": 4.825339602301387e-05, - "loss": 76.6868, - "step": 51930 - }, - { - "epoch": 0.20984417231947705, - "grad_norm": 1137.5469970703125, - "learning_rate": 4.825211397199495e-05, - "loss": 77.4429, - "step": 51940 - }, - { - "epoch": 0.20988457358484466, - "grad_norm": 1342.8388671875, - "learning_rate": 4.82508314676641e-05, - "loss": 87.7724, - "step": 51950 - }, - { - "epoch": 0.2099249748502123, - "grad_norm": 549.8995971679688, - "learning_rate": 4.824954851004633e-05, - "loss": 62.8191, - "step": 51960 - }, - { - "epoch": 0.20996537611557994, - "grad_norm": 347.0243835449219, - "learning_rate": 4.8248265099166634e-05, - "loss": 68.7288, - "step": 51970 - }, - { - "epoch": 0.21000577738094758, - "grad_norm": 938.3527221679688, - "learning_rate": 4.824698123505004e-05, - "loss": 57.8288, - "step": 51980 - }, - { - "epoch": 0.2100461786463152, - "grad_norm": 869.6561279296875, - "learning_rate": 4.824569691772158e-05, - "loss": 82.7138, - "step": 51990 - }, - { - "epoch": 0.21008657991168284, - "grad_norm": 940.8391723632812, - "learning_rate": 4.8244412147206284e-05, - "loss": 87.2308, - "step": 52000 - }, - { - "epoch": 0.21012698117705048, - "grad_norm": 1123.21826171875, - "learning_rate": 4.8243126923529214e-05, - "loss": 62.064, - "step": 52010 - }, - { - "epoch": 0.2101673824424181, - "grad_norm": 919.0745849609375, - "learning_rate": 4.824184124671542e-05, - "loss": 80.0747, - "step": 52020 - }, - { - "epoch": 0.21020778370778573, - "grad_norm": 1110.951904296875, - "learning_rate": 4.8240555116789964e-05, - "loss": 58.8613, - "step": 52030 - }, - { - "epoch": 0.21024818497315337, - "grad_norm": 1179.7130126953125, - "learning_rate": 4.823926853377791e-05, - "loss": 85.1923, - "step": 52040 - }, - { - "epoch": 0.21028858623852098, - "grad_norm": 451.0613708496094, - "learning_rate": 4.8237981497704365e-05, - "loss": 63.9672, - "step": 52050 - }, - { - "epoch": 0.21032898750388862, - "grad_norm": 1503.4912109375, - "learning_rate": 4.8236694008594405e-05, - "loss": 85.0293, - "step": 52060 - }, - { - "epoch": 0.21036938876925626, - "grad_norm": 691.0103149414062, - "learning_rate": 4.823540606647313e-05, - "loss": 83.7888, - "step": 52070 - }, - { - "epoch": 0.21040979003462387, - "grad_norm": 393.8356628417969, - "learning_rate": 4.823411767136565e-05, - "loss": 71.635, - "step": 52080 - }, - { - "epoch": 0.2104501912999915, - "grad_norm": 828.97119140625, - "learning_rate": 4.8232828823297085e-05, - "loss": 88.108, - "step": 52090 - }, - { - "epoch": 0.21049059256535915, - "grad_norm": 1106.6741943359375, - "learning_rate": 4.8231539522292564e-05, - "loss": 95.024, - "step": 52100 - }, - { - "epoch": 0.21053099383072676, - "grad_norm": 996.2150268554688, - "learning_rate": 4.823024976837721e-05, - "loss": 63.8951, - "step": 52110 - }, - { - "epoch": 0.2105713950960944, - "grad_norm": 1018.7054443359375, - "learning_rate": 4.822895956157619e-05, - "loss": 83.2238, - "step": 52120 - }, - { - "epoch": 0.21061179636146204, - "grad_norm": 648.1104736328125, - "learning_rate": 4.8227668901914636e-05, - "loss": 123.0206, - "step": 52130 - }, - { - "epoch": 0.21065219762682968, - "grad_norm": 781.1571655273438, - "learning_rate": 4.822637778941772e-05, - "loss": 43.9732, - "step": 52140 - }, - { - "epoch": 0.2106925988921973, - "grad_norm": 539.1663208007812, - "learning_rate": 4.8225086224110615e-05, - "loss": 77.1991, - "step": 52150 - }, - { - "epoch": 0.21073300015756494, - "grad_norm": 654.8760986328125, - "learning_rate": 4.822379420601849e-05, - "loss": 95.2022, - "step": 52160 - }, - { - "epoch": 0.21077340142293258, - "grad_norm": 666.3115234375, - "learning_rate": 4.822250173516655e-05, - "loss": 101.8235, - "step": 52170 - }, - { - "epoch": 0.2108138026883002, - "grad_norm": 8625.4052734375, - "learning_rate": 4.822120881157998e-05, - "loss": 73.1473, - "step": 52180 - }, - { - "epoch": 0.21085420395366783, - "grad_norm": 1041.82470703125, - "learning_rate": 4.821991543528398e-05, - "loss": 101.8729, - "step": 52190 - }, - { - "epoch": 0.21089460521903547, - "grad_norm": 0.0, - "learning_rate": 4.821862160630378e-05, - "loss": 49.5271, - "step": 52200 - }, - { - "epoch": 0.21093500648440308, - "grad_norm": 347.1128234863281, - "learning_rate": 4.8217327324664595e-05, - "loss": 109.6646, - "step": 52210 - }, - { - "epoch": 0.21097540774977072, - "grad_norm": 786.9452514648438, - "learning_rate": 4.821603259039167e-05, - "loss": 69.5838, - "step": 52220 - }, - { - "epoch": 0.21101580901513836, - "grad_norm": 308.2260437011719, - "learning_rate": 4.821473740351023e-05, - "loss": 41.9356, - "step": 52230 - }, - { - "epoch": 0.21105621028050597, - "grad_norm": 730.7586669921875, - "learning_rate": 4.821344176404554e-05, - "loss": 64.5068, - "step": 52240 - }, - { - "epoch": 0.2110966115458736, - "grad_norm": 724.8195190429688, - "learning_rate": 4.8212145672022844e-05, - "loss": 75.1824, - "step": 52250 - }, - { - "epoch": 0.21113701281124125, - "grad_norm": 1213.788818359375, - "learning_rate": 4.821084912746742e-05, - "loss": 99.653, - "step": 52260 - }, - { - "epoch": 0.21117741407660887, - "grad_norm": 886.8688354492188, - "learning_rate": 4.820955213040454e-05, - "loss": 85.2024, - "step": 52270 - }, - { - "epoch": 0.2112178153419765, - "grad_norm": 746.9656372070312, - "learning_rate": 4.8208254680859494e-05, - "loss": 62.4096, - "step": 52280 - }, - { - "epoch": 0.21125821660734415, - "grad_norm": 695.8228759765625, - "learning_rate": 4.820695677885757e-05, - "loss": 123.5433, - "step": 52290 - }, - { - "epoch": 0.21129861787271179, - "grad_norm": 642.0867309570312, - "learning_rate": 4.820565842442408e-05, - "loss": 82.3376, - "step": 52300 - }, - { - "epoch": 0.2113390191380794, - "grad_norm": 448.39202880859375, - "learning_rate": 4.8204359617584336e-05, - "loss": 58.77, - "step": 52310 - }, - { - "epoch": 0.21137942040344704, - "grad_norm": 906.605712890625, - "learning_rate": 4.820306035836365e-05, - "loss": 79.2194, - "step": 52320 - }, - { - "epoch": 0.21141982166881468, - "grad_norm": 478.28643798828125, - "learning_rate": 4.8201760646787366e-05, - "loss": 56.0789, - "step": 52330 - }, - { - "epoch": 0.2114602229341823, - "grad_norm": 1141.816650390625, - "learning_rate": 4.82004604828808e-05, - "loss": 110.6768, - "step": 52340 - }, - { - "epoch": 0.21150062419954993, - "grad_norm": 280.15692138671875, - "learning_rate": 4.819915986666932e-05, - "loss": 93.5777, - "step": 52350 - }, - { - "epoch": 0.21154102546491757, - "grad_norm": 1224.0416259765625, - "learning_rate": 4.819785879817827e-05, - "loss": 104.5719, - "step": 52360 - }, - { - "epoch": 0.21158142673028518, - "grad_norm": 960.8833618164062, - "learning_rate": 4.819655727743302e-05, - "loss": 88.1145, - "step": 52370 - }, - { - "epoch": 0.21162182799565282, - "grad_norm": 717.3423461914062, - "learning_rate": 4.8195255304458945e-05, - "loss": 138.9197, - "step": 52380 - }, - { - "epoch": 0.21166222926102046, - "grad_norm": 902.6900634765625, - "learning_rate": 4.819395287928143e-05, - "loss": 72.1466, - "step": 52390 - }, - { - "epoch": 0.21170263052638807, - "grad_norm": 988.1195068359375, - "learning_rate": 4.8192650001925855e-05, - "loss": 63.8846, - "step": 52400 - }, - { - "epoch": 0.21174303179175571, - "grad_norm": 1469.6943359375, - "learning_rate": 4.8191346672417633e-05, - "loss": 68.6236, - "step": 52410 - }, - { - "epoch": 0.21178343305712335, - "grad_norm": 529.160888671875, - "learning_rate": 4.819004289078217e-05, - "loss": 71.5383, - "step": 52420 - }, - { - "epoch": 0.21182383432249097, - "grad_norm": 1089.6658935546875, - "learning_rate": 4.818873865704487e-05, - "loss": 101.7536, - "step": 52430 - }, - { - "epoch": 0.2118642355878586, - "grad_norm": 1817.579833984375, - "learning_rate": 4.818743397123119e-05, - "loss": 111.1048, - "step": 52440 - }, - { - "epoch": 0.21190463685322625, - "grad_norm": 1221.6204833984375, - "learning_rate": 4.818612883336654e-05, - "loss": 59.6956, - "step": 52450 - }, - { - "epoch": 0.2119450381185939, - "grad_norm": 572.4197998046875, - "learning_rate": 4.8184823243476364e-05, - "loss": 69.0064, - "step": 52460 - }, - { - "epoch": 0.2119854393839615, - "grad_norm": 617.7356567382812, - "learning_rate": 4.818351720158613e-05, - "loss": 50.4712, - "step": 52470 - }, - { - "epoch": 0.21202584064932914, - "grad_norm": 530.05224609375, - "learning_rate": 4.8182210707721284e-05, - "loss": 63.9792, - "step": 52480 - }, - { - "epoch": 0.21206624191469678, - "grad_norm": 2283.126953125, - "learning_rate": 4.8180903761907315e-05, - "loss": 76.8574, - "step": 52490 - }, - { - "epoch": 0.2121066431800644, - "grad_norm": 721.9110717773438, - "learning_rate": 4.817959636416969e-05, - "loss": 61.0024, - "step": 52500 - }, - { - "epoch": 0.21214704444543203, - "grad_norm": 993.3989868164062, - "learning_rate": 4.81782885145339e-05, - "loss": 97.0186, - "step": 52510 - }, - { - "epoch": 0.21218744571079967, - "grad_norm": 897.0438232421875, - "learning_rate": 4.8176980213025434e-05, - "loss": 83.1266, - "step": 52520 - }, - { - "epoch": 0.21222784697616728, - "grad_norm": 677.8610229492188, - "learning_rate": 4.817567145966982e-05, - "loss": 46.0433, - "step": 52530 - }, - { - "epoch": 0.21226824824153492, - "grad_norm": 893.5686645507812, - "learning_rate": 4.817436225449255e-05, - "loss": 66.1131, - "step": 52540 - }, - { - "epoch": 0.21230864950690256, - "grad_norm": 879.1597290039062, - "learning_rate": 4.817305259751916e-05, - "loss": 56.4979, - "step": 52550 - }, - { - "epoch": 0.21234905077227018, - "grad_norm": 1273.606689453125, - "learning_rate": 4.817174248877518e-05, - "loss": 68.6022, - "step": 52560 - }, - { - "epoch": 0.21238945203763782, - "grad_norm": 958.6312255859375, - "learning_rate": 4.8170431928286155e-05, - "loss": 73.1606, - "step": 52570 - }, - { - "epoch": 0.21242985330300546, - "grad_norm": 2562.6357421875, - "learning_rate": 4.816912091607762e-05, - "loss": 97.7864, - "step": 52580 - }, - { - "epoch": 0.21247025456837307, - "grad_norm": 305.3066711425781, - "learning_rate": 4.816780945217515e-05, - "loss": 77.4546, - "step": 52590 - }, - { - "epoch": 0.2125106558337407, - "grad_norm": 721.4779663085938, - "learning_rate": 4.81664975366043e-05, - "loss": 87.7981, - "step": 52600 - }, - { - "epoch": 0.21255105709910835, - "grad_norm": 1225.9923095703125, - "learning_rate": 4.816518516939067e-05, - "loss": 82.7573, - "step": 52610 - }, - { - "epoch": 0.212591458364476, - "grad_norm": 729.4354248046875, - "learning_rate": 4.8163872350559816e-05, - "loss": 66.5677, - "step": 52620 - }, - { - "epoch": 0.2126318596298436, - "grad_norm": 1146.287109375, - "learning_rate": 4.8162559080137346e-05, - "loss": 108.5152, - "step": 52630 - }, - { - "epoch": 0.21267226089521124, - "grad_norm": 1008.0455322265625, - "learning_rate": 4.8161245358148866e-05, - "loss": 91.5322, - "step": 52640 - }, - { - "epoch": 0.21271266216057888, - "grad_norm": 193.93905639648438, - "learning_rate": 4.815993118461998e-05, - "loss": 40.234, - "step": 52650 - }, - { - "epoch": 0.2127530634259465, - "grad_norm": 1779.3536376953125, - "learning_rate": 4.815861655957632e-05, - "loss": 82.0843, - "step": 52660 - }, - { - "epoch": 0.21279346469131413, - "grad_norm": 761.0687255859375, - "learning_rate": 4.81573014830435e-05, - "loss": 98.4412, - "step": 52670 - }, - { - "epoch": 0.21283386595668177, - "grad_norm": 639.0739135742188, - "learning_rate": 4.815598595504717e-05, - "loss": 103.2698, - "step": 52680 - }, - { - "epoch": 0.21287426722204938, - "grad_norm": 1339.623291015625, - "learning_rate": 4.8154669975612966e-05, - "loss": 97.1118, - "step": 52690 - }, - { - "epoch": 0.21291466848741702, - "grad_norm": 1258.3170166015625, - "learning_rate": 4.8153353544766553e-05, - "loss": 73.1002, - "step": 52700 - }, - { - "epoch": 0.21295506975278466, - "grad_norm": 783.255859375, - "learning_rate": 4.815203666253359e-05, - "loss": 60.4868, - "step": 52710 - }, - { - "epoch": 0.21299547101815228, - "grad_norm": 621.6766357421875, - "learning_rate": 4.8150719328939755e-05, - "loss": 53.9103, - "step": 52720 - }, - { - "epoch": 0.21303587228351992, - "grad_norm": 899.034912109375, - "learning_rate": 4.814940154401073e-05, - "loss": 90.3724, - "step": 52730 - }, - { - "epoch": 0.21307627354888756, - "grad_norm": 1244.728515625, - "learning_rate": 4.81480833077722e-05, - "loss": 57.8157, - "step": 52740 - }, - { - "epoch": 0.21311667481425517, - "grad_norm": 809.1812744140625, - "learning_rate": 4.814676462024988e-05, - "loss": 110.7006, - "step": 52750 - }, - { - "epoch": 0.2131570760796228, - "grad_norm": 3029.4775390625, - "learning_rate": 4.814544548146945e-05, - "loss": 63.5398, - "step": 52760 - }, - { - "epoch": 0.21319747734499045, - "grad_norm": 392.19464111328125, - "learning_rate": 4.814412589145665e-05, - "loss": 84.5926, - "step": 52770 - }, - { - "epoch": 0.2132378786103581, - "grad_norm": 883.9444580078125, - "learning_rate": 4.814280585023721e-05, - "loss": 73.6628, - "step": 52780 - }, - { - "epoch": 0.2132782798757257, - "grad_norm": 1597.8800048828125, - "learning_rate": 4.814148535783684e-05, - "loss": 137.1192, - "step": 52790 - }, - { - "epoch": 0.21331868114109334, - "grad_norm": 390.61407470703125, - "learning_rate": 4.8140164414281306e-05, - "loss": 58.5544, - "step": 52800 - }, - { - "epoch": 0.21335908240646098, - "grad_norm": 1327.6656494140625, - "learning_rate": 4.813884301959635e-05, - "loss": 62.8842, - "step": 52810 - }, - { - "epoch": 0.2133994836718286, - "grad_norm": 1165.1925048828125, - "learning_rate": 4.813752117380774e-05, - "loss": 73.2491, - "step": 52820 - }, - { - "epoch": 0.21343988493719623, - "grad_norm": 974.8809204101562, - "learning_rate": 4.813619887694124e-05, - "loss": 86.8438, - "step": 52830 - }, - { - "epoch": 0.21348028620256387, - "grad_norm": 658.647216796875, - "learning_rate": 4.813487612902264e-05, - "loss": 75.7994, - "step": 52840 - }, - { - "epoch": 0.21352068746793149, - "grad_norm": 619.7402954101562, - "learning_rate": 4.8133552930077716e-05, - "loss": 47.0964, - "step": 52850 - }, - { - "epoch": 0.21356108873329913, - "grad_norm": 865.9132690429688, - "learning_rate": 4.813222928013226e-05, - "loss": 47.7462, - "step": 52860 - }, - { - "epoch": 0.21360148999866677, - "grad_norm": 983.5340576171875, - "learning_rate": 4.813090517921209e-05, - "loss": 48.6633, - "step": 52870 - }, - { - "epoch": 0.21364189126403438, - "grad_norm": 607.2757568359375, - "learning_rate": 4.812958062734302e-05, - "loss": 93.2731, - "step": 52880 - }, - { - "epoch": 0.21368229252940202, - "grad_norm": 732.9096069335938, - "learning_rate": 4.812825562455086e-05, - "loss": 83.7247, - "step": 52890 - }, - { - "epoch": 0.21372269379476966, - "grad_norm": 493.807373046875, - "learning_rate": 4.812693017086145e-05, - "loss": 71.3714, - "step": 52900 - }, - { - "epoch": 0.21376309506013727, - "grad_norm": 880.7445678710938, - "learning_rate": 4.8125604266300636e-05, - "loss": 69.891, - "step": 52910 - }, - { - "epoch": 0.2138034963255049, - "grad_norm": 1000.0642700195312, - "learning_rate": 4.812427791089426e-05, - "loss": 106.0305, - "step": 52920 - }, - { - "epoch": 0.21384389759087255, - "grad_norm": 649.3963012695312, - "learning_rate": 4.812295110466817e-05, - "loss": 71.4997, - "step": 52930 - }, - { - "epoch": 0.2138842988562402, - "grad_norm": 539.8707885742188, - "learning_rate": 4.812162384764826e-05, - "loss": 81.3335, - "step": 52940 - }, - { - "epoch": 0.2139247001216078, - "grad_norm": 822.1153564453125, - "learning_rate": 4.8120296139860376e-05, - "loss": 67.337, - "step": 52950 - }, - { - "epoch": 0.21396510138697544, - "grad_norm": 2130.726806640625, - "learning_rate": 4.811896798133042e-05, - "loss": 111.1284, - "step": 52960 - }, - { - "epoch": 0.21400550265234308, - "grad_norm": 467.0498046875, - "learning_rate": 4.811763937208428e-05, - "loss": 84.292, - "step": 52970 - }, - { - "epoch": 0.2140459039177107, - "grad_norm": 1501.177978515625, - "learning_rate": 4.811631031214786e-05, - "loss": 63.4714, - "step": 52980 - }, - { - "epoch": 0.21408630518307833, - "grad_norm": 628.075439453125, - "learning_rate": 4.811498080154707e-05, - "loss": 113.7706, - "step": 52990 - }, - { - "epoch": 0.21412670644844597, - "grad_norm": 792.900146484375, - "learning_rate": 4.8113650840307834e-05, - "loss": 55.3442, - "step": 53000 - }, - { - "epoch": 0.2141671077138136, - "grad_norm": 989.729736328125, - "learning_rate": 4.811232042845607e-05, - "loss": 88.5752, - "step": 53010 - }, - { - "epoch": 0.21420750897918123, - "grad_norm": 1206.408447265625, - "learning_rate": 4.8110989566017716e-05, - "loss": 50.2796, - "step": 53020 - }, - { - "epoch": 0.21424791024454887, - "grad_norm": 854.8396606445312, - "learning_rate": 4.810965825301873e-05, - "loss": 85.1104, - "step": 53030 - }, - { - "epoch": 0.21428831150991648, - "grad_norm": 841.41357421875, - "learning_rate": 4.810832648948505e-05, - "loss": 64.3325, - "step": 53040 - }, - { - "epoch": 0.21432871277528412, - "grad_norm": 1686.3604736328125, - "learning_rate": 4.810699427544265e-05, - "loss": 73.6608, - "step": 53050 - }, - { - "epoch": 0.21436911404065176, - "grad_norm": 1173.083740234375, - "learning_rate": 4.810566161091751e-05, - "loss": 80.8808, - "step": 53060 - }, - { - "epoch": 0.21440951530601937, - "grad_norm": 1014.2672119140625, - "learning_rate": 4.810432849593559e-05, - "loss": 109.5157, - "step": 53070 - }, - { - "epoch": 0.214449916571387, - "grad_norm": 848.516357421875, - "learning_rate": 4.810299493052289e-05, - "loss": 86.9813, - "step": 53080 - }, - { - "epoch": 0.21449031783675465, - "grad_norm": 2046.2830810546875, - "learning_rate": 4.810166091470542e-05, - "loss": 71.7423, - "step": 53090 - }, - { - "epoch": 0.2145307191021223, - "grad_norm": 856.9232177734375, - "learning_rate": 4.810032644850917e-05, - "loss": 50.4441, - "step": 53100 - }, - { - "epoch": 0.2145711203674899, - "grad_norm": 868.9803466796875, - "learning_rate": 4.809899153196017e-05, - "loss": 115.6349, - "step": 53110 - }, - { - "epoch": 0.21461152163285754, - "grad_norm": 0.0, - "learning_rate": 4.809765616508443e-05, - "loss": 59.3985, - "step": 53120 - }, - { - "epoch": 0.21465192289822518, - "grad_norm": 2540.765625, - "learning_rate": 4.8096320347908e-05, - "loss": 75.7832, - "step": 53130 - }, - { - "epoch": 0.2146923241635928, - "grad_norm": 927.6019897460938, - "learning_rate": 4.8094984080456904e-05, - "loss": 65.1414, - "step": 53140 - }, - { - "epoch": 0.21473272542896044, - "grad_norm": 863.6876831054688, - "learning_rate": 4.8093647362757206e-05, - "loss": 163.4252, - "step": 53150 - }, - { - "epoch": 0.21477312669432808, - "grad_norm": 542.0109252929688, - "learning_rate": 4.809231019483497e-05, - "loss": 81.4207, - "step": 53160 - }, - { - "epoch": 0.2148135279596957, - "grad_norm": 738.8974609375, - "learning_rate": 4.809097257671625e-05, - "loss": 71.7718, - "step": 53170 - }, - { - "epoch": 0.21485392922506333, - "grad_norm": 1300.409423828125, - "learning_rate": 4.808963450842713e-05, - "loss": 63.6445, - "step": 53180 - }, - { - "epoch": 0.21489433049043097, - "grad_norm": 710.06103515625, - "learning_rate": 4.80882959899937e-05, - "loss": 75.5958, - "step": 53190 - }, - { - "epoch": 0.21493473175579858, - "grad_norm": 1211.570556640625, - "learning_rate": 4.808695702144206e-05, - "loss": 96.9039, - "step": 53200 - }, - { - "epoch": 0.21497513302116622, - "grad_norm": 4932.537109375, - "learning_rate": 4.808561760279831e-05, - "loss": 62.6395, - "step": 53210 - }, - { - "epoch": 0.21501553428653386, - "grad_norm": 1449.601806640625, - "learning_rate": 4.8084277734088544e-05, - "loss": 76.1311, - "step": 53220 - }, - { - "epoch": 0.21505593555190147, - "grad_norm": 1062.4901123046875, - "learning_rate": 4.808293741533891e-05, - "loss": 94.2731, - "step": 53230 - }, - { - "epoch": 0.2150963368172691, - "grad_norm": 766.841552734375, - "learning_rate": 4.808159664657552e-05, - "loss": 59.1123, - "step": 53240 - }, - { - "epoch": 0.21513673808263675, - "grad_norm": 503.95635986328125, - "learning_rate": 4.808025542782453e-05, - "loss": 71.4319, - "step": 53250 - }, - { - "epoch": 0.2151771393480044, - "grad_norm": 2950.0439453125, - "learning_rate": 4.8078913759112066e-05, - "loss": 90.5065, - "step": 53260 - }, - { - "epoch": 0.215217540613372, - "grad_norm": 1027.7640380859375, - "learning_rate": 4.80775716404643e-05, - "loss": 56.2461, - "step": 53270 - }, - { - "epoch": 0.21525794187873964, - "grad_norm": 507.6009521484375, - "learning_rate": 4.8076229071907397e-05, - "loss": 66.3295, - "step": 53280 - }, - { - "epoch": 0.21529834314410728, - "grad_norm": 665.6806640625, - "learning_rate": 4.807488605346753e-05, - "loss": 80.3044, - "step": 53290 - }, - { - "epoch": 0.2153387444094749, - "grad_norm": 989.35791015625, - "learning_rate": 4.8073542585170877e-05, - "loss": 68.0304, - "step": 53300 - }, - { - "epoch": 0.21537914567484254, - "grad_norm": 6701.48046875, - "learning_rate": 4.8072198667043635e-05, - "loss": 130.3542, - "step": 53310 - }, - { - "epoch": 0.21541954694021018, - "grad_norm": 626.8556518554688, - "learning_rate": 4.8070854299111994e-05, - "loss": 69.6508, - "step": 53320 - }, - { - "epoch": 0.2154599482055778, - "grad_norm": 889.5938720703125, - "learning_rate": 4.806950948140217e-05, - "loss": 94.4536, - "step": 53330 - }, - { - "epoch": 0.21550034947094543, - "grad_norm": 470.2935485839844, - "learning_rate": 4.8068164213940393e-05, - "loss": 65.35, - "step": 53340 - }, - { - "epoch": 0.21554075073631307, - "grad_norm": 1064.505859375, - "learning_rate": 4.8066818496752875e-05, - "loss": 88.5015, - "step": 53350 - }, - { - "epoch": 0.21558115200168068, - "grad_norm": 673.2662963867188, - "learning_rate": 4.8065472329865854e-05, - "loss": 63.5901, - "step": 53360 - }, - { - "epoch": 0.21562155326704832, - "grad_norm": 880.4266967773438, - "learning_rate": 4.806412571330557e-05, - "loss": 63.0849, - "step": 53370 - }, - { - "epoch": 0.21566195453241596, - "grad_norm": 761.7822875976562, - "learning_rate": 4.8062778647098284e-05, - "loss": 78.9925, - "step": 53380 - }, - { - "epoch": 0.21570235579778357, - "grad_norm": 974.259521484375, - "learning_rate": 4.806143113127025e-05, - "loss": 93.895, - "step": 53390 - }, - { - "epoch": 0.2157427570631512, - "grad_norm": 555.7957153320312, - "learning_rate": 4.8060083165847754e-05, - "loss": 74.5153, - "step": 53400 - }, - { - "epoch": 0.21578315832851885, - "grad_norm": 925.432373046875, - "learning_rate": 4.805873475085706e-05, - "loss": 119.6515, - "step": 53410 - }, - { - "epoch": 0.2158235595938865, - "grad_norm": 817.2948608398438, - "learning_rate": 4.805738588632446e-05, - "loss": 90.0423, - "step": 53420 - }, - { - "epoch": 0.2158639608592541, - "grad_norm": 921.9326782226562, - "learning_rate": 4.805603657227625e-05, - "loss": 106.7092, - "step": 53430 - }, - { - "epoch": 0.21590436212462175, - "grad_norm": 1191.692626953125, - "learning_rate": 4.805468680873874e-05, - "loss": 57.5349, - "step": 53440 - }, - { - "epoch": 0.21594476338998939, - "grad_norm": 327.62908935546875, - "learning_rate": 4.8053336595738236e-05, - "loss": 84.3568, - "step": 53450 - }, - { - "epoch": 0.215985164655357, - "grad_norm": 752.6683349609375, - "learning_rate": 4.805198593330107e-05, - "loss": 63.1536, - "step": 53460 - }, - { - "epoch": 0.21602556592072464, - "grad_norm": 711.6024780273438, - "learning_rate": 4.8050634821453565e-05, - "loss": 62.8601, - "step": 53470 - }, - { - "epoch": 0.21606596718609228, - "grad_norm": 466.1304931640625, - "learning_rate": 4.8049283260222075e-05, - "loss": 77.9144, - "step": 53480 - }, - { - "epoch": 0.2161063684514599, - "grad_norm": 1117.0677490234375, - "learning_rate": 4.804793124963294e-05, - "loss": 76.5366, - "step": 53490 - }, - { - "epoch": 0.21614676971682753, - "grad_norm": 637.1599731445312, - "learning_rate": 4.8046578789712515e-05, - "loss": 67.9231, - "step": 53500 - }, - { - "epoch": 0.21618717098219517, - "grad_norm": 568.0106811523438, - "learning_rate": 4.804522588048718e-05, - "loss": 75.5774, - "step": 53510 - }, - { - "epoch": 0.21622757224756278, - "grad_norm": 432.543212890625, - "learning_rate": 4.8043872521983294e-05, - "loss": 59.4265, - "step": 53520 - }, - { - "epoch": 0.21626797351293042, - "grad_norm": 454.713134765625, - "learning_rate": 4.804251871422725e-05, - "loss": 96.0723, - "step": 53530 - }, - { - "epoch": 0.21630837477829806, - "grad_norm": 1082.2203369140625, - "learning_rate": 4.804116445724543e-05, - "loss": 79.7681, - "step": 53540 - }, - { - "epoch": 0.21634877604366567, - "grad_norm": 621.678466796875, - "learning_rate": 4.803980975106427e-05, - "loss": 85.7473, - "step": 53550 - }, - { - "epoch": 0.21638917730903331, - "grad_norm": 890.781005859375, - "learning_rate": 4.803845459571014e-05, - "loss": 86.9005, - "step": 53560 - }, - { - "epoch": 0.21642957857440095, - "grad_norm": 649.1585083007812, - "learning_rate": 4.8037098991209484e-05, - "loss": 61.2427, - "step": 53570 - }, - { - "epoch": 0.2164699798397686, - "grad_norm": 850.4962768554688, - "learning_rate": 4.8035742937588724e-05, - "loss": 59.2555, - "step": 53580 - }, - { - "epoch": 0.2165103811051362, - "grad_norm": 787.1389770507812, - "learning_rate": 4.803438643487429e-05, - "loss": 82.7354, - "step": 53590 - }, - { - "epoch": 0.21655078237050385, - "grad_norm": 743.615234375, - "learning_rate": 4.803302948309264e-05, - "loss": 72.4777, - "step": 53600 - }, - { - "epoch": 0.2165911836358715, - "grad_norm": 1553.5562744140625, - "learning_rate": 4.8031672082270216e-05, - "loss": 85.1181, - "step": 53610 - }, - { - "epoch": 0.2166315849012391, - "grad_norm": 525.2081298828125, - "learning_rate": 4.803031423243349e-05, - "loss": 65.5073, - "step": 53620 - }, - { - "epoch": 0.21667198616660674, - "grad_norm": 718.9132080078125, - "learning_rate": 4.802895593360893e-05, - "loss": 169.8341, - "step": 53630 - }, - { - "epoch": 0.21671238743197438, - "grad_norm": 663.7149658203125, - "learning_rate": 4.8027597185823016e-05, - "loss": 54.949, - "step": 53640 - }, - { - "epoch": 0.216752788697342, - "grad_norm": 558.2113037109375, - "learning_rate": 4.802623798910224e-05, - "loss": 72.456, - "step": 53650 - }, - { - "epoch": 0.21679318996270963, - "grad_norm": 3290.45068359375, - "learning_rate": 4.802487834347311e-05, - "loss": 80.3636, - "step": 53660 - }, - { - "epoch": 0.21683359122807727, - "grad_norm": 601.9519653320312, - "learning_rate": 4.802351824896211e-05, - "loss": 82.0123, - "step": 53670 - }, - { - "epoch": 0.21687399249344488, - "grad_norm": 0.0, - "learning_rate": 4.802215770559577e-05, - "loss": 58.6228, - "step": 53680 - }, - { - "epoch": 0.21691439375881252, - "grad_norm": 610.6953125, - "learning_rate": 4.802079671340062e-05, - "loss": 60.981, - "step": 53690 - }, - { - "epoch": 0.21695479502418016, - "grad_norm": 774.2489624023438, - "learning_rate": 4.801943527240318e-05, - "loss": 94.8428, - "step": 53700 - }, - { - "epoch": 0.21699519628954778, - "grad_norm": 1123.4874267578125, - "learning_rate": 4.801807338263e-05, - "loss": 76.9702, - "step": 53710 - }, - { - "epoch": 0.21703559755491542, - "grad_norm": 1223.6903076171875, - "learning_rate": 4.801671104410763e-05, - "loss": 101.9046, - "step": 53720 - }, - { - "epoch": 0.21707599882028306, - "grad_norm": 1136.135009765625, - "learning_rate": 4.801534825686263e-05, - "loss": 98.1998, - "step": 53730 - }, - { - "epoch": 0.2171164000856507, - "grad_norm": 1007.6103515625, - "learning_rate": 4.801398502092156e-05, - "loss": 79.4994, - "step": 53740 - }, - { - "epoch": 0.2171568013510183, - "grad_norm": 1269.972900390625, - "learning_rate": 4.8012621336311016e-05, - "loss": 63.9924, - "step": 53750 - }, - { - "epoch": 0.21719720261638595, - "grad_norm": 643.7221069335938, - "learning_rate": 4.8011257203057556e-05, - "loss": 55.9485, - "step": 53760 - }, - { - "epoch": 0.2172376038817536, - "grad_norm": 1297.72412109375, - "learning_rate": 4.80098926211878e-05, - "loss": 92.5376, - "step": 53770 - }, - { - "epoch": 0.2172780051471212, - "grad_norm": 1075.8739013671875, - "learning_rate": 4.800852759072833e-05, - "loss": 59.2101, - "step": 53780 - }, - { - "epoch": 0.21731840641248884, - "grad_norm": 1902.2833251953125, - "learning_rate": 4.800716211170578e-05, - "loss": 95.2259, - "step": 53790 - }, - { - "epoch": 0.21735880767785648, - "grad_norm": 1468.7894287109375, - "learning_rate": 4.800579618414676e-05, - "loss": 94.4642, - "step": 53800 - }, - { - "epoch": 0.2173992089432241, - "grad_norm": 640.5420532226562, - "learning_rate": 4.80044298080779e-05, - "loss": 86.4085, - "step": 53810 - }, - { - "epoch": 0.21743961020859173, - "grad_norm": 872.8704833984375, - "learning_rate": 4.800306298352583e-05, - "loss": 64.3737, - "step": 53820 - }, - { - "epoch": 0.21748001147395937, - "grad_norm": 586.101806640625, - "learning_rate": 4.800169571051721e-05, - "loss": 89.3042, - "step": 53830 - }, - { - "epoch": 0.21752041273932698, - "grad_norm": 966.0181884765625, - "learning_rate": 4.800032798907869e-05, - "loss": 90.1948, - "step": 53840 - }, - { - "epoch": 0.21756081400469462, - "grad_norm": 795.524658203125, - "learning_rate": 4.799895981923693e-05, - "loss": 110.5656, - "step": 53850 - }, - { - "epoch": 0.21760121527006226, - "grad_norm": 496.3986511230469, - "learning_rate": 4.799759120101861e-05, - "loss": 51.413, - "step": 53860 - }, - { - "epoch": 0.21764161653542988, - "grad_norm": 596.7861328125, - "learning_rate": 4.799622213445041e-05, - "loss": 44.7856, - "step": 53870 - }, - { - "epoch": 0.21768201780079752, - "grad_norm": 1061.037353515625, - "learning_rate": 4.7994852619559016e-05, - "loss": 93.6382, - "step": 53880 - }, - { - "epoch": 0.21772241906616516, - "grad_norm": 386.0953063964844, - "learning_rate": 4.7993482656371135e-05, - "loss": 70.8924, - "step": 53890 - }, - { - "epoch": 0.2177628203315328, - "grad_norm": 1275.1336669921875, - "learning_rate": 4.799211224491348e-05, - "loss": 77.7109, - "step": 53900 - }, - { - "epoch": 0.2178032215969004, - "grad_norm": 534.1307373046875, - "learning_rate": 4.799074138521274e-05, - "loss": 47.0972, - "step": 53910 - }, - { - "epoch": 0.21784362286226805, - "grad_norm": 915.685791015625, - "learning_rate": 4.798937007729568e-05, - "loss": 67.5173, - "step": 53920 - }, - { - "epoch": 0.2178840241276357, - "grad_norm": 965.9025268554688, - "learning_rate": 4.7987998321189e-05, - "loss": 60.5081, - "step": 53930 - }, - { - "epoch": 0.2179244253930033, - "grad_norm": 2893.699462890625, - "learning_rate": 4.798662611691947e-05, - "loss": 109.9214, - "step": 53940 - }, - { - "epoch": 0.21796482665837094, - "grad_norm": 468.1808776855469, - "learning_rate": 4.7985253464513825e-05, - "loss": 56.8471, - "step": 53950 - }, - { - "epoch": 0.21800522792373858, - "grad_norm": 547.9169311523438, - "learning_rate": 4.798388036399883e-05, - "loss": 57.3491, - "step": 53960 - }, - { - "epoch": 0.2180456291891062, - "grad_norm": 482.74652099609375, - "learning_rate": 4.7982506815401254e-05, - "loss": 69.7557, - "step": 53970 - }, - { - "epoch": 0.21808603045447383, - "grad_norm": 1082.1453857421875, - "learning_rate": 4.7981132818747876e-05, - "loss": 96.0472, - "step": 53980 - }, - { - "epoch": 0.21812643171984147, - "grad_norm": 1212.4945068359375, - "learning_rate": 4.797975837406547e-05, - "loss": 63.2927, - "step": 53990 - }, - { - "epoch": 0.21816683298520909, - "grad_norm": 921.6978149414062, - "learning_rate": 4.797838348138086e-05, - "loss": 57.7244, - "step": 54000 - }, - { - "epoch": 0.21820723425057673, - "grad_norm": 873.8997802734375, - "learning_rate": 4.797700814072083e-05, - "loss": 66.1426, - "step": 54010 - }, - { - "epoch": 0.21824763551594437, - "grad_norm": 760.1358032226562, - "learning_rate": 4.7975632352112195e-05, - "loss": 77.4176, - "step": 54020 - }, - { - "epoch": 0.21828803678131198, - "grad_norm": 1324.942626953125, - "learning_rate": 4.7974256115581785e-05, - "loss": 69.078, - "step": 54030 - }, - { - "epoch": 0.21832843804667962, - "grad_norm": 1516.5565185546875, - "learning_rate": 4.797287943115641e-05, - "loss": 64.1911, - "step": 54040 - }, - { - "epoch": 0.21836883931204726, - "grad_norm": 1137.977294921875, - "learning_rate": 4.7971502298862936e-05, - "loss": 102.8754, - "step": 54050 - }, - { - "epoch": 0.2184092405774149, - "grad_norm": 813.7706909179688, - "learning_rate": 4.7970124718728193e-05, - "loss": 82.1068, - "step": 54060 - }, - { - "epoch": 0.2184496418427825, - "grad_norm": 635.2130737304688, - "learning_rate": 4.7968746690779044e-05, - "loss": 58.6373, - "step": 54070 - }, - { - "epoch": 0.21849004310815015, - "grad_norm": 642.5101928710938, - "learning_rate": 4.796736821504235e-05, - "loss": 74.7753, - "step": 54080 - }, - { - "epoch": 0.2185304443735178, - "grad_norm": 849.4306030273438, - "learning_rate": 4.7965989291545e-05, - "loss": 91.7941, - "step": 54090 - }, - { - "epoch": 0.2185708456388854, - "grad_norm": 740.3421630859375, - "learning_rate": 4.796460992031385e-05, - "loss": 72.5141, - "step": 54100 - }, - { - "epoch": 0.21861124690425304, - "grad_norm": 606.4213256835938, - "learning_rate": 4.7963230101375814e-05, - "loss": 90.6276, - "step": 54110 - }, - { - "epoch": 0.21865164816962068, - "grad_norm": 534.52197265625, - "learning_rate": 4.7961849834757786e-05, - "loss": 141.8116, - "step": 54120 - }, - { - "epoch": 0.2186920494349883, - "grad_norm": 2459.29443359375, - "learning_rate": 4.7960469120486674e-05, - "loss": 104.6196, - "step": 54130 - }, - { - "epoch": 0.21873245070035593, - "grad_norm": 804.312744140625, - "learning_rate": 4.7959087958589386e-05, - "loss": 71.0338, - "step": 54140 - }, - { - "epoch": 0.21877285196572357, - "grad_norm": 884.2997436523438, - "learning_rate": 4.7957706349092865e-05, - "loss": 80.0751, - "step": 54150 - }, - { - "epoch": 0.2188132532310912, - "grad_norm": 1428.2335205078125, - "learning_rate": 4.795632429202405e-05, - "loss": 73.554, - "step": 54160 - }, - { - "epoch": 0.21885365449645883, - "grad_norm": 555.4033203125, - "learning_rate": 4.795494178740986e-05, - "loss": 52.6659, - "step": 54170 - }, - { - "epoch": 0.21889405576182647, - "grad_norm": 479.893310546875, - "learning_rate": 4.795355883527727e-05, - "loss": 81.4304, - "step": 54180 - }, - { - "epoch": 0.21893445702719408, - "grad_norm": 738.9581909179688, - "learning_rate": 4.7952175435653226e-05, - "loss": 83.4163, - "step": 54190 - }, - { - "epoch": 0.21897485829256172, - "grad_norm": 599.0054321289062, - "learning_rate": 4.79507915885647e-05, - "loss": 62.3013, - "step": 54200 - }, - { - "epoch": 0.21901525955792936, - "grad_norm": 672.7980346679688, - "learning_rate": 4.794940729403869e-05, - "loss": 73.0649, - "step": 54210 - }, - { - "epoch": 0.219055660823297, - "grad_norm": 1021.8695068359375, - "learning_rate": 4.794802255210217e-05, - "loss": 74.2551, - "step": 54220 - }, - { - "epoch": 0.2190960620886646, - "grad_norm": 2235.082763671875, - "learning_rate": 4.794663736278212e-05, - "loss": 100.521, - "step": 54230 - }, - { - "epoch": 0.21913646335403225, - "grad_norm": 454.6265563964844, - "learning_rate": 4.794525172610558e-05, - "loss": 64.2982, - "step": 54240 - }, - { - "epoch": 0.2191768646193999, - "grad_norm": 2061.634765625, - "learning_rate": 4.794386564209953e-05, - "loss": 80.8011, - "step": 54250 - }, - { - "epoch": 0.2192172658847675, - "grad_norm": 1272.87451171875, - "learning_rate": 4.7942479110791015e-05, - "loss": 75.4505, - "step": 54260 - }, - { - "epoch": 0.21925766715013514, - "grad_norm": 732.8099975585938, - "learning_rate": 4.7941092132207056e-05, - "loss": 76.4685, - "step": 54270 - }, - { - "epoch": 0.21929806841550278, - "grad_norm": 1131.0069580078125, - "learning_rate": 4.793970470637469e-05, - "loss": 95.1893, - "step": 54280 - }, - { - "epoch": 0.2193384696808704, - "grad_norm": 707.0789794921875, - "learning_rate": 4.793831683332098e-05, - "loss": 79.5281, - "step": 54290 - }, - { - "epoch": 0.21937887094623804, - "grad_norm": 1276.7493896484375, - "learning_rate": 4.7936928513072964e-05, - "loss": 63.4697, - "step": 54300 - }, - { - "epoch": 0.21941927221160568, - "grad_norm": 169.8773193359375, - "learning_rate": 4.793553974565773e-05, - "loss": 65.7283, - "step": 54310 - }, - { - "epoch": 0.2194596734769733, - "grad_norm": 554.336181640625, - "learning_rate": 4.793415053110233e-05, - "loss": 56.5391, - "step": 54320 - }, - { - "epoch": 0.21950007474234093, - "grad_norm": 848.8544921875, - "learning_rate": 4.7932760869433865e-05, - "loss": 66.7205, - "step": 54330 - }, - { - "epoch": 0.21954047600770857, - "grad_norm": 1150.58349609375, - "learning_rate": 4.793137076067942e-05, - "loss": 70.9918, - "step": 54340 - }, - { - "epoch": 0.21958087727307618, - "grad_norm": 1347.6571044921875, - "learning_rate": 4.792998020486609e-05, - "loss": 75.1297, - "step": 54350 - }, - { - "epoch": 0.21962127853844382, - "grad_norm": 796.1422119140625, - "learning_rate": 4.792858920202099e-05, - "loss": 91.2722, - "step": 54360 - }, - { - "epoch": 0.21966167980381146, - "grad_norm": 978.01025390625, - "learning_rate": 4.792719775217124e-05, - "loss": 79.3633, - "step": 54370 - }, - { - "epoch": 0.2197020810691791, - "grad_norm": 676.4722900390625, - "learning_rate": 4.7925805855343975e-05, - "loss": 76.3162, - "step": 54380 - }, - { - "epoch": 0.2197424823345467, - "grad_norm": 820.4541625976562, - "learning_rate": 4.7924413511566315e-05, - "loss": 43.4046, - "step": 54390 - }, - { - "epoch": 0.21978288359991435, - "grad_norm": 598.8165893554688, - "learning_rate": 4.7923020720865414e-05, - "loss": 57.7346, - "step": 54400 - }, - { - "epoch": 0.219823284865282, - "grad_norm": 2260.6591796875, - "learning_rate": 4.792162748326841e-05, - "loss": 97.9996, - "step": 54410 - }, - { - "epoch": 0.2198636861306496, - "grad_norm": 1380.8861083984375, - "learning_rate": 4.792023379880249e-05, - "loss": 88.7846, - "step": 54420 - }, - { - "epoch": 0.21990408739601724, - "grad_norm": 1240.204833984375, - "learning_rate": 4.791883966749482e-05, - "loss": 78.3506, - "step": 54430 - }, - { - "epoch": 0.21994448866138488, - "grad_norm": 436.11798095703125, - "learning_rate": 4.791744508937256e-05, - "loss": 80.6678, - "step": 54440 - }, - { - "epoch": 0.2199848899267525, - "grad_norm": 1142.2906494140625, - "learning_rate": 4.791605006446291e-05, - "loss": 63.2783, - "step": 54450 - }, - { - "epoch": 0.22002529119212014, - "grad_norm": 815.8292846679688, - "learning_rate": 4.7914654592793065e-05, - "loss": 77.567, - "step": 54460 - }, - { - "epoch": 0.22006569245748778, - "grad_norm": 920.00439453125, - "learning_rate": 4.791325867439024e-05, - "loss": 57.8406, - "step": 54470 - }, - { - "epoch": 0.2201060937228554, - "grad_norm": 752.0264282226562, - "learning_rate": 4.791186230928163e-05, - "loss": 94.0675, - "step": 54480 - }, - { - "epoch": 0.22014649498822303, - "grad_norm": 1893.0584716796875, - "learning_rate": 4.7910465497494474e-05, - "loss": 136.9929, - "step": 54490 - }, - { - "epoch": 0.22018689625359067, - "grad_norm": 634.4692993164062, - "learning_rate": 4.790906823905599e-05, - "loss": 69.6565, - "step": 54500 - }, - { - "epoch": 0.22022729751895828, - "grad_norm": 395.0136413574219, - "learning_rate": 4.790767053399343e-05, - "loss": 33.5299, - "step": 54510 - }, - { - "epoch": 0.22026769878432592, - "grad_norm": 1267.1798095703125, - "learning_rate": 4.790627238233405e-05, - "loss": 52.7564, - "step": 54520 - }, - { - "epoch": 0.22030810004969356, - "grad_norm": 1663.51611328125, - "learning_rate": 4.790487378410509e-05, - "loss": 97.8893, - "step": 54530 - }, - { - "epoch": 0.2203485013150612, - "grad_norm": 279.7654113769531, - "learning_rate": 4.790347473933382e-05, - "loss": 49.2227, - "step": 54540 - }, - { - "epoch": 0.2203889025804288, - "grad_norm": 1237.12255859375, - "learning_rate": 4.7902075248047515e-05, - "loss": 93.2389, - "step": 54550 - }, - { - "epoch": 0.22042930384579645, - "grad_norm": 1792.2801513671875, - "learning_rate": 4.7900675310273466e-05, - "loss": 65.3986, - "step": 54560 - }, - { - "epoch": 0.2204697051111641, - "grad_norm": 923.925048828125, - "learning_rate": 4.7899274926038976e-05, - "loss": 60.5506, - "step": 54570 - }, - { - "epoch": 0.2205101063765317, - "grad_norm": 624.0264282226562, - "learning_rate": 4.789787409537131e-05, - "loss": 46.6642, - "step": 54580 - }, - { - "epoch": 0.22055050764189935, - "grad_norm": 732.738037109375, - "learning_rate": 4.789647281829781e-05, - "loss": 118.6461, - "step": 54590 - }, - { - "epoch": 0.22059090890726699, - "grad_norm": 654.3661499023438, - "learning_rate": 4.789507109484579e-05, - "loss": 108.6401, - "step": 54600 - }, - { - "epoch": 0.2206313101726346, - "grad_norm": 628.9659423828125, - "learning_rate": 4.7893668925042565e-05, - "loss": 56.0631, - "step": 54610 - }, - { - "epoch": 0.22067171143800224, - "grad_norm": 1165.976806640625, - "learning_rate": 4.789226630891548e-05, - "loss": 67.4176, - "step": 54620 - }, - { - "epoch": 0.22071211270336988, - "grad_norm": 2275.9443359375, - "learning_rate": 4.789086324649187e-05, - "loss": 81.9733, - "step": 54630 - }, - { - "epoch": 0.2207525139687375, - "grad_norm": 1239.550537109375, - "learning_rate": 4.78894597377991e-05, - "loss": 81.8693, - "step": 54640 - }, - { - "epoch": 0.22079291523410513, - "grad_norm": 474.0306701660156, - "learning_rate": 4.788805578286454e-05, - "loss": 70.7014, - "step": 54650 - }, - { - "epoch": 0.22083331649947277, - "grad_norm": 1004.7076416015625, - "learning_rate": 4.788665138171553e-05, - "loss": 79.816, - "step": 54660 - }, - { - "epoch": 0.22087371776484038, - "grad_norm": 536.8333129882812, - "learning_rate": 4.788524653437948e-05, - "loss": 107.741, - "step": 54670 - }, - { - "epoch": 0.22091411903020802, - "grad_norm": 1520.8243408203125, - "learning_rate": 4.7883841240883766e-05, - "loss": 67.9091, - "step": 54680 - }, - { - "epoch": 0.22095452029557566, - "grad_norm": 978.5899047851562, - "learning_rate": 4.7882435501255785e-05, - "loss": 61.7931, - "step": 54690 - }, - { - "epoch": 0.22099492156094327, - "grad_norm": 2291.6357421875, - "learning_rate": 4.788102931552294e-05, - "loss": 66.0947, - "step": 54700 - }, - { - "epoch": 0.22103532282631091, - "grad_norm": 1223.5504150390625, - "learning_rate": 4.787962268371266e-05, - "loss": 96.6182, - "step": 54710 - }, - { - "epoch": 0.22107572409167855, - "grad_norm": 753.4441528320312, - "learning_rate": 4.7878215605852336e-05, - "loss": 78.4969, - "step": 54720 - }, - { - "epoch": 0.2211161253570462, - "grad_norm": 2007.7423095703125, - "learning_rate": 4.7876808081969436e-05, - "loss": 88.9049, - "step": 54730 - }, - { - "epoch": 0.2211565266224138, - "grad_norm": 520.70361328125, - "learning_rate": 4.787540011209138e-05, - "loss": 55.8599, - "step": 54740 - }, - { - "epoch": 0.22119692788778145, - "grad_norm": 509.8031005859375, - "learning_rate": 4.7873991696245624e-05, - "loss": 68.4668, - "step": 54750 - }, - { - "epoch": 0.2212373291531491, - "grad_norm": 1531.3848876953125, - "learning_rate": 4.787258283445962e-05, - "loss": 109.6678, - "step": 54760 - }, - { - "epoch": 0.2212777304185167, - "grad_norm": 1034.49755859375, - "learning_rate": 4.7871173526760835e-05, - "loss": 82.1401, - "step": 54770 - }, - { - "epoch": 0.22131813168388434, - "grad_norm": 1044.2027587890625, - "learning_rate": 4.7869763773176756e-05, - "loss": 55.1931, - "step": 54780 - }, - { - "epoch": 0.22135853294925198, - "grad_norm": 937.3899536132812, - "learning_rate": 4.786835357373486e-05, - "loss": 86.6882, - "step": 54790 - }, - { - "epoch": 0.2213989342146196, - "grad_norm": 816.9603881835938, - "learning_rate": 4.7866942928462625e-05, - "loss": 68.9065, - "step": 54800 - }, - { - "epoch": 0.22143933547998723, - "grad_norm": 1069.583984375, - "learning_rate": 4.7865531837387576e-05, - "loss": 79.5692, - "step": 54810 - }, - { - "epoch": 0.22147973674535487, - "grad_norm": 859.8712768554688, - "learning_rate": 4.7864120300537206e-05, - "loss": 52.4455, - "step": 54820 - }, - { - "epoch": 0.22152013801072248, - "grad_norm": 861.6782836914062, - "learning_rate": 4.786270831793904e-05, - "loss": 67.6935, - "step": 54830 - }, - { - "epoch": 0.22156053927609012, - "grad_norm": 382.9268798828125, - "learning_rate": 4.786129588962061e-05, - "loss": 85.1282, - "step": 54840 - }, - { - "epoch": 0.22160094054145776, - "grad_norm": 1050.475830078125, - "learning_rate": 4.785988301560944e-05, - "loss": 103.8309, - "step": 54850 - }, - { - "epoch": 0.22164134180682538, - "grad_norm": 691.6054077148438, - "learning_rate": 4.785846969593308e-05, - "loss": 68.233, - "step": 54860 - }, - { - "epoch": 0.22168174307219302, - "grad_norm": 2130.216064453125, - "learning_rate": 4.785705593061909e-05, - "loss": 68.9335, - "step": 54870 - }, - { - "epoch": 0.22172214433756066, - "grad_norm": 804.7174072265625, - "learning_rate": 4.7855641719695023e-05, - "loss": 60.727, - "step": 54880 - }, - { - "epoch": 0.2217625456029283, - "grad_norm": 634.82080078125, - "learning_rate": 4.785422706318846e-05, - "loss": 88.999, - "step": 54890 - }, - { - "epoch": 0.2218029468682959, - "grad_norm": 597.0457763671875, - "learning_rate": 4.785281196112698e-05, - "loss": 64.0293, - "step": 54900 - }, - { - "epoch": 0.22184334813366355, - "grad_norm": 1046.802001953125, - "learning_rate": 4.785139641353815e-05, - "loss": 70.1819, - "step": 54910 - }, - { - "epoch": 0.2218837493990312, - "grad_norm": 753.67236328125, - "learning_rate": 4.7849980420449594e-05, - "loss": 66.1747, - "step": 54920 - }, - { - "epoch": 0.2219241506643988, - "grad_norm": 1037.9305419921875, - "learning_rate": 4.7848563981888893e-05, - "loss": 108.7891, - "step": 54930 - }, - { - "epoch": 0.22196455192976644, - "grad_norm": 1170.2239990234375, - "learning_rate": 4.784714709788368e-05, - "loss": 102.6976, - "step": 54940 - }, - { - "epoch": 0.22200495319513408, - "grad_norm": 894.5665283203125, - "learning_rate": 4.7845729768461576e-05, - "loss": 54.2852, - "step": 54950 - }, - { - "epoch": 0.2220453544605017, - "grad_norm": 584.92431640625, - "learning_rate": 4.7844311993650205e-05, - "loss": 75.9472, - "step": 54960 - }, - { - "epoch": 0.22208575572586933, - "grad_norm": 1176.026123046875, - "learning_rate": 4.784289377347721e-05, - "loss": 88.7713, - "step": 54970 - }, - { - "epoch": 0.22212615699123697, - "grad_norm": 834.7158203125, - "learning_rate": 4.7841475107970244e-05, - "loss": 86.6488, - "step": 54980 - }, - { - "epoch": 0.22216655825660458, - "grad_norm": 1956.4654541015625, - "learning_rate": 4.784005599715696e-05, - "loss": 53.2461, - "step": 54990 - }, - { - "epoch": 0.22220695952197222, - "grad_norm": 1810.177734375, - "learning_rate": 4.783863644106502e-05, - "loss": 141.2733, - "step": 55000 - }, - { - "epoch": 0.22224736078733986, - "grad_norm": 641.3799438476562, - "learning_rate": 4.783721643972211e-05, - "loss": 62.0612, - "step": 55010 - }, - { - "epoch": 0.22228776205270748, - "grad_norm": 1118.1512451171875, - "learning_rate": 4.783579599315591e-05, - "loss": 80.4928, - "step": 55020 - }, - { - "epoch": 0.22232816331807512, - "grad_norm": 1295.068115234375, - "learning_rate": 4.783437510139411e-05, - "loss": 75.4781, - "step": 55030 - }, - { - "epoch": 0.22236856458344276, - "grad_norm": 489.80078125, - "learning_rate": 4.7832953764464405e-05, - "loss": 67.9461, - "step": 55040 - }, - { - "epoch": 0.2224089658488104, - "grad_norm": 432.6492614746094, - "learning_rate": 4.783153198239452e-05, - "loss": 66.1955, - "step": 55050 - }, - { - "epoch": 0.222449367114178, - "grad_norm": 0.0, - "learning_rate": 4.783010975521216e-05, - "loss": 61.2556, - "step": 55060 - }, - { - "epoch": 0.22248976837954565, - "grad_norm": 656.0430908203125, - "learning_rate": 4.7828687082945054e-05, - "loss": 50.6344, - "step": 55070 - }, - { - "epoch": 0.2225301696449133, - "grad_norm": 799.9983520507812, - "learning_rate": 4.782726396562094e-05, - "loss": 63.7389, - "step": 55080 - }, - { - "epoch": 0.2225705709102809, - "grad_norm": 848.791748046875, - "learning_rate": 4.782584040326757e-05, - "loss": 83.2984, - "step": 55090 - }, - { - "epoch": 0.22261097217564854, - "grad_norm": 749.4911499023438, - "learning_rate": 4.7824416395912686e-05, - "loss": 84.4783, - "step": 55100 - }, - { - "epoch": 0.22265137344101618, - "grad_norm": 546.4697265625, - "learning_rate": 4.782299194358405e-05, - "loss": 80.4849, - "step": 55110 - }, - { - "epoch": 0.2226917747063838, - "grad_norm": 587.6947021484375, - "learning_rate": 4.782156704630944e-05, - "loss": 79.875, - "step": 55120 - }, - { - "epoch": 0.22273217597175143, - "grad_norm": 1178.3653564453125, - "learning_rate": 4.782014170411663e-05, - "loss": 77.4586, - "step": 55130 - }, - { - "epoch": 0.22277257723711907, - "grad_norm": 641.4794921875, - "learning_rate": 4.781871591703341e-05, - "loss": 70.9194, - "step": 55140 - }, - { - "epoch": 0.22281297850248669, - "grad_norm": 450.2964172363281, - "learning_rate": 4.7817289685087577e-05, - "loss": 92.5157, - "step": 55150 - }, - { - "epoch": 0.22285337976785433, - "grad_norm": 1155.9171142578125, - "learning_rate": 4.781586300830693e-05, - "loss": 125.689, - "step": 55160 - }, - { - "epoch": 0.22289378103322197, - "grad_norm": 684.2454833984375, - "learning_rate": 4.781443588671929e-05, - "loss": 61.9113, - "step": 55170 - }, - { - "epoch": 0.22293418229858958, - "grad_norm": 0.0, - "learning_rate": 4.781300832035247e-05, - "loss": 66.496, - "step": 55180 - }, - { - "epoch": 0.22297458356395722, - "grad_norm": 546.8045043945312, - "learning_rate": 4.7811580309234314e-05, - "loss": 74.1194, - "step": 55190 - }, - { - "epoch": 0.22301498482932486, - "grad_norm": 559.2355346679688, - "learning_rate": 4.781015185339266e-05, - "loss": 63.7682, - "step": 55200 - }, - { - "epoch": 0.2230553860946925, - "grad_norm": 848.5513916015625, - "learning_rate": 4.7808722952855344e-05, - "loss": 59.4205, - "step": 55210 - }, - { - "epoch": 0.2230957873600601, - "grad_norm": 513.669677734375, - "learning_rate": 4.780729360765024e-05, - "loss": 69.9661, - "step": 55220 - }, - { - "epoch": 0.22313618862542775, - "grad_norm": 1898.05419921875, - "learning_rate": 4.7805863817805196e-05, - "loss": 60.6413, - "step": 55230 - }, - { - "epoch": 0.2231765898907954, - "grad_norm": 304.1728820800781, - "learning_rate": 4.78044335833481e-05, - "loss": 65.169, - "step": 55240 - }, - { - "epoch": 0.223216991156163, - "grad_norm": 1311.47265625, - "learning_rate": 4.780300290430682e-05, - "loss": 75.9994, - "step": 55250 - }, - { - "epoch": 0.22325739242153064, - "grad_norm": 867.0023193359375, - "learning_rate": 4.780157178070928e-05, - "loss": 70.7438, - "step": 55260 - }, - { - "epoch": 0.22329779368689828, - "grad_norm": 749.3482055664062, - "learning_rate": 4.780014021258334e-05, - "loss": 87.707, - "step": 55270 - }, - { - "epoch": 0.2233381949522659, - "grad_norm": 717.660888671875, - "learning_rate": 4.779870819995694e-05, - "loss": 90.9197, - "step": 55280 - }, - { - "epoch": 0.22337859621763353, - "grad_norm": 677.76708984375, - "learning_rate": 4.779727574285798e-05, - "loss": 69.9488, - "step": 55290 - }, - { - "epoch": 0.22341899748300117, - "grad_norm": 781.3521728515625, - "learning_rate": 4.77958428413144e-05, - "loss": 83.5576, - "step": 55300 - }, - { - "epoch": 0.2234593987483688, - "grad_norm": 678.9473266601562, - "learning_rate": 4.779440949535412e-05, - "loss": 74.7966, - "step": 55310 - }, - { - "epoch": 0.22349980001373643, - "grad_norm": 1800.738037109375, - "learning_rate": 4.779297570500509e-05, - "loss": 97.8668, - "step": 55320 - }, - { - "epoch": 0.22354020127910407, - "grad_norm": 642.32568359375, - "learning_rate": 4.779154147029527e-05, - "loss": 38.1043, - "step": 55330 - }, - { - "epoch": 0.22358060254447168, - "grad_norm": 955.9810180664062, - "learning_rate": 4.7790106791252614e-05, - "loss": 99.5178, - "step": 55340 - }, - { - "epoch": 0.22362100380983932, - "grad_norm": 2772.03564453125, - "learning_rate": 4.7788671667905096e-05, - "loss": 103.198, - "step": 55350 - }, - { - "epoch": 0.22366140507520696, - "grad_norm": 896.0523071289062, - "learning_rate": 4.7787236100280685e-05, - "loss": 104.9025, - "step": 55360 - }, - { - "epoch": 0.2237018063405746, - "grad_norm": 657.1527709960938, - "learning_rate": 4.7785800088407376e-05, - "loss": 64.4315, - "step": 55370 - }, - { - "epoch": 0.2237422076059422, - "grad_norm": 1311.0645751953125, - "learning_rate": 4.7784363632313166e-05, - "loss": 82.5705, - "step": 55380 - }, - { - "epoch": 0.22378260887130985, - "grad_norm": 636.6400146484375, - "learning_rate": 4.778292673202606e-05, - "loss": 69.7723, - "step": 55390 - }, - { - "epoch": 0.2238230101366775, - "grad_norm": 768.77392578125, - "learning_rate": 4.778148938757406e-05, - "loss": 88.6552, - "step": 55400 - }, - { - "epoch": 0.2238634114020451, - "grad_norm": 494.320068359375, - "learning_rate": 4.7780051598985196e-05, - "loss": 78.5235, - "step": 55410 - }, - { - "epoch": 0.22390381266741274, - "grad_norm": 525.7529907226562, - "learning_rate": 4.7778613366287505e-05, - "loss": 106.9901, - "step": 55420 - }, - { - "epoch": 0.22394421393278038, - "grad_norm": 574.625732421875, - "learning_rate": 4.7777174689509006e-05, - "loss": 81.0395, - "step": 55430 - }, - { - "epoch": 0.223984615198148, - "grad_norm": 599.45947265625, - "learning_rate": 4.7775735568677775e-05, - "loss": 68.9037, - "step": 55440 - }, - { - "epoch": 0.22402501646351564, - "grad_norm": 653.6656494140625, - "learning_rate": 4.777429600382185e-05, - "loss": 79.6398, - "step": 55450 - }, - { - "epoch": 0.22406541772888328, - "grad_norm": 372.3857116699219, - "learning_rate": 4.777285599496929e-05, - "loss": 47.1804, - "step": 55460 - }, - { - "epoch": 0.2241058189942509, - "grad_norm": 444.17919921875, - "learning_rate": 4.777141554214819e-05, - "loss": 79.5894, - "step": 55470 - }, - { - "epoch": 0.22414622025961853, - "grad_norm": 952.4342041015625, - "learning_rate": 4.776997464538662e-05, - "loss": 79.2442, - "step": 55480 - }, - { - "epoch": 0.22418662152498617, - "grad_norm": 1239.525146484375, - "learning_rate": 4.776853330471266e-05, - "loss": 70.9746, - "step": 55490 - }, - { - "epoch": 0.22422702279035378, - "grad_norm": 5761.12841796875, - "learning_rate": 4.776709152015443e-05, - "loss": 110.6402, - "step": 55500 - }, - { - "epoch": 0.22426742405572142, - "grad_norm": 5974.0244140625, - "learning_rate": 4.776564929174003e-05, - "loss": 88.7858, - "step": 55510 - }, - { - "epoch": 0.22430782532108906, - "grad_norm": 702.3343505859375, - "learning_rate": 4.776420661949758e-05, - "loss": 80.6855, - "step": 55520 - }, - { - "epoch": 0.2243482265864567, - "grad_norm": 736.3425903320312, - "learning_rate": 4.776276350345519e-05, - "loss": 74.7216, - "step": 55530 - }, - { - "epoch": 0.2243886278518243, - "grad_norm": 412.60272216796875, - "learning_rate": 4.776131994364102e-05, - "loss": 80.698, - "step": 55540 - }, - { - "epoch": 0.22442902911719195, - "grad_norm": 723.0787353515625, - "learning_rate": 4.775987594008319e-05, - "loss": 106.2656, - "step": 55550 - }, - { - "epoch": 0.2244694303825596, - "grad_norm": 598.0833129882812, - "learning_rate": 4.775843149280986e-05, - "loss": 60.9806, - "step": 55560 - }, - { - "epoch": 0.2245098316479272, - "grad_norm": 1235.12060546875, - "learning_rate": 4.775698660184919e-05, - "loss": 80.7508, - "step": 55570 - }, - { - "epoch": 0.22455023291329484, - "grad_norm": 874.450927734375, - "learning_rate": 4.775554126722935e-05, - "loss": 42.9898, - "step": 55580 - }, - { - "epoch": 0.22459063417866248, - "grad_norm": 589.5454711914062, - "learning_rate": 4.775409548897853e-05, - "loss": 94.5234, - "step": 55590 - }, - { - "epoch": 0.2246310354440301, - "grad_norm": 918.5469360351562, - "learning_rate": 4.775264926712489e-05, - "loss": 48.5788, - "step": 55600 - }, - { - "epoch": 0.22467143670939774, - "grad_norm": 1047.6563720703125, - "learning_rate": 4.775120260169665e-05, - "loss": 88.8342, - "step": 55610 - }, - { - "epoch": 0.22471183797476538, - "grad_norm": 932.34912109375, - "learning_rate": 4.774975549272199e-05, - "loss": 48.4354, - "step": 55620 - }, - { - "epoch": 0.224752239240133, - "grad_norm": 614.5720825195312, - "learning_rate": 4.774830794022915e-05, - "loss": 93.6599, - "step": 55630 - }, - { - "epoch": 0.22479264050550063, - "grad_norm": 503.9685363769531, - "learning_rate": 4.7746859944246325e-05, - "loss": 85.0896, - "step": 55640 - }, - { - "epoch": 0.22483304177086827, - "grad_norm": 662.5595703125, - "learning_rate": 4.774541150480175e-05, - "loss": 103.3519, - "step": 55650 - }, - { - "epoch": 0.22487344303623588, - "grad_norm": 982.870849609375, - "learning_rate": 4.7743962621923674e-05, - "loss": 82.5962, - "step": 55660 - }, - { - "epoch": 0.22491384430160352, - "grad_norm": 1002.5994873046875, - "learning_rate": 4.774251329564034e-05, - "loss": 64.0428, - "step": 55670 - }, - { - "epoch": 0.22495424556697116, - "grad_norm": 360.1722412109375, - "learning_rate": 4.7741063525980004e-05, - "loss": 75.0625, - "step": 55680 - }, - { - "epoch": 0.2249946468323388, - "grad_norm": 1272.3038330078125, - "learning_rate": 4.773961331297092e-05, - "loss": 114.2109, - "step": 55690 - }, - { - "epoch": 0.2250350480977064, - "grad_norm": 6723.4404296875, - "learning_rate": 4.773816265664136e-05, - "loss": 103.1604, - "step": 55700 - }, - { - "epoch": 0.22507544936307405, - "grad_norm": 2476.1787109375, - "learning_rate": 4.7736711557019617e-05, - "loss": 101.4048, - "step": 55710 - }, - { - "epoch": 0.2251158506284417, - "grad_norm": 1094.4051513671875, - "learning_rate": 4.7735260014133986e-05, - "loss": 80.1582, - "step": 55720 - }, - { - "epoch": 0.2251562518938093, - "grad_norm": 990.44677734375, - "learning_rate": 4.773380802801275e-05, - "loss": 80.2596, - "step": 55730 - }, - { - "epoch": 0.22519665315917695, - "grad_norm": 700.4454956054688, - "learning_rate": 4.773235559868422e-05, - "loss": 78.9232, - "step": 55740 - }, - { - "epoch": 0.22523705442454459, - "grad_norm": 1065.6510009765625, - "learning_rate": 4.773090272617672e-05, - "loss": 85.6272, - "step": 55750 - }, - { - "epoch": 0.2252774556899122, - "grad_norm": 810.5206909179688, - "learning_rate": 4.772944941051856e-05, - "loss": 83.4951, - "step": 55760 - }, - { - "epoch": 0.22531785695527984, - "grad_norm": 508.2962341308594, - "learning_rate": 4.772799565173809e-05, - "loss": 63.031, - "step": 55770 - }, - { - "epoch": 0.22535825822064748, - "grad_norm": 371.3773498535156, - "learning_rate": 4.772654144986364e-05, - "loss": 96.568, - "step": 55780 - }, - { - "epoch": 0.2253986594860151, - "grad_norm": 492.9206237792969, - "learning_rate": 4.772508680492356e-05, - "loss": 76.7317, - "step": 55790 - }, - { - "epoch": 0.22543906075138273, - "grad_norm": 368.9469909667969, - "learning_rate": 4.772363171694622e-05, - "loss": 62.3136, - "step": 55800 - }, - { - "epoch": 0.22547946201675037, - "grad_norm": 445.32501220703125, - "learning_rate": 4.7722176185959974e-05, - "loss": 47.5343, - "step": 55810 - }, - { - "epoch": 0.22551986328211798, - "grad_norm": 385.32098388671875, - "learning_rate": 4.772072021199321e-05, - "loss": 86.4635, - "step": 55820 - }, - { - "epoch": 0.22556026454748562, - "grad_norm": 828.4990844726562, - "learning_rate": 4.771926379507431e-05, - "loss": 79.7598, - "step": 55830 - }, - { - "epoch": 0.22560066581285326, - "grad_norm": 881.7483520507812, - "learning_rate": 4.7717806935231665e-05, - "loss": 54.1686, - "step": 55840 - }, - { - "epoch": 0.2256410670782209, - "grad_norm": 882.0201416015625, - "learning_rate": 4.7716349632493674e-05, - "loss": 61.8202, - "step": 55850 - }, - { - "epoch": 0.22568146834358851, - "grad_norm": 1207.105712890625, - "learning_rate": 4.7714891886888756e-05, - "loss": 78.0542, - "step": 55860 - }, - { - "epoch": 0.22572186960895615, - "grad_norm": 1286.038330078125, - "learning_rate": 4.771343369844532e-05, - "loss": 84.5764, - "step": 55870 - }, - { - "epoch": 0.2257622708743238, - "grad_norm": 518.7008056640625, - "learning_rate": 4.771197506719181e-05, - "loss": 68.4155, - "step": 55880 - }, - { - "epoch": 0.2258026721396914, - "grad_norm": 1427.2728271484375, - "learning_rate": 4.7710515993156645e-05, - "loss": 89.9567, - "step": 55890 - }, - { - "epoch": 0.22584307340505905, - "grad_norm": 964.0196533203125, - "learning_rate": 4.770905647636828e-05, - "loss": 68.5106, - "step": 55900 - }, - { - "epoch": 0.2258834746704267, - "grad_norm": 452.55487060546875, - "learning_rate": 4.770759651685517e-05, - "loss": 56.08, - "step": 55910 - }, - { - "epoch": 0.2259238759357943, - "grad_norm": 1233.5595703125, - "learning_rate": 4.770613611464577e-05, - "loss": 74.0022, - "step": 55920 - }, - { - "epoch": 0.22596427720116194, - "grad_norm": 1455.181640625, - "learning_rate": 4.7704675269768565e-05, - "loss": 91.9318, - "step": 55930 - }, - { - "epoch": 0.22600467846652958, - "grad_norm": 735.1796264648438, - "learning_rate": 4.7703213982252016e-05, - "loss": 85.3958, - "step": 55940 - }, - { - "epoch": 0.2260450797318972, - "grad_norm": 670.2793579101562, - "learning_rate": 4.770175225212463e-05, - "loss": 60.0693, - "step": 55950 - }, - { - "epoch": 0.22608548099726483, - "grad_norm": 820.1773071289062, - "learning_rate": 4.7700290079414896e-05, - "loss": 75.0173, - "step": 55960 - }, - { - "epoch": 0.22612588226263247, - "grad_norm": 640.2984619140625, - "learning_rate": 4.769882746415132e-05, - "loss": 58.8079, - "step": 55970 - }, - { - "epoch": 0.22616628352800008, - "grad_norm": 1904.8134765625, - "learning_rate": 4.769736440636241e-05, - "loss": 59.5396, - "step": 55980 - }, - { - "epoch": 0.22620668479336772, - "grad_norm": 764.6589965820312, - "learning_rate": 4.76959009060767e-05, - "loss": 47.2034, - "step": 55990 - }, - { - "epoch": 0.22624708605873536, - "grad_norm": 312.55859375, - "learning_rate": 4.769443696332272e-05, - "loss": 62.7167, - "step": 56000 - }, - { - "epoch": 0.226287487324103, - "grad_norm": 560.5413818359375, - "learning_rate": 4.7692972578129005e-05, - "loss": 79.5518, - "step": 56010 - }, - { - "epoch": 0.22632788858947062, - "grad_norm": 664.6395874023438, - "learning_rate": 4.769150775052411e-05, - "loss": 63.6301, - "step": 56020 - }, - { - "epoch": 0.22636828985483826, - "grad_norm": 1466.82666015625, - "learning_rate": 4.769004248053658e-05, - "loss": 63.513, - "step": 56030 - }, - { - "epoch": 0.2264086911202059, - "grad_norm": 994.0587768554688, - "learning_rate": 4.7688576768194994e-05, - "loss": 59.1224, - "step": 56040 - }, - { - "epoch": 0.2264490923855735, - "grad_norm": 700.6870727539062, - "learning_rate": 4.7687110613527926e-05, - "loss": 85.2521, - "step": 56050 - }, - { - "epoch": 0.22648949365094115, - "grad_norm": 1606.5682373046875, - "learning_rate": 4.7685644016563956e-05, - "loss": 97.3078, - "step": 56060 - }, - { - "epoch": 0.2265298949163088, - "grad_norm": 559.5364379882812, - "learning_rate": 4.7684176977331674e-05, - "loss": 75.0282, - "step": 56070 - }, - { - "epoch": 0.2265702961816764, - "grad_norm": 750.7931518554688, - "learning_rate": 4.768270949585968e-05, - "loss": 89.0941, - "step": 56080 - }, - { - "epoch": 0.22661069744704404, - "grad_norm": 843.3683471679688, - "learning_rate": 4.7681241572176596e-05, - "loss": 76.073, - "step": 56090 - }, - { - "epoch": 0.22665109871241168, - "grad_norm": 759.1117553710938, - "learning_rate": 4.767977320631103e-05, - "loss": 72.5775, - "step": 56100 - }, - { - "epoch": 0.2266914999777793, - "grad_norm": 687.8123168945312, - "learning_rate": 4.76783043982916e-05, - "loss": 53.4036, - "step": 56110 - }, - { - "epoch": 0.22673190124314693, - "grad_norm": 996.044921875, - "learning_rate": 4.767683514814696e-05, - "loss": 56.8859, - "step": 56120 - }, - { - "epoch": 0.22677230250851457, - "grad_norm": 821.6911010742188, - "learning_rate": 4.767536545590574e-05, - "loss": 66.1622, - "step": 56130 - }, - { - "epoch": 0.22681270377388218, - "grad_norm": 1311.89892578125, - "learning_rate": 4.767389532159659e-05, - "loss": 69.1287, - "step": 56140 - }, - { - "epoch": 0.22685310503924982, - "grad_norm": 734.8349609375, - "learning_rate": 4.7672424745248176e-05, - "loss": 54.9413, - "step": 56150 - }, - { - "epoch": 0.22689350630461746, - "grad_norm": 854.7682495117188, - "learning_rate": 4.767095372688918e-05, - "loss": 98.0047, - "step": 56160 - }, - { - "epoch": 0.2269339075699851, - "grad_norm": 969.171630859375, - "learning_rate": 4.7669482266548264e-05, - "loss": 98.2503, - "step": 56170 - }, - { - "epoch": 0.22697430883535272, - "grad_norm": 1126.5001220703125, - "learning_rate": 4.7668010364254124e-05, - "loss": 63.528, - "step": 56180 - }, - { - "epoch": 0.22701471010072036, - "grad_norm": 1452.1922607421875, - "learning_rate": 4.7666538020035445e-05, - "loss": 68.9039, - "step": 56190 - }, - { - "epoch": 0.227055111366088, - "grad_norm": 1715.3773193359375, - "learning_rate": 4.7665065233920945e-05, - "loss": 109.966, - "step": 56200 - }, - { - "epoch": 0.2270955126314556, - "grad_norm": 631.9252319335938, - "learning_rate": 4.766359200593933e-05, - "loss": 49.2157, - "step": 56210 - }, - { - "epoch": 0.22713591389682325, - "grad_norm": 1373.4630126953125, - "learning_rate": 4.766211833611931e-05, - "loss": 74.802, - "step": 56220 - }, - { - "epoch": 0.2271763151621909, - "grad_norm": 2058.58740234375, - "learning_rate": 4.766064422448964e-05, - "loss": 75.4964, - "step": 56230 - }, - { - "epoch": 0.2272167164275585, - "grad_norm": 2785.7197265625, - "learning_rate": 4.765916967107903e-05, - "loss": 77.155, - "step": 56240 - }, - { - "epoch": 0.22725711769292614, - "grad_norm": 593.6171264648438, - "learning_rate": 4.765769467591625e-05, - "loss": 60.3693, - "step": 56250 - }, - { - "epoch": 0.22729751895829378, - "grad_norm": 1533.2698974609375, - "learning_rate": 4.7656219239030046e-05, - "loss": 95.2811, - "step": 56260 - }, - { - "epoch": 0.2273379202236614, - "grad_norm": 806.5111083984375, - "learning_rate": 4.7654743360449186e-05, - "loss": 65.6008, - "step": 56270 - }, - { - "epoch": 0.22737832148902903, - "grad_norm": 1836.31787109375, - "learning_rate": 4.7653267040202436e-05, - "loss": 80.5326, - "step": 56280 - }, - { - "epoch": 0.22741872275439667, - "grad_norm": 733.4277954101562, - "learning_rate": 4.765179027831858e-05, - "loss": 81.6788, - "step": 56290 - }, - { - "epoch": 0.22745912401976429, - "grad_norm": 1102.4857177734375, - "learning_rate": 4.7650313074826425e-05, - "loss": 63.4363, - "step": 56300 - }, - { - "epoch": 0.22749952528513193, - "grad_norm": 1184.0565185546875, - "learning_rate": 4.764883542975475e-05, - "loss": 113.9402, - "step": 56310 - }, - { - "epoch": 0.22753992655049957, - "grad_norm": 901.2548828125, - "learning_rate": 4.764735734313236e-05, - "loss": 61.9935, - "step": 56320 - }, - { - "epoch": 0.2275803278158672, - "grad_norm": 750.1239624023438, - "learning_rate": 4.7645878814988075e-05, - "loss": 82.1202, - "step": 56330 - }, - { - "epoch": 0.22762072908123482, - "grad_norm": 1717.6053466796875, - "learning_rate": 4.764439984535074e-05, - "loss": 54.0498, - "step": 56340 - }, - { - "epoch": 0.22766113034660246, - "grad_norm": 755.3112182617188, - "learning_rate": 4.764292043424916e-05, - "loss": 46.0645, - "step": 56350 - }, - { - "epoch": 0.2277015316119701, - "grad_norm": 819.234375, - "learning_rate": 4.764144058171219e-05, - "loss": 68.3072, - "step": 56360 - }, - { - "epoch": 0.2277419328773377, - "grad_norm": 1272.74462890625, - "learning_rate": 4.763996028776868e-05, - "loss": 81.0632, - "step": 56370 - }, - { - "epoch": 0.22778233414270535, - "grad_norm": 970.59765625, - "learning_rate": 4.763847955244749e-05, - "loss": 45.2182, - "step": 56380 - }, - { - "epoch": 0.227822735408073, - "grad_norm": 895.1033325195312, - "learning_rate": 4.7636998375777486e-05, - "loss": 78.1244, - "step": 56390 - }, - { - "epoch": 0.2278631366734406, - "grad_norm": 1142.0018310546875, - "learning_rate": 4.763551675778755e-05, - "loss": 93.2416, - "step": 56400 - }, - { - "epoch": 0.22790353793880824, - "grad_norm": 694.2943725585938, - "learning_rate": 4.7634034698506545e-05, - "loss": 66.8523, - "step": 56410 - }, - { - "epoch": 0.22794393920417588, - "grad_norm": 649.4854736328125, - "learning_rate": 4.76325521979634e-05, - "loss": 94.2294, - "step": 56420 - }, - { - "epoch": 0.2279843404695435, - "grad_norm": 1363.1820068359375, - "learning_rate": 4.7631069256186986e-05, - "loss": 68.8548, - "step": 56430 - }, - { - "epoch": 0.22802474173491113, - "grad_norm": 766.2236328125, - "learning_rate": 4.7629585873206226e-05, - "loss": 98.7785, - "step": 56440 - }, - { - "epoch": 0.22806514300027877, - "grad_norm": 1092.222900390625, - "learning_rate": 4.7628102049050036e-05, - "loss": 68.331, - "step": 56450 - }, - { - "epoch": 0.2281055442656464, - "grad_norm": 745.677978515625, - "learning_rate": 4.7626617783747364e-05, - "loss": 86.9224, - "step": 56460 - }, - { - "epoch": 0.22814594553101403, - "grad_norm": 848.4603881835938, - "learning_rate": 4.762513307732711e-05, - "loss": 97.7969, - "step": 56470 - }, - { - "epoch": 0.22818634679638167, - "grad_norm": 924.7987670898438, - "learning_rate": 4.762364792981825e-05, - "loss": 68.205, - "step": 56480 - }, - { - "epoch": 0.2282267480617493, - "grad_norm": 442.6836242675781, - "learning_rate": 4.762216234124972e-05, - "loss": 125.6712, - "step": 56490 - }, - { - "epoch": 0.22826714932711692, - "grad_norm": 716.2772827148438, - "learning_rate": 4.762067631165049e-05, - "loss": 126.1072, - "step": 56500 - }, - { - "epoch": 0.22830755059248456, - "grad_norm": 956.81005859375, - "learning_rate": 4.761918984104953e-05, - "loss": 51.0658, - "step": 56510 - }, - { - "epoch": 0.2283479518578522, - "grad_norm": 379.62493896484375, - "learning_rate": 4.761770292947582e-05, - "loss": 97.7945, - "step": 56520 - }, - { - "epoch": 0.2283883531232198, - "grad_norm": 1116.5791015625, - "learning_rate": 4.761621557695834e-05, - "loss": 77.2991, - "step": 56530 - }, - { - "epoch": 0.22842875438858745, - "grad_norm": 544.430419921875, - "learning_rate": 4.76147277835261e-05, - "loss": 94.4459, - "step": 56540 - }, - { - "epoch": 0.2284691556539551, - "grad_norm": 827.6028442382812, - "learning_rate": 4.7613239549208106e-05, - "loss": 53.5641, - "step": 56550 - }, - { - "epoch": 0.2285095569193227, - "grad_norm": 1755.43505859375, - "learning_rate": 4.7611750874033356e-05, - "loss": 97.6019, - "step": 56560 - }, - { - "epoch": 0.22854995818469034, - "grad_norm": 456.3388671875, - "learning_rate": 4.7610261758030886e-05, - "loss": 65.4111, - "step": 56570 - }, - { - "epoch": 0.22859035945005798, - "grad_norm": 710.4873046875, - "learning_rate": 4.760877220122971e-05, - "loss": 70.0858, - "step": 56580 - }, - { - "epoch": 0.2286307607154256, - "grad_norm": 717.0205688476562, - "learning_rate": 4.76072822036589e-05, - "loss": 80.4853, - "step": 56590 - }, - { - "epoch": 0.22867116198079324, - "grad_norm": 813.551513671875, - "learning_rate": 4.760579176534747e-05, - "loss": 72.0974, - "step": 56600 - }, - { - "epoch": 0.22871156324616088, - "grad_norm": 1653.39111328125, - "learning_rate": 4.7604300886324496e-05, - "loss": 37.6728, - "step": 56610 - }, - { - "epoch": 0.2287519645115285, - "grad_norm": 1388.64501953125, - "learning_rate": 4.760280956661903e-05, - "loss": 102.1589, - "step": 56620 - }, - { - "epoch": 0.22879236577689613, - "grad_norm": 1519.29931640625, - "learning_rate": 4.760131780626017e-05, - "loss": 107.9805, - "step": 56630 - }, - { - "epoch": 0.22883276704226377, - "grad_norm": 967.7742919921875, - "learning_rate": 4.759982560527698e-05, - "loss": 86.072, - "step": 56640 - }, - { - "epoch": 0.2288731683076314, - "grad_norm": 1597.58642578125, - "learning_rate": 4.7598332963698545e-05, - "loss": 95.1602, - "step": 56650 - }, - { - "epoch": 0.22891356957299902, - "grad_norm": 616.4041137695312, - "learning_rate": 4.7596839881553976e-05, - "loss": 83.347, - "step": 56660 - }, - { - "epoch": 0.22895397083836666, - "grad_norm": 728.1845703125, - "learning_rate": 4.75953463588724e-05, - "loss": 58.7279, - "step": 56670 - }, - { - "epoch": 0.2289943721037343, - "grad_norm": 1369.0509033203125, - "learning_rate": 4.759385239568289e-05, - "loss": 104.2516, - "step": 56680 - }, - { - "epoch": 0.2290347733691019, - "grad_norm": 445.80267333984375, - "learning_rate": 4.75923579920146e-05, - "loss": 69.9564, - "step": 56690 - }, - { - "epoch": 0.22907517463446955, - "grad_norm": 1323.3516845703125, - "learning_rate": 4.7590863147896666e-05, - "loss": 57.2469, - "step": 56700 - }, - { - "epoch": 0.2291155758998372, - "grad_norm": 1134.4810791015625, - "learning_rate": 4.7589367863358225e-05, - "loss": 74.4222, - "step": 56710 - }, - { - "epoch": 0.2291559771652048, - "grad_norm": 1246.8629150390625, - "learning_rate": 4.758787213842842e-05, - "loss": 97.5686, - "step": 56720 - }, - { - "epoch": 0.22919637843057244, - "grad_norm": 1165.90478515625, - "learning_rate": 4.758637597313642e-05, - "loss": 84.3428, - "step": 56730 - }, - { - "epoch": 0.22923677969594008, - "grad_norm": 683.56689453125, - "learning_rate": 4.7584879367511395e-05, - "loss": 61.9244, - "step": 56740 - }, - { - "epoch": 0.2292771809613077, - "grad_norm": 2286.635498046875, - "learning_rate": 4.758338232158252e-05, - "loss": 90.8525, - "step": 56750 - }, - { - "epoch": 0.22931758222667534, - "grad_norm": 1139.4610595703125, - "learning_rate": 4.758188483537898e-05, - "loss": 93.7091, - "step": 56760 - }, - { - "epoch": 0.22935798349204298, - "grad_norm": 517.6661376953125, - "learning_rate": 4.758038690892997e-05, - "loss": 62.2874, - "step": 56770 - }, - { - "epoch": 0.2293983847574106, - "grad_norm": 657.8548583984375, - "learning_rate": 4.7578888542264686e-05, - "loss": 90.596, - "step": 56780 - }, - { - "epoch": 0.22943878602277823, - "grad_norm": 1066.094482421875, - "learning_rate": 4.757738973541236e-05, - "loss": 54.5622, - "step": 56790 - }, - { - "epoch": 0.22947918728814587, - "grad_norm": 1410.8668212890625, - "learning_rate": 4.7575890488402185e-05, - "loss": 44.4098, - "step": 56800 - }, - { - "epoch": 0.2295195885535135, - "grad_norm": 1515.7193603515625, - "learning_rate": 4.75743908012634e-05, - "loss": 89.9203, - "step": 56810 - }, - { - "epoch": 0.22955998981888112, - "grad_norm": 1256.6802978515625, - "learning_rate": 4.757289067402525e-05, - "loss": 65.3287, - "step": 56820 - }, - { - "epoch": 0.22960039108424876, - "grad_norm": 520.7095947265625, - "learning_rate": 4.757139010671697e-05, - "loss": 67.0209, - "step": 56830 - }, - { - "epoch": 0.2296407923496164, - "grad_norm": 1852.7618408203125, - "learning_rate": 4.7569889099367824e-05, - "loss": 90.3647, - "step": 56840 - }, - { - "epoch": 0.229681193614984, - "grad_norm": 888.56787109375, - "learning_rate": 4.7568387652007075e-05, - "loss": 97.766, - "step": 56850 - }, - { - "epoch": 0.22972159488035165, - "grad_norm": 515.2661743164062, - "learning_rate": 4.756688576466398e-05, - "loss": 52.9156, - "step": 56860 - }, - { - "epoch": 0.2297619961457193, - "grad_norm": 779.8905639648438, - "learning_rate": 4.756538343736784e-05, - "loss": 85.2727, - "step": 56870 - }, - { - "epoch": 0.2298023974110869, - "grad_norm": 808.3165893554688, - "learning_rate": 4.756388067014792e-05, - "loss": 89.8556, - "step": 56880 - }, - { - "epoch": 0.22984279867645455, - "grad_norm": 1065.549072265625, - "learning_rate": 4.7562377463033536e-05, - "loss": 81.1974, - "step": 56890 - }, - { - "epoch": 0.22988319994182219, - "grad_norm": 1067.1673583984375, - "learning_rate": 4.7560873816053984e-05, - "loss": 61.5579, - "step": 56900 - }, - { - "epoch": 0.2299236012071898, - "grad_norm": 768.210205078125, - "learning_rate": 4.755936972923859e-05, - "loss": 75.4602, - "step": 56910 - }, - { - "epoch": 0.22996400247255744, - "grad_norm": 333.94232177734375, - "learning_rate": 4.7557865202616656e-05, - "loss": 65.8191, - "step": 56920 - }, - { - "epoch": 0.23000440373792508, - "grad_norm": 1091.112548828125, - "learning_rate": 4.7556360236217534e-05, - "loss": 112.1934, - "step": 56930 - }, - { - "epoch": 0.2300448050032927, - "grad_norm": 1078.4996337890625, - "learning_rate": 4.755485483007056e-05, - "loss": 80.1494, - "step": 56940 - }, - { - "epoch": 0.23008520626866033, - "grad_norm": 663.4862670898438, - "learning_rate": 4.755334898420507e-05, - "loss": 81.2759, - "step": 56950 - }, - { - "epoch": 0.23012560753402797, - "grad_norm": 1670.8721923828125, - "learning_rate": 4.7551842698650436e-05, - "loss": 98.7399, - "step": 56960 - }, - { - "epoch": 0.2301660087993956, - "grad_norm": 707.2112426757812, - "learning_rate": 4.755033597343602e-05, - "loss": 84.5847, - "step": 56970 - }, - { - "epoch": 0.23020641006476322, - "grad_norm": 913.306884765625, - "learning_rate": 4.7548828808591195e-05, - "loss": 78.6086, - "step": 56980 - }, - { - "epoch": 0.23024681133013086, - "grad_norm": 390.49578857421875, - "learning_rate": 4.754732120414534e-05, - "loss": 81.2955, - "step": 56990 - }, - { - "epoch": 0.2302872125954985, - "grad_norm": 593.7713623046875, - "learning_rate": 4.754581316012785e-05, - "loss": 59.7465, - "step": 57000 - }, - { - "epoch": 0.23032761386086611, - "grad_norm": 704.7691650390625, - "learning_rate": 4.754430467656812e-05, - "loss": 61.9324, - "step": 57010 - }, - { - "epoch": 0.23036801512623375, - "grad_norm": 770.904296875, - "learning_rate": 4.7542795753495574e-05, - "loss": 68.9715, - "step": 57020 - }, - { - "epoch": 0.2304084163916014, - "grad_norm": 630.1139526367188, - "learning_rate": 4.754128639093961e-05, - "loss": 82.8383, - "step": 57030 - }, - { - "epoch": 0.230448817656969, - "grad_norm": 1761.17626953125, - "learning_rate": 4.753977658892967e-05, - "loss": 59.8425, - "step": 57040 - }, - { - "epoch": 0.23048921892233665, - "grad_norm": 1344.968017578125, - "learning_rate": 4.753826634749518e-05, - "loss": 89.4451, - "step": 57050 - }, - { - "epoch": 0.2305296201877043, - "grad_norm": 448.2859802246094, - "learning_rate": 4.753675566666558e-05, - "loss": 79.9184, - "step": 57060 - }, - { - "epoch": 0.2305700214530719, - "grad_norm": 999.4277954101562, - "learning_rate": 4.7535244546470325e-05, - "loss": 112.2555, - "step": 57070 - }, - { - "epoch": 0.23061042271843954, - "grad_norm": 1350.092041015625, - "learning_rate": 4.753373298693888e-05, - "loss": 84.6386, - "step": 57080 - }, - { - "epoch": 0.23065082398380718, - "grad_norm": 318.74359130859375, - "learning_rate": 4.753222098810071e-05, - "loss": 48.7075, - "step": 57090 - }, - { - "epoch": 0.2306912252491748, - "grad_norm": 1049.048095703125, - "learning_rate": 4.7530708549985287e-05, - "loss": 55.4899, - "step": 57100 - }, - { - "epoch": 0.23073162651454243, - "grad_norm": 2994.60107421875, - "learning_rate": 4.75291956726221e-05, - "loss": 82.0188, - "step": 57110 - }, - { - "epoch": 0.23077202777991007, - "grad_norm": 1358.6580810546875, - "learning_rate": 4.752768235604065e-05, - "loss": 84.3785, - "step": 57120 - }, - { - "epoch": 0.2308124290452777, - "grad_norm": 309.6406555175781, - "learning_rate": 4.7526168600270435e-05, - "loss": 73.9946, - "step": 57130 - }, - { - "epoch": 0.23085283031064532, - "grad_norm": 1002.0283203125, - "learning_rate": 4.752465440534096e-05, - "loss": 104.0093, - "step": 57140 - }, - { - "epoch": 0.23089323157601296, - "grad_norm": 697.1533813476562, - "learning_rate": 4.752313977128175e-05, - "loss": 45.6771, - "step": 57150 - }, - { - "epoch": 0.2309336328413806, - "grad_norm": 1255.6212158203125, - "learning_rate": 4.752162469812234e-05, - "loss": 77.5493, - "step": 57160 - }, - { - "epoch": 0.23097403410674822, - "grad_norm": 5676.9970703125, - "learning_rate": 4.752010918589226e-05, - "loss": 64.4288, - "step": 57170 - }, - { - "epoch": 0.23101443537211586, - "grad_norm": 564.4259033203125, - "learning_rate": 4.7518593234621056e-05, - "loss": 98.8771, - "step": 57180 - }, - { - "epoch": 0.2310548366374835, - "grad_norm": 1186.0013427734375, - "learning_rate": 4.7517076844338285e-05, - "loss": 67.5268, - "step": 57190 - }, - { - "epoch": 0.2310952379028511, - "grad_norm": 1269.7178955078125, - "learning_rate": 4.7515560015073514e-05, - "loss": 89.8433, - "step": 57200 - }, - { - "epoch": 0.23113563916821875, - "grad_norm": 0.0, - "learning_rate": 4.75140427468563e-05, - "loss": 60.9059, - "step": 57210 - }, - { - "epoch": 0.2311760404335864, - "grad_norm": 1260.9530029296875, - "learning_rate": 4.751252503971624e-05, - "loss": 68.3741, - "step": 57220 - }, - { - "epoch": 0.231216441698954, - "grad_norm": 1468.294677734375, - "learning_rate": 4.75110068936829e-05, - "loss": 86.1376, - "step": 57230 - }, - { - "epoch": 0.23125684296432164, - "grad_norm": 631.3265991210938, - "learning_rate": 4.7509488308785905e-05, - "loss": 92.6364, - "step": 57240 - }, - { - "epoch": 0.23129724422968928, - "grad_norm": 391.2497863769531, - "learning_rate": 4.7507969285054845e-05, - "loss": 57.7668, - "step": 57250 - }, - { - "epoch": 0.2313376454950569, - "grad_norm": 1009.0247192382812, - "learning_rate": 4.750644982251933e-05, - "loss": 60.969, - "step": 57260 - }, - { - "epoch": 0.23137804676042453, - "grad_norm": 787.4730834960938, - "learning_rate": 4.7504929921208984e-05, - "loss": 105.2391, - "step": 57270 - }, - { - "epoch": 0.23141844802579217, - "grad_norm": 649.8621215820312, - "learning_rate": 4.750340958115346e-05, - "loss": 62.9108, - "step": 57280 - }, - { - "epoch": 0.2314588492911598, - "grad_norm": 1550.597900390625, - "learning_rate": 4.7501888802382365e-05, - "loss": 96.6346, - "step": 57290 - }, - { - "epoch": 0.23149925055652743, - "grad_norm": 1971.5574951171875, - "learning_rate": 4.750036758492537e-05, - "loss": 85.7584, - "step": 57300 - }, - { - "epoch": 0.23153965182189506, - "grad_norm": 386.6150817871094, - "learning_rate": 4.749884592881212e-05, - "loss": 109.9105, - "step": 57310 - }, - { - "epoch": 0.2315800530872627, - "grad_norm": 1582.1597900390625, - "learning_rate": 4.749732383407229e-05, - "loss": 112.0731, - "step": 57320 - }, - { - "epoch": 0.23162045435263032, - "grad_norm": 0.0, - "learning_rate": 4.7495801300735554e-05, - "loss": 52.8345, - "step": 57330 - }, - { - "epoch": 0.23166085561799796, - "grad_norm": 489.4527893066406, - "learning_rate": 4.7494278328831584e-05, - "loss": 69.7233, - "step": 57340 - }, - { - "epoch": 0.2317012568833656, - "grad_norm": 483.29364013671875, - "learning_rate": 4.7492754918390074e-05, - "loss": 89.4249, - "step": 57350 - }, - { - "epoch": 0.2317416581487332, - "grad_norm": 2289.152099609375, - "learning_rate": 4.749123106944073e-05, - "loss": 75.3741, - "step": 57360 - }, - { - "epoch": 0.23178205941410085, - "grad_norm": 730.8201904296875, - "learning_rate": 4.748970678201326e-05, - "loss": 76.5618, - "step": 57370 - }, - { - "epoch": 0.2318224606794685, - "grad_norm": 604.216064453125, - "learning_rate": 4.7488182056137374e-05, - "loss": 64.0703, - "step": 57380 - }, - { - "epoch": 0.2318628619448361, - "grad_norm": 850.9320068359375, - "learning_rate": 4.74866568918428e-05, - "loss": 82.7347, - "step": 57390 - }, - { - "epoch": 0.23190326321020374, - "grad_norm": 948.7066650390625, - "learning_rate": 4.7485131289159276e-05, - "loss": 76.1124, - "step": 57400 - }, - { - "epoch": 0.23194366447557138, - "grad_norm": 985.8638916015625, - "learning_rate": 4.7483605248116544e-05, - "loss": 90.0347, - "step": 57410 - }, - { - "epoch": 0.231984065740939, - "grad_norm": 322.2492370605469, - "learning_rate": 4.7482078768744345e-05, - "loss": 97.9077, - "step": 57420 - }, - { - "epoch": 0.23202446700630663, - "grad_norm": 1223.1875, - "learning_rate": 4.7480551851072454e-05, - "loss": 53.8952, - "step": 57430 - }, - { - "epoch": 0.23206486827167427, - "grad_norm": 455.7870788574219, - "learning_rate": 4.747902449513063e-05, - "loss": 78.5205, - "step": 57440 - }, - { - "epoch": 0.23210526953704191, - "grad_norm": 1016.0787963867188, - "learning_rate": 4.747749670094864e-05, - "loss": 86.3612, - "step": 57450 - }, - { - "epoch": 0.23214567080240953, - "grad_norm": 1260.7244873046875, - "learning_rate": 4.7475968468556295e-05, - "loss": 84.6346, - "step": 57460 - }, - { - "epoch": 0.23218607206777717, - "grad_norm": 437.6191711425781, - "learning_rate": 4.7474439797983364e-05, - "loss": 107.3411, - "step": 57470 - }, - { - "epoch": 0.2322264733331448, - "grad_norm": 516.1616821289062, - "learning_rate": 4.7472910689259655e-05, - "loss": 71.8174, - "step": 57480 - }, - { - "epoch": 0.23226687459851242, - "grad_norm": 662.67822265625, - "learning_rate": 4.747138114241499e-05, - "loss": 73.287, - "step": 57490 - }, - { - "epoch": 0.23230727586388006, - "grad_norm": 1739.5025634765625, - "learning_rate": 4.7469851157479177e-05, - "loss": 92.4851, - "step": 57500 - }, - { - "epoch": 0.2323476771292477, - "grad_norm": 682.4597778320312, - "learning_rate": 4.746832073448205e-05, - "loss": 88.4044, - "step": 57510 - }, - { - "epoch": 0.2323880783946153, - "grad_norm": 797.5489501953125, - "learning_rate": 4.7466789873453444e-05, - "loss": 86.4066, - "step": 57520 - }, - { - "epoch": 0.23242847965998295, - "grad_norm": 423.3626708984375, - "learning_rate": 4.74652585744232e-05, - "loss": 62.8509, - "step": 57530 - }, - { - "epoch": 0.2324688809253506, - "grad_norm": 712.4830322265625, - "learning_rate": 4.746372683742117e-05, - "loss": 42.4057, - "step": 57540 - }, - { - "epoch": 0.2325092821907182, - "grad_norm": 799.061767578125, - "learning_rate": 4.746219466247722e-05, - "loss": 52.0032, - "step": 57550 - }, - { - "epoch": 0.23254968345608584, - "grad_norm": 945.8649291992188, - "learning_rate": 4.746066204962123e-05, - "loss": 88.8774, - "step": 57560 - }, - { - "epoch": 0.23259008472145348, - "grad_norm": 803.7813110351562, - "learning_rate": 4.745912899888306e-05, - "loss": 93.842, - "step": 57570 - }, - { - "epoch": 0.2326304859868211, - "grad_norm": 696.2904663085938, - "learning_rate": 4.745759551029261e-05, - "loss": 101.2722, - "step": 57580 - }, - { - "epoch": 0.23267088725218874, - "grad_norm": 857.1948852539062, - "learning_rate": 4.745606158387978e-05, - "loss": 61.6568, - "step": 57590 - }, - { - "epoch": 0.23271128851755638, - "grad_norm": 548.427734375, - "learning_rate": 4.745452721967446e-05, - "loss": 96.5383, - "step": 57600 - }, - { - "epoch": 0.23275168978292402, - "grad_norm": 1423.155517578125, - "learning_rate": 4.745299241770658e-05, - "loss": 85.5254, - "step": 57610 - }, - { - "epoch": 0.23279209104829163, - "grad_norm": 496.381591796875, - "learning_rate": 4.745145717800605e-05, - "loss": 67.1594, - "step": 57620 - }, - { - "epoch": 0.23283249231365927, - "grad_norm": 629.1244506835938, - "learning_rate": 4.74499215006028e-05, - "loss": 64.9573, - "step": 57630 - }, - { - "epoch": 0.2328728935790269, - "grad_norm": 903.4159545898438, - "learning_rate": 4.744838538552677e-05, - "loss": 76.1402, - "step": 57640 - }, - { - "epoch": 0.23291329484439452, - "grad_norm": 504.8460693359375, - "learning_rate": 4.744684883280792e-05, - "loss": 82.0837, - "step": 57650 - }, - { - "epoch": 0.23295369610976216, - "grad_norm": 923.25732421875, - "learning_rate": 4.744531184247619e-05, - "loss": 61.5699, - "step": 57660 - }, - { - "epoch": 0.2329940973751298, - "grad_norm": 869.8147583007812, - "learning_rate": 4.744377441456155e-05, - "loss": 68.7058, - "step": 57670 - }, - { - "epoch": 0.2330344986404974, - "grad_norm": 924.66845703125, - "learning_rate": 4.744223654909397e-05, - "loss": 109.6333, - "step": 57680 - }, - { - "epoch": 0.23307489990586505, - "grad_norm": 1206.5703125, - "learning_rate": 4.744069824610344e-05, - "loss": 95.6685, - "step": 57690 - }, - { - "epoch": 0.2331153011712327, - "grad_norm": 777.4462280273438, - "learning_rate": 4.743915950561994e-05, - "loss": 65.7932, - "step": 57700 - }, - { - "epoch": 0.2331557024366003, - "grad_norm": 741.0814819335938, - "learning_rate": 4.743762032767348e-05, - "loss": 51.2344, - "step": 57710 - }, - { - "epoch": 0.23319610370196794, - "grad_norm": 898.6174926757812, - "learning_rate": 4.743608071229405e-05, - "loss": 97.9571, - "step": 57720 - }, - { - "epoch": 0.23323650496733558, - "grad_norm": 949.9400024414062, - "learning_rate": 4.743454065951168e-05, - "loss": 88.1819, - "step": 57730 - }, - { - "epoch": 0.2332769062327032, - "grad_norm": 6401.75244140625, - "learning_rate": 4.743300016935639e-05, - "loss": 86.2424, - "step": 57740 - }, - { - "epoch": 0.23331730749807084, - "grad_norm": 1315.59130859375, - "learning_rate": 4.743145924185821e-05, - "loss": 78.7651, - "step": 57750 - }, - { - "epoch": 0.23335770876343848, - "grad_norm": 968.3670043945312, - "learning_rate": 4.742991787704719e-05, - "loss": 53.8725, - "step": 57760 - }, - { - "epoch": 0.2333981100288061, - "grad_norm": 281.5350036621094, - "learning_rate": 4.7428376074953365e-05, - "loss": 88.1397, - "step": 57770 - }, - { - "epoch": 0.23343851129417373, - "grad_norm": 1783.01318359375, - "learning_rate": 4.7426833835606806e-05, - "loss": 111.0655, - "step": 57780 - }, - { - "epoch": 0.23347891255954137, - "grad_norm": 971.6173095703125, - "learning_rate": 4.7425291159037575e-05, - "loss": 71.0971, - "step": 57790 - }, - { - "epoch": 0.233519313824909, - "grad_norm": 966.53662109375, - "learning_rate": 4.742374804527575e-05, - "loss": 85.1464, - "step": 57800 - }, - { - "epoch": 0.23355971509027662, - "grad_norm": 1798.930419921875, - "learning_rate": 4.742220449435141e-05, - "loss": 84.1092, - "step": 57810 - }, - { - "epoch": 0.23360011635564426, - "grad_norm": 1772.7415771484375, - "learning_rate": 4.742066050629465e-05, - "loss": 50.9059, - "step": 57820 - }, - { - "epoch": 0.2336405176210119, - "grad_norm": 942.0059814453125, - "learning_rate": 4.741911608113557e-05, - "loss": 77.9877, - "step": 57830 - }, - { - "epoch": 0.2336809188863795, - "grad_norm": 472.93890380859375, - "learning_rate": 4.741757121890428e-05, - "loss": 67.7341, - "step": 57840 - }, - { - "epoch": 0.23372132015174715, - "grad_norm": 731.5870971679688, - "learning_rate": 4.7416025919630904e-05, - "loss": 96.6892, - "step": 57850 - }, - { - "epoch": 0.2337617214171148, - "grad_norm": 973.33935546875, - "learning_rate": 4.741448018334555e-05, - "loss": 106.4843, - "step": 57860 - }, - { - "epoch": 0.2338021226824824, - "grad_norm": 1150.518310546875, - "learning_rate": 4.741293401007837e-05, - "loss": 57.7009, - "step": 57870 - }, - { - "epoch": 0.23384252394785005, - "grad_norm": 879.5131225585938, - "learning_rate": 4.741138739985951e-05, - "loss": 75.2222, - "step": 57880 - }, - { - "epoch": 0.23388292521321769, - "grad_norm": 1559.7349853515625, - "learning_rate": 4.7409840352719106e-05, - "loss": 95.9505, - "step": 57890 - }, - { - "epoch": 0.2339233264785853, - "grad_norm": 1342.956298828125, - "learning_rate": 4.740829286868733e-05, - "loss": 74.7077, - "step": 57900 - }, - { - "epoch": 0.23396372774395294, - "grad_norm": 1618.971923828125, - "learning_rate": 4.740674494779435e-05, - "loss": 72.2942, - "step": 57910 - }, - { - "epoch": 0.23400412900932058, - "grad_norm": 598.3280029296875, - "learning_rate": 4.740519659007033e-05, - "loss": 71.8578, - "step": 57920 - }, - { - "epoch": 0.2340445302746882, - "grad_norm": 1528.6131591796875, - "learning_rate": 4.7403647795545484e-05, - "loss": 109.1537, - "step": 57930 - }, - { - "epoch": 0.23408493154005583, - "grad_norm": 1394.4090576171875, - "learning_rate": 4.7402098564249974e-05, - "loss": 58.2658, - "step": 57940 - }, - { - "epoch": 0.23412533280542347, - "grad_norm": 415.70263671875, - "learning_rate": 4.7400548896214024e-05, - "loss": 67.7115, - "step": 57950 - }, - { - "epoch": 0.2341657340707911, - "grad_norm": 928.4489135742188, - "learning_rate": 4.739899879146785e-05, - "loss": 54.671, - "step": 57960 - }, - { - "epoch": 0.23420613533615872, - "grad_norm": 995.4201049804688, - "learning_rate": 4.739744825004165e-05, - "loss": 85.7247, - "step": 57970 - }, - { - "epoch": 0.23424653660152636, - "grad_norm": 950.1658935546875, - "learning_rate": 4.739589727196568e-05, - "loss": 53.7052, - "step": 57980 - }, - { - "epoch": 0.234286937866894, - "grad_norm": 431.6142883300781, - "learning_rate": 4.739434585727015e-05, - "loss": 56.5389, - "step": 57990 - }, - { - "epoch": 0.23432733913226161, - "grad_norm": 1080.2391357421875, - "learning_rate": 4.7392794005985326e-05, - "loss": 82.0306, - "step": 58000 - }, - { - "epoch": 0.23436774039762925, - "grad_norm": 1104.7730712890625, - "learning_rate": 4.739124171814145e-05, - "loss": 64.9178, - "step": 58010 - }, - { - "epoch": 0.2344081416629969, - "grad_norm": 1969.99755859375, - "learning_rate": 4.7389688993768786e-05, - "loss": 97.3219, - "step": 58020 - }, - { - "epoch": 0.2344485429283645, - "grad_norm": 792.9730224609375, - "learning_rate": 4.738813583289762e-05, - "loss": 55.4297, - "step": 58030 - }, - { - "epoch": 0.23448894419373215, - "grad_norm": 1705.9310302734375, - "learning_rate": 4.7386582235558205e-05, - "loss": 105.6232, - "step": 58040 - }, - { - "epoch": 0.2345293454590998, - "grad_norm": 630.877197265625, - "learning_rate": 4.738502820178085e-05, - "loss": 72.6224, - "step": 58050 - }, - { - "epoch": 0.2345697467244674, - "grad_norm": 656.330322265625, - "learning_rate": 4.738347373159585e-05, - "loss": 106.0807, - "step": 58060 - }, - { - "epoch": 0.23461014798983504, - "grad_norm": 917.6051635742188, - "learning_rate": 4.73819188250335e-05, - "loss": 82.6257, - "step": 58070 - }, - { - "epoch": 0.23465054925520268, - "grad_norm": 574.9983520507812, - "learning_rate": 4.738036348212412e-05, - "loss": 86.4343, - "step": 58080 - }, - { - "epoch": 0.2346909505205703, - "grad_norm": 593.380859375, - "learning_rate": 4.737880770289803e-05, - "loss": 84.0278, - "step": 58090 - }, - { - "epoch": 0.23473135178593793, - "grad_norm": 342.4287109375, - "learning_rate": 4.737725148738557e-05, - "loss": 48.0205, - "step": 58100 - }, - { - "epoch": 0.23477175305130557, - "grad_norm": 520.4551391601562, - "learning_rate": 4.737569483561707e-05, - "loss": 48.0131, - "step": 58110 - }, - { - "epoch": 0.2348121543166732, - "grad_norm": 431.37115478515625, - "learning_rate": 4.737413774762287e-05, - "loss": 49.1139, - "step": 58120 - }, - { - "epoch": 0.23485255558204082, - "grad_norm": 1225.8043212890625, - "learning_rate": 4.737258022343335e-05, - "loss": 55.6529, - "step": 58130 - }, - { - "epoch": 0.23489295684740846, - "grad_norm": 475.2739562988281, - "learning_rate": 4.737102226307884e-05, - "loss": 72.1621, - "step": 58140 - }, - { - "epoch": 0.2349333581127761, - "grad_norm": 5526.28271484375, - "learning_rate": 4.736946386658976e-05, - "loss": 81.7198, - "step": 58150 - }, - { - "epoch": 0.23497375937814372, - "grad_norm": 836.2157592773438, - "learning_rate": 4.7367905033996445e-05, - "loss": 64.377, - "step": 58160 - }, - { - "epoch": 0.23501416064351136, - "grad_norm": 880.4158935546875, - "learning_rate": 4.736634576532931e-05, - "loss": 92.249, - "step": 58170 - }, - { - "epoch": 0.235054561908879, - "grad_norm": 582.840576171875, - "learning_rate": 4.736478606061875e-05, - "loss": 52.3027, - "step": 58180 - }, - { - "epoch": 0.2350949631742466, - "grad_norm": 587.2545776367188, - "learning_rate": 4.7363225919895185e-05, - "loss": 57.8036, - "step": 58190 - }, - { - "epoch": 0.23513536443961425, - "grad_norm": 975.4487915039062, - "learning_rate": 4.7361665343189e-05, - "loss": 74.3185, - "step": 58200 - }, - { - "epoch": 0.2351757657049819, - "grad_norm": 1377.298583984375, - "learning_rate": 4.736010433053064e-05, - "loss": 80.2555, - "step": 58210 - }, - { - "epoch": 0.2352161669703495, - "grad_norm": 806.4180297851562, - "learning_rate": 4.735854288195054e-05, - "loss": 63.3192, - "step": 58220 - }, - { - "epoch": 0.23525656823571714, - "grad_norm": 1805.80810546875, - "learning_rate": 4.735698099747913e-05, - "loss": 73.7252, - "step": 58230 - }, - { - "epoch": 0.23529696950108478, - "grad_norm": 1094.813232421875, - "learning_rate": 4.735541867714687e-05, - "loss": 114.6661, - "step": 58240 - }, - { - "epoch": 0.2353373707664524, - "grad_norm": 3966.76123046875, - "learning_rate": 4.73538559209842e-05, - "loss": 168.1956, - "step": 58250 - }, - { - "epoch": 0.23537777203182003, - "grad_norm": 496.0440673828125, - "learning_rate": 4.735229272902162e-05, - "loss": 90.3166, - "step": 58260 - }, - { - "epoch": 0.23541817329718767, - "grad_norm": 639.0549926757812, - "learning_rate": 4.735072910128957e-05, - "loss": 101.5113, - "step": 58270 - }, - { - "epoch": 0.2354585745625553, - "grad_norm": 643.226318359375, - "learning_rate": 4.734916503781856e-05, - "loss": 46.3206, - "step": 58280 - }, - { - "epoch": 0.23549897582792292, - "grad_norm": 1588.71142578125, - "learning_rate": 4.7347600538639067e-05, - "loss": 82.4462, - "step": 58290 - }, - { - "epoch": 0.23553937709329056, - "grad_norm": 306.2060852050781, - "learning_rate": 4.73460356037816e-05, - "loss": 86.551, - "step": 58300 - }, - { - "epoch": 0.2355797783586582, - "grad_norm": 838.1617431640625, - "learning_rate": 4.734447023327666e-05, - "loss": 62.018, - "step": 58310 - }, - { - "epoch": 0.23562017962402582, - "grad_norm": 839.5139770507812, - "learning_rate": 4.7342904427154766e-05, - "loss": 68.918, - "step": 58320 - }, - { - "epoch": 0.23566058088939346, - "grad_norm": 2884.371826171875, - "learning_rate": 4.734133818544645e-05, - "loss": 93.363, - "step": 58330 - }, - { - "epoch": 0.2357009821547611, - "grad_norm": 1337.994873046875, - "learning_rate": 4.733977150818225e-05, - "loss": 80.4189, - "step": 58340 - }, - { - "epoch": 0.2357413834201287, - "grad_norm": 916.3220825195312, - "learning_rate": 4.7338204395392694e-05, - "loss": 64.318, - "step": 58350 - }, - { - "epoch": 0.23578178468549635, - "grad_norm": 4077.7041015625, - "learning_rate": 4.733663684710835e-05, - "loss": 106.2886, - "step": 58360 - }, - { - "epoch": 0.235822185950864, - "grad_norm": 471.49566650390625, - "learning_rate": 4.7335068863359764e-05, - "loss": 76.7145, - "step": 58370 - }, - { - "epoch": 0.2358625872162316, - "grad_norm": 821.7672119140625, - "learning_rate": 4.733350044417752e-05, - "loss": 59.775, - "step": 58380 - }, - { - "epoch": 0.23590298848159924, - "grad_norm": 628.004638671875, - "learning_rate": 4.733193158959218e-05, - "loss": 65.8933, - "step": 58390 - }, - { - "epoch": 0.23594338974696688, - "grad_norm": 487.54107666015625, - "learning_rate": 4.733036229963435e-05, - "loss": 78.1683, - "step": 58400 - }, - { - "epoch": 0.2359837910123345, - "grad_norm": 967.7349243164062, - "learning_rate": 4.732879257433459e-05, - "loss": 87.9057, - "step": 58410 - }, - { - "epoch": 0.23602419227770213, - "grad_norm": 834.74755859375, - "learning_rate": 4.7327222413723536e-05, - "loss": 74.7745, - "step": 58420 - }, - { - "epoch": 0.23606459354306977, - "grad_norm": 1843.8294677734375, - "learning_rate": 4.7325651817831784e-05, - "loss": 69.399, - "step": 58430 - }, - { - "epoch": 0.2361049948084374, - "grad_norm": 900.3565063476562, - "learning_rate": 4.732408078668995e-05, - "loss": 70.5233, - "step": 58440 - }, - { - "epoch": 0.23614539607380503, - "grad_norm": 420.2508544921875, - "learning_rate": 4.7322509320328675e-05, - "loss": 67.6304, - "step": 58450 - }, - { - "epoch": 0.23618579733917267, - "grad_norm": 1126.9072265625, - "learning_rate": 4.732093741877859e-05, - "loss": 95.9313, - "step": 58460 - }, - { - "epoch": 0.2362261986045403, - "grad_norm": 496.6501770019531, - "learning_rate": 4.731936508207033e-05, - "loss": 65.5002, - "step": 58470 - }, - { - "epoch": 0.23626659986990792, - "grad_norm": 511.6619567871094, - "learning_rate": 4.731779231023456e-05, - "loss": 68.3283, - "step": 58480 - }, - { - "epoch": 0.23630700113527556, - "grad_norm": 728.8594970703125, - "learning_rate": 4.731621910330194e-05, - "loss": 71.2413, - "step": 58490 - }, - { - "epoch": 0.2363474024006432, - "grad_norm": 721.872802734375, - "learning_rate": 4.731464546130314e-05, - "loss": 64.1135, - "step": 58500 - }, - { - "epoch": 0.2363878036660108, - "grad_norm": 557.9456787109375, - "learning_rate": 4.7313071384268836e-05, - "loss": 71.9643, - "step": 58510 - }, - { - "epoch": 0.23642820493137845, - "grad_norm": 952.9915771484375, - "learning_rate": 4.731149687222972e-05, - "loss": 72.1631, - "step": 58520 - }, - { - "epoch": 0.2364686061967461, - "grad_norm": 406.0745544433594, - "learning_rate": 4.7309921925216484e-05, - "loss": 87.8844, - "step": 58530 - }, - { - "epoch": 0.2365090074621137, - "grad_norm": 6206.943359375, - "learning_rate": 4.730834654325984e-05, - "loss": 117.5255, - "step": 58540 - }, - { - "epoch": 0.23654940872748134, - "grad_norm": 0.0, - "learning_rate": 4.7306770726390496e-05, - "loss": 52.3261, - "step": 58550 - }, - { - "epoch": 0.23658980999284898, - "grad_norm": 1959.0308837890625, - "learning_rate": 4.730519447463916e-05, - "loss": 87.7869, - "step": 58560 - }, - { - "epoch": 0.2366302112582166, - "grad_norm": 1353.8363037109375, - "learning_rate": 4.730361778803658e-05, - "loss": 92.4961, - "step": 58570 - }, - { - "epoch": 0.23667061252358423, - "grad_norm": 1127.2203369140625, - "learning_rate": 4.730204066661349e-05, - "loss": 101.3056, - "step": 58580 - }, - { - "epoch": 0.23671101378895187, - "grad_norm": 1524.7484130859375, - "learning_rate": 4.730046311040064e-05, - "loss": 144.1944, - "step": 58590 - }, - { - "epoch": 0.23675141505431951, - "grad_norm": 831.2205200195312, - "learning_rate": 4.7298885119428773e-05, - "loss": 70.5365, - "step": 58600 - }, - { - "epoch": 0.23679181631968713, - "grad_norm": 913.74853515625, - "learning_rate": 4.729730669372866e-05, - "loss": 68.5036, - "step": 58610 - }, - { - "epoch": 0.23683221758505477, - "grad_norm": 2814.275146484375, - "learning_rate": 4.729572783333108e-05, - "loss": 90.047, - "step": 58620 - }, - { - "epoch": 0.2368726188504224, - "grad_norm": 1022.5120849609375, - "learning_rate": 4.72941485382668e-05, - "loss": 84.8219, - "step": 58630 - }, - { - "epoch": 0.23691302011579002, - "grad_norm": 665.8732299804688, - "learning_rate": 4.729256880856662e-05, - "loss": 63.3056, - "step": 58640 - }, - { - "epoch": 0.23695342138115766, - "grad_norm": 2222.1650390625, - "learning_rate": 4.7290988644261336e-05, - "loss": 77.9888, - "step": 58650 - }, - { - "epoch": 0.2369938226465253, - "grad_norm": 780.3485717773438, - "learning_rate": 4.728940804538176e-05, - "loss": 58.1334, - "step": 58660 - }, - { - "epoch": 0.2370342239118929, - "grad_norm": 568.1907958984375, - "learning_rate": 4.728782701195869e-05, - "loss": 53.3181, - "step": 58670 - }, - { - "epoch": 0.23707462517726055, - "grad_norm": 662.433349609375, - "learning_rate": 4.728624554402295e-05, - "loss": 67.1239, - "step": 58680 - }, - { - "epoch": 0.2371150264426282, - "grad_norm": 742.7056274414062, - "learning_rate": 4.7284663641605384e-05, - "loss": 56.0102, - "step": 58690 - }, - { - "epoch": 0.2371554277079958, - "grad_norm": 1382.75439453125, - "learning_rate": 4.728308130473683e-05, - "loss": 52.9646, - "step": 58700 - }, - { - "epoch": 0.23719582897336344, - "grad_norm": 1125.7822265625, - "learning_rate": 4.7281498533448136e-05, - "loss": 69.9835, - "step": 58710 - }, - { - "epoch": 0.23723623023873108, - "grad_norm": 446.69677734375, - "learning_rate": 4.7279915327770155e-05, - "loss": 54.0501, - "step": 58720 - }, - { - "epoch": 0.2372766315040987, - "grad_norm": 2016.18310546875, - "learning_rate": 4.7278331687733754e-05, - "loss": 111.3491, - "step": 58730 - }, - { - "epoch": 0.23731703276946634, - "grad_norm": 1517.979736328125, - "learning_rate": 4.727674761336981e-05, - "loss": 95.3152, - "step": 58740 - }, - { - "epoch": 0.23735743403483398, - "grad_norm": 965.1806030273438, - "learning_rate": 4.72751631047092e-05, - "loss": 77.6278, - "step": 58750 - }, - { - "epoch": 0.23739783530020162, - "grad_norm": 697.4866333007812, - "learning_rate": 4.727357816178282e-05, - "loss": 86.2706, - "step": 58760 - }, - { - "epoch": 0.23743823656556923, - "grad_norm": 521.2189331054688, - "learning_rate": 4.727199278462156e-05, - "loss": 59.1101, - "step": 58770 - }, - { - "epoch": 0.23747863783093687, - "grad_norm": 456.9306945800781, - "learning_rate": 4.727040697325634e-05, - "loss": 57.2227, - "step": 58780 - }, - { - "epoch": 0.2375190390963045, - "grad_norm": 1488.9251708984375, - "learning_rate": 4.726882072771807e-05, - "loss": 83.9987, - "step": 58790 - }, - { - "epoch": 0.23755944036167212, - "grad_norm": 816.0877075195312, - "learning_rate": 4.7267234048037664e-05, - "loss": 75.2146, - "step": 58800 - }, - { - "epoch": 0.23759984162703976, - "grad_norm": 1049.84375, - "learning_rate": 4.726564693424608e-05, - "loss": 85.2812, - "step": 58810 - }, - { - "epoch": 0.2376402428924074, - "grad_norm": 660.714599609375, - "learning_rate": 4.7264059386374236e-05, - "loss": 50.8552, - "step": 58820 - }, - { - "epoch": 0.237680644157775, - "grad_norm": 835.5247192382812, - "learning_rate": 4.72624714044531e-05, - "loss": 85.9735, - "step": 58830 - }, - { - "epoch": 0.23772104542314265, - "grad_norm": 934.1461181640625, - "learning_rate": 4.7260882988513624e-05, - "loss": 82.1574, - "step": 58840 - }, - { - "epoch": 0.2377614466885103, - "grad_norm": 253.60675048828125, - "learning_rate": 4.725929413858677e-05, - "loss": 86.5755, - "step": 58850 - }, - { - "epoch": 0.2378018479538779, - "grad_norm": 476.08544921875, - "learning_rate": 4.725770485470351e-05, - "loss": 79.0762, - "step": 58860 - }, - { - "epoch": 0.23784224921924554, - "grad_norm": 1652.21484375, - "learning_rate": 4.725611513689485e-05, - "loss": 76.6932, - "step": 58870 - }, - { - "epoch": 0.23788265048461318, - "grad_norm": 504.9270324707031, - "learning_rate": 4.725452498519175e-05, - "loss": 76.0411, - "step": 58880 - }, - { - "epoch": 0.2379230517499808, - "grad_norm": 1309.692626953125, - "learning_rate": 4.7252934399625234e-05, - "loss": 52.6607, - "step": 58890 - }, - { - "epoch": 0.23796345301534844, - "grad_norm": 1309.8441162109375, - "learning_rate": 4.725134338022631e-05, - "loss": 88.8201, - "step": 58900 - }, - { - "epoch": 0.23800385428071608, - "grad_norm": 1632.8326416015625, - "learning_rate": 4.7249751927025996e-05, - "loss": 88.7722, - "step": 58910 - }, - { - "epoch": 0.23804425554608372, - "grad_norm": 535.5170288085938, - "learning_rate": 4.7248160040055304e-05, - "loss": 47.7584, - "step": 58920 - }, - { - "epoch": 0.23808465681145133, - "grad_norm": 708.9509887695312, - "learning_rate": 4.724656771934528e-05, - "loss": 83.726, - "step": 58930 - }, - { - "epoch": 0.23812505807681897, - "grad_norm": 579.57275390625, - "learning_rate": 4.7244974964926965e-05, - "loss": 62.5475, - "step": 58940 - }, - { - "epoch": 0.2381654593421866, - "grad_norm": 651.062744140625, - "learning_rate": 4.724338177683141e-05, - "loss": 63.3703, - "step": 58950 - }, - { - "epoch": 0.23820586060755422, - "grad_norm": 1036.9290771484375, - "learning_rate": 4.724178815508967e-05, - "loss": 59.7281, - "step": 58960 - }, - { - "epoch": 0.23824626187292186, - "grad_norm": 406.64044189453125, - "learning_rate": 4.724019409973283e-05, - "loss": 75.1738, - "step": 58970 - }, - { - "epoch": 0.2382866631382895, - "grad_norm": 533.7338256835938, - "learning_rate": 4.723859961079195e-05, - "loss": 76.1549, - "step": 58980 - }, - { - "epoch": 0.2383270644036571, - "grad_norm": 595.7689819335938, - "learning_rate": 4.7237004688298125e-05, - "loss": 134.3011, - "step": 58990 - }, - { - "epoch": 0.23836746566902475, - "grad_norm": 1294.3282470703125, - "learning_rate": 4.723540933228244e-05, - "loss": 100.5167, - "step": 59000 - }, - { - "epoch": 0.2384078669343924, - "grad_norm": 612.8972778320312, - "learning_rate": 4.7233813542776006e-05, - "loss": 75.6335, - "step": 59010 - }, - { - "epoch": 0.23844826819976, - "grad_norm": 1156.35498046875, - "learning_rate": 4.723221731980993e-05, - "loss": 70.4552, - "step": 59020 - }, - { - "epoch": 0.23848866946512765, - "grad_norm": 1819.1259765625, - "learning_rate": 4.723062066341533e-05, - "loss": 64.2368, - "step": 59030 - }, - { - "epoch": 0.23852907073049529, - "grad_norm": 765.05615234375, - "learning_rate": 4.722902357362333e-05, - "loss": 80.1383, - "step": 59040 - }, - { - "epoch": 0.2385694719958629, - "grad_norm": 623.2485961914062, - "learning_rate": 4.7227426050465084e-05, - "loss": 73.9087, - "step": 59050 - }, - { - "epoch": 0.23860987326123054, - "grad_norm": 719.6350708007812, - "learning_rate": 4.722582809397171e-05, - "loss": 59.0293, - "step": 59060 - }, - { - "epoch": 0.23865027452659818, - "grad_norm": 410.1199645996094, - "learning_rate": 4.722422970417438e-05, - "loss": 58.2241, - "step": 59070 - }, - { - "epoch": 0.23869067579196582, - "grad_norm": 598.96240234375, - "learning_rate": 4.722263088110426e-05, - "loss": 79.149, - "step": 59080 - }, - { - "epoch": 0.23873107705733343, - "grad_norm": 504.0932922363281, - "learning_rate": 4.72210316247925e-05, - "loss": 67.0265, - "step": 59090 - }, - { - "epoch": 0.23877147832270107, - "grad_norm": 1157.12060546875, - "learning_rate": 4.721943193527029e-05, - "loss": 89.9269, - "step": 59100 - }, - { - "epoch": 0.2388118795880687, - "grad_norm": 1023.6410522460938, - "learning_rate": 4.7217831812568815e-05, - "loss": 59.8878, - "step": 59110 - }, - { - "epoch": 0.23885228085343632, - "grad_norm": 299.01300048828125, - "learning_rate": 4.721623125671927e-05, - "loss": 106.7344, - "step": 59120 - }, - { - "epoch": 0.23889268211880396, - "grad_norm": 490.2772521972656, - "learning_rate": 4.7214630267752856e-05, - "loss": 84.1761, - "step": 59130 - }, - { - "epoch": 0.2389330833841716, - "grad_norm": 689.8060913085938, - "learning_rate": 4.721302884570079e-05, - "loss": 61.8113, - "step": 59140 - }, - { - "epoch": 0.23897348464953921, - "grad_norm": 573.6192016601562, - "learning_rate": 4.7211426990594296e-05, - "loss": 84.5191, - "step": 59150 - }, - { - "epoch": 0.23901388591490685, - "grad_norm": 554.744384765625, - "learning_rate": 4.720982470246459e-05, - "loss": 68.6441, - "step": 59160 - }, - { - "epoch": 0.2390542871802745, - "grad_norm": 279.83795166015625, - "learning_rate": 4.720822198134293e-05, - "loss": 70.0726, - "step": 59170 - }, - { - "epoch": 0.2390946884456421, - "grad_norm": 1090.882568359375, - "learning_rate": 4.7206618827260534e-05, - "loss": 41.3233, - "step": 59180 - }, - { - "epoch": 0.23913508971100975, - "grad_norm": 2077.9443359375, - "learning_rate": 4.720501524024867e-05, - "loss": 81.099, - "step": 59190 - }, - { - "epoch": 0.2391754909763774, - "grad_norm": 537.9066772460938, - "learning_rate": 4.720341122033862e-05, - "loss": 71.1957, - "step": 59200 - }, - { - "epoch": 0.239215892241745, - "grad_norm": 1677.8251953125, - "learning_rate": 4.720180676756162e-05, - "loss": 70.0094, - "step": 59210 - }, - { - "epoch": 0.23925629350711264, - "grad_norm": 618.0469970703125, - "learning_rate": 4.720020188194897e-05, - "loss": 60.1108, - "step": 59220 - }, - { - "epoch": 0.23929669477248028, - "grad_norm": 843.9923095703125, - "learning_rate": 4.719859656353196e-05, - "loss": 54.7157, - "step": 59230 - }, - { - "epoch": 0.23933709603784792, - "grad_norm": 1101.4031982421875, - "learning_rate": 4.719699081234188e-05, - "loss": 101.9431, - "step": 59240 - }, - { - "epoch": 0.23937749730321553, - "grad_norm": 541.9828491210938, - "learning_rate": 4.719538462841003e-05, - "loss": 85.9788, - "step": 59250 - }, - { - "epoch": 0.23941789856858317, - "grad_norm": 470.3389587402344, - "learning_rate": 4.719377801176774e-05, - "loss": 73.3981, - "step": 59260 - }, - { - "epoch": 0.2394582998339508, - "grad_norm": 867.8907470703125, - "learning_rate": 4.719217096244631e-05, - "loss": 56.2378, - "step": 59270 - }, - { - "epoch": 0.23949870109931842, - "grad_norm": 1353.7008056640625, - "learning_rate": 4.7190563480477095e-05, - "loss": 57.5405, - "step": 59280 - }, - { - "epoch": 0.23953910236468606, - "grad_norm": 968.8933715820312, - "learning_rate": 4.718895556589141e-05, - "loss": 55.8982, - "step": 59290 - }, - { - "epoch": 0.2395795036300537, - "grad_norm": 1031.3507080078125, - "learning_rate": 4.718734721872062e-05, - "loss": 72.3141, - "step": 59300 - }, - { - "epoch": 0.23961990489542132, - "grad_norm": 531.7722778320312, - "learning_rate": 4.718573843899607e-05, - "loss": 79.3897, - "step": 59310 - }, - { - "epoch": 0.23966030616078896, - "grad_norm": 1232.718994140625, - "learning_rate": 4.718412922674913e-05, - "loss": 79.8532, - "step": 59320 - }, - { - "epoch": 0.2397007074261566, - "grad_norm": 3018.275390625, - "learning_rate": 4.718251958201117e-05, - "loss": 88.7309, - "step": 59330 - }, - { - "epoch": 0.2397411086915242, - "grad_norm": 1827.9061279296875, - "learning_rate": 4.718090950481356e-05, - "loss": 101.574, - "step": 59340 - }, - { - "epoch": 0.23978150995689185, - "grad_norm": 715.8681640625, - "learning_rate": 4.71792989951877e-05, - "loss": 72.9536, - "step": 59350 - }, - { - "epoch": 0.2398219112222595, - "grad_norm": 1006.0107421875, - "learning_rate": 4.717768805316501e-05, - "loss": 67.8689, - "step": 59360 - }, - { - "epoch": 0.2398623124876271, - "grad_norm": 1562.623779296875, - "learning_rate": 4.717607667877685e-05, - "loss": 94.7435, - "step": 59370 - }, - { - "epoch": 0.23990271375299474, - "grad_norm": 822.7528686523438, - "learning_rate": 4.717446487205466e-05, - "loss": 71.5322, - "step": 59380 - }, - { - "epoch": 0.23994311501836238, - "grad_norm": 733.3274536132812, - "learning_rate": 4.717285263302987e-05, - "loss": 63.2148, - "step": 59390 - }, - { - "epoch": 0.23998351628373002, - "grad_norm": 1009.9617919921875, - "learning_rate": 4.71712399617339e-05, - "loss": 92.269, - "step": 59400 - }, - { - "epoch": 0.24002391754909763, - "grad_norm": 768.0454711914062, - "learning_rate": 4.716962685819819e-05, - "loss": 46.8281, - "step": 59410 - }, - { - "epoch": 0.24006431881446527, - "grad_norm": 392.7835693359375, - "learning_rate": 4.716801332245419e-05, - "loss": 57.8384, - "step": 59420 - }, - { - "epoch": 0.2401047200798329, - "grad_norm": 1200.4879150390625, - "learning_rate": 4.7166399354533365e-05, - "loss": 63.8987, - "step": 59430 - }, - { - "epoch": 0.24014512134520052, - "grad_norm": 540.96142578125, - "learning_rate": 4.7164784954467166e-05, - "loss": 86.5465, - "step": 59440 - }, - { - "epoch": 0.24018552261056816, - "grad_norm": 679.489501953125, - "learning_rate": 4.716317012228707e-05, - "loss": 52.6366, - "step": 59450 - }, - { - "epoch": 0.2402259238759358, - "grad_norm": 1019.4818115234375, - "learning_rate": 4.716155485802457e-05, - "loss": 55.8305, - "step": 59460 - }, - { - "epoch": 0.24026632514130342, - "grad_norm": 330.5364990234375, - "learning_rate": 4.715993916171114e-05, - "loss": 42.9955, - "step": 59470 - }, - { - "epoch": 0.24030672640667106, - "grad_norm": 1147.410888671875, - "learning_rate": 4.715832303337829e-05, - "loss": 93.0537, - "step": 59480 - }, - { - "epoch": 0.2403471276720387, - "grad_norm": 1502.9691162109375, - "learning_rate": 4.715670647305753e-05, - "loss": 66.4446, - "step": 59490 - }, - { - "epoch": 0.2403875289374063, - "grad_norm": 1007.5733032226562, - "learning_rate": 4.715508948078037e-05, - "loss": 68.1062, - "step": 59500 - }, - { - "epoch": 0.24042793020277395, - "grad_norm": 1082.7093505859375, - "learning_rate": 4.715347205657833e-05, - "loss": 85.7707, - "step": 59510 - }, - { - "epoch": 0.2404683314681416, - "grad_norm": 591.158447265625, - "learning_rate": 4.715185420048295e-05, - "loss": 53.6973, - "step": 59520 - }, - { - "epoch": 0.2405087327335092, - "grad_norm": 357.9864196777344, - "learning_rate": 4.715023591252576e-05, - "loss": 70.244, - "step": 59530 - }, - { - "epoch": 0.24054913399887684, - "grad_norm": 975.7410888671875, - "learning_rate": 4.714861719273833e-05, - "loss": 84.0754, - "step": 59540 - }, - { - "epoch": 0.24058953526424448, - "grad_norm": 986.4381103515625, - "learning_rate": 4.7146998041152204e-05, - "loss": 64.928, - "step": 59550 - }, - { - "epoch": 0.24062993652961212, - "grad_norm": 996.6936645507812, - "learning_rate": 4.714537845779894e-05, - "loss": 47.7855, - "step": 59560 - }, - { - "epoch": 0.24067033779497973, - "grad_norm": 864.1629638671875, - "learning_rate": 4.7143758442710124e-05, - "loss": 65.1874, - "step": 59570 - }, - { - "epoch": 0.24071073906034737, - "grad_norm": 291.1817321777344, - "learning_rate": 4.7142137995917336e-05, - "loss": 57.6154, - "step": 59580 - }, - { - "epoch": 0.240751140325715, - "grad_norm": 353.8094177246094, - "learning_rate": 4.714051711745217e-05, - "loss": 62.8998, - "step": 59590 - }, - { - "epoch": 0.24079154159108263, - "grad_norm": 667.8562622070312, - "learning_rate": 4.713889580734623e-05, - "loss": 61.3621, - "step": 59600 - }, - { - "epoch": 0.24083194285645027, - "grad_norm": 600.4000854492188, - "learning_rate": 4.713727406563111e-05, - "loss": 101.2895, - "step": 59610 - }, - { - "epoch": 0.2408723441218179, - "grad_norm": 1322.2716064453125, - "learning_rate": 4.713565189233844e-05, - "loss": 73.4781, - "step": 59620 - }, - { - "epoch": 0.24091274538718552, - "grad_norm": 378.68841552734375, - "learning_rate": 4.7134029287499834e-05, - "loss": 68.5723, - "step": 59630 - }, - { - "epoch": 0.24095314665255316, - "grad_norm": 1208.138916015625, - "learning_rate": 4.7132406251146935e-05, - "loss": 66.9278, - "step": 59640 - }, - { - "epoch": 0.2409935479179208, - "grad_norm": 402.3450927734375, - "learning_rate": 4.713078278331138e-05, - "loss": 84.2316, - "step": 59650 - }, - { - "epoch": 0.2410339491832884, - "grad_norm": 785.4451293945312, - "learning_rate": 4.712915888402483e-05, - "loss": 82.4133, - "step": 59660 - }, - { - "epoch": 0.24107435044865605, - "grad_norm": 485.10504150390625, - "learning_rate": 4.7127534553318925e-05, - "loss": 62.9998, - "step": 59670 - }, - { - "epoch": 0.2411147517140237, - "grad_norm": 791.6461791992188, - "learning_rate": 4.712590979122534e-05, - "loss": 59.8595, - "step": 59680 - }, - { - "epoch": 0.2411551529793913, - "grad_norm": 417.98040771484375, - "learning_rate": 4.712428459777576e-05, - "loss": 71.6051, - "step": 59690 - }, - { - "epoch": 0.24119555424475894, - "grad_norm": 866.4073486328125, - "learning_rate": 4.712265897300186e-05, - "loss": 50.0575, - "step": 59700 - }, - { - "epoch": 0.24123595551012658, - "grad_norm": 1018.1526489257812, - "learning_rate": 4.712103291693533e-05, - "loss": 88.681, - "step": 59710 - }, - { - "epoch": 0.24127635677549422, - "grad_norm": 2852.052978515625, - "learning_rate": 4.7119406429607885e-05, - "loss": 85.578, - "step": 59720 - }, - { - "epoch": 0.24131675804086183, - "grad_norm": 682.5413208007812, - "learning_rate": 4.711777951105121e-05, - "loss": 73.2862, - "step": 59730 - }, - { - "epoch": 0.24135715930622947, - "grad_norm": 1790.8092041015625, - "learning_rate": 4.7116152161297045e-05, - "loss": 67.5014, - "step": 59740 - }, - { - "epoch": 0.24139756057159711, - "grad_norm": 671.518310546875, - "learning_rate": 4.71145243803771e-05, - "loss": 67.0482, - "step": 59750 - }, - { - "epoch": 0.24143796183696473, - "grad_norm": 248.55958557128906, - "learning_rate": 4.711289616832312e-05, - "loss": 52.1543, - "step": 59760 - }, - { - "epoch": 0.24147836310233237, - "grad_norm": 676.2057495117188, - "learning_rate": 4.7111267525166845e-05, - "loss": 89.2593, - "step": 59770 - }, - { - "epoch": 0.2415187643677, - "grad_norm": 846.4784545898438, - "learning_rate": 4.710963845094003e-05, - "loss": 94.6082, - "step": 59780 - }, - { - "epoch": 0.24155916563306762, - "grad_norm": 735.9619750976562, - "learning_rate": 4.710800894567443e-05, - "loss": 85.0373, - "step": 59790 - }, - { - "epoch": 0.24159956689843526, - "grad_norm": 0.0, - "learning_rate": 4.710637900940181e-05, - "loss": 56.9569, - "step": 59800 - }, - { - "epoch": 0.2416399681638029, - "grad_norm": 1803.2908935546875, - "learning_rate": 4.7104748642153954e-05, - "loss": 76.5792, - "step": 59810 - }, - { - "epoch": 0.2416803694291705, - "grad_norm": 886.1052856445312, - "learning_rate": 4.710311784396264e-05, - "loss": 44.95, - "step": 59820 - }, - { - "epoch": 0.24172077069453815, - "grad_norm": 2984.022705078125, - "learning_rate": 4.710148661485966e-05, - "loss": 104.791, - "step": 59830 - }, - { - "epoch": 0.2417611719599058, - "grad_norm": 1253.9434814453125, - "learning_rate": 4.709985495487682e-05, - "loss": 71.7768, - "step": 59840 - }, - { - "epoch": 0.2418015732252734, - "grad_norm": 785.1651000976562, - "learning_rate": 4.7098222864045945e-05, - "loss": 81.442, - "step": 59850 - }, - { - "epoch": 0.24184197449064104, - "grad_norm": 1157.2420654296875, - "learning_rate": 4.709659034239883e-05, - "loss": 91.4549, - "step": 59860 - }, - { - "epoch": 0.24188237575600868, - "grad_norm": 2114.29833984375, - "learning_rate": 4.7094957389967306e-05, - "loss": 82.0255, - "step": 59870 - }, - { - "epoch": 0.24192277702137632, - "grad_norm": 1235.954345703125, - "learning_rate": 4.7093324006783214e-05, - "loss": 95.1471, - "step": 59880 - }, - { - "epoch": 0.24196317828674394, - "grad_norm": 990.7355346679688, - "learning_rate": 4.709169019287839e-05, - "loss": 83.1672, - "step": 59890 - }, - { - "epoch": 0.24200357955211158, - "grad_norm": 790.0713500976562, - "learning_rate": 4.7090055948284706e-05, - "loss": 75.6788, - "step": 59900 - }, - { - "epoch": 0.24204398081747922, - "grad_norm": 613.9937133789062, - "learning_rate": 4.7088421273034e-05, - "loss": 74.3221, - "step": 59910 - }, - { - "epoch": 0.24208438208284683, - "grad_norm": 1069.763427734375, - "learning_rate": 4.708678616715815e-05, - "loss": 94.9927, - "step": 59920 - }, - { - "epoch": 0.24212478334821447, - "grad_norm": 1685.7093505859375, - "learning_rate": 4.7085150630689034e-05, - "loss": 79.8483, - "step": 59930 - }, - { - "epoch": 0.2421651846135821, - "grad_norm": 1826.313720703125, - "learning_rate": 4.7083514663658536e-05, - "loss": 110.4784, - "step": 59940 - }, - { - "epoch": 0.24220558587894972, - "grad_norm": 2508.780029296875, - "learning_rate": 4.7081878266098545e-05, - "loss": 95.3383, - "step": 59950 - }, - { - "epoch": 0.24224598714431736, - "grad_norm": 659.6832275390625, - "learning_rate": 4.708024143804097e-05, - "loss": 79.6695, - "step": 59960 - }, - { - "epoch": 0.242286388409685, - "grad_norm": 570.7511596679688, - "learning_rate": 4.707860417951773e-05, - "loss": 68.3188, - "step": 59970 - }, - { - "epoch": 0.2423267896750526, - "grad_norm": 1179.564208984375, - "learning_rate": 4.707696649056073e-05, - "loss": 109.7361, - "step": 59980 - }, - { - "epoch": 0.24236719094042025, - "grad_norm": 632.1577758789062, - "learning_rate": 4.70753283712019e-05, - "loss": 86.0636, - "step": 59990 - }, - { - "epoch": 0.2424075922057879, - "grad_norm": 975.7955322265625, - "learning_rate": 4.707368982147318e-05, - "loss": 87.0066, - "step": 60000 - }, - { - "epoch": 0.2424479934711555, - "grad_norm": 3105.386962890625, - "learning_rate": 4.707205084140651e-05, - "loss": 77.5594, - "step": 60010 - }, - { - "epoch": 0.24248839473652314, - "grad_norm": 834.739013671875, - "learning_rate": 4.707041143103384e-05, - "loss": 72.7049, - "step": 60020 - }, - { - "epoch": 0.24252879600189078, - "grad_norm": 481.8167419433594, - "learning_rate": 4.706877159038715e-05, - "loss": 91.9634, - "step": 60030 - }, - { - "epoch": 0.24256919726725842, - "grad_norm": 651.197021484375, - "learning_rate": 4.706713131949839e-05, - "loss": 57.6539, - "step": 60040 - }, - { - "epoch": 0.24260959853262604, - "grad_norm": 821.2056884765625, - "learning_rate": 4.706549061839954e-05, - "loss": 67.2985, - "step": 60050 - }, - { - "epoch": 0.24264999979799368, - "grad_norm": 445.24578857421875, - "learning_rate": 4.70638494871226e-05, - "loss": 70.1479, - "step": 60060 - }, - { - "epoch": 0.24269040106336132, - "grad_norm": 902.0319213867188, - "learning_rate": 4.7062207925699544e-05, - "loss": 85.1113, - "step": 60070 - }, - { - "epoch": 0.24273080232872893, - "grad_norm": 845.628662109375, - "learning_rate": 4.7060565934162394e-05, - "loss": 86.7892, - "step": 60080 - }, - { - "epoch": 0.24277120359409657, - "grad_norm": 936.5902709960938, - "learning_rate": 4.7058923512543154e-05, - "loss": 84.2543, - "step": 60090 - }, - { - "epoch": 0.2428116048594642, - "grad_norm": 770.0076904296875, - "learning_rate": 4.7057280660873835e-05, - "loss": 77.7724, - "step": 60100 - }, - { - "epoch": 0.24285200612483182, - "grad_norm": 1131.8125, - "learning_rate": 4.705563737918648e-05, - "loss": 69.4776, - "step": 60110 - }, - { - "epoch": 0.24289240739019946, - "grad_norm": 940.4691772460938, - "learning_rate": 4.705399366751312e-05, - "loss": 88.3803, - "step": 60120 - }, - { - "epoch": 0.2429328086555671, - "grad_norm": 763.4826049804688, - "learning_rate": 4.705234952588579e-05, - "loss": 79.9678, - "step": 60130 - }, - { - "epoch": 0.2429732099209347, - "grad_norm": 712.3192749023438, - "learning_rate": 4.705070495433657e-05, - "loss": 71.1181, - "step": 60140 - }, - { - "epoch": 0.24301361118630235, - "grad_norm": 1741.696044921875, - "learning_rate": 4.704905995289749e-05, - "loss": 70.7245, - "step": 60150 - }, - { - "epoch": 0.24305401245167, - "grad_norm": 588.7029418945312, - "learning_rate": 4.7047414521600644e-05, - "loss": 57.2675, - "step": 60160 - }, - { - "epoch": 0.2430944137170376, - "grad_norm": 2938.370361328125, - "learning_rate": 4.704576866047808e-05, - "loss": 61.0899, - "step": 60170 - }, - { - "epoch": 0.24313481498240525, - "grad_norm": 936.1781005859375, - "learning_rate": 4.704412236956193e-05, - "loss": 62.6005, - "step": 60180 - }, - { - "epoch": 0.24317521624777289, - "grad_norm": 804.415771484375, - "learning_rate": 4.7042475648884254e-05, - "loss": 79.235, - "step": 60190 - }, - { - "epoch": 0.24321561751314053, - "grad_norm": 729.1420288085938, - "learning_rate": 4.704082849847718e-05, - "loss": 92.9888, - "step": 60200 - }, - { - "epoch": 0.24325601877850814, - "grad_norm": 604.2557983398438, - "learning_rate": 4.703918091837279e-05, - "loss": 55.6201, - "step": 60210 - }, - { - "epoch": 0.24329642004387578, - "grad_norm": 580.6284790039062, - "learning_rate": 4.703753290860323e-05, - "loss": 45.9183, - "step": 60220 - }, - { - "epoch": 0.24333682130924342, - "grad_norm": 361.0988464355469, - "learning_rate": 4.703588446920062e-05, - "loss": 96.7986, - "step": 60230 - }, - { - "epoch": 0.24337722257461103, - "grad_norm": 5288.78271484375, - "learning_rate": 4.70342356001971e-05, - "loss": 62.3638, - "step": 60240 - }, - { - "epoch": 0.24341762383997867, - "grad_norm": 981.030517578125, - "learning_rate": 4.70325863016248e-05, - "loss": 72.3735, - "step": 60250 - }, - { - "epoch": 0.2434580251053463, - "grad_norm": 1044.311279296875, - "learning_rate": 4.703093657351591e-05, - "loss": 101.13, - "step": 60260 - }, - { - "epoch": 0.24349842637071392, - "grad_norm": 695.3463134765625, - "learning_rate": 4.702928641590255e-05, - "loss": 61.2876, - "step": 60270 - }, - { - "epoch": 0.24353882763608156, - "grad_norm": 825.5418701171875, - "learning_rate": 4.702763582881692e-05, - "loss": 77.4805, - "step": 60280 - }, - { - "epoch": 0.2435792289014492, - "grad_norm": 568.7557983398438, - "learning_rate": 4.702598481229118e-05, - "loss": 52.6858, - "step": 60290 - }, - { - "epoch": 0.24361963016681681, - "grad_norm": 1038.3533935546875, - "learning_rate": 4.702433336635753e-05, - "loss": 72.8244, - "step": 60300 - }, - { - "epoch": 0.24366003143218445, - "grad_norm": 534.8966674804688, - "learning_rate": 4.702268149104816e-05, - "loss": 54.4095, - "step": 60310 - }, - { - "epoch": 0.2437004326975521, - "grad_norm": 683.1052856445312, - "learning_rate": 4.702102918639528e-05, - "loss": 55.7349, - "step": 60320 - }, - { - "epoch": 0.2437408339629197, - "grad_norm": 483.41552734375, - "learning_rate": 4.70193764524311e-05, - "loss": 74.4342, - "step": 60330 - }, - { - "epoch": 0.24378123522828735, - "grad_norm": 688.1597900390625, - "learning_rate": 4.701772328918784e-05, - "loss": 56.6066, - "step": 60340 - }, - { - "epoch": 0.243821636493655, - "grad_norm": 874.982177734375, - "learning_rate": 4.701606969669773e-05, - "loss": 62.7406, - "step": 60350 - }, - { - "epoch": 0.24386203775902263, - "grad_norm": 1608.3681640625, - "learning_rate": 4.7014415674993e-05, - "loss": 87.2219, - "step": 60360 - }, - { - "epoch": 0.24390243902439024, - "grad_norm": 852.6019287109375, - "learning_rate": 4.701276122410591e-05, - "loss": 65.5959, - "step": 60370 - }, - { - "epoch": 0.24394284028975788, - "grad_norm": 796.290771484375, - "learning_rate": 4.70111063440687e-05, - "loss": 64.5564, - "step": 60380 - }, - { - "epoch": 0.24398324155512552, - "grad_norm": 1192.7855224609375, - "learning_rate": 4.7009451034913645e-05, - "loss": 56.2537, - "step": 60390 - }, - { - "epoch": 0.24402364282049313, - "grad_norm": 691.683837890625, - "learning_rate": 4.7007795296673006e-05, - "loss": 58.2113, - "step": 60400 - }, - { - "epoch": 0.24406404408586077, - "grad_norm": 586.52685546875, - "learning_rate": 4.700613912937907e-05, - "loss": 47.3722, - "step": 60410 - }, - { - "epoch": 0.2441044453512284, - "grad_norm": 1319.3663330078125, - "learning_rate": 4.700448253306412e-05, - "loss": 75.1673, - "step": 60420 - }, - { - "epoch": 0.24414484661659602, - "grad_norm": 1594.365234375, - "learning_rate": 4.7002825507760465e-05, - "loss": 120.8496, - "step": 60430 - }, - { - "epoch": 0.24418524788196366, - "grad_norm": 789.883544921875, - "learning_rate": 4.700116805350039e-05, - "loss": 65.3508, - "step": 60440 - }, - { - "epoch": 0.2442256491473313, - "grad_norm": 627.7055053710938, - "learning_rate": 4.699951017031621e-05, - "loss": 80.6547, - "step": 60450 - }, - { - "epoch": 0.24426605041269892, - "grad_norm": 564.2537841796875, - "learning_rate": 4.699785185824026e-05, - "loss": 73.5011, - "step": 60460 - }, - { - "epoch": 0.24430645167806656, - "grad_norm": 589.3305053710938, - "learning_rate": 4.6996193117304864e-05, - "loss": 50.1976, - "step": 60470 - }, - { - "epoch": 0.2443468529434342, - "grad_norm": 2720.202392578125, - "learning_rate": 4.699453394754236e-05, - "loss": 65.688, - "step": 60480 - }, - { - "epoch": 0.2443872542088018, - "grad_norm": 926.1817016601562, - "learning_rate": 4.6992874348985093e-05, - "loss": 99.6495, - "step": 60490 - }, - { - "epoch": 0.24442765547416945, - "grad_norm": 845.654541015625, - "learning_rate": 4.6991214321665414e-05, - "loss": 63.7764, - "step": 60500 - }, - { - "epoch": 0.2444680567395371, - "grad_norm": 458.4206237792969, - "learning_rate": 4.698955386561569e-05, - "loss": 53.1651, - "step": 60510 - }, - { - "epoch": 0.24450845800490473, - "grad_norm": 1341.775146484375, - "learning_rate": 4.6987892980868296e-05, - "loss": 85.8457, - "step": 60520 - }, - { - "epoch": 0.24454885927027234, - "grad_norm": 1214.4681396484375, - "learning_rate": 4.6986231667455605e-05, - "loss": 59.1112, - "step": 60530 - }, - { - "epoch": 0.24458926053563998, - "grad_norm": 791.0181884765625, - "learning_rate": 4.6984569925410016e-05, - "loss": 113.4298, - "step": 60540 - }, - { - "epoch": 0.24462966180100762, - "grad_norm": 1435.430908203125, - "learning_rate": 4.6982907754763906e-05, - "loss": 106.5237, - "step": 60550 - }, - { - "epoch": 0.24467006306637523, - "grad_norm": 671.5120239257812, - "learning_rate": 4.69812451555497e-05, - "loss": 75.014, - "step": 60560 - }, - { - "epoch": 0.24471046433174287, - "grad_norm": 419.3539733886719, - "learning_rate": 4.697958212779981e-05, - "loss": 40.2964, - "step": 60570 - }, - { - "epoch": 0.2447508655971105, - "grad_norm": 1304.94189453125, - "learning_rate": 4.697791867154663e-05, - "loss": 52.5812, - "step": 60580 - }, - { - "epoch": 0.24479126686247812, - "grad_norm": 577.18505859375, - "learning_rate": 4.697625478682263e-05, - "loss": 82.3342, - "step": 60590 - }, - { - "epoch": 0.24483166812784576, - "grad_norm": 455.48480224609375, - "learning_rate": 4.6974590473660216e-05, - "loss": 55.9024, - "step": 60600 - }, - { - "epoch": 0.2448720693932134, - "grad_norm": 592.9940185546875, - "learning_rate": 4.697292573209185e-05, - "loss": 65.5786, - "step": 60610 - }, - { - "epoch": 0.24491247065858102, - "grad_norm": 368.27581787109375, - "learning_rate": 4.697126056214999e-05, - "loss": 34.5167, - "step": 60620 - }, - { - "epoch": 0.24495287192394866, - "grad_norm": 400.2873840332031, - "learning_rate": 4.6969594963867084e-05, - "loss": 104.06, - "step": 60630 - }, - { - "epoch": 0.2449932731893163, - "grad_norm": 744.5037231445312, - "learning_rate": 4.696792893727562e-05, - "loss": 87.9777, - "step": 60640 - }, - { - "epoch": 0.2450336744546839, - "grad_norm": 654.1832885742188, - "learning_rate": 4.696626248240807e-05, - "loss": 54.0729, - "step": 60650 - }, - { - "epoch": 0.24507407572005155, - "grad_norm": 339.2250671386719, - "learning_rate": 4.6964595599296926e-05, - "loss": 40.1769, - "step": 60660 - }, - { - "epoch": 0.2451144769854192, - "grad_norm": 632.5075073242188, - "learning_rate": 4.696292828797468e-05, - "loss": 79.759, - "step": 60670 - }, - { - "epoch": 0.24515487825078683, - "grad_norm": 884.93310546875, - "learning_rate": 4.696126054847385e-05, - "loss": 73.9001, - "step": 60680 - }, - { - "epoch": 0.24519527951615444, - "grad_norm": 1115.2449951171875, - "learning_rate": 4.695959238082692e-05, - "loss": 70.6572, - "step": 60690 - }, - { - "epoch": 0.24523568078152208, - "grad_norm": 1520.8741455078125, - "learning_rate": 4.6957923785066445e-05, - "loss": 90.2716, - "step": 60700 - }, - { - "epoch": 0.24527608204688972, - "grad_norm": 1127.73876953125, - "learning_rate": 4.6956254761224936e-05, - "loss": 70.0457, - "step": 60710 - }, - { - "epoch": 0.24531648331225733, - "grad_norm": 1073.5108642578125, - "learning_rate": 4.695458530933494e-05, - "loss": 100.9174, - "step": 60720 - }, - { - "epoch": 0.24535688457762497, - "grad_norm": 773.4358520507812, - "learning_rate": 4.6952915429429e-05, - "loss": 82.9172, - "step": 60730 - }, - { - "epoch": 0.2453972858429926, - "grad_norm": 1714.5184326171875, - "learning_rate": 4.6951245121539675e-05, - "loss": 74.2328, - "step": 60740 - }, - { - "epoch": 0.24543768710836023, - "grad_norm": 1113.5059814453125, - "learning_rate": 4.694957438569951e-05, - "loss": 68.1501, - "step": 60750 - }, - { - "epoch": 0.24547808837372787, - "grad_norm": 1123.2255859375, - "learning_rate": 4.694790322194111e-05, - "loss": 57.4253, - "step": 60760 - }, - { - "epoch": 0.2455184896390955, - "grad_norm": 1276.2410888671875, - "learning_rate": 4.6946231630297036e-05, - "loss": 110.3862, - "step": 60770 - }, - { - "epoch": 0.24555889090446312, - "grad_norm": 695.6715087890625, - "learning_rate": 4.694455961079987e-05, - "loss": 70.8457, - "step": 60780 - }, - { - "epoch": 0.24559929216983076, - "grad_norm": 1895.229736328125, - "learning_rate": 4.694288716348221e-05, - "loss": 62.3386, - "step": 60790 - }, - { - "epoch": 0.2456396934351984, - "grad_norm": 894.3408813476562, - "learning_rate": 4.694121428837668e-05, - "loss": 73.6542, - "step": 60800 - }, - { - "epoch": 0.245680094700566, - "grad_norm": 773.7852172851562, - "learning_rate": 4.693954098551587e-05, - "loss": 57.4668, - "step": 60810 - }, - { - "epoch": 0.24572049596593365, - "grad_norm": 873.3363037109375, - "learning_rate": 4.693786725493242e-05, - "loss": 88.7946, - "step": 60820 - }, - { - "epoch": 0.2457608972313013, - "grad_norm": 787.7183227539062, - "learning_rate": 4.6936193096658955e-05, - "loss": 95.2631, - "step": 60830 - }, - { - "epoch": 0.2458012984966689, - "grad_norm": 1632.471923828125, - "learning_rate": 4.693451851072811e-05, - "loss": 76.3312, - "step": 60840 - }, - { - "epoch": 0.24584169976203654, - "grad_norm": 624.8790283203125, - "learning_rate": 4.693284349717254e-05, - "loss": 60.229, - "step": 60850 - }, - { - "epoch": 0.24588210102740418, - "grad_norm": 776.8783569335938, - "learning_rate": 4.693116805602489e-05, - "loss": 67.0748, - "step": 60860 - }, - { - "epoch": 0.24592250229277182, - "grad_norm": 357.4325866699219, - "learning_rate": 4.692949218731782e-05, - "loss": 75.164, - "step": 60870 - }, - { - "epoch": 0.24596290355813943, - "grad_norm": 934.4722290039062, - "learning_rate": 4.692781589108402e-05, - "loss": 104.851, - "step": 60880 - }, - { - "epoch": 0.24600330482350707, - "grad_norm": 788.913330078125, - "learning_rate": 4.692613916735615e-05, - "loss": 76.9564, - "step": 60890 - }, - { - "epoch": 0.24604370608887471, - "grad_norm": 1184.293701171875, - "learning_rate": 4.692446201616692e-05, - "loss": 104.7225, - "step": 60900 - }, - { - "epoch": 0.24608410735424233, - "grad_norm": 768.3632202148438, - "learning_rate": 4.692278443754901e-05, - "loss": 96.9056, - "step": 60910 - }, - { - "epoch": 0.24612450861960997, - "grad_norm": 1289.7335205078125, - "learning_rate": 4.6921106431535135e-05, - "loss": 51.5294, - "step": 60920 - }, - { - "epoch": 0.2461649098849776, - "grad_norm": 537.193359375, - "learning_rate": 4.6919427998158e-05, - "loss": 87.4695, - "step": 60930 - }, - { - "epoch": 0.24620531115034522, - "grad_norm": 3655.863525390625, - "learning_rate": 4.691774913745033e-05, - "loss": 92.1189, - "step": 60940 - }, - { - "epoch": 0.24624571241571286, - "grad_norm": 465.6102600097656, - "learning_rate": 4.691606984944486e-05, - "loss": 79.8114, - "step": 60950 - }, - { - "epoch": 0.2462861136810805, - "grad_norm": 531.8803100585938, - "learning_rate": 4.691439013417433e-05, - "loss": 62.0595, - "step": 60960 - }, - { - "epoch": 0.2463265149464481, - "grad_norm": 1052.3104248046875, - "learning_rate": 4.691270999167147e-05, - "loss": 67.6164, - "step": 60970 - }, - { - "epoch": 0.24636691621181575, - "grad_norm": 624.75390625, - "learning_rate": 4.691102942196906e-05, - "loss": 67.1735, - "step": 60980 - }, - { - "epoch": 0.2464073174771834, - "grad_norm": 626.8027954101562, - "learning_rate": 4.6909348425099835e-05, - "loss": 64.8553, - "step": 60990 - }, - { - "epoch": 0.246447718742551, - "grad_norm": 250.1268768310547, - "learning_rate": 4.690766700109659e-05, - "loss": 63.7221, - "step": 61000 - }, - { - "epoch": 0.24648812000791864, - "grad_norm": 978.392333984375, - "learning_rate": 4.6905985149992107e-05, - "loss": 138.7521, - "step": 61010 - }, - { - "epoch": 0.24652852127328628, - "grad_norm": 770.6021728515625, - "learning_rate": 4.690430287181915e-05, - "loss": 104.2015, - "step": 61020 - }, - { - "epoch": 0.24656892253865392, - "grad_norm": 1836.369873046875, - "learning_rate": 4.690262016661054e-05, - "loss": 64.2337, - "step": 61030 - }, - { - "epoch": 0.24660932380402154, - "grad_norm": 1369.9937744140625, - "learning_rate": 4.690093703439907e-05, - "loss": 114.4457, - "step": 61040 - }, - { - "epoch": 0.24664972506938918, - "grad_norm": 756.3161010742188, - "learning_rate": 4.689925347521757e-05, - "loss": 49.0843, - "step": 61050 - }, - { - "epoch": 0.24669012633475682, - "grad_norm": 446.1201171875, - "learning_rate": 4.689756948909884e-05, - "loss": 59.4647, - "step": 61060 - }, - { - "epoch": 0.24673052760012443, - "grad_norm": 1100.18994140625, - "learning_rate": 4.689588507607572e-05, - "loss": 55.1963, - "step": 61070 - }, - { - "epoch": 0.24677092886549207, - "grad_norm": 863.7999877929688, - "learning_rate": 4.689420023618104e-05, - "loss": 67.0081, - "step": 61080 - }, - { - "epoch": 0.2468113301308597, - "grad_norm": 696.3552856445312, - "learning_rate": 4.6892514969447664e-05, - "loss": 80.1907, - "step": 61090 - }, - { - "epoch": 0.24685173139622732, - "grad_norm": 695.3787231445312, - "learning_rate": 4.6890829275908434e-05, - "loss": 70.8243, - "step": 61100 - }, - { - "epoch": 0.24689213266159496, - "grad_norm": 893.7193603515625, - "learning_rate": 4.6889143155596214e-05, - "loss": 99.1561, - "step": 61110 - }, - { - "epoch": 0.2469325339269626, - "grad_norm": 1360.45263671875, - "learning_rate": 4.688745660854388e-05, - "loss": 103.6161, - "step": 61120 - }, - { - "epoch": 0.2469729351923302, - "grad_norm": 726.0472412109375, - "learning_rate": 4.688576963478432e-05, - "loss": 77.2796, - "step": 61130 - }, - { - "epoch": 0.24701333645769785, - "grad_norm": 120.09856414794922, - "learning_rate": 4.68840822343504e-05, - "loss": 77.299, - "step": 61140 - }, - { - "epoch": 0.2470537377230655, - "grad_norm": 1099.0858154296875, - "learning_rate": 4.6882394407275044e-05, - "loss": 103.8713, - "step": 61150 - }, - { - "epoch": 0.2470941389884331, - "grad_norm": 704.1102294921875, - "learning_rate": 4.688070615359114e-05, - "loss": 100.1313, - "step": 61160 - }, - { - "epoch": 0.24713454025380074, - "grad_norm": 429.2733459472656, - "learning_rate": 4.6879017473331595e-05, - "loss": 105.0034, - "step": 61170 - }, - { - "epoch": 0.24717494151916838, - "grad_norm": 715.7286376953125, - "learning_rate": 4.6877328366529346e-05, - "loss": 57.8755, - "step": 61180 - }, - { - "epoch": 0.24721534278453602, - "grad_norm": 708.1912841796875, - "learning_rate": 4.687563883321732e-05, - "loss": 67.7243, - "step": 61190 - }, - { - "epoch": 0.24725574404990364, - "grad_norm": 885.6137084960938, - "learning_rate": 4.687394887342845e-05, - "loss": 63.6241, - "step": 61200 - }, - { - "epoch": 0.24729614531527128, - "grad_norm": 549.2717895507812, - "learning_rate": 4.687225848719568e-05, - "loss": 66.4841, - "step": 61210 - }, - { - "epoch": 0.24733654658063892, - "grad_norm": 390.741455078125, - "learning_rate": 4.687056767455198e-05, - "loss": 59.3939, - "step": 61220 - }, - { - "epoch": 0.24737694784600653, - "grad_norm": 770.025390625, - "learning_rate": 4.6868876435530296e-05, - "loss": 78.1586, - "step": 61230 - }, - { - "epoch": 0.24741734911137417, - "grad_norm": 565.1214599609375, - "learning_rate": 4.686718477016361e-05, - "loss": 68.8117, - "step": 61240 - }, - { - "epoch": 0.2474577503767418, - "grad_norm": 944.383544921875, - "learning_rate": 4.6865492678484895e-05, - "loss": 72.7522, - "step": 61250 - }, - { - "epoch": 0.24749815164210942, - "grad_norm": 2328.01513671875, - "learning_rate": 4.6863800160527147e-05, - "loss": 86.5814, - "step": 61260 - }, - { - "epoch": 0.24753855290747706, - "grad_norm": 648.8901977539062, - "learning_rate": 4.686210721632336e-05, - "loss": 85.4312, - "step": 61270 - }, - { - "epoch": 0.2475789541728447, - "grad_norm": 1613.290283203125, - "learning_rate": 4.6860413845906534e-05, - "loss": 80.4138, - "step": 61280 - }, - { - "epoch": 0.2476193554382123, - "grad_norm": 515.4049682617188, - "learning_rate": 4.685872004930969e-05, - "loss": 48.0804, - "step": 61290 - }, - { - "epoch": 0.24765975670357995, - "grad_norm": 258.1792297363281, - "learning_rate": 4.685702582656584e-05, - "loss": 48.0407, - "step": 61300 - }, - { - "epoch": 0.2477001579689476, - "grad_norm": 1516.9779052734375, - "learning_rate": 4.685533117770803e-05, - "loss": 76.3524, - "step": 61310 - }, - { - "epoch": 0.2477405592343152, - "grad_norm": 1197.6373291015625, - "learning_rate": 4.6853636102769274e-05, - "loss": 95.2485, - "step": 61320 - }, - { - "epoch": 0.24778096049968285, - "grad_norm": 433.1604309082031, - "learning_rate": 4.6851940601782635e-05, - "loss": 83.6786, - "step": 61330 - }, - { - "epoch": 0.24782136176505049, - "grad_norm": 622.607666015625, - "learning_rate": 4.685024467478116e-05, - "loss": 75.2844, - "step": 61340 - }, - { - "epoch": 0.24786176303041813, - "grad_norm": 710.9769287109375, - "learning_rate": 4.684854832179792e-05, - "loss": 106.4003, - "step": 61350 - }, - { - "epoch": 0.24790216429578574, - "grad_norm": 1580.35009765625, - "learning_rate": 4.684685154286599e-05, - "loss": 58.8193, - "step": 61360 - }, - { - "epoch": 0.24794256556115338, - "grad_norm": 917.3915405273438, - "learning_rate": 4.684515433801843e-05, - "loss": 78.9511, - "step": 61370 - }, - { - "epoch": 0.24798296682652102, - "grad_norm": 1568.43408203125, - "learning_rate": 4.684345670728834e-05, - "loss": 82.2572, - "step": 61380 - }, - { - "epoch": 0.24802336809188863, - "grad_norm": 768.3124389648438, - "learning_rate": 4.6841758650708824e-05, - "loss": 83.1386, - "step": 61390 - }, - { - "epoch": 0.24806376935725627, - "grad_norm": 972.3724975585938, - "learning_rate": 4.684006016831297e-05, - "loss": 111.0157, - "step": 61400 - }, - { - "epoch": 0.2481041706226239, - "grad_norm": 710.788330078125, - "learning_rate": 4.68383612601339e-05, - "loss": 70.9548, - "step": 61410 - }, - { - "epoch": 0.24814457188799152, - "grad_norm": 1372.3570556640625, - "learning_rate": 4.6836661926204736e-05, - "loss": 57.5385, - "step": 61420 - }, - { - "epoch": 0.24818497315335916, - "grad_norm": 2235.90380859375, - "learning_rate": 4.6834962166558605e-05, - "loss": 86.6557, - "step": 61430 - }, - { - "epoch": 0.2482253744187268, - "grad_norm": 1524.7860107421875, - "learning_rate": 4.6833261981228646e-05, - "loss": 86.687, - "step": 61440 - }, - { - "epoch": 0.24826577568409441, - "grad_norm": 514.3629760742188, - "learning_rate": 4.683156137024801e-05, - "loss": 73.8575, - "step": 61450 - }, - { - "epoch": 0.24830617694946205, - "grad_norm": 830.9341430664062, - "learning_rate": 4.6829860333649836e-05, - "loss": 63.1829, - "step": 61460 - }, - { - "epoch": 0.2483465782148297, - "grad_norm": 0.0, - "learning_rate": 4.68281588714673e-05, - "loss": 49.3014, - "step": 61470 - }, - { - "epoch": 0.2483869794801973, - "grad_norm": 1122.3907470703125, - "learning_rate": 4.682645698373357e-05, - "loss": 56.4632, - "step": 61480 - }, - { - "epoch": 0.24842738074556495, - "grad_norm": 838.9703369140625, - "learning_rate": 4.682475467048182e-05, - "loss": 51.4873, - "step": 61490 - }, - { - "epoch": 0.2484677820109326, - "grad_norm": 966.208251953125, - "learning_rate": 4.682305193174524e-05, - "loss": 92.0763, - "step": 61500 - }, - { - "epoch": 0.24850818327630023, - "grad_norm": 623.809326171875, - "learning_rate": 4.682134876755704e-05, - "loss": 87.1039, - "step": 61510 - }, - { - "epoch": 0.24854858454166784, - "grad_norm": 769.28759765625, - "learning_rate": 4.68196451779504e-05, - "loss": 98.8148, - "step": 61520 - }, - { - "epoch": 0.24858898580703548, - "grad_norm": 437.263671875, - "learning_rate": 4.6817941162958544e-05, - "loss": 78.6662, - "step": 61530 - }, - { - "epoch": 0.24862938707240312, - "grad_norm": 566.79638671875, - "learning_rate": 4.681623672261469e-05, - "loss": 67.6416, - "step": 61540 - }, - { - "epoch": 0.24866978833777073, - "grad_norm": 378.28082275390625, - "learning_rate": 4.6814531856952084e-05, - "loss": 51.0772, - "step": 61550 - }, - { - "epoch": 0.24871018960313837, - "grad_norm": 2680.483154296875, - "learning_rate": 4.6812826566003934e-05, - "loss": 102.0762, - "step": 61560 - }, - { - "epoch": 0.248750590868506, - "grad_norm": 787.8114624023438, - "learning_rate": 4.68111208498035e-05, - "loss": 40.7656, - "step": 61570 - }, - { - "epoch": 0.24879099213387362, - "grad_norm": 701.6543579101562, - "learning_rate": 4.6809414708384046e-05, - "loss": 71.9813, - "step": 61580 - }, - { - "epoch": 0.24883139339924126, - "grad_norm": 643.1692504882812, - "learning_rate": 4.680770814177882e-05, - "loss": 84.7958, - "step": 61590 - }, - { - "epoch": 0.2488717946646089, - "grad_norm": 857.9775390625, - "learning_rate": 4.68060011500211e-05, - "loss": 78.1685, - "step": 61600 - }, - { - "epoch": 0.24891219592997652, - "grad_norm": 897.5280151367188, - "learning_rate": 4.680429373314415e-05, - "loss": 57.9258, - "step": 61610 - }, - { - "epoch": 0.24895259719534416, - "grad_norm": 756.8377685546875, - "learning_rate": 4.680258589118128e-05, - "loss": 90.2208, - "step": 61620 - }, - { - "epoch": 0.2489929984607118, - "grad_norm": 650.5374145507812, - "learning_rate": 4.680087762416576e-05, - "loss": 81.8518, - "step": 61630 - }, - { - "epoch": 0.2490333997260794, - "grad_norm": 675.606201171875, - "learning_rate": 4.6799168932130915e-05, - "loss": 72.3285, - "step": 61640 - }, - { - "epoch": 0.24907380099144705, - "grad_norm": 932.6596069335938, - "learning_rate": 4.679745981511005e-05, - "loss": 51.2554, - "step": 61650 - }, - { - "epoch": 0.2491142022568147, - "grad_norm": 891.9270629882812, - "learning_rate": 4.679575027313649e-05, - "loss": 54.2277, - "step": 61660 - }, - { - "epoch": 0.24915460352218233, - "grad_norm": 927.309814453125, - "learning_rate": 4.6794040306243545e-05, - "loss": 68.7457, - "step": 61670 - }, - { - "epoch": 0.24919500478754994, - "grad_norm": 2662.16845703125, - "learning_rate": 4.679232991446456e-05, - "loss": 63.2462, - "step": 61680 - }, - { - "epoch": 0.24923540605291758, - "grad_norm": 604.02978515625, - "learning_rate": 4.67906190978329e-05, - "loss": 37.8021, - "step": 61690 - }, - { - "epoch": 0.24927580731828522, - "grad_norm": 291.47430419921875, - "learning_rate": 4.6788907856381895e-05, - "loss": 131.591, - "step": 61700 - }, - { - "epoch": 0.24931620858365283, - "grad_norm": 640.17529296875, - "learning_rate": 4.678719619014491e-05, - "loss": 86.4894, - "step": 61710 - }, - { - "epoch": 0.24935660984902047, - "grad_norm": 745.0997924804688, - "learning_rate": 4.678548409915532e-05, - "loss": 62.8037, - "step": 61720 - }, - { - "epoch": 0.2493970111143881, - "grad_norm": 576.8112182617188, - "learning_rate": 4.67837715834465e-05, - "loss": 96.3769, - "step": 61730 - }, - { - "epoch": 0.24943741237975572, - "grad_norm": 687.8386840820312, - "learning_rate": 4.678205864305184e-05, - "loss": 78.1074, - "step": 61740 - }, - { - "epoch": 0.24947781364512336, - "grad_norm": 542.1369018554688, - "learning_rate": 4.678034527800474e-05, - "loss": 70.0832, - "step": 61750 - }, - { - "epoch": 0.249518214910491, - "grad_norm": 945.9921875, - "learning_rate": 4.677863148833859e-05, - "loss": 87.9454, - "step": 61760 - }, - { - "epoch": 0.24955861617585862, - "grad_norm": 443.54583740234375, - "learning_rate": 4.6776917274086806e-05, - "loss": 63.2674, - "step": 61770 - }, - { - "epoch": 0.24959901744122626, - "grad_norm": 485.80865478515625, - "learning_rate": 4.67752026352828e-05, - "loss": 86.9183, - "step": 61780 - }, - { - "epoch": 0.2496394187065939, - "grad_norm": 514.6878051757812, - "learning_rate": 4.677348757196002e-05, - "loss": 95.4372, - "step": 61790 - }, - { - "epoch": 0.2496798199719615, - "grad_norm": 1996.046142578125, - "learning_rate": 4.6771772084151885e-05, - "loss": 93.6957, - "step": 61800 - }, - { - "epoch": 0.24972022123732915, - "grad_norm": 893.337646484375, - "learning_rate": 4.6770056171891846e-05, - "loss": 75.4777, - "step": 61810 - }, - { - "epoch": 0.2497606225026968, - "grad_norm": 999.063232421875, - "learning_rate": 4.676833983521335e-05, - "loss": 133.2376, - "step": 61820 - }, - { - "epoch": 0.24980102376806443, - "grad_norm": 1203.1588134765625, - "learning_rate": 4.676662307414987e-05, - "loss": 92.8217, - "step": 61830 - }, - { - "epoch": 0.24984142503343204, - "grad_norm": 1015.3056030273438, - "learning_rate": 4.676490588873486e-05, - "loss": 71.3694, - "step": 61840 - }, - { - "epoch": 0.24988182629879968, - "grad_norm": 504.0675354003906, - "learning_rate": 4.6763188279001804e-05, - "loss": 37.9027, - "step": 61850 - }, - { - "epoch": 0.24992222756416732, - "grad_norm": 1208.43310546875, - "learning_rate": 4.6761470244984196e-05, - "loss": 79.1104, - "step": 61860 - }, - { - "epoch": 0.24996262882953493, - "grad_norm": 1380.7022705078125, - "learning_rate": 4.675975178671551e-05, - "loss": 59.1448, - "step": 61870 - }, - { - "epoch": 0.25000303009490255, - "grad_norm": 348.9476623535156, - "learning_rate": 4.675803290422927e-05, - "loss": 85.426, - "step": 61880 - }, - { - "epoch": 0.2500434313602702, - "grad_norm": 709.4434204101562, - "learning_rate": 4.6756313597558977e-05, - "loss": 52.583, - "step": 61890 - }, - { - "epoch": 0.2500838326256378, - "grad_norm": 490.9687805175781, - "learning_rate": 4.675459386673815e-05, - "loss": 84.7173, - "step": 61900 - }, - { - "epoch": 0.25012423389100547, - "grad_norm": 935.8824462890625, - "learning_rate": 4.6752873711800306e-05, - "loss": 61.2777, - "step": 61910 - }, - { - "epoch": 0.2501646351563731, - "grad_norm": 1041.913818359375, - "learning_rate": 4.6751153132779e-05, - "loss": 90.0399, - "step": 61920 - }, - { - "epoch": 0.25020503642174075, - "grad_norm": 516.0934448242188, - "learning_rate": 4.674943212970776e-05, - "loss": 53.4762, - "step": 61930 - }, - { - "epoch": 0.2502454376871084, - "grad_norm": 1382.7197265625, - "learning_rate": 4.674771070262014e-05, - "loss": 128.2385, - "step": 61940 - }, - { - "epoch": 0.25028583895247597, - "grad_norm": 759.5361938476562, - "learning_rate": 4.67459888515497e-05, - "loss": 56.0513, - "step": 61950 - }, - { - "epoch": 0.2503262402178436, - "grad_norm": 1789.28466796875, - "learning_rate": 4.674426657653003e-05, - "loss": 59.7396, - "step": 61960 - }, - { - "epoch": 0.25036664148321125, - "grad_norm": 456.0893859863281, - "learning_rate": 4.6742543877594675e-05, - "loss": 42.1051, - "step": 61970 - }, - { - "epoch": 0.2504070427485789, - "grad_norm": 1111.8485107421875, - "learning_rate": 4.6740820754777235e-05, - "loss": 62.1872, - "step": 61980 - }, - { - "epoch": 0.25044744401394653, - "grad_norm": 927.1618041992188, - "learning_rate": 4.6739097208111306e-05, - "loss": 67.374, - "step": 61990 - }, - { - "epoch": 0.25048784527931417, - "grad_norm": 898.595458984375, - "learning_rate": 4.6737373237630476e-05, - "loss": 97.9774, - "step": 62000 - }, - { - "epoch": 0.25052824654468175, - "grad_norm": 0.0, - "learning_rate": 4.6735648843368376e-05, - "loss": 87.7824, - "step": 62010 - }, - { - "epoch": 0.2505686478100494, - "grad_norm": 850.822265625, - "learning_rate": 4.6733924025358597e-05, - "loss": 58.0294, - "step": 62020 - }, - { - "epoch": 0.25060904907541703, - "grad_norm": 1727.8907470703125, - "learning_rate": 4.673219878363479e-05, - "loss": 71.4364, - "step": 62030 - }, - { - "epoch": 0.2506494503407847, - "grad_norm": 878.0247802734375, - "learning_rate": 4.6730473118230575e-05, - "loss": 90.9596, - "step": 62040 - }, - { - "epoch": 0.2506898516061523, - "grad_norm": 1600.9791259765625, - "learning_rate": 4.67287470291796e-05, - "loss": 64.3531, - "step": 62050 - }, - { - "epoch": 0.25073025287151995, - "grad_norm": 617.0396118164062, - "learning_rate": 4.672702051651552e-05, - "loss": 60.0307, - "step": 62060 - }, - { - "epoch": 0.2507706541368876, - "grad_norm": 583.3132934570312, - "learning_rate": 4.672529358027198e-05, - "loss": 82.0429, - "step": 62070 - }, - { - "epoch": 0.2508110554022552, - "grad_norm": 579.264892578125, - "learning_rate": 4.6723566220482664e-05, - "loss": 71.0199, - "step": 62080 - }, - { - "epoch": 0.2508514566676228, - "grad_norm": 723.0690307617188, - "learning_rate": 4.672183843718123e-05, - "loss": 72.1286, - "step": 62090 - }, - { - "epoch": 0.25089185793299046, - "grad_norm": 543.1493530273438, - "learning_rate": 4.672011023040138e-05, - "loss": 64.0717, - "step": 62100 - }, - { - "epoch": 0.2509322591983581, - "grad_norm": 724.9541625976562, - "learning_rate": 4.671838160017681e-05, - "loss": 63.9457, - "step": 62110 - }, - { - "epoch": 0.25097266046372574, - "grad_norm": 565.6871948242188, - "learning_rate": 4.6716652546541194e-05, - "loss": 52.1139, - "step": 62120 - }, - { - "epoch": 0.2510130617290934, - "grad_norm": 1354.7672119140625, - "learning_rate": 4.671492306952826e-05, - "loss": 76.9495, - "step": 62130 - }, - { - "epoch": 0.25105346299446096, - "grad_norm": 636.530029296875, - "learning_rate": 4.6713193169171724e-05, - "loss": 55.6664, - "step": 62140 - }, - { - "epoch": 0.2510938642598286, - "grad_norm": 1726.2562255859375, - "learning_rate": 4.6711462845505304e-05, - "loss": 81.8684, - "step": 62150 - }, - { - "epoch": 0.25113426552519624, - "grad_norm": 742.2718505859375, - "learning_rate": 4.6709732098562745e-05, - "loss": 68.1843, - "step": 62160 - }, - { - "epoch": 0.2511746667905639, - "grad_norm": 725.1959228515625, - "learning_rate": 4.670800092837777e-05, - "loss": 62.3066, - "step": 62170 - }, - { - "epoch": 0.2512150680559315, - "grad_norm": 1269.651123046875, - "learning_rate": 4.670626933498415e-05, - "loss": 75.0705, - "step": 62180 - }, - { - "epoch": 0.25125546932129916, - "grad_norm": 1048.4793701171875, - "learning_rate": 4.670453731841563e-05, - "loss": 67.2275, - "step": 62190 - }, - { - "epoch": 0.25129587058666675, - "grad_norm": 1658.6903076171875, - "learning_rate": 4.670280487870598e-05, - "loss": 87.6034, - "step": 62200 - }, - { - "epoch": 0.2513362718520344, - "grad_norm": 706.1924438476562, - "learning_rate": 4.670107201588898e-05, - "loss": 66.6542, - "step": 62210 - }, - { - "epoch": 0.251376673117402, - "grad_norm": 1182.62939453125, - "learning_rate": 4.669933872999841e-05, - "loss": 61.4396, - "step": 62220 - }, - { - "epoch": 0.25141707438276967, - "grad_norm": 973.4239501953125, - "learning_rate": 4.669760502106805e-05, - "loss": 49.4658, - "step": 62230 - }, - { - "epoch": 0.2514574756481373, - "grad_norm": 954.9755859375, - "learning_rate": 4.6695870889131724e-05, - "loss": 58.9961, - "step": 62240 - }, - { - "epoch": 0.25149787691350495, - "grad_norm": 983.7535400390625, - "learning_rate": 4.669413633422322e-05, - "loss": 67.7966, - "step": 62250 - }, - { - "epoch": 0.2515382781788726, - "grad_norm": 934.3507690429688, - "learning_rate": 4.669240135637635e-05, - "loss": 64.6385, - "step": 62260 - }, - { - "epoch": 0.25157867944424017, - "grad_norm": 535.4703369140625, - "learning_rate": 4.669066595562496e-05, - "loss": 99.6696, - "step": 62270 - }, - { - "epoch": 0.2516190807096078, - "grad_norm": 887.987060546875, - "learning_rate": 4.668893013200286e-05, - "loss": 66.7841, - "step": 62280 - }, - { - "epoch": 0.25165948197497545, - "grad_norm": 1214.275146484375, - "learning_rate": 4.66871938855439e-05, - "loss": 75.537, - "step": 62290 - }, - { - "epoch": 0.2516998832403431, - "grad_norm": 572.87158203125, - "learning_rate": 4.6685457216281936e-05, - "loss": 67.1349, - "step": 62300 - }, - { - "epoch": 0.25174028450571073, - "grad_norm": 507.9439697265625, - "learning_rate": 4.668372012425082e-05, - "loss": 79.3798, - "step": 62310 - }, - { - "epoch": 0.25178068577107837, - "grad_norm": 265.6028747558594, - "learning_rate": 4.6681982609484416e-05, - "loss": 57.9518, - "step": 62320 - }, - { - "epoch": 0.25182108703644596, - "grad_norm": 2266.7421875, - "learning_rate": 4.6680244672016595e-05, - "loss": 74.0739, - "step": 62330 - }, - { - "epoch": 0.2518614883018136, - "grad_norm": 727.9891967773438, - "learning_rate": 4.6678506311881245e-05, - "loss": 107.9434, - "step": 62340 - }, - { - "epoch": 0.25190188956718124, - "grad_norm": 769.7603759765625, - "learning_rate": 4.667676752911225e-05, - "loss": 62.9798, - "step": 62350 - }, - { - "epoch": 0.2519422908325489, - "grad_norm": 367.056396484375, - "learning_rate": 4.667502832374352e-05, - "loss": 76.6132, - "step": 62360 - }, - { - "epoch": 0.2519826920979165, - "grad_norm": 1385.6729736328125, - "learning_rate": 4.667328869580895e-05, - "loss": 56.0849, - "step": 62370 - }, - { - "epoch": 0.25202309336328416, - "grad_norm": 509.8416442871094, - "learning_rate": 4.6671548645342456e-05, - "loss": 59.8157, - "step": 62380 - }, - { - "epoch": 0.2520634946286518, - "grad_norm": 1469.747314453125, - "learning_rate": 4.666980817237797e-05, - "loss": 89.1393, - "step": 62390 - }, - { - "epoch": 0.2521038958940194, - "grad_norm": 1213.3016357421875, - "learning_rate": 4.6668067276949414e-05, - "loss": 59.1252, - "step": 62400 - }, - { - "epoch": 0.252144297159387, - "grad_norm": 990.9741821289062, - "learning_rate": 4.666632595909072e-05, - "loss": 68.1975, - "step": 62410 - }, - { - "epoch": 0.25218469842475466, - "grad_norm": 916.1857299804688, - "learning_rate": 4.666458421883586e-05, - "loss": 66.8495, - "step": 62420 - }, - { - "epoch": 0.2522250996901223, - "grad_norm": 381.66485595703125, - "learning_rate": 4.666284205621877e-05, - "loss": 91.8923, - "step": 62430 - }, - { - "epoch": 0.25226550095548994, - "grad_norm": 894.6453247070312, - "learning_rate": 4.666109947127343e-05, - "loss": 76.0347, - "step": 62440 - }, - { - "epoch": 0.2523059022208576, - "grad_norm": 455.5608215332031, - "learning_rate": 4.6659356464033795e-05, - "loss": 54.6976, - "step": 62450 - }, - { - "epoch": 0.25234630348622517, - "grad_norm": 581.934814453125, - "learning_rate": 4.6657613034533866e-05, - "loss": 60.0454, - "step": 62460 - }, - { - "epoch": 0.2523867047515928, - "grad_norm": 911.0252685546875, - "learning_rate": 4.665586918280761e-05, - "loss": 102.2392, - "step": 62470 - }, - { - "epoch": 0.25242710601696045, - "grad_norm": 1507.02685546875, - "learning_rate": 4.665412490888904e-05, - "loss": 90.9116, - "step": 62480 - }, - { - "epoch": 0.2524675072823281, - "grad_norm": 2237.4755859375, - "learning_rate": 4.6652380212812155e-05, - "loss": 93.2901, - "step": 62490 - }, - { - "epoch": 0.2525079085476957, - "grad_norm": 816.4346923828125, - "learning_rate": 4.665063509461097e-05, - "loss": 80.2981, - "step": 62500 - }, - { - "epoch": 0.25254830981306337, - "grad_norm": 769.7588500976562, - "learning_rate": 4.6648889554319506e-05, - "loss": 55.1148, - "step": 62510 - }, - { - "epoch": 0.25258871107843095, - "grad_norm": 401.32989501953125, - "learning_rate": 4.66471435919718e-05, - "loss": 67.0569, - "step": 62520 - }, - { - "epoch": 0.2526291123437986, - "grad_norm": 888.2326049804688, - "learning_rate": 4.6645397207601884e-05, - "loss": 78.4589, - "step": 62530 - }, - { - "epoch": 0.25266951360916623, - "grad_norm": 310.5882568359375, - "learning_rate": 4.66436504012438e-05, - "loss": 98.2279, - "step": 62540 - }, - { - "epoch": 0.25270991487453387, - "grad_norm": 984.7472534179688, - "learning_rate": 4.664190317293161e-05, - "loss": 97.2269, - "step": 62550 - }, - { - "epoch": 0.2527503161399015, - "grad_norm": 793.189697265625, - "learning_rate": 4.6640155522699374e-05, - "loss": 127.4911, - "step": 62560 - }, - { - "epoch": 0.25279071740526915, - "grad_norm": 2629.53466796875, - "learning_rate": 4.6638407450581165e-05, - "loss": 95.0568, - "step": 62570 - }, - { - "epoch": 0.2528311186706368, - "grad_norm": 380.42779541015625, - "learning_rate": 4.663665895661107e-05, - "loss": 81.795, - "step": 62580 - }, - { - "epoch": 0.2528715199360044, - "grad_norm": 690.9551391601562, - "learning_rate": 4.663491004082316e-05, - "loss": 50.0963, - "step": 62590 - }, - { - "epoch": 0.252911921201372, - "grad_norm": 1449.210693359375, - "learning_rate": 4.6633160703251554e-05, - "loss": 65.8343, - "step": 62600 - }, - { - "epoch": 0.25295232246673965, - "grad_norm": 1021.6344604492188, - "learning_rate": 4.6631410943930334e-05, - "loss": 107.6034, - "step": 62610 - }, - { - "epoch": 0.2529927237321073, - "grad_norm": 869.527099609375, - "learning_rate": 4.662966076289362e-05, - "loss": 102.0092, - "step": 62620 - }, - { - "epoch": 0.25303312499747493, - "grad_norm": 584.1009521484375, - "learning_rate": 4.662791016017554e-05, - "loss": 69.7825, - "step": 62630 - }, - { - "epoch": 0.2530735262628426, - "grad_norm": 802.0068359375, - "learning_rate": 4.6626159135810205e-05, - "loss": 72.057, - "step": 62640 - }, - { - "epoch": 0.25311392752821016, - "grad_norm": 2002.407958984375, - "learning_rate": 4.662440768983177e-05, - "loss": 76.6827, - "step": 62650 - }, - { - "epoch": 0.2531543287935778, - "grad_norm": 573.6917724609375, - "learning_rate": 4.662265582227438e-05, - "loss": 116.2082, - "step": 62660 - }, - { - "epoch": 0.25319473005894544, - "grad_norm": 1771.079345703125, - "learning_rate": 4.662090353317217e-05, - "loss": 81.9311, - "step": 62670 - }, - { - "epoch": 0.2532351313243131, - "grad_norm": 1918.1868896484375, - "learning_rate": 4.661915082255932e-05, - "loss": 109.1544, - "step": 62680 - }, - { - "epoch": 0.2532755325896807, - "grad_norm": 1021.0386962890625, - "learning_rate": 4.661739769047e-05, - "loss": 71.0243, - "step": 62690 - }, - { - "epoch": 0.25331593385504836, - "grad_norm": 1025.5999755859375, - "learning_rate": 4.6615644136938375e-05, - "loss": 69.5765, - "step": 62700 - }, - { - "epoch": 0.253356335120416, - "grad_norm": 441.26617431640625, - "learning_rate": 4.661389016199864e-05, - "loss": 48.4432, - "step": 62710 - }, - { - "epoch": 0.2533967363857836, - "grad_norm": 2817.1181640625, - "learning_rate": 4.6612135765685e-05, - "loss": 43.8955, - "step": 62720 - }, - { - "epoch": 0.2534371376511512, - "grad_norm": 539.3783569335938, - "learning_rate": 4.6610380948031627e-05, - "loss": 63.5467, - "step": 62730 - }, - { - "epoch": 0.25347753891651886, - "grad_norm": 735.4268188476562, - "learning_rate": 4.660862570907277e-05, - "loss": 107.1801, - "step": 62740 - }, - { - "epoch": 0.2535179401818865, - "grad_norm": 1085.6962890625, - "learning_rate": 4.6606870048842624e-05, - "loss": 88.6567, - "step": 62750 - }, - { - "epoch": 0.25355834144725414, - "grad_norm": 488.97369384765625, - "learning_rate": 4.660511396737541e-05, - "loss": 74.2245, - "step": 62760 - }, - { - "epoch": 0.2535987427126218, - "grad_norm": 3399.495361328125, - "learning_rate": 4.660335746470539e-05, - "loss": 188.06, - "step": 62770 - }, - { - "epoch": 0.25363914397798937, - "grad_norm": 515.9884033203125, - "learning_rate": 4.6601600540866794e-05, - "loss": 61.2756, - "step": 62780 - }, - { - "epoch": 0.253679545243357, - "grad_norm": 328.14276123046875, - "learning_rate": 4.659984319589387e-05, - "loss": 66.6262, - "step": 62790 - }, - { - "epoch": 0.25371994650872465, - "grad_norm": 1215.5234375, - "learning_rate": 4.659808542982088e-05, - "loss": 107.4222, - "step": 62800 - }, - { - "epoch": 0.2537603477740923, - "grad_norm": 1078.8668212890625, - "learning_rate": 4.65963272426821e-05, - "loss": 101.8976, - "step": 62810 - }, - { - "epoch": 0.2538007490394599, - "grad_norm": 620.238037109375, - "learning_rate": 4.659456863451181e-05, - "loss": 49.1838, - "step": 62820 - }, - { - "epoch": 0.25384115030482757, - "grad_norm": 357.8902587890625, - "learning_rate": 4.6592809605344276e-05, - "loss": 76.9179, - "step": 62830 - }, - { - "epoch": 0.25388155157019515, - "grad_norm": 763.6502075195312, - "learning_rate": 4.65910501552138e-05, - "loss": 68.4214, - "step": 62840 - }, - { - "epoch": 0.2539219528355628, - "grad_norm": 650.4541015625, - "learning_rate": 4.658929028415468e-05, - "loss": 67.5361, - "step": 62850 - }, - { - "epoch": 0.25396235410093043, - "grad_norm": 1105.059814453125, - "learning_rate": 4.658752999220125e-05, - "loss": 107.3979, - "step": 62860 - }, - { - "epoch": 0.25400275536629807, - "grad_norm": 1753.42822265625, - "learning_rate": 4.65857692793878e-05, - "loss": 59.3314, - "step": 62870 - }, - { - "epoch": 0.2540431566316657, - "grad_norm": 610.6260375976562, - "learning_rate": 4.6584008145748656e-05, - "loss": 75.3817, - "step": 62880 - }, - { - "epoch": 0.25408355789703335, - "grad_norm": 815.94482421875, - "learning_rate": 4.6582246591318175e-05, - "loss": 49.2156, - "step": 62890 - }, - { - "epoch": 0.254123959162401, - "grad_norm": 802.24072265625, - "learning_rate": 4.658048461613068e-05, - "loss": 84.062, - "step": 62900 - }, - { - "epoch": 0.2541643604277686, - "grad_norm": 517.5693969726562, - "learning_rate": 4.6578722220220525e-05, - "loss": 101.054, - "step": 62910 - }, - { - "epoch": 0.2542047616931362, - "grad_norm": 948.2103271484375, - "learning_rate": 4.657695940362207e-05, - "loss": 78.7945, - "step": 62920 - }, - { - "epoch": 0.25424516295850386, - "grad_norm": 751.4588623046875, - "learning_rate": 4.657519616636968e-05, - "loss": 68.9439, - "step": 62930 - }, - { - "epoch": 0.2542855642238715, - "grad_norm": 209.2197723388672, - "learning_rate": 4.6573432508497735e-05, - "loss": 68.4884, - "step": 62940 - }, - { - "epoch": 0.25432596548923914, - "grad_norm": 793.4098510742188, - "learning_rate": 4.6571668430040625e-05, - "loss": 97.053, - "step": 62950 - }, - { - "epoch": 0.2543663667546068, - "grad_norm": 606.1543579101562, - "learning_rate": 4.6569903931032735e-05, - "loss": 69.4006, - "step": 62960 - }, - { - "epoch": 0.25440676801997436, - "grad_norm": 757.6336059570312, - "learning_rate": 4.656813901150845e-05, - "loss": 79.5474, - "step": 62970 - }, - { - "epoch": 0.254447169285342, - "grad_norm": 866.605712890625, - "learning_rate": 4.6566373671502196e-05, - "loss": 56.2643, - "step": 62980 - }, - { - "epoch": 0.25448757055070964, - "grad_norm": 121.89437866210938, - "learning_rate": 4.656460791104839e-05, - "loss": 48.1909, - "step": 62990 - }, - { - "epoch": 0.2545279718160773, - "grad_norm": 1390.025390625, - "learning_rate": 4.656284173018144e-05, - "loss": 107.6447, - "step": 63000 - }, - { - "epoch": 0.2545683730814449, - "grad_norm": 1153.6959228515625, - "learning_rate": 4.656107512893579e-05, - "loss": 75.1273, - "step": 63010 - }, - { - "epoch": 0.25460877434681256, - "grad_norm": 441.3359069824219, - "learning_rate": 4.655930810734589e-05, - "loss": 46.4667, - "step": 63020 - }, - { - "epoch": 0.2546491756121802, - "grad_norm": 914.875732421875, - "learning_rate": 4.655754066544617e-05, - "loss": 58.5283, - "step": 63030 - }, - { - "epoch": 0.2546895768775478, - "grad_norm": 1101.917236328125, - "learning_rate": 4.65557728032711e-05, - "loss": 84.4799, - "step": 63040 - }, - { - "epoch": 0.2547299781429154, - "grad_norm": 985.5469360351562, - "learning_rate": 4.655400452085514e-05, - "loss": 63.2999, - "step": 63050 - }, - { - "epoch": 0.25477037940828307, - "grad_norm": 596.2723999023438, - "learning_rate": 4.6552235818232764e-05, - "loss": 72.8403, - "step": 63060 - }, - { - "epoch": 0.2548107806736507, - "grad_norm": 654.357666015625, - "learning_rate": 4.655046669543845e-05, - "loss": 57.4079, - "step": 63070 - }, - { - "epoch": 0.25485118193901835, - "grad_norm": 634.196044921875, - "learning_rate": 4.6548697152506705e-05, - "loss": 80.194, - "step": 63080 - }, - { - "epoch": 0.254891583204386, - "grad_norm": 701.0132446289062, - "learning_rate": 4.6546927189472014e-05, - "loss": 46.6557, - "step": 63090 - }, - { - "epoch": 0.25493198446975357, - "grad_norm": 1064.72412109375, - "learning_rate": 4.654515680636888e-05, - "loss": 73.7024, - "step": 63100 - }, - { - "epoch": 0.2549723857351212, - "grad_norm": 967.7996826171875, - "learning_rate": 4.654338600323182e-05, - "loss": 62.9957, - "step": 63110 - }, - { - "epoch": 0.25501278700048885, - "grad_norm": 533.4601440429688, - "learning_rate": 4.654161478009536e-05, - "loss": 63.6593, - "step": 63120 - }, - { - "epoch": 0.2550531882658565, - "grad_norm": 588.319580078125, - "learning_rate": 4.6539843136994036e-05, - "loss": 60.8479, - "step": 63130 - }, - { - "epoch": 0.25509358953122413, - "grad_norm": 1351.3424072265625, - "learning_rate": 4.653807107396237e-05, - "loss": 87.2913, - "step": 63140 - }, - { - "epoch": 0.25513399079659177, - "grad_norm": 780.9151000976562, - "learning_rate": 4.653629859103492e-05, - "loss": 64.7784, - "step": 63150 - }, - { - "epoch": 0.25517439206195935, - "grad_norm": 618.2050170898438, - "learning_rate": 4.653452568824625e-05, - "loss": 44.7988, - "step": 63160 - }, - { - "epoch": 0.255214793327327, - "grad_norm": 295.4721984863281, - "learning_rate": 4.653275236563091e-05, - "loss": 83.7096, - "step": 63170 - }, - { - "epoch": 0.25525519459269463, - "grad_norm": 567.467041015625, - "learning_rate": 4.653097862322348e-05, - "loss": 47.7101, - "step": 63180 - }, - { - "epoch": 0.2552955958580623, - "grad_norm": 1230.712646484375, - "learning_rate": 4.652920446105853e-05, - "loss": 82.6227, - "step": 63190 - }, - { - "epoch": 0.2553359971234299, - "grad_norm": 1003.6646118164062, - "learning_rate": 4.652742987917066e-05, - "loss": 82.4499, - "step": 63200 - }, - { - "epoch": 0.25537639838879755, - "grad_norm": 488.0013122558594, - "learning_rate": 4.652565487759446e-05, - "loss": 69.6767, - "step": 63210 - }, - { - "epoch": 0.2554167996541652, - "grad_norm": 423.2750244140625, - "learning_rate": 4.652387945636454e-05, - "loss": 99.7746, - "step": 63220 - }, - { - "epoch": 0.2554572009195328, - "grad_norm": 531.3796997070312, - "learning_rate": 4.65221036155155e-05, - "loss": 65.6895, - "step": 63230 - }, - { - "epoch": 0.2554976021849004, - "grad_norm": 664.51513671875, - "learning_rate": 4.652032735508198e-05, - "loss": 76.6833, - "step": 63240 - }, - { - "epoch": 0.25553800345026806, - "grad_norm": 1186.4224853515625, - "learning_rate": 4.65185506750986e-05, - "loss": 75.191, - "step": 63250 - }, - { - "epoch": 0.2555784047156357, - "grad_norm": 1025.0635986328125, - "learning_rate": 4.651677357559998e-05, - "loss": 79.5044, - "step": 63260 - }, - { - "epoch": 0.25561880598100334, - "grad_norm": 1971.0733642578125, - "learning_rate": 4.65149960566208e-05, - "loss": 78.158, - "step": 63270 - }, - { - "epoch": 0.255659207246371, - "grad_norm": 601.2109375, - "learning_rate": 4.651321811819568e-05, - "loss": 109.8638, - "step": 63280 - }, - { - "epoch": 0.25569960851173856, - "grad_norm": 926.903076171875, - "learning_rate": 4.65114397603593e-05, - "loss": 67.0008, - "step": 63290 - }, - { - "epoch": 0.2557400097771062, - "grad_norm": 543.5036010742188, - "learning_rate": 4.6509660983146334e-05, - "loss": 90.5155, - "step": 63300 - }, - { - "epoch": 0.25578041104247384, - "grad_norm": 1474.077392578125, - "learning_rate": 4.650788178659146e-05, - "loss": 76.4021, - "step": 63310 - }, - { - "epoch": 0.2558208123078415, - "grad_norm": 1020.8681030273438, - "learning_rate": 4.650610217072934e-05, - "loss": 78.0826, - "step": 63320 - }, - { - "epoch": 0.2558612135732091, - "grad_norm": 963.8372192382812, - "learning_rate": 4.650432213559469e-05, - "loss": 63.825, - "step": 63330 - }, - { - "epoch": 0.25590161483857676, - "grad_norm": 1577.4927978515625, - "learning_rate": 4.650254168122222e-05, - "loss": 76.4572, - "step": 63340 - }, - { - "epoch": 0.2559420161039444, - "grad_norm": 985.75439453125, - "learning_rate": 4.650076080764662e-05, - "loss": 45.7849, - "step": 63350 - }, - { - "epoch": 0.255982417369312, - "grad_norm": 651.5304565429688, - "learning_rate": 4.649897951490262e-05, - "loss": 86.52, - "step": 63360 - }, - { - "epoch": 0.2560228186346796, - "grad_norm": 1333.999267578125, - "learning_rate": 4.649719780302495e-05, - "loss": 59.2886, - "step": 63370 - }, - { - "epoch": 0.25606321990004727, - "grad_norm": 926.7659912109375, - "learning_rate": 4.649541567204834e-05, - "loss": 91.8786, - "step": 63380 - }, - { - "epoch": 0.2561036211654149, - "grad_norm": 720.6255493164062, - "learning_rate": 4.649363312200753e-05, - "loss": 76.3271, - "step": 63390 - }, - { - "epoch": 0.25614402243078255, - "grad_norm": 0.0, - "learning_rate": 4.649185015293728e-05, - "loss": 68.2173, - "step": 63400 - }, - { - "epoch": 0.2561844236961502, - "grad_norm": 905.1610717773438, - "learning_rate": 4.649006676487234e-05, - "loss": 72.3964, - "step": 63410 - }, - { - "epoch": 0.25622482496151777, - "grad_norm": 1136.3037109375, - "learning_rate": 4.6488282957847494e-05, - "loss": 102.3837, - "step": 63420 - }, - { - "epoch": 0.2562652262268854, - "grad_norm": 1587.19921875, - "learning_rate": 4.648649873189751e-05, - "loss": 70.8869, - "step": 63430 - }, - { - "epoch": 0.25630562749225305, - "grad_norm": 683.1766967773438, - "learning_rate": 4.648471408705717e-05, - "loss": 78.3768, - "step": 63440 - }, - { - "epoch": 0.2563460287576207, - "grad_norm": 719.8806762695312, - "learning_rate": 4.648292902336126e-05, - "loss": 81.2503, - "step": 63450 - }, - { - "epoch": 0.25638643002298833, - "grad_norm": 1979.6317138671875, - "learning_rate": 4.648114354084459e-05, - "loss": 129.6083, - "step": 63460 - }, - { - "epoch": 0.25642683128835597, - "grad_norm": 565.4490356445312, - "learning_rate": 4.647935763954198e-05, - "loss": 78.8221, - "step": 63470 - }, - { - "epoch": 0.25646723255372356, - "grad_norm": 446.4573669433594, - "learning_rate": 4.647757131948822e-05, - "loss": 103.6806, - "step": 63480 - }, - { - "epoch": 0.2565076338190912, - "grad_norm": 1135.3199462890625, - "learning_rate": 4.6475784580718155e-05, - "loss": 97.5097, - "step": 63490 - }, - { - "epoch": 0.25654803508445884, - "grad_norm": 725.8003540039062, - "learning_rate": 4.6473997423266614e-05, - "loss": 65.7066, - "step": 63500 - }, - { - "epoch": 0.2565884363498265, - "grad_norm": 844.005859375, - "learning_rate": 4.6472209847168435e-05, - "loss": 75.9603, - "step": 63510 - }, - { - "epoch": 0.2566288376151941, - "grad_norm": 555.097412109375, - "learning_rate": 4.647042185245847e-05, - "loss": 68.8893, - "step": 63520 - }, - { - "epoch": 0.25666923888056176, - "grad_norm": 434.0204772949219, - "learning_rate": 4.646863343917158e-05, - "loss": 82.4939, - "step": 63530 - }, - { - "epoch": 0.2567096401459294, - "grad_norm": 918.038818359375, - "learning_rate": 4.646684460734263e-05, - "loss": 61.4218, - "step": 63540 - }, - { - "epoch": 0.256750041411297, - "grad_norm": 617.525390625, - "learning_rate": 4.646505535700649e-05, - "loss": 55.3601, - "step": 63550 - }, - { - "epoch": 0.2567904426766646, - "grad_norm": 864.6348266601562, - "learning_rate": 4.6463265688198044e-05, - "loss": 76.1844, - "step": 63560 - }, - { - "epoch": 0.25683084394203226, - "grad_norm": 2091.900390625, - "learning_rate": 4.6461475600952184e-05, - "loss": 76.1381, - "step": 63570 - }, - { - "epoch": 0.2568712452073999, - "grad_norm": 1115.5269775390625, - "learning_rate": 4.645968509530381e-05, - "loss": 69.8563, - "step": 63580 - }, - { - "epoch": 0.25691164647276754, - "grad_norm": 808.4495849609375, - "learning_rate": 4.645789417128783e-05, - "loss": 65.0377, - "step": 63590 - }, - { - "epoch": 0.2569520477381352, - "grad_norm": 867.1176147460938, - "learning_rate": 4.645610282893915e-05, - "loss": 78.228, - "step": 63600 - }, - { - "epoch": 0.25699244900350277, - "grad_norm": 452.4241943359375, - "learning_rate": 4.64543110682927e-05, - "loss": 80.4785, - "step": 63610 - }, - { - "epoch": 0.2570328502688704, - "grad_norm": 626.6029663085938, - "learning_rate": 4.6452518889383414e-05, - "loss": 68.1861, - "step": 63620 - }, - { - "epoch": 0.25707325153423805, - "grad_norm": 561.9072265625, - "learning_rate": 4.645072629224622e-05, - "loss": 68.4941, - "step": 63630 - }, - { - "epoch": 0.2571136527996057, - "grad_norm": 495.2870788574219, - "learning_rate": 4.6448933276916076e-05, - "loss": 78.5616, - "step": 63640 - }, - { - "epoch": 0.2571540540649733, - "grad_norm": 757.0482788085938, - "learning_rate": 4.644713984342793e-05, - "loss": 61.9383, - "step": 63650 - }, - { - "epoch": 0.25719445533034097, - "grad_norm": 666.8660278320312, - "learning_rate": 4.644534599181677e-05, - "loss": 60.4049, - "step": 63660 - }, - { - "epoch": 0.2572348565957086, - "grad_norm": 487.714111328125, - "learning_rate": 4.644355172211753e-05, - "loss": 65.7735, - "step": 63670 - }, - { - "epoch": 0.2572752578610762, - "grad_norm": 403.6894836425781, - "learning_rate": 4.644175703436522e-05, - "loss": 74.5089, - "step": 63680 - }, - { - "epoch": 0.25731565912644383, - "grad_norm": 806.9922485351562, - "learning_rate": 4.643996192859481e-05, - "loss": 47.5238, - "step": 63690 - }, - { - "epoch": 0.25735606039181147, - "grad_norm": 1357.157470703125, - "learning_rate": 4.643816640484131e-05, - "loss": 86.1968, - "step": 63700 - }, - { - "epoch": 0.2573964616571791, - "grad_norm": 1357.3590087890625, - "learning_rate": 4.643637046313972e-05, - "loss": 82.4154, - "step": 63710 - }, - { - "epoch": 0.25743686292254675, - "grad_norm": 1421.4974365234375, - "learning_rate": 4.6434574103525044e-05, - "loss": 91.1663, - "step": 63720 - }, - { - "epoch": 0.2574772641879144, - "grad_norm": 1042.117431640625, - "learning_rate": 4.6432777326032316e-05, - "loss": 86.4203, - "step": 63730 - }, - { - "epoch": 0.257517665453282, - "grad_norm": 1068.4251708984375, - "learning_rate": 4.6430980130696555e-05, - "loss": 79.4296, - "step": 63740 - }, - { - "epoch": 0.2575580667186496, - "grad_norm": 709.8405151367188, - "learning_rate": 4.642918251755281e-05, - "loss": 59.4794, - "step": 63750 - }, - { - "epoch": 0.25759846798401725, - "grad_norm": 752.4713745117188, - "learning_rate": 4.6427384486636113e-05, - "loss": 80.5621, - "step": 63760 - }, - { - "epoch": 0.2576388692493849, - "grad_norm": 500.284423828125, - "learning_rate": 4.6425586037981526e-05, - "loss": 70.1939, - "step": 63770 - }, - { - "epoch": 0.25767927051475253, - "grad_norm": 1230.3875732421875, - "learning_rate": 4.6423787171624114e-05, - "loss": 93.3144, - "step": 63780 - }, - { - "epoch": 0.2577196717801202, - "grad_norm": 375.65838623046875, - "learning_rate": 4.642198788759894e-05, - "loss": 83.3093, - "step": 63790 - }, - { - "epoch": 0.25776007304548776, - "grad_norm": 1165.2003173828125, - "learning_rate": 4.642018818594107e-05, - "loss": 67.3937, - "step": 63800 - }, - { - "epoch": 0.2578004743108554, - "grad_norm": 1046.146728515625, - "learning_rate": 4.641838806668562e-05, - "loss": 72.0059, - "step": 63810 - }, - { - "epoch": 0.25784087557622304, - "grad_norm": 695.942138671875, - "learning_rate": 4.6416587529867664e-05, - "loss": 54.245, - "step": 63820 - }, - { - "epoch": 0.2578812768415907, - "grad_norm": 1145.5577392578125, - "learning_rate": 4.6414786575522306e-05, - "loss": 88.9208, - "step": 63830 - }, - { - "epoch": 0.2579216781069583, - "grad_norm": 453.61181640625, - "learning_rate": 4.6412985203684654e-05, - "loss": 66.1485, - "step": 63840 - }, - { - "epoch": 0.25796207937232596, - "grad_norm": 765.479248046875, - "learning_rate": 4.6411183414389837e-05, - "loss": 74.9149, - "step": 63850 - }, - { - "epoch": 0.2580024806376936, - "grad_norm": 794.6134643554688, - "learning_rate": 4.6409381207672974e-05, - "loss": 58.8936, - "step": 63860 - }, - { - "epoch": 0.2580428819030612, - "grad_norm": 582.8801879882812, - "learning_rate": 4.64075785835692e-05, - "loss": 95.4306, - "step": 63870 - }, - { - "epoch": 0.2580832831684288, - "grad_norm": 1164.59619140625, - "learning_rate": 4.640577554211366e-05, - "loss": 64.1848, - "step": 63880 - }, - { - "epoch": 0.25812368443379646, - "grad_norm": 1310.0181884765625, - "learning_rate": 4.640397208334151e-05, - "loss": 63.126, - "step": 63890 - }, - { - "epoch": 0.2581640856991641, - "grad_norm": 571.4081420898438, - "learning_rate": 4.64021682072879e-05, - "loss": 42.7484, - "step": 63900 - }, - { - "epoch": 0.25820448696453174, - "grad_norm": 726.766845703125, - "learning_rate": 4.640036391398801e-05, - "loss": 43.3208, - "step": 63910 - }, - { - "epoch": 0.2582448882298994, - "grad_norm": 754.6783447265625, - "learning_rate": 4.639855920347701e-05, - "loss": 110.9717, - "step": 63920 - }, - { - "epoch": 0.25828528949526697, - "grad_norm": 1137.88818359375, - "learning_rate": 4.639675407579007e-05, - "loss": 70.8244, - "step": 63930 - }, - { - "epoch": 0.2583256907606346, - "grad_norm": 508.732421875, - "learning_rate": 4.6394948530962396e-05, - "loss": 55.6226, - "step": 63940 - }, - { - "epoch": 0.25836609202600225, - "grad_norm": 405.8502197265625, - "learning_rate": 4.639314256902919e-05, - "loss": 51.9442, - "step": 63950 - }, - { - "epoch": 0.2584064932913699, - "grad_norm": 941.8868408203125, - "learning_rate": 4.6391336190025644e-05, - "loss": 97.1299, - "step": 63960 - }, - { - "epoch": 0.2584468945567375, - "grad_norm": 368.6508483886719, - "learning_rate": 4.6389529393987e-05, - "loss": 52.9645, - "step": 63970 - }, - { - "epoch": 0.25848729582210517, - "grad_norm": 846.4256591796875, - "learning_rate": 4.638772218094847e-05, - "loss": 76.5249, - "step": 63980 - }, - { - "epoch": 0.25852769708747275, - "grad_norm": 552.8169555664062, - "learning_rate": 4.638591455094527e-05, - "loss": 58.1772, - "step": 63990 - }, - { - "epoch": 0.2585680983528404, - "grad_norm": 1205.1046142578125, - "learning_rate": 4.638410650401267e-05, - "loss": 59.0313, - "step": 64000 - }, - { - "epoch": 0.25860849961820803, - "grad_norm": 979.0299682617188, - "learning_rate": 4.63822980401859e-05, - "loss": 75.0273, - "step": 64010 - }, - { - "epoch": 0.25864890088357567, - "grad_norm": 1404.583740234375, - "learning_rate": 4.638048915950022e-05, - "loss": 77.1647, - "step": 64020 - }, - { - "epoch": 0.2586893021489433, - "grad_norm": 1869.6007080078125, - "learning_rate": 4.637867986199089e-05, - "loss": 57.802, - "step": 64030 - }, - { - "epoch": 0.25872970341431095, - "grad_norm": 502.42138671875, - "learning_rate": 4.6376870147693196e-05, - "loss": 62.1255, - "step": 64040 - }, - { - "epoch": 0.2587701046796786, - "grad_norm": 700.3970947265625, - "learning_rate": 4.6375060016642415e-05, - "loss": 72.0223, - "step": 64050 - }, - { - "epoch": 0.2588105059450462, - "grad_norm": 1100.4659423828125, - "learning_rate": 4.6373249468873833e-05, - "loss": 84.0306, - "step": 64060 - }, - { - "epoch": 0.2588509072104138, - "grad_norm": 818.85595703125, - "learning_rate": 4.637143850442275e-05, - "loss": 82.6929, - "step": 64070 - }, - { - "epoch": 0.25889130847578146, - "grad_norm": 373.8831787109375, - "learning_rate": 4.6369627123324465e-05, - "loss": 65.1804, - "step": 64080 - }, - { - "epoch": 0.2589317097411491, - "grad_norm": 932.3283081054688, - "learning_rate": 4.6367815325614306e-05, - "loss": 72.5139, - "step": 64090 - }, - { - "epoch": 0.25897211100651674, - "grad_norm": 1563.8875732421875, - "learning_rate": 4.636600311132758e-05, - "loss": 100.7779, - "step": 64100 - }, - { - "epoch": 0.2590125122718844, - "grad_norm": 1865.01220703125, - "learning_rate": 4.6364190480499624e-05, - "loss": 105.2925, - "step": 64110 - }, - { - "epoch": 0.25905291353725196, - "grad_norm": 947.768798828125, - "learning_rate": 4.636237743316578e-05, - "loss": 58.8598, - "step": 64120 - }, - { - "epoch": 0.2590933148026196, - "grad_norm": 663.779052734375, - "learning_rate": 4.636056396936138e-05, - "loss": 97.7154, - "step": 64130 - }, - { - "epoch": 0.25913371606798724, - "grad_norm": 814.3782958984375, - "learning_rate": 4.6358750089121795e-05, - "loss": 72.49, - "step": 64140 - }, - { - "epoch": 0.2591741173333549, - "grad_norm": 634.1887817382812, - "learning_rate": 4.635693579248238e-05, - "loss": 64.081, - "step": 64150 - }, - { - "epoch": 0.2592145185987225, - "grad_norm": 1665.0477294921875, - "learning_rate": 4.635512107947851e-05, - "loss": 60.826, - "step": 64160 - }, - { - "epoch": 0.25925491986409016, - "grad_norm": 587.009765625, - "learning_rate": 4.635330595014555e-05, - "loss": 62.3891, - "step": 64170 - }, - { - "epoch": 0.2592953211294578, - "grad_norm": 1239.5069580078125, - "learning_rate": 4.635149040451891e-05, - "loss": 83.9093, - "step": 64180 - }, - { - "epoch": 0.2593357223948254, - "grad_norm": 743.6141357421875, - "learning_rate": 4.634967444263397e-05, - "loss": 111.2367, - "step": 64190 - }, - { - "epoch": 0.259376123660193, - "grad_norm": 848.8331909179688, - "learning_rate": 4.6347858064526125e-05, - "loss": 56.9608, - "step": 64200 - }, - { - "epoch": 0.25941652492556067, - "grad_norm": 628.3904418945312, - "learning_rate": 4.6346041270230804e-05, - "loss": 72.7587, - "step": 64210 - }, - { - "epoch": 0.2594569261909283, - "grad_norm": 849.9680786132812, - "learning_rate": 4.634422405978342e-05, - "loss": 47.3868, - "step": 64220 - }, - { - "epoch": 0.25949732745629595, - "grad_norm": 474.90264892578125, - "learning_rate": 4.6342406433219394e-05, - "loss": 102.6614, - "step": 64230 - }, - { - "epoch": 0.2595377287216636, - "grad_norm": 2507.705322265625, - "learning_rate": 4.634058839057417e-05, - "loss": 70.4811, - "step": 64240 - }, - { - "epoch": 0.25957812998703117, - "grad_norm": 1063.095458984375, - "learning_rate": 4.6338769931883185e-05, - "loss": 78.3297, - "step": 64250 - }, - { - "epoch": 0.2596185312523988, - "grad_norm": 622.4473266601562, - "learning_rate": 4.63369510571819e-05, - "loss": 77.0825, - "step": 64260 - }, - { - "epoch": 0.25965893251776645, - "grad_norm": 674.91455078125, - "learning_rate": 4.633513176650577e-05, - "loss": 60.1044, - "step": 64270 - }, - { - "epoch": 0.2596993337831341, - "grad_norm": 857.1925659179688, - "learning_rate": 4.6333312059890256e-05, - "loss": 75.2282, - "step": 64280 - }, - { - "epoch": 0.25973973504850173, - "grad_norm": 1186.2684326171875, - "learning_rate": 4.633149193737084e-05, - "loss": 90.6651, - "step": 64290 - }, - { - "epoch": 0.25978013631386937, - "grad_norm": 503.4192810058594, - "learning_rate": 4.632967139898301e-05, - "loss": 52.2867, - "step": 64300 - }, - { - "epoch": 0.25982053757923695, - "grad_norm": 0.0, - "learning_rate": 4.632785044476225e-05, - "loss": 54.2169, - "step": 64310 - }, - { - "epoch": 0.2598609388446046, - "grad_norm": 808.451904296875, - "learning_rate": 4.6326029074744074e-05, - "loss": 89.7694, - "step": 64320 - }, - { - "epoch": 0.25990134010997223, - "grad_norm": 3421.571533203125, - "learning_rate": 4.6324207288963974e-05, - "loss": 94.0056, - "step": 64330 - }, - { - "epoch": 0.2599417413753399, - "grad_norm": 300.9707336425781, - "learning_rate": 4.632238508745748e-05, - "loss": 72.748, - "step": 64340 - }, - { - "epoch": 0.2599821426407075, - "grad_norm": 1028.27197265625, - "learning_rate": 4.632056247026011e-05, - "loss": 66.6853, - "step": 64350 - }, - { - "epoch": 0.26002254390607515, - "grad_norm": 730.0326538085938, - "learning_rate": 4.63187394374074e-05, - "loss": 90.4567, - "step": 64360 - }, - { - "epoch": 0.2600629451714428, - "grad_norm": 859.37890625, - "learning_rate": 4.6316915988934884e-05, - "loss": 76.2773, - "step": 64370 - }, - { - "epoch": 0.2601033464368104, - "grad_norm": 956.5285034179688, - "learning_rate": 4.631509212487811e-05, - "loss": 70.2754, - "step": 64380 - }, - { - "epoch": 0.260143747702178, - "grad_norm": 627.6688232421875, - "learning_rate": 4.6313267845272656e-05, - "loss": 61.4442, - "step": 64390 - }, - { - "epoch": 0.26018414896754566, - "grad_norm": 716.159423828125, - "learning_rate": 4.631144315015407e-05, - "loss": 72.7093, - "step": 64400 - }, - { - "epoch": 0.2602245502329133, - "grad_norm": 447.9820861816406, - "learning_rate": 4.630961803955792e-05, - "loss": 66.9453, - "step": 64410 - }, - { - "epoch": 0.26026495149828094, - "grad_norm": 588.1676025390625, - "learning_rate": 4.63077925135198e-05, - "loss": 44.5305, - "step": 64420 - }, - { - "epoch": 0.2603053527636486, - "grad_norm": 594.3401489257812, - "learning_rate": 4.6305966572075286e-05, - "loss": 60.6098, - "step": 64430 - }, - { - "epoch": 0.26034575402901616, - "grad_norm": 1224.4241943359375, - "learning_rate": 4.630414021525999e-05, - "loss": 95.3115, - "step": 64440 - }, - { - "epoch": 0.2603861552943838, - "grad_norm": 1307.3175048828125, - "learning_rate": 4.6302313443109526e-05, - "loss": 59.6623, - "step": 64450 - }, - { - "epoch": 0.26042655655975144, - "grad_norm": 667.8453979492188, - "learning_rate": 4.6300486255659484e-05, - "loss": 68.9579, - "step": 64460 - }, - { - "epoch": 0.2604669578251191, - "grad_norm": 648.3145141601562, - "learning_rate": 4.6298658652945494e-05, - "loss": 72.2152, - "step": 64470 - }, - { - "epoch": 0.2605073590904867, - "grad_norm": 1166.9095458984375, - "learning_rate": 4.629683063500319e-05, - "loss": 56.1489, - "step": 64480 - }, - { - "epoch": 0.26054776035585436, - "grad_norm": 856.700927734375, - "learning_rate": 4.629500220186821e-05, - "loss": 92.3313, - "step": 64490 - }, - { - "epoch": 0.260588161621222, - "grad_norm": 1207.9443359375, - "learning_rate": 4.629317335357619e-05, - "loss": 62.1099, - "step": 64500 - }, - { - "epoch": 0.2606285628865896, - "grad_norm": 618.7341918945312, - "learning_rate": 4.6291344090162804e-05, - "loss": 89.1145, - "step": 64510 - }, - { - "epoch": 0.2606689641519572, - "grad_norm": 1234.6435546875, - "learning_rate": 4.62895144116637e-05, - "loss": 81.7492, - "step": 64520 - }, - { - "epoch": 0.26070936541732487, - "grad_norm": 1199.2274169921875, - "learning_rate": 4.628768431811455e-05, - "loss": 93.2701, - "step": 64530 - }, - { - "epoch": 0.2607497666826925, - "grad_norm": 787.503662109375, - "learning_rate": 4.6285853809551036e-05, - "loss": 67.4107, - "step": 64540 - }, - { - "epoch": 0.26079016794806015, - "grad_norm": 1185.4876708984375, - "learning_rate": 4.6284022886008836e-05, - "loss": 137.017, - "step": 64550 - }, - { - "epoch": 0.2608305692134278, - "grad_norm": 1550.175048828125, - "learning_rate": 4.628219154752367e-05, - "loss": 105.3924, - "step": 64560 - }, - { - "epoch": 0.26087097047879537, - "grad_norm": 474.1405334472656, - "learning_rate": 4.628035979413121e-05, - "loss": 63.6596, - "step": 64570 - }, - { - "epoch": 0.260911371744163, - "grad_norm": 631.3251953125, - "learning_rate": 4.627852762586718e-05, - "loss": 88.0774, - "step": 64580 - }, - { - "epoch": 0.26095177300953065, - "grad_norm": 932.6629638671875, - "learning_rate": 4.627669504276731e-05, - "loss": 69.9022, - "step": 64590 - }, - { - "epoch": 0.2609921742748983, - "grad_norm": 462.3409423828125, - "learning_rate": 4.6274862044867304e-05, - "loss": 105.5615, - "step": 64600 - }, - { - "epoch": 0.26103257554026593, - "grad_norm": 421.7193908691406, - "learning_rate": 4.627302863220291e-05, - "loss": 83.5629, - "step": 64610 - }, - { - "epoch": 0.26107297680563357, - "grad_norm": 849.374267578125, - "learning_rate": 4.627119480480987e-05, - "loss": 108.5556, - "step": 64620 - }, - { - "epoch": 0.26111337807100116, - "grad_norm": 851.120849609375, - "learning_rate": 4.626936056272394e-05, - "loss": 76.4113, - "step": 64630 - }, - { - "epoch": 0.2611537793363688, - "grad_norm": 587.6618041992188, - "learning_rate": 4.626752590598088e-05, - "loss": 97.3091, - "step": 64640 - }, - { - "epoch": 0.26119418060173644, - "grad_norm": 1624.475341796875, - "learning_rate": 4.6265690834616446e-05, - "loss": 66.4535, - "step": 64650 - }, - { - "epoch": 0.2612345818671041, - "grad_norm": 194.85707092285156, - "learning_rate": 4.626385534866642e-05, - "loss": 51.6759, - "step": 64660 - }, - { - "epoch": 0.2612749831324717, - "grad_norm": 1180.03369140625, - "learning_rate": 4.626201944816659e-05, - "loss": 51.8923, - "step": 64670 - }, - { - "epoch": 0.26131538439783936, - "grad_norm": 951.8189697265625, - "learning_rate": 4.626018313315275e-05, - "loss": 76.8008, - "step": 64680 - }, - { - "epoch": 0.261355785663207, - "grad_norm": 894.181396484375, - "learning_rate": 4.625834640366068e-05, - "loss": 93.1069, - "step": 64690 - }, - { - "epoch": 0.2613961869285746, - "grad_norm": 575.2242431640625, - "learning_rate": 4.625650925972622e-05, - "loss": 85.9263, - "step": 64700 - }, - { - "epoch": 0.2614365881939422, - "grad_norm": 925.6211547851562, - "learning_rate": 4.6254671701385154e-05, - "loss": 69.3511, - "step": 64710 - }, - { - "epoch": 0.26147698945930986, - "grad_norm": 588.945068359375, - "learning_rate": 4.625283372867333e-05, - "loss": 36.42, - "step": 64720 - }, - { - "epoch": 0.2615173907246775, - "grad_norm": 1043.0775146484375, - "learning_rate": 4.625099534162656e-05, - "loss": 81.0917, - "step": 64730 - }, - { - "epoch": 0.26155779199004514, - "grad_norm": 1658.7032470703125, - "learning_rate": 4.62491565402807e-05, - "loss": 62.2384, - "step": 64740 - }, - { - "epoch": 0.2615981932554128, - "grad_norm": 1211.529052734375, - "learning_rate": 4.6247317324671605e-05, - "loss": 74.7915, - "step": 64750 - }, - { - "epoch": 0.26163859452078037, - "grad_norm": 1145.8216552734375, - "learning_rate": 4.6245477694835106e-05, - "loss": 52.6177, - "step": 64760 - }, - { - "epoch": 0.261678995786148, - "grad_norm": 758.6449584960938, - "learning_rate": 4.6243637650807086e-05, - "loss": 85.0178, - "step": 64770 - }, - { - "epoch": 0.26171939705151565, - "grad_norm": 427.8953857421875, - "learning_rate": 4.624179719262342e-05, - "loss": 76.1282, - "step": 64780 - }, - { - "epoch": 0.2617597983168833, - "grad_norm": 750.1713256835938, - "learning_rate": 4.623995632031997e-05, - "loss": 74.4636, - "step": 64790 - }, - { - "epoch": 0.2618001995822509, - "grad_norm": 421.75958251953125, - "learning_rate": 4.6238115033932636e-05, - "loss": 56.7678, - "step": 64800 - }, - { - "epoch": 0.26184060084761857, - "grad_norm": 1215.94482421875, - "learning_rate": 4.623627333349732e-05, - "loss": 72.2328, - "step": 64810 - }, - { - "epoch": 0.2618810021129862, - "grad_norm": 0.0, - "learning_rate": 4.623443121904992e-05, - "loss": 68.4072, - "step": 64820 - }, - { - "epoch": 0.2619214033783538, - "grad_norm": 739.1293334960938, - "learning_rate": 4.623258869062636e-05, - "loss": 58.8318, - "step": 64830 - }, - { - "epoch": 0.26196180464372143, - "grad_norm": 760.8939819335938, - "learning_rate": 4.623074574826254e-05, - "loss": 85.5972, - "step": 64840 - }, - { - "epoch": 0.26200220590908907, - "grad_norm": 902.6166381835938, - "learning_rate": 4.622890239199441e-05, - "loss": 77.9647, - "step": 64850 - }, - { - "epoch": 0.2620426071744567, - "grad_norm": 504.2374572753906, - "learning_rate": 4.622705862185789e-05, - "loss": 61.3637, - "step": 64860 - }, - { - "epoch": 0.26208300843982435, - "grad_norm": 550.212890625, - "learning_rate": 4.622521443788894e-05, - "loss": 58.025, - "step": 64870 - }, - { - "epoch": 0.262123409705192, - "grad_norm": 1491.5367431640625, - "learning_rate": 4.622336984012351e-05, - "loss": 119.6012, - "step": 64880 - }, - { - "epoch": 0.2621638109705596, - "grad_norm": 1074.4013671875, - "learning_rate": 4.622152482859755e-05, - "loss": 53.4368, - "step": 64890 - }, - { - "epoch": 0.2622042122359272, - "grad_norm": 1384.9111328125, - "learning_rate": 4.621967940334705e-05, - "loss": 73.9773, - "step": 64900 - }, - { - "epoch": 0.26224461350129485, - "grad_norm": 594.1865234375, - "learning_rate": 4.621783356440796e-05, - "loss": 59.9528, - "step": 64910 - }, - { - "epoch": 0.2622850147666625, - "grad_norm": 1251.0433349609375, - "learning_rate": 4.621598731181629e-05, - "loss": 69.5674, - "step": 64920 - }, - { - "epoch": 0.26232541603203013, - "grad_norm": 799.28564453125, - "learning_rate": 4.621414064560803e-05, - "loss": 90.669, - "step": 64930 - }, - { - "epoch": 0.2623658172973978, - "grad_norm": 766.1452026367188, - "learning_rate": 4.6212293565819166e-05, - "loss": 65.2333, - "step": 64940 - }, - { - "epoch": 0.26240621856276536, - "grad_norm": 2778.8935546875, - "learning_rate": 4.6210446072485725e-05, - "loss": 89.6359, - "step": 64950 - }, - { - "epoch": 0.262446619828133, - "grad_norm": 1777.0819091796875, - "learning_rate": 4.6208598165643715e-05, - "loss": 106.0751, - "step": 64960 - }, - { - "epoch": 0.26248702109350064, - "grad_norm": 607.8473510742188, - "learning_rate": 4.6206749845329164e-05, - "loss": 65.859, - "step": 64970 - }, - { - "epoch": 0.2625274223588683, - "grad_norm": 640.949951171875, - "learning_rate": 4.62049011115781e-05, - "loss": 93.8094, - "step": 64980 - }, - { - "epoch": 0.2625678236242359, - "grad_norm": 449.35650634765625, - "learning_rate": 4.620305196442659e-05, - "loss": 52.2666, - "step": 64990 - }, - { - "epoch": 0.26260822488960356, - "grad_norm": 1173.4410400390625, - "learning_rate": 4.620120240391065e-05, - "loss": 93.0513, - "step": 65000 - }, - { - "epoch": 0.2626486261549712, - "grad_norm": 489.27716064453125, - "learning_rate": 4.619935243006636e-05, - "loss": 64.3064, - "step": 65010 - }, - { - "epoch": 0.2626890274203388, - "grad_norm": 265.78076171875, - "learning_rate": 4.619750204292978e-05, - "loss": 80.1383, - "step": 65020 - }, - { - "epoch": 0.2627294286857064, - "grad_norm": 581.3447265625, - "learning_rate": 4.619565124253698e-05, - "loss": 41.3353, - "step": 65030 - }, - { - "epoch": 0.26276982995107406, - "grad_norm": 3157.60595703125, - "learning_rate": 4.619380002892406e-05, - "loss": 86.7737, - "step": 65040 - }, - { - "epoch": 0.2628102312164417, - "grad_norm": 679.6066284179688, - "learning_rate": 4.619194840212708e-05, - "loss": 73.4557, - "step": 65050 - }, - { - "epoch": 0.26285063248180934, - "grad_norm": 352.4366760253906, - "learning_rate": 4.6190096362182167e-05, - "loss": 84.3029, - "step": 65060 - }, - { - "epoch": 0.262891033747177, - "grad_norm": 1345.6533203125, - "learning_rate": 4.618824390912541e-05, - "loss": 76.0789, - "step": 65070 - }, - { - "epoch": 0.26293143501254457, - "grad_norm": 606.6847534179688, - "learning_rate": 4.618639104299294e-05, - "loss": 84.0097, - "step": 65080 - }, - { - "epoch": 0.2629718362779122, - "grad_norm": 777.6514892578125, - "learning_rate": 4.618453776382086e-05, - "loss": 54.7092, - "step": 65090 - }, - { - "epoch": 0.26301223754327985, - "grad_norm": 686.7308959960938, - "learning_rate": 4.61826840716453e-05, - "loss": 61.8434, - "step": 65100 - }, - { - "epoch": 0.2630526388086475, - "grad_norm": 538.2279052734375, - "learning_rate": 4.618082996650243e-05, - "loss": 49.7606, - "step": 65110 - }, - { - "epoch": 0.2630930400740151, - "grad_norm": 537.077392578125, - "learning_rate": 4.617897544842836e-05, - "loss": 66.0555, - "step": 65120 - }, - { - "epoch": 0.26313344133938277, - "grad_norm": 676.6751098632812, - "learning_rate": 4.617712051745927e-05, - "loss": 64.1107, - "step": 65130 - }, - { - "epoch": 0.2631738426047504, - "grad_norm": 587.270263671875, - "learning_rate": 4.61752651736313e-05, - "loss": 63.3736, - "step": 65140 - }, - { - "epoch": 0.263214243870118, - "grad_norm": 1263.456787109375, - "learning_rate": 4.617340941698064e-05, - "loss": 59.6258, - "step": 65150 - }, - { - "epoch": 0.26325464513548563, - "grad_norm": 427.99737548828125, - "learning_rate": 4.617155324754346e-05, - "loss": 52.6464, - "step": 65160 - }, - { - "epoch": 0.26329504640085327, - "grad_norm": 0.0, - "learning_rate": 4.616969666535596e-05, - "loss": 73.0998, - "step": 65170 - }, - { - "epoch": 0.2633354476662209, - "grad_norm": 617.71337890625, - "learning_rate": 4.6167839670454315e-05, - "loss": 44.5023, - "step": 65180 - }, - { - "epoch": 0.26337584893158855, - "grad_norm": 517.5345458984375, - "learning_rate": 4.616598226287474e-05, - "loss": 95.0356, - "step": 65190 - }, - { - "epoch": 0.2634162501969562, - "grad_norm": 1026.675537109375, - "learning_rate": 4.616412444265345e-05, - "loss": 67.167, - "step": 65200 - }, - { - "epoch": 0.2634566514623238, - "grad_norm": 1128.9427490234375, - "learning_rate": 4.616226620982665e-05, - "loss": 89.6982, - "step": 65210 - }, - { - "epoch": 0.2634970527276914, - "grad_norm": 809.5255737304688, - "learning_rate": 4.6160407564430574e-05, - "loss": 65.6736, - "step": 65220 - }, - { - "epoch": 0.26353745399305906, - "grad_norm": 1231.8623046875, - "learning_rate": 4.6158548506501464e-05, - "loss": 86.9045, - "step": 65230 - }, - { - "epoch": 0.2635778552584267, - "grad_norm": 1171.7540283203125, - "learning_rate": 4.6156689036075555e-05, - "loss": 77.3384, - "step": 65240 - }, - { - "epoch": 0.26361825652379434, - "grad_norm": 619.1737060546875, - "learning_rate": 4.615482915318911e-05, - "loss": 65.3949, - "step": 65250 - }, - { - "epoch": 0.263658657789162, - "grad_norm": 518.4265747070312, - "learning_rate": 4.6152968857878366e-05, - "loss": 55.1953, - "step": 65260 - }, - { - "epoch": 0.26369905905452956, - "grad_norm": 1298.9517822265625, - "learning_rate": 4.615110815017961e-05, - "loss": 56.1695, - "step": 65270 - }, - { - "epoch": 0.2637394603198972, - "grad_norm": 452.3489074707031, - "learning_rate": 4.614924703012911e-05, - "loss": 58.1403, - "step": 65280 - }, - { - "epoch": 0.26377986158526484, - "grad_norm": 809.9728393554688, - "learning_rate": 4.614738549776315e-05, - "loss": 49.3975, - "step": 65290 - }, - { - "epoch": 0.2638202628506325, - "grad_norm": 703.8427124023438, - "learning_rate": 4.614552355311802e-05, - "loss": 54.3999, - "step": 65300 - }, - { - "epoch": 0.2638606641160001, - "grad_norm": 518.8963623046875, - "learning_rate": 4.6143661196230026e-05, - "loss": 59.6565, - "step": 65310 - }, - { - "epoch": 0.26390106538136776, - "grad_norm": 780.0380249023438, - "learning_rate": 4.614179842713547e-05, - "loss": 85.9944, - "step": 65320 - }, - { - "epoch": 0.2639414666467354, - "grad_norm": 855.755126953125, - "learning_rate": 4.613993524587067e-05, - "loss": 57.039, - "step": 65330 - }, - { - "epoch": 0.263981867912103, - "grad_norm": 722.5667114257812, - "learning_rate": 4.613807165247195e-05, - "loss": 77.862, - "step": 65340 - }, - { - "epoch": 0.2640222691774706, - "grad_norm": 833.8828125, - "learning_rate": 4.613620764697564e-05, - "loss": 70.7339, - "step": 65350 - }, - { - "epoch": 0.26406267044283827, - "grad_norm": 827.6605224609375, - "learning_rate": 4.6134343229418075e-05, - "loss": 91.2179, - "step": 65360 - }, - { - "epoch": 0.2641030717082059, - "grad_norm": 398.13287353515625, - "learning_rate": 4.613247839983561e-05, - "loss": 85.9632, - "step": 65370 - }, - { - "epoch": 0.26414347297357355, - "grad_norm": 661.8615112304688, - "learning_rate": 4.613061315826461e-05, - "loss": 89.6971, - "step": 65380 - }, - { - "epoch": 0.2641838742389412, - "grad_norm": 425.14483642578125, - "learning_rate": 4.612874750474142e-05, - "loss": 46.4946, - "step": 65390 - }, - { - "epoch": 0.26422427550430877, - "grad_norm": 709.1620483398438, - "learning_rate": 4.612688143930242e-05, - "loss": 97.9462, - "step": 65400 - }, - { - "epoch": 0.2642646767696764, - "grad_norm": 888.79248046875, - "learning_rate": 4.612501496198398e-05, - "loss": 94.5246, - "step": 65410 - }, - { - "epoch": 0.26430507803504405, - "grad_norm": 629.85791015625, - "learning_rate": 4.612314807282251e-05, - "loss": 47.7306, - "step": 65420 - }, - { - "epoch": 0.2643454793004117, - "grad_norm": 76.55754089355469, - "learning_rate": 4.612128077185439e-05, - "loss": 79.3505, - "step": 65430 - }, - { - "epoch": 0.26438588056577933, - "grad_norm": 1617.7518310546875, - "learning_rate": 4.611941305911602e-05, - "loss": 100.8537, - "step": 65440 - }, - { - "epoch": 0.26442628183114697, - "grad_norm": 653.6577758789062, - "learning_rate": 4.611754493464383e-05, - "loss": 61.9429, - "step": 65450 - }, - { - "epoch": 0.2644666830965146, - "grad_norm": 1721.381103515625, - "learning_rate": 4.611567639847422e-05, - "loss": 72.449, - "step": 65460 - }, - { - "epoch": 0.2645070843618822, - "grad_norm": 2042.4002685546875, - "learning_rate": 4.611380745064363e-05, - "loss": 66.475, - "step": 65470 - }, - { - "epoch": 0.26454748562724983, - "grad_norm": 603.5230712890625, - "learning_rate": 4.61119380911885e-05, - "loss": 81.908, - "step": 65480 - }, - { - "epoch": 0.2645878868926175, - "grad_norm": 507.7789001464844, - "learning_rate": 4.611006832014526e-05, - "loss": 57.1852, - "step": 65490 - }, - { - "epoch": 0.2646282881579851, - "grad_norm": 646.4906005859375, - "learning_rate": 4.610819813755038e-05, - "loss": 48.6094, - "step": 65500 - }, - { - "epoch": 0.26466868942335275, - "grad_norm": 783.2797241210938, - "learning_rate": 4.61063275434403e-05, - "loss": 67.8832, - "step": 65510 - }, - { - "epoch": 0.2647090906887204, - "grad_norm": 698.7806396484375, - "learning_rate": 4.610445653785151e-05, - "loss": 53.7803, - "step": 65520 - }, - { - "epoch": 0.264749491954088, - "grad_norm": 563.870361328125, - "learning_rate": 4.610258512082046e-05, - "loss": 54.008, - "step": 65530 - }, - { - "epoch": 0.2647898932194556, - "grad_norm": 734.525146484375, - "learning_rate": 4.610071329238366e-05, - "loss": 59.3586, - "step": 65540 - }, - { - "epoch": 0.26483029448482326, - "grad_norm": 568.80419921875, - "learning_rate": 4.6098841052577583e-05, - "loss": 52.5479, - "step": 65550 - }, - { - "epoch": 0.2648706957501909, - "grad_norm": 473.8629150390625, - "learning_rate": 4.6096968401438745e-05, - "loss": 88.4755, - "step": 65560 - }, - { - "epoch": 0.26491109701555854, - "grad_norm": 804.3765258789062, - "learning_rate": 4.609509533900364e-05, - "loss": 82.4179, - "step": 65570 - }, - { - "epoch": 0.2649514982809262, - "grad_norm": 556.0540161132812, - "learning_rate": 4.6093221865308786e-05, - "loss": 108.8011, - "step": 65580 - }, - { - "epoch": 0.26499189954629376, - "grad_norm": 761.5044555664062, - "learning_rate": 4.609134798039073e-05, - "loss": 61.9983, - "step": 65590 - }, - { - "epoch": 0.2650323008116614, - "grad_norm": 1213.9462890625, - "learning_rate": 4.6089473684285974e-05, - "loss": 93.0749, - "step": 65600 - }, - { - "epoch": 0.26507270207702904, - "grad_norm": 691.1778564453125, - "learning_rate": 4.608759897703107e-05, - "loss": 55.7611, - "step": 65610 - }, - { - "epoch": 0.2651131033423967, - "grad_norm": 1190.127685546875, - "learning_rate": 4.608572385866257e-05, - "loss": 56.0981, - "step": 65620 - }, - { - "epoch": 0.2651535046077643, - "grad_norm": 1123.38916015625, - "learning_rate": 4.6083848329217026e-05, - "loss": 88.4509, - "step": 65630 - }, - { - "epoch": 0.26519390587313196, - "grad_norm": 358.7076110839844, - "learning_rate": 4.608197238873101e-05, - "loss": 70.7137, - "step": 65640 - }, - { - "epoch": 0.2652343071384996, - "grad_norm": 1214.915283203125, - "learning_rate": 4.6080096037241086e-05, - "loss": 89.7655, - "step": 65650 - }, - { - "epoch": 0.2652747084038672, - "grad_norm": 890.3026123046875, - "learning_rate": 4.607821927478383e-05, - "loss": 86.5065, - "step": 65660 - }, - { - "epoch": 0.2653151096692348, - "grad_norm": 835.5069580078125, - "learning_rate": 4.607634210139584e-05, - "loss": 84.1165, - "step": 65670 - }, - { - "epoch": 0.26535551093460247, - "grad_norm": 350.78350830078125, - "learning_rate": 4.607446451711372e-05, - "loss": 67.6091, - "step": 65680 - }, - { - "epoch": 0.2653959121999701, - "grad_norm": 1582.5438232421875, - "learning_rate": 4.607258652197406e-05, - "loss": 86.531, - "step": 65690 - }, - { - "epoch": 0.26543631346533775, - "grad_norm": 1221.990234375, - "learning_rate": 4.6070708116013476e-05, - "loss": 70.8549, - "step": 65700 - }, - { - "epoch": 0.2654767147307054, - "grad_norm": 931.9036865234375, - "learning_rate": 4.606882929926858e-05, - "loss": 47.7265, - "step": 65710 - }, - { - "epoch": 0.26551711599607297, - "grad_norm": 787.3499755859375, - "learning_rate": 4.6066950071776015e-05, - "loss": 82.7044, - "step": 65720 - }, - { - "epoch": 0.2655575172614406, - "grad_norm": 3718.5693359375, - "learning_rate": 4.606507043357242e-05, - "loss": 120.9925, - "step": 65730 - }, - { - "epoch": 0.26559791852680825, - "grad_norm": 836.9452514648438, - "learning_rate": 4.606319038469443e-05, - "loss": 59.5213, - "step": 65740 - }, - { - "epoch": 0.2656383197921759, - "grad_norm": 694.1288452148438, - "learning_rate": 4.606130992517869e-05, - "loss": 70.1149, - "step": 65750 - }, - { - "epoch": 0.26567872105754353, - "grad_norm": 1005.6154174804688, - "learning_rate": 4.605942905506188e-05, - "loss": 49.102, - "step": 65760 - }, - { - "epoch": 0.26571912232291117, - "grad_norm": 684.3242797851562, - "learning_rate": 4.605754777438065e-05, - "loss": 79.9185, - "step": 65770 - }, - { - "epoch": 0.2657595235882788, - "grad_norm": 1068.396240234375, - "learning_rate": 4.605566608317169e-05, - "loss": 109.3487, - "step": 65780 - }, - { - "epoch": 0.2657999248536464, - "grad_norm": 2943.5751953125, - "learning_rate": 4.6053783981471675e-05, - "loss": 78.831, - "step": 65790 - }, - { - "epoch": 0.26584032611901404, - "grad_norm": 386.4954833984375, - "learning_rate": 4.605190146931731e-05, - "loss": 91.7346, - "step": 65800 - }, - { - "epoch": 0.2658807273843817, - "grad_norm": 1254.3067626953125, - "learning_rate": 4.605001854674529e-05, - "loss": 88.2726, - "step": 65810 - }, - { - "epoch": 0.2659211286497493, - "grad_norm": 553.072509765625, - "learning_rate": 4.604813521379231e-05, - "loss": 71.1216, - "step": 65820 - }, - { - "epoch": 0.26596152991511696, - "grad_norm": 780.3787841796875, - "learning_rate": 4.60462514704951e-05, - "loss": 71.7296, - "step": 65830 - }, - { - "epoch": 0.2660019311804846, - "grad_norm": 1093.623046875, - "learning_rate": 4.6044367316890386e-05, - "loss": 53.4292, - "step": 65840 - }, - { - "epoch": 0.2660423324458522, - "grad_norm": 986.8058471679688, - "learning_rate": 4.6042482753014895e-05, - "loss": 70.1599, - "step": 65850 - }, - { - "epoch": 0.2660827337112198, - "grad_norm": 614.8383178710938, - "learning_rate": 4.604059777890537e-05, - "loss": 68.2766, - "step": 65860 - }, - { - "epoch": 0.26612313497658746, - "grad_norm": 608.2391357421875, - "learning_rate": 4.603871239459856e-05, - "loss": 51.4506, - "step": 65870 - }, - { - "epoch": 0.2661635362419551, - "grad_norm": 1213.140869140625, - "learning_rate": 4.6036826600131216e-05, - "loss": 66.3176, - "step": 65880 - }, - { - "epoch": 0.26620393750732274, - "grad_norm": 1492.8607177734375, - "learning_rate": 4.603494039554011e-05, - "loss": 83.3855, - "step": 65890 - }, - { - "epoch": 0.2662443387726904, - "grad_norm": 2795.365478515625, - "learning_rate": 4.603305378086201e-05, - "loss": 86.1449, - "step": 65900 - }, - { - "epoch": 0.26628474003805797, - "grad_norm": 429.2622985839844, - "learning_rate": 4.60311667561337e-05, - "loss": 93.9114, - "step": 65910 - }, - { - "epoch": 0.2663251413034256, - "grad_norm": 1739.33544921875, - "learning_rate": 4.602927932139197e-05, - "loss": 72.7662, - "step": 65920 - }, - { - "epoch": 0.26636554256879325, - "grad_norm": 850.324951171875, - "learning_rate": 4.6027391476673606e-05, - "loss": 87.2089, - "step": 65930 - }, - { - "epoch": 0.2664059438341609, - "grad_norm": 248.5250244140625, - "learning_rate": 4.602550322201542e-05, - "loss": 73.9269, - "step": 65940 - }, - { - "epoch": 0.2664463450995285, - "grad_norm": 414.97100830078125, - "learning_rate": 4.602361455745423e-05, - "loss": 38.258, - "step": 65950 - }, - { - "epoch": 0.26648674636489617, - "grad_norm": 821.9193725585938, - "learning_rate": 4.602172548302684e-05, - "loss": 51.6796, - "step": 65960 - }, - { - "epoch": 0.2665271476302638, - "grad_norm": 972.9696044921875, - "learning_rate": 4.60198359987701e-05, - "loss": 75.2604, - "step": 65970 - }, - { - "epoch": 0.2665675488956314, - "grad_norm": 589.0814819335938, - "learning_rate": 4.6017946104720836e-05, - "loss": 86.6934, - "step": 65980 - }, - { - "epoch": 0.26660795016099903, - "grad_norm": 812.1279296875, - "learning_rate": 4.6016055800915884e-05, - "loss": 77.9313, - "step": 65990 - }, - { - "epoch": 0.26664835142636667, - "grad_norm": 456.3207702636719, - "learning_rate": 4.601416508739211e-05, - "loss": 49.5027, - "step": 66000 - }, - { - "epoch": 0.2666887526917343, - "grad_norm": 1623.6546630859375, - "learning_rate": 4.6012273964186365e-05, - "loss": 64.7836, - "step": 66010 - }, - { - "epoch": 0.26672915395710195, - "grad_norm": 779.8026733398438, - "learning_rate": 4.601038243133552e-05, - "loss": 108.0493, - "step": 66020 - }, - { - "epoch": 0.2667695552224696, - "grad_norm": 685.0618286132812, - "learning_rate": 4.600849048887646e-05, - "loss": 79.6456, - "step": 66030 - }, - { - "epoch": 0.2668099564878372, - "grad_norm": 1468.7335205078125, - "learning_rate": 4.6006598136846056e-05, - "loss": 63.8021, - "step": 66040 - }, - { - "epoch": 0.2668503577532048, - "grad_norm": 932.735107421875, - "learning_rate": 4.600470537528121e-05, - "loss": 51.6305, - "step": 66050 - }, - { - "epoch": 0.26689075901857245, - "grad_norm": 815.7344970703125, - "learning_rate": 4.6002812204218816e-05, - "loss": 66.5829, - "step": 66060 - }, - { - "epoch": 0.2669311602839401, - "grad_norm": 606.532958984375, - "learning_rate": 4.600091862369579e-05, - "loss": 77.9135, - "step": 66070 - }, - { - "epoch": 0.26697156154930773, - "grad_norm": 760.3806762695312, - "learning_rate": 4.599902463374903e-05, - "loss": 64.1426, - "step": 66080 - }, - { - "epoch": 0.2670119628146754, - "grad_norm": 1549.1610107421875, - "learning_rate": 4.599713023441549e-05, - "loss": 67.7165, - "step": 66090 - }, - { - "epoch": 0.267052364080043, - "grad_norm": 500.93182373046875, - "learning_rate": 4.599523542573207e-05, - "loss": 65.0257, - "step": 66100 - }, - { - "epoch": 0.2670927653454106, - "grad_norm": 301.22589111328125, - "learning_rate": 4.599334020773574e-05, - "loss": 69.3244, - "step": 66110 - }, - { - "epoch": 0.26713316661077824, - "grad_norm": 374.9650573730469, - "learning_rate": 4.599144458046343e-05, - "loss": 57.7532, - "step": 66120 - }, - { - "epoch": 0.2671735678761459, - "grad_norm": 521.96337890625, - "learning_rate": 4.59895485439521e-05, - "loss": 80.4386, - "step": 66130 - }, - { - "epoch": 0.2672139691415135, - "grad_norm": 1903.363525390625, - "learning_rate": 4.5987652098238714e-05, - "loss": 68.9257, - "step": 66140 - }, - { - "epoch": 0.26725437040688116, - "grad_norm": 561.215087890625, - "learning_rate": 4.598575524336025e-05, - "loss": 56.7605, - "step": 66150 - }, - { - "epoch": 0.2672947716722488, - "grad_norm": 502.40240478515625, - "learning_rate": 4.598385797935368e-05, - "loss": 61.1385, - "step": 66160 - }, - { - "epoch": 0.2673351729376164, - "grad_norm": 948.8161010742188, - "learning_rate": 4.5981960306255996e-05, - "loss": 71.2674, - "step": 66170 - }, - { - "epoch": 0.267375574202984, - "grad_norm": 683.00244140625, - "learning_rate": 4.598006222410419e-05, - "loss": 56.7152, - "step": 66180 - }, - { - "epoch": 0.26741597546835166, - "grad_norm": 916.79833984375, - "learning_rate": 4.597816373293528e-05, - "loss": 75.4204, - "step": 66190 - }, - { - "epoch": 0.2674563767337193, - "grad_norm": 406.5462341308594, - "learning_rate": 4.597626483278625e-05, - "loss": 63.3937, - "step": 66200 - }, - { - "epoch": 0.26749677799908694, - "grad_norm": 953.0973510742188, - "learning_rate": 4.5974365523694155e-05, - "loss": 55.9855, - "step": 66210 - }, - { - "epoch": 0.2675371792644546, - "grad_norm": 664.46923828125, - "learning_rate": 4.5972465805695996e-05, - "loss": 100.492, - "step": 66220 - }, - { - "epoch": 0.26757758052982217, - "grad_norm": 810.0478515625, - "learning_rate": 4.597056567882883e-05, - "loss": 68.5419, - "step": 66230 - }, - { - "epoch": 0.2676179817951898, - "grad_norm": 821.2228393554688, - "learning_rate": 4.596866514312967e-05, - "loss": 71.8012, - "step": 66240 - }, - { - "epoch": 0.26765838306055745, - "grad_norm": 1174.8955078125, - "learning_rate": 4.5966764198635606e-05, - "loss": 92.9574, - "step": 66250 - }, - { - "epoch": 0.2676987843259251, - "grad_norm": 1086.9854736328125, - "learning_rate": 4.596486284538367e-05, - "loss": 57.3918, - "step": 66260 - }, - { - "epoch": 0.2677391855912927, - "grad_norm": 690.481689453125, - "learning_rate": 4.5962961083410946e-05, - "loss": 71.6719, - "step": 66270 - }, - { - "epoch": 0.26777958685666037, - "grad_norm": 1423.809814453125, - "learning_rate": 4.596105891275449e-05, - "loss": 66.3168, - "step": 66280 - }, - { - "epoch": 0.267819988122028, - "grad_norm": 1662.720947265625, - "learning_rate": 4.595915633345141e-05, - "loss": 94.1194, - "step": 66290 - }, - { - "epoch": 0.2678603893873956, - "grad_norm": 687.7420654296875, - "learning_rate": 4.595725334553879e-05, - "loss": 125.3564, - "step": 66300 - }, - { - "epoch": 0.26790079065276323, - "grad_norm": 580.8500366210938, - "learning_rate": 4.595534994905372e-05, - "loss": 45.3163, - "step": 66310 - }, - { - "epoch": 0.26794119191813087, - "grad_norm": 0.0, - "learning_rate": 4.5953446144033316e-05, - "loss": 59.1748, - "step": 66320 - }, - { - "epoch": 0.2679815931834985, - "grad_norm": 292.1687927246094, - "learning_rate": 4.595154193051469e-05, - "loss": 62.6477, - "step": 66330 - }, - { - "epoch": 0.26802199444886615, - "grad_norm": 379.4153747558594, - "learning_rate": 4.594963730853497e-05, - "loss": 90.7049, - "step": 66340 - }, - { - "epoch": 0.2680623957142338, - "grad_norm": 2087.564208984375, - "learning_rate": 4.5947732278131286e-05, - "loss": 75.824, - "step": 66350 - }, - { - "epoch": 0.2681027969796014, - "grad_norm": 1189.827880859375, - "learning_rate": 4.594582683934078e-05, - "loss": 92.8753, - "step": 66360 - }, - { - "epoch": 0.268143198244969, - "grad_norm": 524.31005859375, - "learning_rate": 4.5943920992200585e-05, - "loss": 57.8241, - "step": 66370 - }, - { - "epoch": 0.26818359951033666, - "grad_norm": 1099.4833984375, - "learning_rate": 4.5942014736747875e-05, - "loss": 65.1616, - "step": 66380 - }, - { - "epoch": 0.2682240007757043, - "grad_norm": 1009.6073608398438, - "learning_rate": 4.59401080730198e-05, - "loss": 68.1652, - "step": 66390 - }, - { - "epoch": 0.26826440204107194, - "grad_norm": 4793.3828125, - "learning_rate": 4.593820100105355e-05, - "loss": 111.5321, - "step": 66400 - }, - { - "epoch": 0.2683048033064396, - "grad_norm": 940.0222778320312, - "learning_rate": 4.5936293520886275e-05, - "loss": 83.8232, - "step": 66410 - }, - { - "epoch": 0.2683452045718072, - "grad_norm": 1047.490478515625, - "learning_rate": 4.59343856325552e-05, - "loss": 58.4591, - "step": 66420 - }, - { - "epoch": 0.2683856058371748, - "grad_norm": 874.4302978515625, - "learning_rate": 4.593247733609748e-05, - "loss": 104.9535, - "step": 66430 - }, - { - "epoch": 0.26842600710254244, - "grad_norm": 742.1166381835938, - "learning_rate": 4.593056863155034e-05, - "loss": 84.062, - "step": 66440 - }, - { - "epoch": 0.2684664083679101, - "grad_norm": 313.023681640625, - "learning_rate": 4.5928659518951e-05, - "loss": 44.992, - "step": 66450 - }, - { - "epoch": 0.2685068096332777, - "grad_norm": 1163.264892578125, - "learning_rate": 4.592674999833666e-05, - "loss": 71.2603, - "step": 66460 - }, - { - "epoch": 0.26854721089864536, - "grad_norm": 1168.4986572265625, - "learning_rate": 4.592484006974456e-05, - "loss": 46.842, - "step": 66470 - }, - { - "epoch": 0.268587612164013, - "grad_norm": 2281.073974609375, - "learning_rate": 4.5922929733211926e-05, - "loss": 54.4491, - "step": 66480 - }, - { - "epoch": 0.2686280134293806, - "grad_norm": 656.7672729492188, - "learning_rate": 4.5921018988776e-05, - "loss": 88.2362, - "step": 66490 - }, - { - "epoch": 0.2686684146947482, - "grad_norm": 787.104736328125, - "learning_rate": 4.591910783647404e-05, - "loss": 102.6616, - "step": 66500 - }, - { - "epoch": 0.26870881596011587, - "grad_norm": 602.447265625, - "learning_rate": 4.591719627634331e-05, - "loss": 70.4162, - "step": 66510 - }, - { - "epoch": 0.2687492172254835, - "grad_norm": 607.5802612304688, - "learning_rate": 4.591528430842107e-05, - "loss": 67.1125, - "step": 66520 - }, - { - "epoch": 0.26878961849085115, - "grad_norm": 1654.757080078125, - "learning_rate": 4.5913371932744584e-05, - "loss": 82.985, - "step": 66530 - }, - { - "epoch": 0.2688300197562188, - "grad_norm": 711.9852905273438, - "learning_rate": 4.591145914935116e-05, - "loss": 66.6757, - "step": 66540 - }, - { - "epoch": 0.26887042102158637, - "grad_norm": 956.3167114257812, - "learning_rate": 4.590954595827806e-05, - "loss": 78.8319, - "step": 66550 - }, - { - "epoch": 0.268910822286954, - "grad_norm": 1775.7945556640625, - "learning_rate": 4.59076323595626e-05, - "loss": 88.6002, - "step": 66560 - }, - { - "epoch": 0.26895122355232165, - "grad_norm": 776.41015625, - "learning_rate": 4.5905718353242086e-05, - "loss": 70.2173, - "step": 66570 - }, - { - "epoch": 0.2689916248176893, - "grad_norm": 934.8427124023438, - "learning_rate": 4.590380393935383e-05, - "loss": 71.2888, - "step": 66580 - }, - { - "epoch": 0.26903202608305693, - "grad_norm": 1015.8541259765625, - "learning_rate": 4.5901889117935153e-05, - "loss": 94.4522, - "step": 66590 - }, - { - "epoch": 0.26907242734842457, - "grad_norm": 0.0, - "learning_rate": 4.589997388902338e-05, - "loss": 66.415, - "step": 66600 - }, - { - "epoch": 0.2691128286137922, - "grad_norm": 1408.8917236328125, - "learning_rate": 4.589805825265587e-05, - "loss": 83.8427, - "step": 66610 - }, - { - "epoch": 0.2691532298791598, - "grad_norm": 654.0909423828125, - "learning_rate": 4.5896142208869954e-05, - "loss": 59.3255, - "step": 66620 - }, - { - "epoch": 0.26919363114452743, - "grad_norm": 644.368896484375, - "learning_rate": 4.589422575770298e-05, - "loss": 51.3406, - "step": 66630 - }, - { - "epoch": 0.2692340324098951, - "grad_norm": 2795.22998046875, - "learning_rate": 4.589230889919232e-05, - "loss": 64.9199, - "step": 66640 - }, - { - "epoch": 0.2692744336752627, - "grad_norm": 940.733642578125, - "learning_rate": 4.589039163337534e-05, - "loss": 102.8163, - "step": 66650 - }, - { - "epoch": 0.26931483494063035, - "grad_norm": 847.7637939453125, - "learning_rate": 4.588847396028942e-05, - "loss": 77.151, - "step": 66660 - }, - { - "epoch": 0.269355236205998, - "grad_norm": 710.346435546875, - "learning_rate": 4.588655587997195e-05, - "loss": 57.7109, - "step": 66670 - }, - { - "epoch": 0.2693956374713656, - "grad_norm": 607.9664306640625, - "learning_rate": 4.5884637392460314e-05, - "loss": 71.083, - "step": 66680 - }, - { - "epoch": 0.2694360387367332, - "grad_norm": 615.1112060546875, - "learning_rate": 4.588271849779192e-05, - "loss": 63.7689, - "step": 66690 - }, - { - "epoch": 0.26947644000210086, - "grad_norm": 664.7433471679688, - "learning_rate": 4.588079919600419e-05, - "loss": 80.6283, - "step": 66700 - }, - { - "epoch": 0.2695168412674685, - "grad_norm": 407.6193542480469, - "learning_rate": 4.587887948713452e-05, - "loss": 49.1637, - "step": 66710 - }, - { - "epoch": 0.26955724253283614, - "grad_norm": 785.1792602539062, - "learning_rate": 4.5876959371220344e-05, - "loss": 64.5263, - "step": 66720 - }, - { - "epoch": 0.2695976437982038, - "grad_norm": 565.7954711914062, - "learning_rate": 4.587503884829909e-05, - "loss": 86.9049, - "step": 66730 - }, - { - "epoch": 0.2696380450635714, - "grad_norm": 1209.4056396484375, - "learning_rate": 4.587311791840822e-05, - "loss": 103.0452, - "step": 66740 - }, - { - "epoch": 0.269678446328939, - "grad_norm": 384.4944152832031, - "learning_rate": 4.5871196581585166e-05, - "loss": 46.5085, - "step": 66750 - }, - { - "epoch": 0.26971884759430664, - "grad_norm": 818.3886108398438, - "learning_rate": 4.5869274837867394e-05, - "loss": 100.6729, - "step": 66760 - }, - { - "epoch": 0.2697592488596743, - "grad_norm": 964.607421875, - "learning_rate": 4.5867352687292355e-05, - "loss": 64.0361, - "step": 66770 - }, - { - "epoch": 0.2697996501250419, - "grad_norm": 641.611328125, - "learning_rate": 4.5865430129897536e-05, - "loss": 44.5035, - "step": 66780 - }, - { - "epoch": 0.26984005139040956, - "grad_norm": 822.2906494140625, - "learning_rate": 4.5863507165720415e-05, - "loss": 63.5622, - "step": 66790 - }, - { - "epoch": 0.2698804526557772, - "grad_norm": 370.7065124511719, - "learning_rate": 4.586158379479848e-05, - "loss": 57.749, - "step": 66800 - }, - { - "epoch": 0.2699208539211448, - "grad_norm": 565.5477905273438, - "learning_rate": 4.5859660017169224e-05, - "loss": 80.5675, - "step": 66810 - }, - { - "epoch": 0.2699612551865124, - "grad_norm": 869.1512451171875, - "learning_rate": 4.5857735832870166e-05, - "loss": 76.6965, - "step": 66820 - }, - { - "epoch": 0.27000165645188007, - "grad_norm": 390.1742858886719, - "learning_rate": 4.5855811241938806e-05, - "loss": 87.9975, - "step": 66830 - }, - { - "epoch": 0.2700420577172477, - "grad_norm": 815.9105834960938, - "learning_rate": 4.585388624441267e-05, - "loss": 60.8064, - "step": 66840 - }, - { - "epoch": 0.27008245898261535, - "grad_norm": 777.6511840820312, - "learning_rate": 4.585196084032928e-05, - "loss": 50.49, - "step": 66850 - }, - { - "epoch": 0.270122860247983, - "grad_norm": 975.7098999023438, - "learning_rate": 4.585003502972618e-05, - "loss": 82.6168, - "step": 66860 - }, - { - "epoch": 0.27016326151335057, - "grad_norm": 773.058349609375, - "learning_rate": 4.584810881264092e-05, - "loss": 58.9796, - "step": 66870 - }, - { - "epoch": 0.2702036627787182, - "grad_norm": 342.78265380859375, - "learning_rate": 4.5846182189111035e-05, - "loss": 77.3388, - "step": 66880 - }, - { - "epoch": 0.27024406404408585, - "grad_norm": 759.47314453125, - "learning_rate": 4.584425515917411e-05, - "loss": 53.6399, - "step": 66890 - }, - { - "epoch": 0.2702844653094535, - "grad_norm": 392.1265869140625, - "learning_rate": 4.584232772286768e-05, - "loss": 48.1632, - "step": 66900 - }, - { - "epoch": 0.27032486657482113, - "grad_norm": 704.0643920898438, - "learning_rate": 4.5840399880229354e-05, - "loss": 73.1397, - "step": 66910 - }, - { - "epoch": 0.27036526784018877, - "grad_norm": 828.9876098632812, - "learning_rate": 4.58384716312967e-05, - "loss": 60.1899, - "step": 66920 - }, - { - "epoch": 0.2704056691055564, - "grad_norm": 529.6195678710938, - "learning_rate": 4.583654297610731e-05, - "loss": 59.9641, - "step": 66930 - }, - { - "epoch": 0.270446070370924, - "grad_norm": 733.8816528320312, - "learning_rate": 4.583461391469879e-05, - "loss": 90.0057, - "step": 66940 - }, - { - "epoch": 0.27048647163629164, - "grad_norm": 1088.7060546875, - "learning_rate": 4.583268444710875e-05, - "loss": 103.6154, - "step": 66950 - }, - { - "epoch": 0.2705268729016593, - "grad_norm": 952.4298095703125, - "learning_rate": 4.583075457337479e-05, - "loss": 78.6486, - "step": 66960 - }, - { - "epoch": 0.2705672741670269, - "grad_norm": 490.1788024902344, - "learning_rate": 4.5828824293534555e-05, - "loss": 64.2271, - "step": 66970 - }, - { - "epoch": 0.27060767543239456, - "grad_norm": 1667.7532958984375, - "learning_rate": 4.5826893607625665e-05, - "loss": 64.5124, - "step": 66980 - }, - { - "epoch": 0.2706480766977622, - "grad_norm": 823.8807373046875, - "learning_rate": 4.582496251568576e-05, - "loss": 71.0763, - "step": 66990 - }, - { - "epoch": 0.2706884779631298, - "grad_norm": 0.0, - "learning_rate": 4.5823031017752485e-05, - "loss": 65.76, - "step": 67000 - }, - { - "epoch": 0.2707288792284974, - "grad_norm": 962.5149536132812, - "learning_rate": 4.5821099113863506e-05, - "loss": 87.7109, - "step": 67010 - }, - { - "epoch": 0.27076928049386506, - "grad_norm": 1748.1005859375, - "learning_rate": 4.581916680405648e-05, - "loss": 62.5524, - "step": 67020 - }, - { - "epoch": 0.2708096817592327, - "grad_norm": 1395.0408935546875, - "learning_rate": 4.581723408836908e-05, - "loss": 70.1472, - "step": 67030 - }, - { - "epoch": 0.27085008302460034, - "grad_norm": 1065.451904296875, - "learning_rate": 4.581530096683898e-05, - "loss": 65.4421, - "step": 67040 - }, - { - "epoch": 0.270890484289968, - "grad_norm": 1557.5567626953125, - "learning_rate": 4.5813367439503875e-05, - "loss": 50.4025, - "step": 67050 - }, - { - "epoch": 0.27093088555533557, - "grad_norm": 990.9954223632812, - "learning_rate": 4.5811433506401456e-05, - "loss": 60.8415, - "step": 67060 - }, - { - "epoch": 0.2709712868207032, - "grad_norm": 535.752685546875, - "learning_rate": 4.580949916756942e-05, - "loss": 56.8241, - "step": 67070 - }, - { - "epoch": 0.27101168808607085, - "grad_norm": 589.4478149414062, - "learning_rate": 4.580756442304549e-05, - "loss": 43.5019, - "step": 67080 - }, - { - "epoch": 0.2710520893514385, - "grad_norm": 471.8979187011719, - "learning_rate": 4.580562927286738e-05, - "loss": 83.2732, - "step": 67090 - }, - { - "epoch": 0.2710924906168061, - "grad_norm": 1266.64794921875, - "learning_rate": 4.5803693717072815e-05, - "loss": 97.8621, - "step": 67100 - }, - { - "epoch": 0.27113289188217377, - "grad_norm": 939.3637084960938, - "learning_rate": 4.5801757755699534e-05, - "loss": 73.3667, - "step": 67110 - }, - { - "epoch": 0.2711732931475414, - "grad_norm": 516.1859741210938, - "learning_rate": 4.579982138878527e-05, - "loss": 64.6031, - "step": 67120 - }, - { - "epoch": 0.271213694412909, - "grad_norm": 688.78125, - "learning_rate": 4.579788461636778e-05, - "loss": 86.9311, - "step": 67130 - }, - { - "epoch": 0.27125409567827663, - "grad_norm": 726.7952270507812, - "learning_rate": 4.579594743848482e-05, - "loss": 97.9171, - "step": 67140 - }, - { - "epoch": 0.27129449694364427, - "grad_norm": 1185.85498046875, - "learning_rate": 4.579400985517416e-05, - "loss": 92.5374, - "step": 67150 - }, - { - "epoch": 0.2713348982090119, - "grad_norm": 928.0086059570312, - "learning_rate": 4.579207186647357e-05, - "loss": 46.1306, - "step": 67160 - }, - { - "epoch": 0.27137529947437955, - "grad_norm": 5591.84228515625, - "learning_rate": 4.579013347242085e-05, - "loss": 74.2452, - "step": 67170 - }, - { - "epoch": 0.2714157007397472, - "grad_norm": 886.691650390625, - "learning_rate": 4.5788194673053756e-05, - "loss": 55.0841, - "step": 67180 - }, - { - "epoch": 0.2714561020051148, - "grad_norm": 1341.62109375, - "learning_rate": 4.578625546841011e-05, - "loss": 62.2266, - "step": 67190 - }, - { - "epoch": 0.2714965032704824, - "grad_norm": 839.7449951171875, - "learning_rate": 4.5784315858527715e-05, - "loss": 73.6595, - "step": 67200 - }, - { - "epoch": 0.27153690453585005, - "grad_norm": 570.7716674804688, - "learning_rate": 4.578237584344438e-05, - "loss": 68.3076, - "step": 67210 - }, - { - "epoch": 0.2715773058012177, - "grad_norm": 673.6726684570312, - "learning_rate": 4.578043542319793e-05, - "loss": 59.9907, - "step": 67220 - }, - { - "epoch": 0.27161770706658533, - "grad_norm": 966.4290161132812, - "learning_rate": 4.577849459782619e-05, - "loss": 56.2375, - "step": 67230 - }, - { - "epoch": 0.271658108331953, - "grad_norm": 516.3497924804688, - "learning_rate": 4.5776553367367e-05, - "loss": 86.4351, - "step": 67240 - }, - { - "epoch": 0.2716985095973206, - "grad_norm": 2003.4329833984375, - "learning_rate": 4.577461173185821e-05, - "loss": 76.1077, - "step": 67250 - }, - { - "epoch": 0.2717389108626882, - "grad_norm": 829.5421752929688, - "learning_rate": 4.5772669691337665e-05, - "loss": 67.1411, - "step": 67260 - }, - { - "epoch": 0.27177931212805584, - "grad_norm": 783.5784912109375, - "learning_rate": 4.577072724584323e-05, - "loss": 72.7227, - "step": 67270 - }, - { - "epoch": 0.2718197133934235, - "grad_norm": 875.1643676757812, - "learning_rate": 4.576878439541278e-05, - "loss": 80.1554, - "step": 67280 - }, - { - "epoch": 0.2718601146587911, - "grad_norm": 432.68487548828125, - "learning_rate": 4.576684114008418e-05, - "loss": 44.3341, - "step": 67290 - }, - { - "epoch": 0.27190051592415876, - "grad_norm": 427.4791259765625, - "learning_rate": 4.5764897479895317e-05, - "loss": 74.892, - "step": 67300 - }, - { - "epoch": 0.2719409171895264, - "grad_norm": 1177.6551513671875, - "learning_rate": 4.576295341488409e-05, - "loss": 68.8277, - "step": 67310 - }, - { - "epoch": 0.271981318454894, - "grad_norm": 739.357421875, - "learning_rate": 4.57610089450884e-05, - "loss": 83.5637, - "step": 67320 - }, - { - "epoch": 0.2720217197202616, - "grad_norm": 464.8116760253906, - "learning_rate": 4.575906407054615e-05, - "loss": 69.9499, - "step": 67330 - }, - { - "epoch": 0.27206212098562926, - "grad_norm": 439.9012145996094, - "learning_rate": 4.5757118791295264e-05, - "loss": 54.5363, - "step": 67340 - }, - { - "epoch": 0.2721025222509969, - "grad_norm": 415.9515380859375, - "learning_rate": 4.575517310737365e-05, - "loss": 73.2503, - "step": 67350 - }, - { - "epoch": 0.27214292351636454, - "grad_norm": 784.4149780273438, - "learning_rate": 4.575322701881926e-05, - "loss": 64.8702, - "step": 67360 - }, - { - "epoch": 0.2721833247817322, - "grad_norm": 389.4613342285156, - "learning_rate": 4.575128052567002e-05, - "loss": 76.2548, - "step": 67370 - }, - { - "epoch": 0.27222372604709977, - "grad_norm": 942.818603515625, - "learning_rate": 4.5749333627963884e-05, - "loss": 76.8624, - "step": 67380 - }, - { - "epoch": 0.2722641273124674, - "grad_norm": 895.9558715820312, - "learning_rate": 4.574738632573881e-05, - "loss": 69.907, - "step": 67390 - }, - { - "epoch": 0.27230452857783505, - "grad_norm": 585.9174194335938, - "learning_rate": 4.574543861903274e-05, - "loss": 88.2039, - "step": 67400 - }, - { - "epoch": 0.2723449298432027, - "grad_norm": 556.5941162109375, - "learning_rate": 4.5743490507883685e-05, - "loss": 128.176, - "step": 67410 - }, - { - "epoch": 0.2723853311085703, - "grad_norm": 730.61865234375, - "learning_rate": 4.574154199232959e-05, - "loss": 69.3492, - "step": 67420 - }, - { - "epoch": 0.27242573237393797, - "grad_norm": 887.3568725585938, - "learning_rate": 4.573959307240847e-05, - "loss": 69.9397, - "step": 67430 - }, - { - "epoch": 0.2724661336393056, - "grad_norm": 549.9888916015625, - "learning_rate": 4.5737643748158295e-05, - "loss": 76.4168, - "step": 67440 - }, - { - "epoch": 0.2725065349046732, - "grad_norm": 1629.8140869140625, - "learning_rate": 4.573569401961708e-05, - "loss": 59.1505, - "step": 67450 - }, - { - "epoch": 0.27254693617004083, - "grad_norm": 982.314453125, - "learning_rate": 4.573374388682283e-05, - "loss": 70.0269, - "step": 67460 - }, - { - "epoch": 0.27258733743540847, - "grad_norm": 499.3196716308594, - "learning_rate": 4.573179334981358e-05, - "loss": 41.5002, - "step": 67470 - }, - { - "epoch": 0.2726277387007761, - "grad_norm": 764.8738403320312, - "learning_rate": 4.5729842408627334e-05, - "loss": 65.5099, - "step": 67480 - }, - { - "epoch": 0.27266813996614375, - "grad_norm": 813.9560546875, - "learning_rate": 4.572789106330214e-05, - "loss": 67.2205, - "step": 67490 - }, - { - "epoch": 0.2727085412315114, - "grad_norm": 813.4544677734375, - "learning_rate": 4.572593931387604e-05, - "loss": 58.4749, - "step": 67500 - }, - { - "epoch": 0.272748942496879, - "grad_norm": 1321.0914306640625, - "learning_rate": 4.572398716038709e-05, - "loss": 76.9033, - "step": 67510 - }, - { - "epoch": 0.2727893437622466, - "grad_norm": 809.7509155273438, - "learning_rate": 4.572203460287333e-05, - "loss": 44.7534, - "step": 67520 - }, - { - "epoch": 0.27282974502761426, - "grad_norm": 502.35369873046875, - "learning_rate": 4.5720081641372844e-05, - "loss": 75.6196, - "step": 67530 - }, - { - "epoch": 0.2728701462929819, - "grad_norm": 285.2467956542969, - "learning_rate": 4.57181282759237e-05, - "loss": 49.662, - "step": 67540 - }, - { - "epoch": 0.27291054755834954, - "grad_norm": 1159.55419921875, - "learning_rate": 4.571617450656397e-05, - "loss": 68.6516, - "step": 67550 - }, - { - "epoch": 0.2729509488237172, - "grad_norm": 883.5384521484375, - "learning_rate": 4.5714220333331756e-05, - "loss": 48.4618, - "step": 67560 - }, - { - "epoch": 0.2729913500890848, - "grad_norm": 714.6936645507812, - "learning_rate": 4.571226575626516e-05, - "loss": 70.2103, - "step": 67570 - }, - { - "epoch": 0.2730317513544524, - "grad_norm": 803.3701782226562, - "learning_rate": 4.5710310775402274e-05, - "loss": 65.889, - "step": 67580 - }, - { - "epoch": 0.27307215261982004, - "grad_norm": 463.9601745605469, - "learning_rate": 4.570835539078121e-05, - "loss": 65.3356, - "step": 67590 - }, - { - "epoch": 0.2731125538851877, - "grad_norm": 5190.55419921875, - "learning_rate": 4.5706399602440106e-05, - "loss": 79.4295, - "step": 67600 - }, - { - "epoch": 0.2731529551505553, - "grad_norm": 1560.6190185546875, - "learning_rate": 4.5704443410417075e-05, - "loss": 60.054, - "step": 67610 - }, - { - "epoch": 0.27319335641592296, - "grad_norm": 566.1893310546875, - "learning_rate": 4.5702486814750265e-05, - "loss": 65.9659, - "step": 67620 - }, - { - "epoch": 0.2732337576812906, - "grad_norm": 497.7099914550781, - "learning_rate": 4.570052981547782e-05, - "loss": 60.542, - "step": 67630 - }, - { - "epoch": 0.2732741589466582, - "grad_norm": 595.028076171875, - "learning_rate": 4.569857241263788e-05, - "loss": 113.0741, - "step": 67640 - }, - { - "epoch": 0.2733145602120258, - "grad_norm": 562.2115478515625, - "learning_rate": 4.569661460626862e-05, - "loss": 73.3425, - "step": 67650 - }, - { - "epoch": 0.27335496147739347, - "grad_norm": 3808.25244140625, - "learning_rate": 4.5694656396408195e-05, - "loss": 101.9904, - "step": 67660 - }, - { - "epoch": 0.2733953627427611, - "grad_norm": 1519.3927001953125, - "learning_rate": 4.56926977830948e-05, - "loss": 89.2418, - "step": 67670 - }, - { - "epoch": 0.27343576400812875, - "grad_norm": 1025.804931640625, - "learning_rate": 4.56907387663666e-05, - "loss": 69.6141, - "step": 67680 - }, - { - "epoch": 0.2734761652734964, - "grad_norm": 852.5979614257812, - "learning_rate": 4.56887793462618e-05, - "loss": 88.1962, - "step": 67690 - }, - { - "epoch": 0.27351656653886397, - "grad_norm": 589.6854858398438, - "learning_rate": 4.5686819522818594e-05, - "loss": 73.4965, - "step": 67700 - }, - { - "epoch": 0.2735569678042316, - "grad_norm": 1321.8143310546875, - "learning_rate": 4.56848592960752e-05, - "loss": 79.9866, - "step": 67710 - }, - { - "epoch": 0.27359736906959925, - "grad_norm": 1166.6270751953125, - "learning_rate": 4.568289866606981e-05, - "loss": 55.3361, - "step": 67720 - }, - { - "epoch": 0.2736377703349669, - "grad_norm": 440.9592590332031, - "learning_rate": 4.568093763284067e-05, - "loss": 41.0626, - "step": 67730 - }, - { - "epoch": 0.27367817160033453, - "grad_norm": 598.6520385742188, - "learning_rate": 4.567897619642601e-05, - "loss": 80.7803, - "step": 67740 - }, - { - "epoch": 0.27371857286570217, - "grad_norm": 897.6016235351562, - "learning_rate": 4.567701435686404e-05, - "loss": 94.469, - "step": 67750 - }, - { - "epoch": 0.2737589741310698, - "grad_norm": 926.674560546875, - "learning_rate": 4.567505211419305e-05, - "loss": 68.1472, - "step": 67760 - }, - { - "epoch": 0.2737993753964374, - "grad_norm": 497.29339599609375, - "learning_rate": 4.567308946845127e-05, - "loss": 47.2474, - "step": 67770 - }, - { - "epoch": 0.27383977666180503, - "grad_norm": 891.9127807617188, - "learning_rate": 4.567112641967697e-05, - "loss": 88.2951, - "step": 67780 - }, - { - "epoch": 0.2738801779271727, - "grad_norm": 655.0907592773438, - "learning_rate": 4.566916296790842e-05, - "loss": 61.0948, - "step": 67790 - }, - { - "epoch": 0.2739205791925403, - "grad_norm": 577.0123901367188, - "learning_rate": 4.566719911318389e-05, - "loss": 89.3447, - "step": 67800 - }, - { - "epoch": 0.27396098045790795, - "grad_norm": 598.5209350585938, - "learning_rate": 4.5665234855541675e-05, - "loss": 42.0639, - "step": 67810 - }, - { - "epoch": 0.2740013817232756, - "grad_norm": 578.243408203125, - "learning_rate": 4.566327019502007e-05, - "loss": 50.7425, - "step": 67820 - }, - { - "epoch": 0.2740417829886432, - "grad_norm": 849.9849853515625, - "learning_rate": 4.566130513165737e-05, - "loss": 58.8878, - "step": 67830 - }, - { - "epoch": 0.2740821842540108, - "grad_norm": 585.877685546875, - "learning_rate": 4.565933966549189e-05, - "loss": 64.4063, - "step": 67840 - }, - { - "epoch": 0.27412258551937846, - "grad_norm": 600.2329711914062, - "learning_rate": 4.565737379656195e-05, - "loss": 62.7049, - "step": 67850 - }, - { - "epoch": 0.2741629867847461, - "grad_norm": 1287.9954833984375, - "learning_rate": 4.5655407524905866e-05, - "loss": 55.8217, - "step": 67860 - }, - { - "epoch": 0.27420338805011374, - "grad_norm": 640.1063842773438, - "learning_rate": 4.5653440850561986e-05, - "loss": 107.2745, - "step": 67870 - }, - { - "epoch": 0.2742437893154814, - "grad_norm": 1059.8924560546875, - "learning_rate": 4.565147377356864e-05, - "loss": 64.9931, - "step": 67880 - }, - { - "epoch": 0.274284190580849, - "grad_norm": 1559.3463134765625, - "learning_rate": 4.564950629396418e-05, - "loss": 117.1461, - "step": 67890 - }, - { - "epoch": 0.2743245918462166, - "grad_norm": 367.62322998046875, - "learning_rate": 4.564753841178697e-05, - "loss": 45.144, - "step": 67900 - }, - { - "epoch": 0.27436499311158424, - "grad_norm": 2093.43505859375, - "learning_rate": 4.564557012707536e-05, - "loss": 118.0313, - "step": 67910 - }, - { - "epoch": 0.2744053943769519, - "grad_norm": 1412.019287109375, - "learning_rate": 4.5643601439867734e-05, - "loss": 85.6378, - "step": 67920 - }, - { - "epoch": 0.2744457956423195, - "grad_norm": 1107.6322021484375, - "learning_rate": 4.564163235020247e-05, - "loss": 77.5563, - "step": 67930 - }, - { - "epoch": 0.27448619690768716, - "grad_norm": 938.7551879882812, - "learning_rate": 4.563966285811796e-05, - "loss": 53.3907, - "step": 67940 - }, - { - "epoch": 0.2745265981730548, - "grad_norm": 898.0419921875, - "learning_rate": 4.5637692963652596e-05, - "loss": 56.9066, - "step": 67950 - }, - { - "epoch": 0.2745669994384224, - "grad_norm": 654.0993041992188, - "learning_rate": 4.5635722666844775e-05, - "loss": 71.2898, - "step": 67960 - }, - { - "epoch": 0.27460740070379, - "grad_norm": 762.6791381835938, - "learning_rate": 4.563375196773293e-05, - "loss": 111.2552, - "step": 67970 - }, - { - "epoch": 0.27464780196915767, - "grad_norm": 683.8074951171875, - "learning_rate": 4.5631780866355454e-05, - "loss": 79.4953, - "step": 67980 - }, - { - "epoch": 0.2746882032345253, - "grad_norm": 0.0, - "learning_rate": 4.56298093627508e-05, - "loss": 79.2197, - "step": 67990 - }, - { - "epoch": 0.27472860449989295, - "grad_norm": 525.245361328125, - "learning_rate": 4.562783745695738e-05, - "loss": 53.7112, - "step": 68000 - }, - { - "epoch": 0.2747690057652606, - "grad_norm": 588.7833251953125, - "learning_rate": 4.562586514901366e-05, - "loss": 50.6445, - "step": 68010 - }, - { - "epoch": 0.27480940703062817, - "grad_norm": 515.099853515625, - "learning_rate": 4.5623892438958074e-05, - "loss": 48.6461, - "step": 68020 - }, - { - "epoch": 0.2748498082959958, - "grad_norm": 835.2371826171875, - "learning_rate": 4.562191932682908e-05, - "loss": 49.5645, - "step": 68030 - }, - { - "epoch": 0.27489020956136345, - "grad_norm": 1118.2117919921875, - "learning_rate": 4.561994581266516e-05, - "loss": 97.7809, - "step": 68040 - }, - { - "epoch": 0.2749306108267311, - "grad_norm": 514.0653076171875, - "learning_rate": 4.561797189650478e-05, - "loss": 76.3595, - "step": 68050 - }, - { - "epoch": 0.27497101209209873, - "grad_norm": 461.3988342285156, - "learning_rate": 4.561599757838643e-05, - "loss": 66.2146, - "step": 68060 - }, - { - "epoch": 0.27501141335746637, - "grad_norm": 710.6083374023438, - "learning_rate": 4.561402285834858e-05, - "loss": 65.2828, - "step": 68070 - }, - { - "epoch": 0.275051814622834, - "grad_norm": 1474.451416015625, - "learning_rate": 4.561204773642974e-05, - "loss": 54.9727, - "step": 68080 - }, - { - "epoch": 0.2750922158882016, - "grad_norm": 606.7708129882812, - "learning_rate": 4.5610072212668434e-05, - "loss": 57.3314, - "step": 68090 - }, - { - "epoch": 0.27513261715356924, - "grad_norm": 706.001953125, - "learning_rate": 4.560809628710315e-05, - "loss": 72.5477, - "step": 68100 - }, - { - "epoch": 0.2751730184189369, - "grad_norm": 805.1146850585938, - "learning_rate": 4.560611995977242e-05, - "loss": 62.7448, - "step": 68110 - }, - { - "epoch": 0.2752134196843045, - "grad_norm": 502.4281311035156, - "learning_rate": 4.560414323071477e-05, - "loss": 40.9372, - "step": 68120 - }, - { - "epoch": 0.27525382094967216, - "grad_norm": 468.7350158691406, - "learning_rate": 4.560216609996874e-05, - "loss": 71.8095, - "step": 68130 - }, - { - "epoch": 0.2752942222150398, - "grad_norm": 557.2088623046875, - "learning_rate": 4.5600188567572876e-05, - "loss": 64.4124, - "step": 68140 - }, - { - "epoch": 0.2753346234804074, - "grad_norm": 1210.1270751953125, - "learning_rate": 4.559821063356574e-05, - "loss": 85.0767, - "step": 68150 - }, - { - "epoch": 0.275375024745775, - "grad_norm": 1038.1043701171875, - "learning_rate": 4.559623229798587e-05, - "loss": 71.8373, - "step": 68160 - }, - { - "epoch": 0.27541542601114266, - "grad_norm": 929.40576171875, - "learning_rate": 4.5594253560871854e-05, - "loss": 72.4507, - "step": 68170 - }, - { - "epoch": 0.2754558272765103, - "grad_norm": 957.2470092773438, - "learning_rate": 4.559227442226226e-05, - "loss": 97.403, - "step": 68180 - }, - { - "epoch": 0.27549622854187794, - "grad_norm": 706.5764770507812, - "learning_rate": 4.559029488219567e-05, - "loss": 64.6209, - "step": 68190 - }, - { - "epoch": 0.2755366298072456, - "grad_norm": 434.8182373046875, - "learning_rate": 4.558831494071069e-05, - "loss": 87.0814, - "step": 68200 - }, - { - "epoch": 0.2755770310726132, - "grad_norm": 760.851318359375, - "learning_rate": 4.5586334597845904e-05, - "loss": 80.2775, - "step": 68210 - }, - { - "epoch": 0.2756174323379808, - "grad_norm": 256.12982177734375, - "learning_rate": 4.558435385363993e-05, - "loss": 53.6615, - "step": 68220 - }, - { - "epoch": 0.27565783360334845, - "grad_norm": 421.9570617675781, - "learning_rate": 4.5582372708131385e-05, - "loss": 75.5364, - "step": 68230 - }, - { - "epoch": 0.2756982348687161, - "grad_norm": 812.7416381835938, - "learning_rate": 4.558039116135887e-05, - "loss": 65.1539, - "step": 68240 - }, - { - "epoch": 0.2757386361340837, - "grad_norm": 639.4966430664062, - "learning_rate": 4.557840921336105e-05, - "loss": 54.6641, - "step": 68250 - }, - { - "epoch": 0.27577903739945137, - "grad_norm": 370.4179992675781, - "learning_rate": 4.557642686417654e-05, - "loss": 98.5805, - "step": 68260 - }, - { - "epoch": 0.275819438664819, - "grad_norm": 516.9441528320312, - "learning_rate": 4.5574444113844e-05, - "loss": 76.0973, - "step": 68270 - }, - { - "epoch": 0.2758598399301866, - "grad_norm": 434.20660400390625, - "learning_rate": 4.5572460962402075e-05, - "loss": 59.694, - "step": 68280 - }, - { - "epoch": 0.27590024119555423, - "grad_norm": 1096.5814208984375, - "learning_rate": 4.557047740988944e-05, - "loss": 83.0501, - "step": 68290 - }, - { - "epoch": 0.27594064246092187, - "grad_norm": 679.775634765625, - "learning_rate": 4.556849345634475e-05, - "loss": 71.1283, - "step": 68300 - }, - { - "epoch": 0.2759810437262895, - "grad_norm": 1577.58251953125, - "learning_rate": 4.5566509101806695e-05, - "loss": 99.9247, - "step": 68310 - }, - { - "epoch": 0.27602144499165715, - "grad_norm": 1189.281982421875, - "learning_rate": 4.556452434631395e-05, - "loss": 80.3163, - "step": 68320 - }, - { - "epoch": 0.2760618462570248, - "grad_norm": 809.91015625, - "learning_rate": 4.5562539189905223e-05, - "loss": 104.2898, - "step": 68330 - }, - { - "epoch": 0.2761022475223924, - "grad_norm": 1438.5238037109375, - "learning_rate": 4.5560553632619205e-05, - "loss": 99.692, - "step": 68340 - }, - { - "epoch": 0.27614264878776, - "grad_norm": 330.85479736328125, - "learning_rate": 4.555856767449461e-05, - "loss": 67.1174, - "step": 68350 - }, - { - "epoch": 0.27618305005312765, - "grad_norm": 803.5668334960938, - "learning_rate": 4.555658131557015e-05, - "loss": 99.5837, - "step": 68360 - }, - { - "epoch": 0.2762234513184953, - "grad_norm": 674.5006713867188, - "learning_rate": 4.555459455588456e-05, - "loss": 65.4563, - "step": 68370 - }, - { - "epoch": 0.27626385258386293, - "grad_norm": 773.7620849609375, - "learning_rate": 4.555260739547657e-05, - "loss": 43.1016, - "step": 68380 - }, - { - "epoch": 0.2763042538492306, - "grad_norm": 639.3969116210938, - "learning_rate": 4.55506198343849e-05, - "loss": 78.3155, - "step": 68390 - }, - { - "epoch": 0.2763446551145982, - "grad_norm": 373.80694580078125, - "learning_rate": 4.5548631872648326e-05, - "loss": 83.402, - "step": 68400 - }, - { - "epoch": 0.2763850563799658, - "grad_norm": 420.725830078125, - "learning_rate": 4.55466435103056e-05, - "loss": 61.3818, - "step": 68410 - }, - { - "epoch": 0.27642545764533344, - "grad_norm": 877.37890625, - "learning_rate": 4.554465474739548e-05, - "loss": 59.281, - "step": 68420 - }, - { - "epoch": 0.2764658589107011, - "grad_norm": 295.0977478027344, - "learning_rate": 4.5542665583956736e-05, - "loss": 36.8095, - "step": 68430 - }, - { - "epoch": 0.2765062601760687, - "grad_norm": 288.376953125, - "learning_rate": 4.5540676020028145e-05, - "loss": 68.784, - "step": 68440 - }, - { - "epoch": 0.27654666144143636, - "grad_norm": 1751.3602294921875, - "learning_rate": 4.5538686055648506e-05, - "loss": 93.4555, - "step": 68450 - }, - { - "epoch": 0.276587062706804, - "grad_norm": 857.876953125, - "learning_rate": 4.5536695690856606e-05, - "loss": 53.5522, - "step": 68460 - }, - { - "epoch": 0.2766274639721716, - "grad_norm": 1057.3465576171875, - "learning_rate": 4.553470492569125e-05, - "loss": 58.7135, - "step": 68470 - }, - { - "epoch": 0.2766678652375392, - "grad_norm": 602.7147216796875, - "learning_rate": 4.553271376019125e-05, - "loss": 73.2039, - "step": 68480 - }, - { - "epoch": 0.27670826650290686, - "grad_norm": 799.6278686523438, - "learning_rate": 4.5530722194395425e-05, - "loss": 66.5906, - "step": 68490 - }, - { - "epoch": 0.2767486677682745, - "grad_norm": 539.0422973632812, - "learning_rate": 4.5528730228342605e-05, - "loss": 79.3498, - "step": 68500 - }, - { - "epoch": 0.27678906903364214, - "grad_norm": 838.4744873046875, - "learning_rate": 4.552673786207161e-05, - "loss": 46.1416, - "step": 68510 - }, - { - "epoch": 0.2768294702990098, - "grad_norm": 1674.3841552734375, - "learning_rate": 4.55247450956213e-05, - "loss": 72.0151, - "step": 68520 - }, - { - "epoch": 0.2768698715643774, - "grad_norm": 5940.70263671875, - "learning_rate": 4.552275192903052e-05, - "loss": 75.9517, - "step": 68530 - }, - { - "epoch": 0.276910272829745, - "grad_norm": 1089.8138427734375, - "learning_rate": 4.552075836233812e-05, - "loss": 97.0178, - "step": 68540 - }, - { - "epoch": 0.27695067409511265, - "grad_norm": 608.082275390625, - "learning_rate": 4.551876439558298e-05, - "loss": 85.0739, - "step": 68550 - }, - { - "epoch": 0.2769910753604803, - "grad_norm": 1392.0042724609375, - "learning_rate": 4.5516770028803954e-05, - "loss": 60.7242, - "step": 68560 - }, - { - "epoch": 0.27703147662584793, - "grad_norm": 758.804931640625, - "learning_rate": 4.5514775262039934e-05, - "loss": 61.6169, - "step": 68570 - }, - { - "epoch": 0.27707187789121557, - "grad_norm": 3674.3056640625, - "learning_rate": 4.551278009532981e-05, - "loss": 77.6101, - "step": 68580 - }, - { - "epoch": 0.2771122791565832, - "grad_norm": 1095.73876953125, - "learning_rate": 4.551078452871248e-05, - "loss": 54.9264, - "step": 68590 - }, - { - "epoch": 0.2771526804219508, - "grad_norm": 1469.385009765625, - "learning_rate": 4.550878856222685e-05, - "loss": 70.6401, - "step": 68600 - }, - { - "epoch": 0.27719308168731843, - "grad_norm": 866.8749389648438, - "learning_rate": 4.5506792195911817e-05, - "loss": 50.9549, - "step": 68610 - }, - { - "epoch": 0.27723348295268607, - "grad_norm": 735.7921142578125, - "learning_rate": 4.550479542980632e-05, - "loss": 85.6192, - "step": 68620 - }, - { - "epoch": 0.2772738842180537, - "grad_norm": 400.9674377441406, - "learning_rate": 4.550279826394928e-05, - "loss": 97.7626, - "step": 68630 - }, - { - "epoch": 0.27731428548342135, - "grad_norm": 700.01318359375, - "learning_rate": 4.5500800698379624e-05, - "loss": 66.2745, - "step": 68640 - }, - { - "epoch": 0.277354686748789, - "grad_norm": 1081.5074462890625, - "learning_rate": 4.549880273313631e-05, - "loss": 73.2586, - "step": 68650 - }, - { - "epoch": 0.2773950880141566, - "grad_norm": 770.979736328125, - "learning_rate": 4.5496804368258286e-05, - "loss": 56.8278, - "step": 68660 - }, - { - "epoch": 0.2774354892795242, - "grad_norm": 733.8268432617188, - "learning_rate": 4.549480560378451e-05, - "loss": 57.6247, - "step": 68670 - }, - { - "epoch": 0.27747589054489186, - "grad_norm": 744.9702758789062, - "learning_rate": 4.5492806439753935e-05, - "loss": 59.9518, - "step": 68680 - }, - { - "epoch": 0.2775162918102595, - "grad_norm": 701.0172729492188, - "learning_rate": 4.549080687620555e-05, - "loss": 86.6771, - "step": 68690 - }, - { - "epoch": 0.27755669307562714, - "grad_norm": 387.11041259765625, - "learning_rate": 4.548880691317835e-05, - "loss": 79.3791, - "step": 68700 - }, - { - "epoch": 0.2775970943409948, - "grad_norm": 1299.3056640625, - "learning_rate": 4.54868065507113e-05, - "loss": 64.44, - "step": 68710 - }, - { - "epoch": 0.2776374956063624, - "grad_norm": 795.0128173828125, - "learning_rate": 4.548480578884341e-05, - "loss": 60.6718, - "step": 68720 - }, - { - "epoch": 0.27767789687173, - "grad_norm": 332.22100830078125, - "learning_rate": 4.5482804627613686e-05, - "loss": 61.0125, - "step": 68730 - }, - { - "epoch": 0.27771829813709764, - "grad_norm": 525.1163940429688, - "learning_rate": 4.548080306706114e-05, - "loss": 74.0871, - "step": 68740 - }, - { - "epoch": 0.2777586994024653, - "grad_norm": 1393.9156494140625, - "learning_rate": 4.54788011072248e-05, - "loss": 72.4176, - "step": 68750 - }, - { - "epoch": 0.2777991006678329, - "grad_norm": 950.6893920898438, - "learning_rate": 4.547679874814368e-05, - "loss": 62.7376, - "step": 68760 - }, - { - "epoch": 0.27783950193320056, - "grad_norm": 1314.624267578125, - "learning_rate": 4.547479598985683e-05, - "loss": 95.1351, - "step": 68770 - }, - { - "epoch": 0.2778799031985682, - "grad_norm": 360.00762939453125, - "learning_rate": 4.547279283240329e-05, - "loss": 52.8572, - "step": 68780 - }, - { - "epoch": 0.2779203044639358, - "grad_norm": 789.6798706054688, - "learning_rate": 4.547078927582212e-05, - "loss": 79.6554, - "step": 68790 - }, - { - "epoch": 0.2779607057293034, - "grad_norm": 973.8438110351562, - "learning_rate": 4.5468785320152365e-05, - "loss": 66.0115, - "step": 68800 - }, - { - "epoch": 0.27800110699467107, - "grad_norm": 351.57525634765625, - "learning_rate": 4.546678096543311e-05, - "loss": 62.568, - "step": 68810 - }, - { - "epoch": 0.2780415082600387, - "grad_norm": 1510.792724609375, - "learning_rate": 4.546477621170342e-05, - "loss": 82.8039, - "step": 68820 - }, - { - "epoch": 0.27808190952540635, - "grad_norm": 1050.9688720703125, - "learning_rate": 4.546277105900237e-05, - "loss": 71.6794, - "step": 68830 - }, - { - "epoch": 0.278122310790774, - "grad_norm": 1624.618408203125, - "learning_rate": 4.5460765507369084e-05, - "loss": 64.593, - "step": 68840 - }, - { - "epoch": 0.2781627120561416, - "grad_norm": 1015.446533203125, - "learning_rate": 4.5458759556842624e-05, - "loss": 73.7418, - "step": 68850 - }, - { - "epoch": 0.2782031133215092, - "grad_norm": 1055.7169189453125, - "learning_rate": 4.545675320746212e-05, - "loss": 89.4522, - "step": 68860 - }, - { - "epoch": 0.27824351458687685, - "grad_norm": 939.6693725585938, - "learning_rate": 4.545474645926668e-05, - "loss": 55.4124, - "step": 68870 - }, - { - "epoch": 0.2782839158522445, - "grad_norm": 524.6759033203125, - "learning_rate": 4.5452739312295436e-05, - "loss": 67.2065, - "step": 68880 - }, - { - "epoch": 0.27832431711761213, - "grad_norm": 636.6887817382812, - "learning_rate": 4.54507317665875e-05, - "loss": 54.9611, - "step": 68890 - }, - { - "epoch": 0.27836471838297977, - "grad_norm": 1205.8494873046875, - "learning_rate": 4.544872382218202e-05, - "loss": 55.5338, - "step": 68900 - }, - { - "epoch": 0.2784051196483474, - "grad_norm": 400.2951965332031, - "learning_rate": 4.544671547911814e-05, - "loss": 72.6024, - "step": 68910 - }, - { - "epoch": 0.278445520913715, - "grad_norm": 671.6885986328125, - "learning_rate": 4.5444706737435014e-05, - "loss": 88.0325, - "step": 68920 - }, - { - "epoch": 0.27848592217908263, - "grad_norm": 905.302490234375, - "learning_rate": 4.544269759717181e-05, - "loss": 90.4285, - "step": 68930 - }, - { - "epoch": 0.2785263234444503, - "grad_norm": 713.8814086914062, - "learning_rate": 4.5440688058367686e-05, - "loss": 70.4518, - "step": 68940 - }, - { - "epoch": 0.2785667247098179, - "grad_norm": 986.9356689453125, - "learning_rate": 4.543867812106183e-05, - "loss": 61.4856, - "step": 68950 - }, - { - "epoch": 0.27860712597518555, - "grad_norm": 852.2457885742188, - "learning_rate": 4.543666778529342e-05, - "loss": 67.6646, - "step": 68960 - }, - { - "epoch": 0.2786475272405532, - "grad_norm": 1037.552490234375, - "learning_rate": 4.543465705110165e-05, - "loss": 50.1012, - "step": 68970 - }, - { - "epoch": 0.2786879285059208, - "grad_norm": 327.4298095703125, - "learning_rate": 4.543264591852572e-05, - "loss": 61.3465, - "step": 68980 - }, - { - "epoch": 0.2787283297712884, - "grad_norm": 866.1886596679688, - "learning_rate": 4.543063438760483e-05, - "loss": 48.84, - "step": 68990 - }, - { - "epoch": 0.27876873103665606, - "grad_norm": 708.6840209960938, - "learning_rate": 4.542862245837821e-05, - "loss": 66.3793, - "step": 69000 - }, - { - "epoch": 0.2788091323020237, - "grad_norm": 1597.6319580078125, - "learning_rate": 4.5426610130885087e-05, - "loss": 80.3279, - "step": 69010 - }, - { - "epoch": 0.27884953356739134, - "grad_norm": 906.7168579101562, - "learning_rate": 4.542459740516467e-05, - "loss": 73.8522, - "step": 69020 - }, - { - "epoch": 0.278889934832759, - "grad_norm": 890.2028198242188, - "learning_rate": 4.542258428125622e-05, - "loss": 90.2994, - "step": 69030 - }, - { - "epoch": 0.2789303360981266, - "grad_norm": 612.5167846679688, - "learning_rate": 4.542057075919897e-05, - "loss": 87.7907, - "step": 69040 - }, - { - "epoch": 0.2789707373634942, - "grad_norm": 983.6898193359375, - "learning_rate": 4.541855683903219e-05, - "loss": 65.7596, - "step": 69050 - }, - { - "epoch": 0.27901113862886184, - "grad_norm": 366.17645263671875, - "learning_rate": 4.541654252079513e-05, - "loss": 63.9307, - "step": 69060 - }, - { - "epoch": 0.2790515398942295, - "grad_norm": 489.0401306152344, - "learning_rate": 4.541452780452705e-05, - "loss": 41.8797, - "step": 69070 - }, - { - "epoch": 0.2790919411595971, - "grad_norm": 983.3743896484375, - "learning_rate": 4.5412512690267246e-05, - "loss": 111.5006, - "step": 69080 - }, - { - "epoch": 0.27913234242496476, - "grad_norm": 1284.0035400390625, - "learning_rate": 4.5410497178055e-05, - "loss": 58.7547, - "step": 69090 - }, - { - "epoch": 0.2791727436903324, - "grad_norm": 731.1244506835938, - "learning_rate": 4.5408481267929605e-05, - "loss": 81.0387, - "step": 69100 - }, - { - "epoch": 0.2792131449557, - "grad_norm": 717.0902099609375, - "learning_rate": 4.540646495993036e-05, - "loss": 130.641, - "step": 69110 - }, - { - "epoch": 0.27925354622106763, - "grad_norm": 844.9999389648438, - "learning_rate": 4.540444825409657e-05, - "loss": 67.0038, - "step": 69120 - }, - { - "epoch": 0.27929394748643527, - "grad_norm": 399.6801452636719, - "learning_rate": 4.540243115046756e-05, - "loss": 55.7817, - "step": 69130 - }, - { - "epoch": 0.2793343487518029, - "grad_norm": 438.31207275390625, - "learning_rate": 4.540041364908265e-05, - "loss": 63.9536, - "step": 69140 - }, - { - "epoch": 0.27937475001717055, - "grad_norm": 1663.7740478515625, - "learning_rate": 4.539839574998117e-05, - "loss": 80.7982, - "step": 69150 - }, - { - "epoch": 0.2794151512825382, - "grad_norm": 1818.9991455078125, - "learning_rate": 4.5396377453202466e-05, - "loss": 113.5125, - "step": 69160 - }, - { - "epoch": 0.27945555254790583, - "grad_norm": 736.188720703125, - "learning_rate": 4.539435875878588e-05, - "loss": 81.4038, - "step": 69170 - }, - { - "epoch": 0.2794959538132734, - "grad_norm": 879.2432250976562, - "learning_rate": 4.539233966677078e-05, - "loss": 87.2281, - "step": 69180 - }, - { - "epoch": 0.27953635507864105, - "grad_norm": 560.682373046875, - "learning_rate": 4.539032017719651e-05, - "loss": 82.0745, - "step": 69190 - }, - { - "epoch": 0.2795767563440087, - "grad_norm": 943.4149780273438, - "learning_rate": 4.5388300290102456e-05, - "loss": 117.8961, - "step": 69200 - }, - { - "epoch": 0.27961715760937633, - "grad_norm": 854.2255249023438, - "learning_rate": 4.538628000552799e-05, - "loss": 46.8455, - "step": 69210 - }, - { - "epoch": 0.27965755887474397, - "grad_norm": 768.5675659179688, - "learning_rate": 4.5384259323512504e-05, - "loss": 77.2018, - "step": 69220 - }, - { - "epoch": 0.2796979601401116, - "grad_norm": 790.1107177734375, - "learning_rate": 4.538223824409538e-05, - "loss": 62.6797, - "step": 69230 - }, - { - "epoch": 0.2797383614054792, - "grad_norm": 0.0, - "learning_rate": 4.538021676731603e-05, - "loss": 50.2399, - "step": 69240 - }, - { - "epoch": 0.27977876267084684, - "grad_norm": 2715.3037109375, - "learning_rate": 4.537819489321386e-05, - "loss": 107.9521, - "step": 69250 - }, - { - "epoch": 0.2798191639362145, - "grad_norm": 2060.965576171875, - "learning_rate": 4.537617262182829e-05, - "loss": 65.6806, - "step": 69260 - }, - { - "epoch": 0.2798595652015821, - "grad_norm": 1369.305419921875, - "learning_rate": 4.5374149953198746e-05, - "loss": 92.2145, - "step": 69270 - }, - { - "epoch": 0.27989996646694976, - "grad_norm": 830.0508422851562, - "learning_rate": 4.5372126887364655e-05, - "loss": 106.5613, - "step": 69280 - }, - { - "epoch": 0.2799403677323174, - "grad_norm": 942.8829345703125, - "learning_rate": 4.5370103424365474e-05, - "loss": 76.8458, - "step": 69290 - }, - { - "epoch": 0.279980768997685, - "grad_norm": 842.5079345703125, - "learning_rate": 4.536807956424063e-05, - "loss": 53.2971, - "step": 69300 - }, - { - "epoch": 0.2800211702630526, - "grad_norm": 1059.9853515625, - "learning_rate": 4.5366055307029585e-05, - "loss": 80.9336, - "step": 69310 - }, - { - "epoch": 0.28006157152842026, - "grad_norm": 326.0080261230469, - "learning_rate": 4.536403065277182e-05, - "loss": 78.7748, - "step": 69320 - }, - { - "epoch": 0.2801019727937879, - "grad_norm": 275.553955078125, - "learning_rate": 4.536200560150678e-05, - "loss": 69.185, - "step": 69330 - }, - { - "epoch": 0.28014237405915554, - "grad_norm": 686.8462524414062, - "learning_rate": 4.5359980153273964e-05, - "loss": 68.1509, - "step": 69340 - }, - { - "epoch": 0.2801827753245232, - "grad_norm": 630.9837646484375, - "learning_rate": 4.535795430811285e-05, - "loss": 107.0847, - "step": 69350 - }, - { - "epoch": 0.2802231765898908, - "grad_norm": 2537.67138671875, - "learning_rate": 4.535592806606294e-05, - "loss": 64.2041, - "step": 69360 - }, - { - "epoch": 0.2802635778552584, - "grad_norm": 871.0073852539062, - "learning_rate": 4.5353901427163725e-05, - "loss": 56.6685, - "step": 69370 - }, - { - "epoch": 0.28030397912062605, - "grad_norm": 805.7468872070312, - "learning_rate": 4.535187439145473e-05, - "loss": 56.4696, - "step": 69380 - }, - { - "epoch": 0.2803443803859937, - "grad_norm": 638.688720703125, - "learning_rate": 4.534984695897546e-05, - "loss": 61.3559, - "step": 69390 - }, - { - "epoch": 0.2803847816513613, - "grad_norm": 916.9631958007812, - "learning_rate": 4.534781912976546e-05, - "loss": 81.307, - "step": 69400 - }, - { - "epoch": 0.28042518291672897, - "grad_norm": 695.0220947265625, - "learning_rate": 4.534579090386424e-05, - "loss": 69.4657, - "step": 69410 - }, - { - "epoch": 0.2804655841820966, - "grad_norm": 2208.393310546875, - "learning_rate": 4.5343762281311345e-05, - "loss": 78.4579, - "step": 69420 - }, - { - "epoch": 0.2805059854474642, - "grad_norm": 903.3010864257812, - "learning_rate": 4.534173326214634e-05, - "loss": 85.2893, - "step": 69430 - }, - { - "epoch": 0.28054638671283183, - "grad_norm": 1086.91455078125, - "learning_rate": 4.533970384640877e-05, - "loss": 56.4757, - "step": 69440 - }, - { - "epoch": 0.28058678797819947, - "grad_norm": 841.8659057617188, - "learning_rate": 4.53376740341382e-05, - "loss": 71.8159, - "step": 69450 - }, - { - "epoch": 0.2806271892435671, - "grad_norm": 1415.8238525390625, - "learning_rate": 4.533564382537421e-05, - "loss": 62.5434, - "step": 69460 - }, - { - "epoch": 0.28066759050893475, - "grad_norm": 974.2418212890625, - "learning_rate": 4.533361322015637e-05, - "loss": 69.978, - "step": 69470 - }, - { - "epoch": 0.2807079917743024, - "grad_norm": 2103.853515625, - "learning_rate": 4.533158221852427e-05, - "loss": 81.6764, - "step": 69480 - }, - { - "epoch": 0.28074839303967003, - "grad_norm": 644.2200317382812, - "learning_rate": 4.532955082051751e-05, - "loss": 49.908, - "step": 69490 - }, - { - "epoch": 0.2807887943050376, - "grad_norm": 603.8206176757812, - "learning_rate": 4.532751902617569e-05, - "loss": 112.3736, - "step": 69500 - }, - { - "epoch": 0.28082919557040525, - "grad_norm": 700.8045654296875, - "learning_rate": 4.532548683553842e-05, - "loss": 65.1945, - "step": 69510 - }, - { - "epoch": 0.2808695968357729, - "grad_norm": 565.0137329101562, - "learning_rate": 4.5323454248645324e-05, - "loss": 89.2239, - "step": 69520 - }, - { - "epoch": 0.28090999810114053, - "grad_norm": 550.55859375, - "learning_rate": 4.532142126553603e-05, - "loss": 62.9486, - "step": 69530 - }, - { - "epoch": 0.2809503993665082, - "grad_norm": 819.1644287109375, - "learning_rate": 4.5319387886250156e-05, - "loss": 79.9527, - "step": 69540 - }, - { - "epoch": 0.2809908006318758, - "grad_norm": 1997.134033203125, - "learning_rate": 4.531735411082735e-05, - "loss": 73.3801, - "step": 69550 - }, - { - "epoch": 0.2810312018972434, - "grad_norm": 233.41488647460938, - "learning_rate": 4.531531993930727e-05, - "loss": 79.5541, - "step": 69560 - }, - { - "epoch": 0.28107160316261104, - "grad_norm": 981.9132080078125, - "learning_rate": 4.5313285371729575e-05, - "loss": 66.0245, - "step": 69570 - }, - { - "epoch": 0.2811120044279787, - "grad_norm": 600.988037109375, - "learning_rate": 4.531125040813392e-05, - "loss": 71.3912, - "step": 69580 - }, - { - "epoch": 0.2811524056933463, - "grad_norm": 760.9716796875, - "learning_rate": 4.530921504855997e-05, - "loss": 52.4341, - "step": 69590 - }, - { - "epoch": 0.28119280695871396, - "grad_norm": 1831.3338623046875, - "learning_rate": 4.530717929304743e-05, - "loss": 109.2623, - "step": 69600 - }, - { - "epoch": 0.2812332082240816, - "grad_norm": 838.1921997070312, - "learning_rate": 4.5305143141635976e-05, - "loss": 68.4752, - "step": 69610 - }, - { - "epoch": 0.2812736094894492, - "grad_norm": 446.0285949707031, - "learning_rate": 4.5303106594365296e-05, - "loss": 53.7918, - "step": 69620 - }, - { - "epoch": 0.2813140107548168, - "grad_norm": 1072.6981201171875, - "learning_rate": 4.530106965127511e-05, - "loss": 71.5331, - "step": 69630 - }, - { - "epoch": 0.28135441202018446, - "grad_norm": 841.3654174804688, - "learning_rate": 4.529903231240511e-05, - "loss": 74.8359, - "step": 69640 - }, - { - "epoch": 0.2813948132855521, - "grad_norm": 685.6234741210938, - "learning_rate": 4.529699457779503e-05, - "loss": 77.722, - "step": 69650 - }, - { - "epoch": 0.28143521455091974, - "grad_norm": 1382.5294189453125, - "learning_rate": 4.5294956447484584e-05, - "loss": 72.9175, - "step": 69660 - }, - { - "epoch": 0.2814756158162874, - "grad_norm": 651.1674194335938, - "learning_rate": 4.529291792151351e-05, - "loss": 73.6339, - "step": 69670 - }, - { - "epoch": 0.281516017081655, - "grad_norm": 540.8804931640625, - "learning_rate": 4.529087899992156e-05, - "loss": 36.5959, - "step": 69680 - }, - { - "epoch": 0.2815564183470226, - "grad_norm": 648.30615234375, - "learning_rate": 4.528883968274848e-05, - "loss": 96.0729, - "step": 69690 - }, - { - "epoch": 0.28159681961239025, - "grad_norm": 1536.320556640625, - "learning_rate": 4.528679997003403e-05, - "loss": 62.9985, - "step": 69700 - }, - { - "epoch": 0.2816372208777579, - "grad_norm": 458.5219421386719, - "learning_rate": 4.528475986181796e-05, - "loss": 69.9463, - "step": 69710 - }, - { - "epoch": 0.28167762214312553, - "grad_norm": 342.8268127441406, - "learning_rate": 4.5282719358140056e-05, - "loss": 87.2414, - "step": 69720 - }, - { - "epoch": 0.28171802340849317, - "grad_norm": 564.786376953125, - "learning_rate": 4.5280678459040095e-05, - "loss": 61.6622, - "step": 69730 - }, - { - "epoch": 0.2817584246738608, - "grad_norm": 665.1903686523438, - "learning_rate": 4.5278637164557866e-05, - "loss": 116.5534, - "step": 69740 - }, - { - "epoch": 0.2817988259392284, - "grad_norm": 736.6189575195312, - "learning_rate": 4.527659547473317e-05, - "loss": 67.6608, - "step": 69750 - }, - { - "epoch": 0.28183922720459603, - "grad_norm": 660.351806640625, - "learning_rate": 4.52745533896058e-05, - "loss": 84.5275, - "step": 69760 - }, - { - "epoch": 0.2818796284699637, - "grad_norm": 641.83642578125, - "learning_rate": 4.527251090921558e-05, - "loss": 64.5504, - "step": 69770 - }, - { - "epoch": 0.2819200297353313, - "grad_norm": 966.1611328125, - "learning_rate": 4.527046803360232e-05, - "loss": 66.6436, - "step": 69780 - }, - { - "epoch": 0.28196043100069895, - "grad_norm": 1488.1563720703125, - "learning_rate": 4.526842476280585e-05, - "loss": 72.4152, - "step": 69790 - }, - { - "epoch": 0.2820008322660666, - "grad_norm": 809.4866333007812, - "learning_rate": 4.5266381096866e-05, - "loss": 56.8854, - "step": 69800 - }, - { - "epoch": 0.28204123353143423, - "grad_norm": 945.8799438476562, - "learning_rate": 4.526433703582262e-05, - "loss": 85.1573, - "step": 69810 - }, - { - "epoch": 0.2820816347968018, - "grad_norm": 500.84307861328125, - "learning_rate": 4.5262292579715556e-05, - "loss": 37.6372, - "step": 69820 - }, - { - "epoch": 0.28212203606216946, - "grad_norm": 909.1983642578125, - "learning_rate": 4.526024772858467e-05, - "loss": 69.4535, - "step": 69830 - }, - { - "epoch": 0.2821624373275371, - "grad_norm": 1079.6373291015625, - "learning_rate": 4.525820248246982e-05, - "loss": 111.5219, - "step": 69840 - }, - { - "epoch": 0.28220283859290474, - "grad_norm": 727.4838256835938, - "learning_rate": 4.5256156841410886e-05, - "loss": 66.2702, - "step": 69850 - }, - { - "epoch": 0.2822432398582724, - "grad_norm": 1162.87353515625, - "learning_rate": 4.525411080544775e-05, - "loss": 62.2597, - "step": 69860 - }, - { - "epoch": 0.28228364112364, - "grad_norm": 2795.06640625, - "learning_rate": 4.5252064374620285e-05, - "loss": 99.6256, - "step": 69870 - }, - { - "epoch": 0.2823240423890076, - "grad_norm": 672.3999633789062, - "learning_rate": 4.5250017548968404e-05, - "loss": 104.1018, - "step": 69880 - }, - { - "epoch": 0.28236444365437524, - "grad_norm": 1359.24609375, - "learning_rate": 4.524797032853201e-05, - "loss": 90.7683, - "step": 69890 - }, - { - "epoch": 0.2824048449197429, - "grad_norm": 744.1480102539062, - "learning_rate": 4.5245922713350996e-05, - "loss": 73.5604, - "step": 69900 - }, - { - "epoch": 0.2824452461851105, - "grad_norm": 629.0797729492188, - "learning_rate": 4.524387470346531e-05, - "loss": 79.3253, - "step": 69910 - }, - { - "epoch": 0.28248564745047816, - "grad_norm": 846.8230590820312, - "learning_rate": 4.524182629891486e-05, - "loss": 52.2285, - "step": 69920 - }, - { - "epoch": 0.2825260487158458, - "grad_norm": 916.1362915039062, - "learning_rate": 4.523977749973958e-05, - "loss": 47.4978, - "step": 69930 - }, - { - "epoch": 0.2825664499812134, - "grad_norm": 878.1392822265625, - "learning_rate": 4.523772830597942e-05, - "loss": 69.1207, - "step": 69940 - }, - { - "epoch": 0.282606851246581, - "grad_norm": 1516.745361328125, - "learning_rate": 4.523567871767434e-05, - "loss": 70.5018, - "step": 69950 - }, - { - "epoch": 0.28264725251194867, - "grad_norm": 1540.39501953125, - "learning_rate": 4.523362873486427e-05, - "loss": 61.0742, - "step": 69960 - }, - { - "epoch": 0.2826876537773163, - "grad_norm": 588.6622314453125, - "learning_rate": 4.52315783575892e-05, - "loss": 105.1381, - "step": 69970 - }, - { - "epoch": 0.28272805504268395, - "grad_norm": 792.640380859375, - "learning_rate": 4.522952758588909e-05, - "loss": 74.2773, - "step": 69980 - }, - { - "epoch": 0.2827684563080516, - "grad_norm": 381.4082336425781, - "learning_rate": 4.5227476419803916e-05, - "loss": 42.4223, - "step": 69990 - }, - { - "epoch": 0.2828088575734192, - "grad_norm": 2376.44775390625, - "learning_rate": 4.522542485937369e-05, - "loss": 73.0111, - "step": 70000 - }, - { - "epoch": 0.2828492588387868, - "grad_norm": 478.2873229980469, - "learning_rate": 4.5223372904638386e-05, - "loss": 89.4105, - "step": 70010 - }, - { - "epoch": 0.28288966010415445, - "grad_norm": 1394.88623046875, - "learning_rate": 4.5221320555638016e-05, - "loss": 57.0316, - "step": 70020 - }, - { - "epoch": 0.2829300613695221, - "grad_norm": 1728.24267578125, - "learning_rate": 4.521926781241259e-05, - "loss": 60.0436, - "step": 70030 - }, - { - "epoch": 0.28297046263488973, - "grad_norm": 313.620361328125, - "learning_rate": 4.521721467500213e-05, - "loss": 78.582, - "step": 70040 - }, - { - "epoch": 0.28301086390025737, - "grad_norm": 876.0878295898438, - "learning_rate": 4.521516114344666e-05, - "loss": 96.7629, - "step": 70050 - }, - { - "epoch": 0.283051265165625, - "grad_norm": 715.8422241210938, - "learning_rate": 4.521310721778622e-05, - "loss": 68.7341, - "step": 70060 - }, - { - "epoch": 0.2830916664309926, - "grad_norm": 984.6596069335938, - "learning_rate": 4.5211052898060855e-05, - "loss": 85.3589, - "step": 70070 - }, - { - "epoch": 0.28313206769636023, - "grad_norm": 440.2537536621094, - "learning_rate": 4.5208998184310596e-05, - "loss": 66.0765, - "step": 70080 - }, - { - "epoch": 0.2831724689617279, - "grad_norm": 958.81982421875, - "learning_rate": 4.520694307657551e-05, - "loss": 62.4521, - "step": 70090 - }, - { - "epoch": 0.2832128702270955, - "grad_norm": 592.9948120117188, - "learning_rate": 4.5204887574895684e-05, - "loss": 76.5842, - "step": 70100 - }, - { - "epoch": 0.28325327149246315, - "grad_norm": 810.19287109375, - "learning_rate": 4.520283167931115e-05, - "loss": 67.7459, - "step": 70110 - }, - { - "epoch": 0.2832936727578308, - "grad_norm": 486.7115173339844, - "learning_rate": 4.5200775389862026e-05, - "loss": 57.3887, - "step": 70120 - }, - { - "epoch": 0.2833340740231984, - "grad_norm": 1011.84033203125, - "learning_rate": 4.519871870658838e-05, - "loss": 77.9214, - "step": 70130 - }, - { - "epoch": 0.283374475288566, - "grad_norm": 1170.3331298828125, - "learning_rate": 4.519666162953032e-05, - "loss": 81.3161, - "step": 70140 - }, - { - "epoch": 0.28341487655393366, - "grad_norm": 885.1085815429688, - "learning_rate": 4.519460415872794e-05, - "loss": 73.1431, - "step": 70150 - }, - { - "epoch": 0.2834552778193013, - "grad_norm": 746.1101684570312, - "learning_rate": 4.519254629422136e-05, - "loss": 75.5062, - "step": 70160 - }, - { - "epoch": 0.28349567908466894, - "grad_norm": 805.8640747070312, - "learning_rate": 4.5190488036050685e-05, - "loss": 62.1489, - "step": 70170 - }, - { - "epoch": 0.2835360803500366, - "grad_norm": 1288.6954345703125, - "learning_rate": 4.518842938425605e-05, - "loss": 70.3223, - "step": 70180 - }, - { - "epoch": 0.2835764816154042, - "grad_norm": 820.5307006835938, - "learning_rate": 4.51863703388776e-05, - "loss": 49.1447, - "step": 70190 - }, - { - "epoch": 0.2836168828807718, - "grad_norm": 279.9125671386719, - "learning_rate": 4.5184310899955465e-05, - "loss": 41.5823, - "step": 70200 - }, - { - "epoch": 0.28365728414613944, - "grad_norm": 2208.78564453125, - "learning_rate": 4.518225106752979e-05, - "loss": 63.2972, - "step": 70210 - }, - { - "epoch": 0.2836976854115071, - "grad_norm": 609.0946655273438, - "learning_rate": 4.5180190841640747e-05, - "loss": 59.1739, - "step": 70220 - }, - { - "epoch": 0.2837380866768747, - "grad_norm": 761.7667236328125, - "learning_rate": 4.517813022232849e-05, - "loss": 76.7521, - "step": 70230 - }, - { - "epoch": 0.28377848794224236, - "grad_norm": 623.1316528320312, - "learning_rate": 4.51760692096332e-05, - "loss": 54.5164, - "step": 70240 - }, - { - "epoch": 0.28381888920761, - "grad_norm": 875.2012329101562, - "learning_rate": 4.5174007803595055e-05, - "loss": 69.7034, - "step": 70250 - }, - { - "epoch": 0.2838592904729776, - "grad_norm": 294.3750915527344, - "learning_rate": 4.517194600425423e-05, - "loss": 75.7382, - "step": 70260 - }, - { - "epoch": 0.28389969173834523, - "grad_norm": 793.992919921875, - "learning_rate": 4.516988381165095e-05, - "loss": 57.0891, - "step": 70270 - }, - { - "epoch": 0.28394009300371287, - "grad_norm": 265.2918395996094, - "learning_rate": 4.516782122582538e-05, - "loss": 53.4736, - "step": 70280 - }, - { - "epoch": 0.2839804942690805, - "grad_norm": 637.1311645507812, - "learning_rate": 4.516575824681777e-05, - "loss": 89.6522, - "step": 70290 - }, - { - "epoch": 0.28402089553444815, - "grad_norm": 2700.84521484375, - "learning_rate": 4.516369487466832e-05, - "loss": 78.3818, - "step": 70300 - }, - { - "epoch": 0.2840612967998158, - "grad_norm": 775.3411865234375, - "learning_rate": 4.5161631109417246e-05, - "loss": 75.4549, - "step": 70310 - }, - { - "epoch": 0.28410169806518343, - "grad_norm": 616.4786376953125, - "learning_rate": 4.5159566951104796e-05, - "loss": 78.6188, - "step": 70320 - }, - { - "epoch": 0.284142099330551, - "grad_norm": 265.2325744628906, - "learning_rate": 4.515750239977122e-05, - "loss": 58.4777, - "step": 70330 - }, - { - "epoch": 0.28418250059591865, - "grad_norm": 356.9336242675781, - "learning_rate": 4.5155437455456744e-05, - "loss": 88.5666, - "step": 70340 - }, - { - "epoch": 0.2842229018612863, - "grad_norm": 1054.8193359375, - "learning_rate": 4.515337211820165e-05, - "loss": 74.0532, - "step": 70350 - }, - { - "epoch": 0.28426330312665393, - "grad_norm": 1073.7655029296875, - "learning_rate": 4.5151306388046175e-05, - "loss": 57.4097, - "step": 70360 - }, - { - "epoch": 0.2843037043920216, - "grad_norm": 1117.8515625, - "learning_rate": 4.5149240265030627e-05, - "loss": 91.991, - "step": 70370 - }, - { - "epoch": 0.2843441056573892, - "grad_norm": 554.7203979492188, - "learning_rate": 4.5147173749195255e-05, - "loss": 51.3412, - "step": 70380 - }, - { - "epoch": 0.2843845069227568, - "grad_norm": 669.3203125, - "learning_rate": 4.514510684058036e-05, - "loss": 90.7202, - "step": 70390 - }, - { - "epoch": 0.28442490818812444, - "grad_norm": 861.2157592773438, - "learning_rate": 4.5143039539226234e-05, - "loss": 70.1871, - "step": 70400 - }, - { - "epoch": 0.2844653094534921, - "grad_norm": 345.44781494140625, - "learning_rate": 4.514097184517318e-05, - "loss": 49.2981, - "step": 70410 - }, - { - "epoch": 0.2845057107188597, - "grad_norm": 1595.496337890625, - "learning_rate": 4.5138903758461515e-05, - "loss": 47.9407, - "step": 70420 - }, - { - "epoch": 0.28454611198422736, - "grad_norm": 560.5866088867188, - "learning_rate": 4.5136835279131556e-05, - "loss": 67.7868, - "step": 70430 - }, - { - "epoch": 0.284586513249595, - "grad_norm": 215.32069396972656, - "learning_rate": 4.513476640722362e-05, - "loss": 59.5168, - "step": 70440 - }, - { - "epoch": 0.2846269145149626, - "grad_norm": 858.737548828125, - "learning_rate": 4.513269714277805e-05, - "loss": 53.7752, - "step": 70450 - }, - { - "epoch": 0.2846673157803302, - "grad_norm": 575.0614624023438, - "learning_rate": 4.5130627485835186e-05, - "loss": 95.9444, - "step": 70460 - }, - { - "epoch": 0.28470771704569786, - "grad_norm": 2945.451416015625, - "learning_rate": 4.512855743643537e-05, - "loss": 69.3364, - "step": 70470 - }, - { - "epoch": 0.2847481183110655, - "grad_norm": 578.4557495117188, - "learning_rate": 4.512648699461897e-05, - "loss": 53.4026, - "step": 70480 - }, - { - "epoch": 0.28478851957643314, - "grad_norm": 362.3124084472656, - "learning_rate": 4.512441616042634e-05, - "loss": 86.2023, - "step": 70490 - }, - { - "epoch": 0.2848289208418008, - "grad_norm": 521.9320068359375, - "learning_rate": 4.512234493389785e-05, - "loss": 69.5704, - "step": 70500 - }, - { - "epoch": 0.2848693221071684, - "grad_norm": 606.285888671875, - "learning_rate": 4.5120273315073897e-05, - "loss": 67.9938, - "step": 70510 - }, - { - "epoch": 0.284909723372536, - "grad_norm": 977.4638671875, - "learning_rate": 4.511820130399485e-05, - "loss": 90.854, - "step": 70520 - }, - { - "epoch": 0.28495012463790365, - "grad_norm": 402.9257507324219, - "learning_rate": 4.5116128900701114e-05, - "loss": 79.0524, - "step": 70530 - }, - { - "epoch": 0.2849905259032713, - "grad_norm": 1074.8834228515625, - "learning_rate": 4.511405610523309e-05, - "loss": 80.932, - "step": 70540 - }, - { - "epoch": 0.2850309271686389, - "grad_norm": 1405.5771484375, - "learning_rate": 4.5111982917631194e-05, - "loss": 103.0241, - "step": 70550 - }, - { - "epoch": 0.28507132843400657, - "grad_norm": 1106.503662109375, - "learning_rate": 4.510990933793583e-05, - "loss": 70.0878, - "step": 70560 - }, - { - "epoch": 0.2851117296993742, - "grad_norm": 788.6580810546875, - "learning_rate": 4.5107835366187425e-05, - "loss": 32.8911, - "step": 70570 - }, - { - "epoch": 0.2851521309647418, - "grad_norm": 1015.0027465820312, - "learning_rate": 4.5105761002426415e-05, - "loss": 55.8387, - "step": 70580 - }, - { - "epoch": 0.28519253223010943, - "grad_norm": 537.4610595703125, - "learning_rate": 4.510368624669325e-05, - "loss": 58.8079, - "step": 70590 - }, - { - "epoch": 0.28523293349547707, - "grad_norm": 1022.7601318359375, - "learning_rate": 4.510161109902837e-05, - "loss": 66.04, - "step": 70600 - }, - { - "epoch": 0.2852733347608447, - "grad_norm": 785.5269165039062, - "learning_rate": 4.5099535559472234e-05, - "loss": 87.9221, - "step": 70610 - }, - { - "epoch": 0.28531373602621235, - "grad_norm": 765.5206298828125, - "learning_rate": 4.50974596280653e-05, - "loss": 72.3749, - "step": 70620 - }, - { - "epoch": 0.28535413729158, - "grad_norm": 1824.7486572265625, - "learning_rate": 4.509538330484805e-05, - "loss": 70.8314, - "step": 70630 - }, - { - "epoch": 0.28539453855694763, - "grad_norm": 784.1112670898438, - "learning_rate": 4.509330658986095e-05, - "loss": 64.7922, - "step": 70640 - }, - { - "epoch": 0.2854349398223152, - "grad_norm": 621.5699462890625, - "learning_rate": 4.5091229483144495e-05, - "loss": 94.8581, - "step": 70650 - }, - { - "epoch": 0.28547534108768285, - "grad_norm": 724.716796875, - "learning_rate": 4.508915198473919e-05, - "loss": 54.7563, - "step": 70660 - }, - { - "epoch": 0.2855157423530505, - "grad_norm": 435.5616149902344, - "learning_rate": 4.50870740946855e-05, - "loss": 69.268, - "step": 70670 - }, - { - "epoch": 0.28555614361841813, - "grad_norm": 1086.2598876953125, - "learning_rate": 4.508499581302398e-05, - "loss": 56.1892, - "step": 70680 - }, - { - "epoch": 0.2855965448837858, - "grad_norm": 1214.41015625, - "learning_rate": 4.5082917139795125e-05, - "loss": 75.7297, - "step": 70690 - }, - { - "epoch": 0.2856369461491534, - "grad_norm": 665.9967041015625, - "learning_rate": 4.508083807503945e-05, - "loss": 56.6401, - "step": 70700 - }, - { - "epoch": 0.285677347414521, - "grad_norm": 1049.6688232421875, - "learning_rate": 4.50787586187975e-05, - "loss": 70.0839, - "step": 70710 - }, - { - "epoch": 0.28571774867988864, - "grad_norm": 680.3385009765625, - "learning_rate": 4.507667877110982e-05, - "loss": 65.669, - "step": 70720 - }, - { - "epoch": 0.2857581499452563, - "grad_norm": 351.40545654296875, - "learning_rate": 4.507459853201695e-05, - "loss": 52.2754, - "step": 70730 - }, - { - "epoch": 0.2857985512106239, - "grad_norm": 699.9556274414062, - "learning_rate": 4.507251790155944e-05, - "loss": 66.7143, - "step": 70740 - }, - { - "epoch": 0.28583895247599156, - "grad_norm": 483.2270202636719, - "learning_rate": 4.5070436879777865e-05, - "loss": 72.3885, - "step": 70750 - }, - { - "epoch": 0.2858793537413592, - "grad_norm": 13059.109375, - "learning_rate": 4.506835546671278e-05, - "loss": 100.1066, - "step": 70760 - }, - { - "epoch": 0.2859197550067268, - "grad_norm": 1270.271240234375, - "learning_rate": 4.506627366240479e-05, - "loss": 76.3044, - "step": 70770 - }, - { - "epoch": 0.2859601562720944, - "grad_norm": 373.1097106933594, - "learning_rate": 4.506419146689446e-05, - "loss": 138.1525, - "step": 70780 - }, - { - "epoch": 0.28600055753746206, - "grad_norm": 627.6198120117188, - "learning_rate": 4.506210888022239e-05, - "loss": 80.5968, - "step": 70790 - }, - { - "epoch": 0.2860409588028297, - "grad_norm": 396.57012939453125, - "learning_rate": 4.5060025902429174e-05, - "loss": 53.7545, - "step": 70800 - }, - { - "epoch": 0.28608136006819734, - "grad_norm": 534.5454711914062, - "learning_rate": 4.505794253355542e-05, - "loss": 87.9787, - "step": 70810 - }, - { - "epoch": 0.286121761333565, - "grad_norm": 1753.4989013671875, - "learning_rate": 4.505585877364175e-05, - "loss": 80.0071, - "step": 70820 - }, - { - "epoch": 0.2861621625989326, - "grad_norm": 940.0836791992188, - "learning_rate": 4.505377462272879e-05, - "loss": 64.5939, - "step": 70830 - }, - { - "epoch": 0.2862025638643002, - "grad_norm": 524.5255737304688, - "learning_rate": 4.5051690080857176e-05, - "loss": 54.3899, - "step": 70840 - }, - { - "epoch": 0.28624296512966785, - "grad_norm": 982.0265502929688, - "learning_rate": 4.504960514806753e-05, - "loss": 64.2541, - "step": 70850 - }, - { - "epoch": 0.2862833663950355, - "grad_norm": 1201.5341796875, - "learning_rate": 4.504751982440052e-05, - "loss": 88.8859, - "step": 70860 - }, - { - "epoch": 0.28632376766040313, - "grad_norm": 624.1221313476562, - "learning_rate": 4.5045434109896786e-05, - "loss": 59.5705, - "step": 70870 - }, - { - "epoch": 0.28636416892577077, - "grad_norm": 422.28778076171875, - "learning_rate": 4.504334800459699e-05, - "loss": 44.9323, - "step": 70880 - }, - { - "epoch": 0.2864045701911384, - "grad_norm": 665.96923828125, - "learning_rate": 4.504126150854181e-05, - "loss": 60.9275, - "step": 70890 - }, - { - "epoch": 0.286444971456506, - "grad_norm": 1277.694580078125, - "learning_rate": 4.503917462177192e-05, - "loss": 77.8568, - "step": 70900 - }, - { - "epoch": 0.28648537272187363, - "grad_norm": 1883.61962890625, - "learning_rate": 4.5037087344328e-05, - "loss": 62.4995, - "step": 70910 - }, - { - "epoch": 0.2865257739872413, - "grad_norm": 885.0050659179688, - "learning_rate": 4.5034999676250745e-05, - "loss": 62.4988, - "step": 70920 - }, - { - "epoch": 0.2865661752526089, - "grad_norm": 1359.623779296875, - "learning_rate": 4.503291161758087e-05, - "loss": 71.7273, - "step": 70930 - }, - { - "epoch": 0.28660657651797655, - "grad_norm": 497.2101745605469, - "learning_rate": 4.5030823168359046e-05, - "loss": 76.8819, - "step": 70940 - }, - { - "epoch": 0.2866469777833442, - "grad_norm": 891.3270874023438, - "learning_rate": 4.502873432862603e-05, - "loss": 69.6958, - "step": 70950 - }, - { - "epoch": 0.28668737904871183, - "grad_norm": 441.0372314453125, - "learning_rate": 4.5026645098422515e-05, - "loss": 102.5139, - "step": 70960 - }, - { - "epoch": 0.2867277803140794, - "grad_norm": 993.6549072265625, - "learning_rate": 4.5024555477789255e-05, - "loss": 79.794, - "step": 70970 - }, - { - "epoch": 0.28676818157944706, - "grad_norm": 3066.687255859375, - "learning_rate": 4.5022465466766974e-05, - "loss": 105.9858, - "step": 70980 - }, - { - "epoch": 0.2868085828448147, - "grad_norm": 1127.01025390625, - "learning_rate": 4.502037506539642e-05, - "loss": 69.0879, - "step": 70990 - }, - { - "epoch": 0.28684898411018234, - "grad_norm": 1199.5037841796875, - "learning_rate": 4.5018284273718336e-05, - "loss": 58.2869, - "step": 71000 - }, - { - "epoch": 0.28688938537555, - "grad_norm": 706.9274291992188, - "learning_rate": 4.5016193091773504e-05, - "loss": 71.1858, - "step": 71010 - }, - { - "epoch": 0.2869297866409176, - "grad_norm": 758.85498046875, - "learning_rate": 4.501410151960268e-05, - "loss": 75.0063, - "step": 71020 - }, - { - "epoch": 0.2869701879062852, - "grad_norm": 938.1792602539062, - "learning_rate": 4.5012009557246645e-05, - "loss": 56.8306, - "step": 71030 - }, - { - "epoch": 0.28701058917165284, - "grad_norm": 539.9905395507812, - "learning_rate": 4.5009917204746184e-05, - "loss": 49.6551, - "step": 71040 - }, - { - "epoch": 0.2870509904370205, - "grad_norm": 1365.8111572265625, - "learning_rate": 4.5007824462142076e-05, - "loss": 67.7948, - "step": 71050 - }, - { - "epoch": 0.2870913917023881, - "grad_norm": 1149.0391845703125, - "learning_rate": 4.500573132947514e-05, - "loss": 62.5273, - "step": 71060 - }, - { - "epoch": 0.28713179296775576, - "grad_norm": 833.6195678710938, - "learning_rate": 4.500363780678617e-05, - "loss": 46.2563, - "step": 71070 - }, - { - "epoch": 0.2871721942331234, - "grad_norm": 2005.719482421875, - "learning_rate": 4.5001543894115975e-05, - "loss": 65.34, - "step": 71080 - }, - { - "epoch": 0.287212595498491, - "grad_norm": 499.4972839355469, - "learning_rate": 4.4999449591505396e-05, - "loss": 46.8325, - "step": 71090 - }, - { - "epoch": 0.2872529967638586, - "grad_norm": 1662.39013671875, - "learning_rate": 4.499735489899524e-05, - "loss": 75.1804, - "step": 71100 - }, - { - "epoch": 0.28729339802922627, - "grad_norm": 297.105712890625, - "learning_rate": 4.4995259816626356e-05, - "loss": 59.1684, - "step": 71110 - }, - { - "epoch": 0.2873337992945939, - "grad_norm": 506.93670654296875, - "learning_rate": 4.499316434443959e-05, - "loss": 41.7648, - "step": 71120 - }, - { - "epoch": 0.28737420055996155, - "grad_norm": 666.0968627929688, - "learning_rate": 4.49910684824758e-05, - "loss": 110.1385, - "step": 71130 - }, - { - "epoch": 0.2874146018253292, - "grad_norm": 786.0115966796875, - "learning_rate": 4.498897223077582e-05, - "loss": 72.202, - "step": 71140 - }, - { - "epoch": 0.2874550030906968, - "grad_norm": 7043.43310546875, - "learning_rate": 4.498687558938055e-05, - "loss": 108.4283, - "step": 71150 - }, - { - "epoch": 0.2874954043560644, - "grad_norm": 857.16748046875, - "learning_rate": 4.4984778558330844e-05, - "loss": 96.0511, - "step": 71160 - }, - { - "epoch": 0.28753580562143205, - "grad_norm": 637.8214111328125, - "learning_rate": 4.4982681137667594e-05, - "loss": 44.1582, - "step": 71170 - }, - { - "epoch": 0.2875762068867997, - "grad_norm": 904.2376098632812, - "learning_rate": 4.498058332743168e-05, - "loss": 98.6093, - "step": 71180 - }, - { - "epoch": 0.28761660815216733, - "grad_norm": 1919.4237060546875, - "learning_rate": 4.4978485127664015e-05, - "loss": 103.2059, - "step": 71190 - }, - { - "epoch": 0.28765700941753497, - "grad_norm": 810.6397094726562, - "learning_rate": 4.4976386538405495e-05, - "loss": 67.5545, - "step": 71200 - }, - { - "epoch": 0.2876974106829026, - "grad_norm": 1610.69384765625, - "learning_rate": 4.4974287559697035e-05, - "loss": 60.4293, - "step": 71210 - }, - { - "epoch": 0.2877378119482702, - "grad_norm": 1313.538818359375, - "learning_rate": 4.497218819157956e-05, - "loss": 57.7007, - "step": 71220 - }, - { - "epoch": 0.28777821321363783, - "grad_norm": 872.7113037109375, - "learning_rate": 4.497008843409399e-05, - "loss": 51.1471, - "step": 71230 - }, - { - "epoch": 0.2878186144790055, - "grad_norm": 398.4093017578125, - "learning_rate": 4.496798828728126e-05, - "loss": 73.8484, - "step": 71240 - }, - { - "epoch": 0.2878590157443731, - "grad_norm": 3197.44970703125, - "learning_rate": 4.496588775118232e-05, - "loss": 97.3321, - "step": 71250 - }, - { - "epoch": 0.28789941700974075, - "grad_norm": 422.7282409667969, - "learning_rate": 4.496378682583813e-05, - "loss": 79.5314, - "step": 71260 - }, - { - "epoch": 0.2879398182751084, - "grad_norm": 409.5416564941406, - "learning_rate": 4.4961685511289625e-05, - "loss": 93.6449, - "step": 71270 - }, - { - "epoch": 0.28798021954047603, - "grad_norm": 714.4796752929688, - "learning_rate": 4.495958380757779e-05, - "loss": 105.323, - "step": 71280 - }, - { - "epoch": 0.2880206208058436, - "grad_norm": 1594.0919189453125, - "learning_rate": 4.4957481714743585e-05, - "loss": 68.0684, - "step": 71290 - }, - { - "epoch": 0.28806102207121126, - "grad_norm": 460.36334228515625, - "learning_rate": 4.495537923282801e-05, - "loss": 64.8978, - "step": 71300 - }, - { - "epoch": 0.2881014233365789, - "grad_norm": 1116.5792236328125, - "learning_rate": 4.4953276361872034e-05, - "loss": 61.4736, - "step": 71310 - }, - { - "epoch": 0.28814182460194654, - "grad_norm": 463.9264221191406, - "learning_rate": 4.4951173101916675e-05, - "loss": 59.6178, - "step": 71320 - }, - { - "epoch": 0.2881822258673142, - "grad_norm": 958.1243896484375, - "learning_rate": 4.494906945300291e-05, - "loss": 76.7924, - "step": 71330 - }, - { - "epoch": 0.2882226271326818, - "grad_norm": 479.6456604003906, - "learning_rate": 4.4946965415171775e-05, - "loss": 57.5769, - "step": 71340 - }, - { - "epoch": 0.2882630283980494, - "grad_norm": 595.579345703125, - "learning_rate": 4.4944860988464276e-05, - "loss": 72.5989, - "step": 71350 - }, - { - "epoch": 0.28830342966341704, - "grad_norm": 821.9955444335938, - "learning_rate": 4.494275617292144e-05, - "loss": 74.0442, - "step": 71360 - }, - { - "epoch": 0.2883438309287847, - "grad_norm": 890.1593627929688, - "learning_rate": 4.494065096858432e-05, - "loss": 67.1569, - "step": 71370 - }, - { - "epoch": 0.2883842321941523, - "grad_norm": 935.3182983398438, - "learning_rate": 4.4938545375493934e-05, - "loss": 57.2469, - "step": 71380 - }, - { - "epoch": 0.28842463345951996, - "grad_norm": 930.1670532226562, - "learning_rate": 4.493643939369134e-05, - "loss": 62.9051, - "step": 71390 - }, - { - "epoch": 0.2884650347248876, - "grad_norm": 923.1588745117188, - "learning_rate": 4.493433302321759e-05, - "loss": 72.7095, - "step": 71400 - }, - { - "epoch": 0.2885054359902552, - "grad_norm": 1056.5206298828125, - "learning_rate": 4.4932226264113764e-05, - "loss": 89.8381, - "step": 71410 - }, - { - "epoch": 0.28854583725562283, - "grad_norm": 556.2738647460938, - "learning_rate": 4.493011911642092e-05, - "loss": 64.1478, - "step": 71420 - }, - { - "epoch": 0.28858623852099047, - "grad_norm": 614.498046875, - "learning_rate": 4.4928011580180155e-05, - "loss": 71.601, - "step": 71430 - }, - { - "epoch": 0.2886266397863581, - "grad_norm": 1232.3363037109375, - "learning_rate": 4.492590365543253e-05, - "loss": 59.9572, - "step": 71440 - }, - { - "epoch": 0.28866704105172575, - "grad_norm": 1182.1708984375, - "learning_rate": 4.492379534221916e-05, - "loss": 87.4886, - "step": 71450 - }, - { - "epoch": 0.2887074423170934, - "grad_norm": 703.7147827148438, - "learning_rate": 4.492168664058114e-05, - "loss": 48.6093, - "step": 71460 - }, - { - "epoch": 0.28874784358246103, - "grad_norm": 1096.2550048828125, - "learning_rate": 4.491957755055959e-05, - "loss": 55.8847, - "step": 71470 - }, - { - "epoch": 0.2887882448478286, - "grad_norm": 824.9928588867188, - "learning_rate": 4.491746807219561e-05, - "loss": 69.6475, - "step": 71480 - }, - { - "epoch": 0.28882864611319625, - "grad_norm": 799.056640625, - "learning_rate": 4.491535820553034e-05, - "loss": 74.3196, - "step": 71490 - }, - { - "epoch": 0.2888690473785639, - "grad_norm": 1376.468017578125, - "learning_rate": 4.491324795060491e-05, - "loss": 56.1541, - "step": 71500 - }, - { - "epoch": 0.28890944864393153, - "grad_norm": 792.1198120117188, - "learning_rate": 4.491113730746046e-05, - "loss": 80.2355, - "step": 71510 - }, - { - "epoch": 0.2889498499092992, - "grad_norm": 1302.2103271484375, - "learning_rate": 4.490902627613813e-05, - "loss": 70.5781, - "step": 71520 - }, - { - "epoch": 0.2889902511746668, - "grad_norm": 1214.075927734375, - "learning_rate": 4.4906914856679094e-05, - "loss": 84.3187, - "step": 71530 - }, - { - "epoch": 0.2890306524400344, - "grad_norm": 401.3418273925781, - "learning_rate": 4.49048030491245e-05, - "loss": 53.576, - "step": 71540 - }, - { - "epoch": 0.28907105370540204, - "grad_norm": 885.4319458007812, - "learning_rate": 4.490269085351552e-05, - "loss": 46.2776, - "step": 71550 - }, - { - "epoch": 0.2891114549707697, - "grad_norm": 410.9359436035156, - "learning_rate": 4.4900578269893335e-05, - "loss": 75.6896, - "step": 71560 - }, - { - "epoch": 0.2891518562361373, - "grad_norm": 858.1013793945312, - "learning_rate": 4.4898465298299134e-05, - "loss": 78.359, - "step": 71570 - }, - { - "epoch": 0.28919225750150496, - "grad_norm": 1423.806640625, - "learning_rate": 4.489635193877411e-05, - "loss": 62.488, - "step": 71580 - }, - { - "epoch": 0.2892326587668726, - "grad_norm": 962.8115234375, - "learning_rate": 4.489423819135945e-05, - "loss": 68.6732, - "step": 71590 - }, - { - "epoch": 0.28927306003224024, - "grad_norm": 737.3809814453125, - "learning_rate": 4.4892124056096386e-05, - "loss": 85.0963, - "step": 71600 - }, - { - "epoch": 0.2893134612976078, - "grad_norm": 699.3038330078125, - "learning_rate": 4.489000953302612e-05, - "loss": 39.0792, - "step": 71610 - }, - { - "epoch": 0.28935386256297546, - "grad_norm": 526.4671020507812, - "learning_rate": 4.488789462218987e-05, - "loss": 73.1894, - "step": 71620 - }, - { - "epoch": 0.2893942638283431, - "grad_norm": 869.59130859375, - "learning_rate": 4.4885779323628886e-05, - "loss": 80.5256, - "step": 71630 - }, - { - "epoch": 0.28943466509371074, - "grad_norm": 2488.22216796875, - "learning_rate": 4.4883663637384396e-05, - "loss": 96.78, - "step": 71640 - }, - { - "epoch": 0.2894750663590784, - "grad_norm": 322.6717529296875, - "learning_rate": 4.488154756349764e-05, - "loss": 55.7131, - "step": 71650 - }, - { - "epoch": 0.289515467624446, - "grad_norm": 762.85498046875, - "learning_rate": 4.4879431102009886e-05, - "loss": 83.2775, - "step": 71660 - }, - { - "epoch": 0.2895558688898136, - "grad_norm": 312.4606018066406, - "learning_rate": 4.487731425296238e-05, - "loss": 73.5523, - "step": 71670 - }, - { - "epoch": 0.28959627015518125, - "grad_norm": 774.15478515625, - "learning_rate": 4.487519701639641e-05, - "loss": 58.399, - "step": 71680 - }, - { - "epoch": 0.2896366714205489, - "grad_norm": 939.1637573242188, - "learning_rate": 4.487307939235323e-05, - "loss": 103.4214, - "step": 71690 - }, - { - "epoch": 0.2896770726859165, - "grad_norm": 378.4061584472656, - "learning_rate": 4.487096138087414e-05, - "loss": 54.6242, - "step": 71700 - }, - { - "epoch": 0.28971747395128417, - "grad_norm": 841.2785034179688, - "learning_rate": 4.4868842982000425e-05, - "loss": 92.4388, - "step": 71710 - }, - { - "epoch": 0.2897578752166518, - "grad_norm": 1951.40673828125, - "learning_rate": 4.486672419577339e-05, - "loss": 99.056, - "step": 71720 - }, - { - "epoch": 0.2897982764820194, - "grad_norm": 870.7053833007812, - "learning_rate": 4.486460502223434e-05, - "loss": 82.6444, - "step": 71730 - }, - { - "epoch": 0.28983867774738703, - "grad_norm": 1036.0379638671875, - "learning_rate": 4.4862485461424585e-05, - "loss": 84.8484, - "step": 71740 - }, - { - "epoch": 0.28987907901275467, - "grad_norm": 1319.0260009765625, - "learning_rate": 4.4860365513385456e-05, - "loss": 65.2683, - "step": 71750 - }, - { - "epoch": 0.2899194802781223, - "grad_norm": 623.9202270507812, - "learning_rate": 4.4858245178158276e-05, - "loss": 51.1636, - "step": 71760 - }, - { - "epoch": 0.28995988154348995, - "grad_norm": 1197.3074951171875, - "learning_rate": 4.4856124455784375e-05, - "loss": 57.0653, - "step": 71770 - }, - { - "epoch": 0.2900002828088576, - "grad_norm": 951.6260986328125, - "learning_rate": 4.485400334630511e-05, - "loss": 79.302, - "step": 71780 - }, - { - "epoch": 0.29004068407422523, - "grad_norm": 714.1635131835938, - "learning_rate": 4.485188184976182e-05, - "loss": 64.435, - "step": 71790 - }, - { - "epoch": 0.2900810853395928, - "grad_norm": 1034.5103759765625, - "learning_rate": 4.484975996619589e-05, - "loss": 69.878, - "step": 71800 - }, - { - "epoch": 0.29012148660496045, - "grad_norm": 2745.012939453125, - "learning_rate": 4.484763769564866e-05, - "loss": 46.8337, - "step": 71810 - }, - { - "epoch": 0.2901618878703281, - "grad_norm": 905.9317626953125, - "learning_rate": 4.4845515038161515e-05, - "loss": 72.7523, - "step": 71820 - }, - { - "epoch": 0.29020228913569573, - "grad_norm": 596.106689453125, - "learning_rate": 4.484339199377583e-05, - "loss": 56.2039, - "step": 71830 - }, - { - "epoch": 0.2902426904010634, - "grad_norm": 770.546875, - "learning_rate": 4.484126856253301e-05, - "loss": 63.8412, - "step": 71840 - }, - { - "epoch": 0.290283091666431, - "grad_norm": 1505.4610595703125, - "learning_rate": 4.483914474447445e-05, - "loss": 84.4822, - "step": 71850 - }, - { - "epoch": 0.2903234929317986, - "grad_norm": 925.32421875, - "learning_rate": 4.483702053964154e-05, - "loss": 58.1121, - "step": 71860 - }, - { - "epoch": 0.29036389419716624, - "grad_norm": 754.6409912109375, - "learning_rate": 4.4834895948075704e-05, - "loss": 94.093, - "step": 71870 - }, - { - "epoch": 0.2904042954625339, - "grad_norm": 296.52349853515625, - "learning_rate": 4.483277096981836e-05, - "loss": 70.3271, - "step": 71880 - }, - { - "epoch": 0.2904446967279015, - "grad_norm": 387.88568115234375, - "learning_rate": 4.483064560491094e-05, - "loss": 46.6679, - "step": 71890 - }, - { - "epoch": 0.29048509799326916, - "grad_norm": 628.3577270507812, - "learning_rate": 4.482851985339487e-05, - "loss": 73.2846, - "step": 71900 - }, - { - "epoch": 0.2905254992586368, - "grad_norm": 429.7192077636719, - "learning_rate": 4.4826393715311595e-05, - "loss": 54.1423, - "step": 71910 - }, - { - "epoch": 0.29056590052400444, - "grad_norm": 933.8616943359375, - "learning_rate": 4.482426719070258e-05, - "loss": 82.952, - "step": 71920 - }, - { - "epoch": 0.290606301789372, - "grad_norm": 1550.8494873046875, - "learning_rate": 4.482214027960925e-05, - "loss": 60.8338, - "step": 71930 - }, - { - "epoch": 0.29064670305473966, - "grad_norm": 725.9008178710938, - "learning_rate": 4.48200129820731e-05, - "loss": 58.8863, - "step": 71940 - }, - { - "epoch": 0.2906871043201073, - "grad_norm": 426.5110168457031, - "learning_rate": 4.481788529813559e-05, - "loss": 84.5091, - "step": 71950 - }, - { - "epoch": 0.29072750558547494, - "grad_norm": 735.1624145507812, - "learning_rate": 4.481575722783821e-05, - "loss": 71.0047, - "step": 71960 - }, - { - "epoch": 0.2907679068508426, - "grad_norm": 1465.6466064453125, - "learning_rate": 4.481362877122243e-05, - "loss": 93.0289, - "step": 71970 - }, - { - "epoch": 0.2908083081162102, - "grad_norm": 448.85919189453125, - "learning_rate": 4.481149992832977e-05, - "loss": 49.5786, - "step": 71980 - }, - { - "epoch": 0.2908487093815778, - "grad_norm": 1719.1485595703125, - "learning_rate": 4.4809370699201706e-05, - "loss": 99.739, - "step": 71990 - }, - { - "epoch": 0.29088911064694545, - "grad_norm": 2338.972900390625, - "learning_rate": 4.480724108387977e-05, - "loss": 94.2316, - "step": 72000 - }, - { - "epoch": 0.2909295119123131, - "grad_norm": 495.1227722167969, - "learning_rate": 4.480511108240547e-05, - "loss": 57.7428, - "step": 72010 - }, - { - "epoch": 0.29096991317768073, - "grad_norm": 516.1085205078125, - "learning_rate": 4.480298069482033e-05, - "loss": 65.6927, - "step": 72020 - }, - { - "epoch": 0.29101031444304837, - "grad_norm": 592.4568481445312, - "learning_rate": 4.480084992116589e-05, - "loss": 54.2324, - "step": 72030 - }, - { - "epoch": 0.291050715708416, - "grad_norm": 661.2754516601562, - "learning_rate": 4.479871876148368e-05, - "loss": 101.6369, - "step": 72040 - }, - { - "epoch": 0.2910911169737836, - "grad_norm": 1018.503662109375, - "learning_rate": 4.479658721581527e-05, - "loss": 87.5494, - "step": 72050 - }, - { - "epoch": 0.29113151823915123, - "grad_norm": 1803.326416015625, - "learning_rate": 4.479445528420218e-05, - "loss": 121.1224, - "step": 72060 - }, - { - "epoch": 0.2911719195045189, - "grad_norm": 773.8688354492188, - "learning_rate": 4.479232296668601e-05, - "loss": 40.3872, - "step": 72070 - }, - { - "epoch": 0.2912123207698865, - "grad_norm": 598.6536865234375, - "learning_rate": 4.4790190263308306e-05, - "loss": 61.5503, - "step": 72080 - }, - { - "epoch": 0.29125272203525415, - "grad_norm": 1390.3765869140625, - "learning_rate": 4.478805717411066e-05, - "loss": 75.8073, - "step": 72090 - }, - { - "epoch": 0.2912931233006218, - "grad_norm": 1810.283447265625, - "learning_rate": 4.478592369913465e-05, - "loss": 66.2582, - "step": 72100 - }, - { - "epoch": 0.29133352456598943, - "grad_norm": 585.9959106445312, - "learning_rate": 4.478378983842186e-05, - "loss": 60.1694, - "step": 72110 - }, - { - "epoch": 0.291373925831357, - "grad_norm": 589.87109375, - "learning_rate": 4.4781655592013914e-05, - "loss": 69.5412, - "step": 72120 - }, - { - "epoch": 0.29141432709672466, - "grad_norm": 829.7304077148438, - "learning_rate": 4.47795209599524e-05, - "loss": 57.3538, - "step": 72130 - }, - { - "epoch": 0.2914547283620923, - "grad_norm": 331.1748962402344, - "learning_rate": 4.477738594227895e-05, - "loss": 54.7226, - "step": 72140 - }, - { - "epoch": 0.29149512962745994, - "grad_norm": 1150.1204833984375, - "learning_rate": 4.4775250539035174e-05, - "loss": 101.6825, - "step": 72150 - }, - { - "epoch": 0.2915355308928276, - "grad_norm": 1445.37255859375, - "learning_rate": 4.477311475026271e-05, - "loss": 73.3006, - "step": 72160 - }, - { - "epoch": 0.2915759321581952, - "grad_norm": 1057.3179931640625, - "learning_rate": 4.4770978576003196e-05, - "loss": 87.7836, - "step": 72170 - }, - { - "epoch": 0.2916163334235628, - "grad_norm": 258.7021484375, - "learning_rate": 4.4768842016298275e-05, - "loss": 47.9227, - "step": 72180 - }, - { - "epoch": 0.29165673468893044, - "grad_norm": 572.38623046875, - "learning_rate": 4.4766705071189595e-05, - "loss": 94.038, - "step": 72190 - }, - { - "epoch": 0.2916971359542981, - "grad_norm": 1339.6978759765625, - "learning_rate": 4.4764567740718825e-05, - "loss": 55.9968, - "step": 72200 - }, - { - "epoch": 0.2917375372196657, - "grad_norm": 671.230712890625, - "learning_rate": 4.4762430024927636e-05, - "loss": 83.76, - "step": 72210 - }, - { - "epoch": 0.29177793848503336, - "grad_norm": 459.9236145019531, - "learning_rate": 4.476029192385769e-05, - "loss": 52.4123, - "step": 72220 - }, - { - "epoch": 0.291818339750401, - "grad_norm": 1072.2008056640625, - "learning_rate": 4.4758153437550684e-05, - "loss": 61.8481, - "step": 72230 - }, - { - "epoch": 0.29185874101576864, - "grad_norm": 1238.638671875, - "learning_rate": 4.475601456604831e-05, - "loss": 101.6763, - "step": 72240 - }, - { - "epoch": 0.2918991422811362, - "grad_norm": 1013.4456787109375, - "learning_rate": 4.4753875309392266e-05, - "loss": 83.2611, - "step": 72250 - }, - { - "epoch": 0.29193954354650387, - "grad_norm": 609.5685424804688, - "learning_rate": 4.4751735667624237e-05, - "loss": 83.8193, - "step": 72260 - }, - { - "epoch": 0.2919799448118715, - "grad_norm": 683.1033325195312, - "learning_rate": 4.474959564078596e-05, - "loss": 51.5649, - "step": 72270 - }, - { - "epoch": 0.29202034607723915, - "grad_norm": 799.1218872070312, - "learning_rate": 4.4747455228919146e-05, - "loss": 48.9159, - "step": 72280 - }, - { - "epoch": 0.2920607473426068, - "grad_norm": 316.8408508300781, - "learning_rate": 4.4745314432065535e-05, - "loss": 80.174, - "step": 72290 - }, - { - "epoch": 0.2921011486079744, - "grad_norm": 549.0940551757812, - "learning_rate": 4.474317325026684e-05, - "loss": 56.1946, - "step": 72300 - }, - { - "epoch": 0.292141549873342, - "grad_norm": 591.0458984375, - "learning_rate": 4.474103168356483e-05, - "loss": 101.0572, - "step": 72310 - }, - { - "epoch": 0.29218195113870965, - "grad_norm": 3971.22802734375, - "learning_rate": 4.4738889732001234e-05, - "loss": 65.9609, - "step": 72320 - }, - { - "epoch": 0.2922223524040773, - "grad_norm": 947.2832641601562, - "learning_rate": 4.473674739561783e-05, - "loss": 108.9696, - "step": 72330 - }, - { - "epoch": 0.29226275366944493, - "grad_norm": 745.0115356445312, - "learning_rate": 4.473460467445637e-05, - "loss": 53.7305, - "step": 72340 - }, - { - "epoch": 0.29230315493481257, - "grad_norm": 1212.1033935546875, - "learning_rate": 4.473246156855863e-05, - "loss": 85.5423, - "step": 72350 - }, - { - "epoch": 0.2923435562001802, - "grad_norm": 1858.6473388671875, - "learning_rate": 4.473031807796639e-05, - "loss": 84.1671, - "step": 72360 - }, - { - "epoch": 0.2923839574655478, - "grad_norm": 1183.3797607421875, - "learning_rate": 4.4728174202721444e-05, - "loss": 85.8308, - "step": 72370 - }, - { - "epoch": 0.29242435873091543, - "grad_norm": 564.0057373046875, - "learning_rate": 4.472602994286559e-05, - "loss": 62.2768, - "step": 72380 - }, - { - "epoch": 0.2924647599962831, - "grad_norm": 740.6309204101562, - "learning_rate": 4.472388529844062e-05, - "loss": 46.9198, - "step": 72390 - }, - { - "epoch": 0.2925051612616507, - "grad_norm": 1595.9669189453125, - "learning_rate": 4.4721740269488355e-05, - "loss": 61.2028, - "step": 72400 - }, - { - "epoch": 0.29254556252701835, - "grad_norm": 1151.5087890625, - "learning_rate": 4.4719594856050604e-05, - "loss": 62.4049, - "step": 72410 - }, - { - "epoch": 0.292585963792386, - "grad_norm": 475.585205078125, - "learning_rate": 4.4717449058169216e-05, - "loss": 50.4637, - "step": 72420 - }, - { - "epoch": 0.29262636505775363, - "grad_norm": 519.3822631835938, - "learning_rate": 4.471530287588599e-05, - "loss": 77.0086, - "step": 72430 - }, - { - "epoch": 0.2926667663231212, - "grad_norm": 993.6683959960938, - "learning_rate": 4.471315630924279e-05, - "loss": 76.9779, - "step": 72440 - }, - { - "epoch": 0.29270716758848886, - "grad_norm": 1110.651611328125, - "learning_rate": 4.4711009358281456e-05, - "loss": 73.6586, - "step": 72450 - }, - { - "epoch": 0.2927475688538565, - "grad_norm": 703.5302124023438, - "learning_rate": 4.4708862023043854e-05, - "loss": 68.1201, - "step": 72460 - }, - { - "epoch": 0.29278797011922414, - "grad_norm": 773.6651611328125, - "learning_rate": 4.470671430357183e-05, - "loss": 62.8462, - "step": 72470 - }, - { - "epoch": 0.2928283713845918, - "grad_norm": 806.9835815429688, - "learning_rate": 4.470456619990727e-05, - "loss": 71.4623, - "step": 72480 - }, - { - "epoch": 0.2928687726499594, - "grad_norm": 805.1903686523438, - "learning_rate": 4.470241771209205e-05, - "loss": 75.0459, - "step": 72490 - }, - { - "epoch": 0.292909173915327, - "grad_norm": 408.85089111328125, - "learning_rate": 4.4700268840168045e-05, - "loss": 58.6624, - "step": 72500 - }, - { - "epoch": 0.29294957518069464, - "grad_norm": 701.9678344726562, - "learning_rate": 4.469811958417717e-05, - "loss": 55.822, - "step": 72510 - }, - { - "epoch": 0.2929899764460623, - "grad_norm": 563.7380981445312, - "learning_rate": 4.46959699441613e-05, - "loss": 49.1962, - "step": 72520 - }, - { - "epoch": 0.2930303777114299, - "grad_norm": 736.0975952148438, - "learning_rate": 4.469381992016236e-05, - "loss": 42.205, - "step": 72530 - }, - { - "epoch": 0.29307077897679756, - "grad_norm": 375.850830078125, - "learning_rate": 4.469166951222227e-05, - "loss": 58.5806, - "step": 72540 - }, - { - "epoch": 0.2931111802421652, - "grad_norm": 828.135498046875, - "learning_rate": 4.4689518720382937e-05, - "loss": 69.4176, - "step": 72550 - }, - { - "epoch": 0.29315158150753284, - "grad_norm": 955.2371215820312, - "learning_rate": 4.46873675446863e-05, - "loss": 100.2446, - "step": 72560 - }, - { - "epoch": 0.29319198277290043, - "grad_norm": 808.4039916992188, - "learning_rate": 4.468521598517429e-05, - "loss": 79.4082, - "step": 72570 - }, - { - "epoch": 0.29323238403826807, - "grad_norm": 563.0283813476562, - "learning_rate": 4.468306404188887e-05, - "loss": 33.0342, - "step": 72580 - }, - { - "epoch": 0.2932727853036357, - "grad_norm": 2005.7269287109375, - "learning_rate": 4.468091171487197e-05, - "loss": 87.8131, - "step": 72590 - }, - { - "epoch": 0.29331318656900335, - "grad_norm": 1091.245849609375, - "learning_rate": 4.4678759004165584e-05, - "loss": 60.274, - "step": 72600 - }, - { - "epoch": 0.293353587834371, - "grad_norm": 773.4925537109375, - "learning_rate": 4.467660590981165e-05, - "loss": 43.7714, - "step": 72610 - }, - { - "epoch": 0.29339398909973863, - "grad_norm": 414.9910583496094, - "learning_rate": 4.4674452431852155e-05, - "loss": 43.9692, - "step": 72620 - }, - { - "epoch": 0.2934343903651062, - "grad_norm": 974.5853881835938, - "learning_rate": 4.467229857032907e-05, - "loss": 73.6409, - "step": 72630 - }, - { - "epoch": 0.29347479163047385, - "grad_norm": 879.56494140625, - "learning_rate": 4.4670144325284414e-05, - "loss": 103.894, - "step": 72640 - }, - { - "epoch": 0.2935151928958415, - "grad_norm": 522.8005981445312, - "learning_rate": 4.466798969676015e-05, - "loss": 108.3693, - "step": 72650 - }, - { - "epoch": 0.29355559416120913, - "grad_norm": 680.3546142578125, - "learning_rate": 4.4665834684798316e-05, - "loss": 58.7339, - "step": 72660 - }, - { - "epoch": 0.2935959954265768, - "grad_norm": 639.8063354492188, - "learning_rate": 4.4663679289440895e-05, - "loss": 69.8376, - "step": 72670 - }, - { - "epoch": 0.2936363966919444, - "grad_norm": 279.01385498046875, - "learning_rate": 4.466152351072994e-05, - "loss": 60.8839, - "step": 72680 - }, - { - "epoch": 0.293676797957312, - "grad_norm": 744.683349609375, - "learning_rate": 4.465936734870745e-05, - "loss": 59.2348, - "step": 72690 - }, - { - "epoch": 0.29371719922267964, - "grad_norm": 884.4476928710938, - "learning_rate": 4.465721080341547e-05, - "loss": 55.9587, - "step": 72700 - }, - { - "epoch": 0.2937576004880473, - "grad_norm": 736.0975341796875, - "learning_rate": 4.465505387489606e-05, - "loss": 85.0535, - "step": 72710 - }, - { - "epoch": 0.2937980017534149, - "grad_norm": 1129.6541748046875, - "learning_rate": 4.465289656319124e-05, - "loss": 166.6917, - "step": 72720 - }, - { - "epoch": 0.29383840301878256, - "grad_norm": 918.02587890625, - "learning_rate": 4.465073886834309e-05, - "loss": 80.1093, - "step": 72730 - }, - { - "epoch": 0.2938788042841502, - "grad_norm": 741.1550903320312, - "learning_rate": 4.464858079039367e-05, - "loss": 62.3389, - "step": 72740 - }, - { - "epoch": 0.29391920554951784, - "grad_norm": 1366.1309814453125, - "learning_rate": 4.464642232938505e-05, - "loss": 127.8192, - "step": 72750 - }, - { - "epoch": 0.2939596068148854, - "grad_norm": 691.9977416992188, - "learning_rate": 4.464426348535931e-05, - "loss": 71.4388, - "step": 72760 - }, - { - "epoch": 0.29400000808025306, - "grad_norm": 743.5264282226562, - "learning_rate": 4.464210425835854e-05, - "loss": 77.9983, - "step": 72770 - }, - { - "epoch": 0.2940404093456207, - "grad_norm": 570.9989624023438, - "learning_rate": 4.463994464842484e-05, - "loss": 66.5061, - "step": 72780 - }, - { - "epoch": 0.29408081061098834, - "grad_norm": 419.94793701171875, - "learning_rate": 4.46377846556003e-05, - "loss": 80.6933, - "step": 72790 - }, - { - "epoch": 0.294121211876356, - "grad_norm": 399.4372253417969, - "learning_rate": 4.4635624279927044e-05, - "loss": 82.7092, - "step": 72800 - }, - { - "epoch": 0.2941616131417236, - "grad_norm": 1092.372802734375, - "learning_rate": 4.463346352144718e-05, - "loss": 61.0069, - "step": 72810 - }, - { - "epoch": 0.2942020144070912, - "grad_norm": 1244.2633056640625, - "learning_rate": 4.463130238020285e-05, - "loss": 69.0925, - "step": 72820 - }, - { - "epoch": 0.29424241567245885, - "grad_norm": 3014.16796875, - "learning_rate": 4.4629140856236155e-05, - "loss": 95.4817, - "step": 72830 - }, - { - "epoch": 0.2942828169378265, - "grad_norm": 855.9567260742188, - "learning_rate": 4.462697894958926e-05, - "loss": 79.4187, - "step": 72840 - }, - { - "epoch": 0.2943232182031941, - "grad_norm": 588.0103149414062, - "learning_rate": 4.4624816660304314e-05, - "loss": 46.0367, - "step": 72850 - }, - { - "epoch": 0.29436361946856177, - "grad_norm": 2154.02734375, - "learning_rate": 4.4622653988423455e-05, - "loss": 116.9557, - "step": 72860 - }, - { - "epoch": 0.2944040207339294, - "grad_norm": 743.9976196289062, - "learning_rate": 4.462049093398885e-05, - "loss": 52.3771, - "step": 72870 - }, - { - "epoch": 0.29444442199929705, - "grad_norm": 1056.8822021484375, - "learning_rate": 4.461832749704268e-05, - "loss": 70.0739, - "step": 72880 - }, - { - "epoch": 0.29448482326466463, - "grad_norm": 860.94580078125, - "learning_rate": 4.461616367762711e-05, - "loss": 53.0582, - "step": 72890 - }, - { - "epoch": 0.29452522453003227, - "grad_norm": 319.7754821777344, - "learning_rate": 4.4613999475784336e-05, - "loss": 50.5095, - "step": 72900 - }, - { - "epoch": 0.2945656257953999, - "grad_norm": 1478.31982421875, - "learning_rate": 4.4611834891556534e-05, - "loss": 94.7775, - "step": 72910 - }, - { - "epoch": 0.29460602706076755, - "grad_norm": 548.7947998046875, - "learning_rate": 4.460966992498593e-05, - "loss": 96.3625, - "step": 72920 - }, - { - "epoch": 0.2946464283261352, - "grad_norm": 567.1472778320312, - "learning_rate": 4.46075045761147e-05, - "loss": 66.8417, - "step": 72930 - }, - { - "epoch": 0.29468682959150283, - "grad_norm": 510.6759033203125, - "learning_rate": 4.460533884498509e-05, - "loss": 43.7635, - "step": 72940 - }, - { - "epoch": 0.2947272308568704, - "grad_norm": 1200.787353515625, - "learning_rate": 4.460317273163929e-05, - "loss": 79.7278, - "step": 72950 - }, - { - "epoch": 0.29476763212223805, - "grad_norm": 1466.562255859375, - "learning_rate": 4.460100623611955e-05, - "loss": 80.256, - "step": 72960 - }, - { - "epoch": 0.2948080333876057, - "grad_norm": 1230.124267578125, - "learning_rate": 4.45988393584681e-05, - "loss": 94.8147, - "step": 72970 - }, - { - "epoch": 0.29484843465297333, - "grad_norm": 1354.7569580078125, - "learning_rate": 4.4596672098727195e-05, - "loss": 79.9409, - "step": 72980 - }, - { - "epoch": 0.294888835918341, - "grad_norm": 486.84246826171875, - "learning_rate": 4.459450445693907e-05, - "loss": 48.9059, - "step": 72990 - }, - { - "epoch": 0.2949292371837086, - "grad_norm": 606.8703002929688, - "learning_rate": 4.4592336433146e-05, - "loss": 56.8745, - "step": 73000 - }, - { - "epoch": 0.2949696384490762, - "grad_norm": 1210.473388671875, - "learning_rate": 4.459016802739023e-05, - "loss": 81.2079, - "step": 73010 - }, - { - "epoch": 0.29501003971444384, - "grad_norm": 467.654296875, - "learning_rate": 4.458799923971406e-05, - "loss": 71.5637, - "step": 73020 - }, - { - "epoch": 0.2950504409798115, - "grad_norm": 1085.084228515625, - "learning_rate": 4.4585830070159764e-05, - "loss": 59.4551, - "step": 73030 - }, - { - "epoch": 0.2950908422451791, - "grad_norm": 1020.708984375, - "learning_rate": 4.458366051876962e-05, - "loss": 67.9901, - "step": 73040 - }, - { - "epoch": 0.29513124351054676, - "grad_norm": 413.11859130859375, - "learning_rate": 4.458149058558594e-05, - "loss": 94.3448, - "step": 73050 - }, - { - "epoch": 0.2951716447759144, - "grad_norm": 344.1728210449219, - "learning_rate": 4.457932027065102e-05, - "loss": 42.2916, - "step": 73060 - }, - { - "epoch": 0.29521204604128204, - "grad_norm": 0.0, - "learning_rate": 4.457714957400716e-05, - "loss": 72.4169, - "step": 73070 - }, - { - "epoch": 0.2952524473066496, - "grad_norm": 1693.0472412109375, - "learning_rate": 4.45749784956967e-05, - "loss": 81.7885, - "step": 73080 - }, - { - "epoch": 0.29529284857201726, - "grad_norm": 676.3005981445312, - "learning_rate": 4.457280703576194e-05, - "loss": 69.0328, - "step": 73090 - }, - { - "epoch": 0.2953332498373849, - "grad_norm": 723.7576293945312, - "learning_rate": 4.457063519424525e-05, - "loss": 74.3711, - "step": 73100 - }, - { - "epoch": 0.29537365110275254, - "grad_norm": 397.1194152832031, - "learning_rate": 4.456846297118894e-05, - "loss": 65.241, - "step": 73110 - }, - { - "epoch": 0.2954140523681202, - "grad_norm": 686.8461303710938, - "learning_rate": 4.456629036663537e-05, - "loss": 66.4102, - "step": 73120 - }, - { - "epoch": 0.2954544536334878, - "grad_norm": 991.1904296875, - "learning_rate": 4.45641173806269e-05, - "loss": 79.4375, - "step": 73130 - }, - { - "epoch": 0.2954948548988554, - "grad_norm": 1777.7161865234375, - "learning_rate": 4.4561944013205885e-05, - "loss": 74.0132, - "step": 73140 - }, - { - "epoch": 0.29553525616422305, - "grad_norm": 1228.3016357421875, - "learning_rate": 4.45597702644147e-05, - "loss": 55.363, - "step": 73150 - }, - { - "epoch": 0.2955756574295907, - "grad_norm": 846.4840087890625, - "learning_rate": 4.455759613429573e-05, - "loss": 76.3519, - "step": 73160 - }, - { - "epoch": 0.29561605869495833, - "grad_norm": 639.7553100585938, - "learning_rate": 4.455542162289136e-05, - "loss": 83.1235, - "step": 73170 - }, - { - "epoch": 0.29565645996032597, - "grad_norm": 485.4346618652344, - "learning_rate": 4.455324673024396e-05, - "loss": 106.5092, - "step": 73180 - }, - { - "epoch": 0.2956968612256936, - "grad_norm": 1039.0308837890625, - "learning_rate": 4.4551071456395957e-05, - "loss": 55.8722, - "step": 73190 - }, - { - "epoch": 0.2957372624910612, - "grad_norm": 4481.669921875, - "learning_rate": 4.454889580138975e-05, - "loss": 92.0912, - "step": 73200 - }, - { - "epoch": 0.29577766375642883, - "grad_norm": 1369.180419921875, - "learning_rate": 4.454671976526776e-05, - "loss": 81.6197, - "step": 73210 - }, - { - "epoch": 0.2958180650217965, - "grad_norm": 1476.4033203125, - "learning_rate": 4.45445433480724e-05, - "loss": 60.5057, - "step": 73220 - }, - { - "epoch": 0.2958584662871641, - "grad_norm": 817.6678466796875, - "learning_rate": 4.45423665498461e-05, - "loss": 107.7844, - "step": 73230 - }, - { - "epoch": 0.29589886755253175, - "grad_norm": 1333.8826904296875, - "learning_rate": 4.4540189370631315e-05, - "loss": 100.9942, - "step": 73240 - }, - { - "epoch": 0.2959392688178994, - "grad_norm": 465.54510498046875, - "learning_rate": 4.453801181047047e-05, - "loss": 77.737, - "step": 73250 - }, - { - "epoch": 0.29597967008326703, - "grad_norm": 1762.44287109375, - "learning_rate": 4.4535833869406027e-05, - "loss": 58.7377, - "step": 73260 - }, - { - "epoch": 0.2960200713486346, - "grad_norm": 568.57958984375, - "learning_rate": 4.4533655547480444e-05, - "loss": 56.8368, - "step": 73270 - }, - { - "epoch": 0.29606047261400226, - "grad_norm": 1187.254638671875, - "learning_rate": 4.45314768447362e-05, - "loss": 88.9359, - "step": 73280 - }, - { - "epoch": 0.2961008738793699, - "grad_norm": 902.0029296875, - "learning_rate": 4.452929776121575e-05, - "loss": 69.4659, - "step": 73290 - }, - { - "epoch": 0.29614127514473754, - "grad_norm": 0.0, - "learning_rate": 4.452711829696158e-05, - "loss": 47.5744, - "step": 73300 - }, - { - "epoch": 0.2961816764101052, - "grad_norm": 407.5782775878906, - "learning_rate": 4.452493845201619e-05, - "loss": 63.7359, - "step": 73310 - }, - { - "epoch": 0.2962220776754728, - "grad_norm": 674.528076171875, - "learning_rate": 4.4522758226422076e-05, - "loss": 43.8548, - "step": 73320 - }, - { - "epoch": 0.2962624789408404, - "grad_norm": 679.5468139648438, - "learning_rate": 4.452057762022174e-05, - "loss": 56.5893, - "step": 73330 - }, - { - "epoch": 0.29630288020620804, - "grad_norm": 982.80517578125, - "learning_rate": 4.4518396633457696e-05, - "loss": 103.2029, - "step": 73340 - }, - { - "epoch": 0.2963432814715757, - "grad_norm": 529.539794921875, - "learning_rate": 4.4516215266172453e-05, - "loss": 36.5715, - "step": 73350 - }, - { - "epoch": 0.2963836827369433, - "grad_norm": 703.641357421875, - "learning_rate": 4.451403351840855e-05, - "loss": 68.3904, - "step": 73360 - }, - { - "epoch": 0.29642408400231096, - "grad_norm": 865.1505126953125, - "learning_rate": 4.451185139020852e-05, - "loss": 64.2055, - "step": 73370 - }, - { - "epoch": 0.2964644852676786, - "grad_norm": 718.0322265625, - "learning_rate": 4.4509668881614894e-05, - "loss": 102.1239, - "step": 73380 - }, - { - "epoch": 0.29650488653304624, - "grad_norm": 1524.7041015625, - "learning_rate": 4.450748599267024e-05, - "loss": 81.8768, - "step": 73390 - }, - { - "epoch": 0.2965452877984138, - "grad_norm": 378.5061950683594, - "learning_rate": 4.450530272341709e-05, - "loss": 56.3704, - "step": 73400 - }, - { - "epoch": 0.29658568906378147, - "grad_norm": 1193.3980712890625, - "learning_rate": 4.4503119073898024e-05, - "loss": 62.8788, - "step": 73410 - }, - { - "epoch": 0.2966260903291491, - "grad_norm": 972.7481079101562, - "learning_rate": 4.4500935044155626e-05, - "loss": 55.7444, - "step": 73420 - }, - { - "epoch": 0.29666649159451675, - "grad_norm": 496.94207763671875, - "learning_rate": 4.4498750634232445e-05, - "loss": 55.0117, - "step": 73430 - }, - { - "epoch": 0.2967068928598844, - "grad_norm": 690.003173828125, - "learning_rate": 4.449656584417108e-05, - "loss": 63.6232, - "step": 73440 - }, - { - "epoch": 0.296747294125252, - "grad_norm": 1239.76123046875, - "learning_rate": 4.449438067401413e-05, - "loss": 56.0802, - "step": 73450 - }, - { - "epoch": 0.2967876953906196, - "grad_norm": 1073.9417724609375, - "learning_rate": 4.44921951238042e-05, - "loss": 74.7659, - "step": 73460 - }, - { - "epoch": 0.29682809665598725, - "grad_norm": 612.8600463867188, - "learning_rate": 4.449000919358388e-05, - "loss": 49.4253, - "step": 73470 - }, - { - "epoch": 0.2968684979213549, - "grad_norm": 803.32763671875, - "learning_rate": 4.4487822883395805e-05, - "loss": 70.9369, - "step": 73480 - }, - { - "epoch": 0.29690889918672253, - "grad_norm": 999.6846313476562, - "learning_rate": 4.448563619328259e-05, - "loss": 77.0513, - "step": 73490 - }, - { - "epoch": 0.29694930045209017, - "grad_norm": 260.62261962890625, - "learning_rate": 4.448344912328686e-05, - "loss": 40.874, - "step": 73500 - }, - { - "epoch": 0.2969897017174578, - "grad_norm": 1337.423828125, - "learning_rate": 4.4481261673451255e-05, - "loss": 63.2176, - "step": 73510 - }, - { - "epoch": 0.2970301029828254, - "grad_norm": 328.9664306640625, - "learning_rate": 4.447907384381843e-05, - "loss": 58.7723, - "step": 73520 - }, - { - "epoch": 0.29707050424819303, - "grad_norm": 767.3320922851562, - "learning_rate": 4.447688563443103e-05, - "loss": 100.0262, - "step": 73530 - }, - { - "epoch": 0.2971109055135607, - "grad_norm": 251.83306884765625, - "learning_rate": 4.447469704533172e-05, - "loss": 58.1344, - "step": 73540 - }, - { - "epoch": 0.2971513067789283, - "grad_norm": 696.3643798828125, - "learning_rate": 4.4472508076563166e-05, - "loss": 80.2865, - "step": 73550 - }, - { - "epoch": 0.29719170804429595, - "grad_norm": 578.1966552734375, - "learning_rate": 4.447031872816804e-05, - "loss": 63.5288, - "step": 73560 - }, - { - "epoch": 0.2972321093096636, - "grad_norm": 1685.81982421875, - "learning_rate": 4.446812900018902e-05, - "loss": 98.5778, - "step": 73570 - }, - { - "epoch": 0.29727251057503123, - "grad_norm": 605.77978515625, - "learning_rate": 4.4465938892668814e-05, - "loss": 108.6232, - "step": 73580 - }, - { - "epoch": 0.2973129118403988, - "grad_norm": 425.3101501464844, - "learning_rate": 4.44637484056501e-05, - "loss": 81.9932, - "step": 73590 - }, - { - "epoch": 0.29735331310576646, - "grad_norm": 664.3129272460938, - "learning_rate": 4.4461557539175594e-05, - "loss": 80.7212, - "step": 73600 - }, - { - "epoch": 0.2973937143711341, - "grad_norm": 932.316650390625, - "learning_rate": 4.4459366293287994e-05, - "loss": 83.0597, - "step": 73610 - }, - { - "epoch": 0.29743411563650174, - "grad_norm": 1423.0401611328125, - "learning_rate": 4.445717466803004e-05, - "loss": 64.9128, - "step": 73620 - }, - { - "epoch": 0.2974745169018694, - "grad_norm": 572.2749633789062, - "learning_rate": 4.445498266344444e-05, - "loss": 49.8087, - "step": 73630 - }, - { - "epoch": 0.297514918167237, - "grad_norm": 1016.2572021484375, - "learning_rate": 4.445279027957395e-05, - "loss": 64.3123, - "step": 73640 - }, - { - "epoch": 0.2975553194326046, - "grad_norm": 1676.9608154296875, - "learning_rate": 4.4450597516461287e-05, - "loss": 79.8256, - "step": 73650 - }, - { - "epoch": 0.29759572069797224, - "grad_norm": 501.0017395019531, - "learning_rate": 4.444840437414922e-05, - "loss": 63.8892, - "step": 73660 - }, - { - "epoch": 0.2976361219633399, - "grad_norm": 1168.860107421875, - "learning_rate": 4.444621085268049e-05, - "loss": 59.1573, - "step": 73670 - }, - { - "epoch": 0.2976765232287075, - "grad_norm": 664.9098510742188, - "learning_rate": 4.444401695209788e-05, - "loss": 57.3835, - "step": 73680 - }, - { - "epoch": 0.29771692449407516, - "grad_norm": 1014.5247192382812, - "learning_rate": 4.4441822672444134e-05, - "loss": 89.5824, - "step": 73690 - }, - { - "epoch": 0.2977573257594428, - "grad_norm": 765.1936645507812, - "learning_rate": 4.443962801376205e-05, - "loss": 38.8063, - "step": 73700 - }, - { - "epoch": 0.29779772702481044, - "grad_norm": 928.1898193359375, - "learning_rate": 4.443743297609442e-05, - "loss": 62.8045, - "step": 73710 - }, - { - "epoch": 0.29783812829017803, - "grad_norm": 685.7420654296875, - "learning_rate": 4.443523755948401e-05, - "loss": 66.0851, - "step": 73720 - }, - { - "epoch": 0.29787852955554567, - "grad_norm": 881.26416015625, - "learning_rate": 4.443304176397365e-05, - "loss": 71.4241, - "step": 73730 - }, - { - "epoch": 0.2979189308209133, - "grad_norm": 1430.6566162109375, - "learning_rate": 4.443084558960613e-05, - "loss": 110.0562, - "step": 73740 - }, - { - "epoch": 0.29795933208628095, - "grad_norm": 476.3144226074219, - "learning_rate": 4.442864903642428e-05, - "loss": 63.0536, - "step": 73750 - }, - { - "epoch": 0.2979997333516486, - "grad_norm": 597.589111328125, - "learning_rate": 4.4426452104470903e-05, - "loss": 49.1393, - "step": 73760 - }, - { - "epoch": 0.29804013461701623, - "grad_norm": 673.9664306640625, - "learning_rate": 4.4424254793788844e-05, - "loss": 49.0798, - "step": 73770 - }, - { - "epoch": 0.2980805358823838, - "grad_norm": 1329.530029296875, - "learning_rate": 4.4422057104420946e-05, - "loss": 71.3935, - "step": 73780 - }, - { - "epoch": 0.29812093714775145, - "grad_norm": 690.584228515625, - "learning_rate": 4.4419859036410036e-05, - "loss": 73.6381, - "step": 73790 - }, - { - "epoch": 0.2981613384131191, - "grad_norm": 627.472900390625, - "learning_rate": 4.441766058979898e-05, - "loss": 82.9257, - "step": 73800 - }, - { - "epoch": 0.29820173967848673, - "grad_norm": 638.86279296875, - "learning_rate": 4.441546176463063e-05, - "loss": 57.4086, - "step": 73810 - }, - { - "epoch": 0.2982421409438544, - "grad_norm": 844.6746215820312, - "learning_rate": 4.441326256094787e-05, - "loss": 74.4755, - "step": 73820 - }, - { - "epoch": 0.298282542209222, - "grad_norm": 1176.2183837890625, - "learning_rate": 4.4411062978793545e-05, - "loss": 48.4431, - "step": 73830 - }, - { - "epoch": 0.2983229434745896, - "grad_norm": 762.2763671875, - "learning_rate": 4.4408863018210564e-05, - "loss": 54.5466, - "step": 73840 - }, - { - "epoch": 0.29836334473995724, - "grad_norm": 2829.393310546875, - "learning_rate": 4.44066626792418e-05, - "loss": 88.412, - "step": 73850 - }, - { - "epoch": 0.2984037460053249, - "grad_norm": 2878.249267578125, - "learning_rate": 4.440446196193016e-05, - "loss": 129.003, - "step": 73860 - }, - { - "epoch": 0.2984441472706925, - "grad_norm": 631.333984375, - "learning_rate": 4.440226086631854e-05, - "loss": 59.6416, - "step": 73870 - }, - { - "epoch": 0.29848454853606016, - "grad_norm": 858.2770385742188, - "learning_rate": 4.440005939244986e-05, - "loss": 54.7916, - "step": 73880 - }, - { - "epoch": 0.2985249498014278, - "grad_norm": 945.3146362304688, - "learning_rate": 4.439785754036703e-05, - "loss": 70.1292, - "step": 73890 - }, - { - "epoch": 0.29856535106679544, - "grad_norm": 915.803955078125, - "learning_rate": 4.439565531011299e-05, - "loss": 66.0254, - "step": 73900 - }, - { - "epoch": 0.298605752332163, - "grad_norm": 683.1613159179688, - "learning_rate": 4.4393452701730655e-05, - "loss": 55.0487, - "step": 73910 - }, - { - "epoch": 0.29864615359753066, - "grad_norm": 1310.735107421875, - "learning_rate": 4.439124971526297e-05, - "loss": 99.916, - "step": 73920 - }, - { - "epoch": 0.2986865548628983, - "grad_norm": 276.0152893066406, - "learning_rate": 4.4389046350752905e-05, - "loss": 46.5066, - "step": 73930 - }, - { - "epoch": 0.29872695612826594, - "grad_norm": 1337.242919921875, - "learning_rate": 4.438684260824339e-05, - "loss": 112.974, - "step": 73940 - }, - { - "epoch": 0.2987673573936336, - "grad_norm": 583.646728515625, - "learning_rate": 4.43846384877774e-05, - "loss": 49.2991, - "step": 73950 - }, - { - "epoch": 0.2988077586590012, - "grad_norm": 1229.034423828125, - "learning_rate": 4.4382433989397895e-05, - "loss": 56.1411, - "step": 73960 - }, - { - "epoch": 0.2988481599243688, - "grad_norm": 556.5376586914062, - "learning_rate": 4.4380229113147866e-05, - "loss": 69.2976, - "step": 73970 - }, - { - "epoch": 0.29888856118973645, - "grad_norm": 448.5850524902344, - "learning_rate": 4.437802385907029e-05, - "loss": 65.7849, - "step": 73980 - }, - { - "epoch": 0.2989289624551041, - "grad_norm": 412.0335388183594, - "learning_rate": 4.4375818227208164e-05, - "loss": 47.8312, - "step": 73990 - }, - { - "epoch": 0.2989693637204717, - "grad_norm": 549.283935546875, - "learning_rate": 4.4373612217604496e-05, - "loss": 60.2066, - "step": 74000 - }, - { - "epoch": 0.29900976498583937, - "grad_norm": 641.086181640625, - "learning_rate": 4.437140583030227e-05, - "loss": 81.0014, - "step": 74010 - }, - { - "epoch": 0.299050166251207, - "grad_norm": 470.0632629394531, - "learning_rate": 4.4369199065344525e-05, - "loss": 48.1158, - "step": 74020 - }, - { - "epoch": 0.29909056751657465, - "grad_norm": 479.7843933105469, - "learning_rate": 4.436699192277426e-05, - "loss": 62.3095, - "step": 74030 - }, - { - "epoch": 0.29913096878194223, - "grad_norm": 953.147705078125, - "learning_rate": 4.436478440263453e-05, - "loss": 69.0217, - "step": 74040 - }, - { - "epoch": 0.29917137004730987, - "grad_norm": 609.3707885742188, - "learning_rate": 4.436257650496834e-05, - "loss": 71.6211, - "step": 74050 - }, - { - "epoch": 0.2992117713126775, - "grad_norm": 1487.8802490234375, - "learning_rate": 4.436036822981877e-05, - "loss": 84.6675, - "step": 74060 - }, - { - "epoch": 0.29925217257804515, - "grad_norm": 894.4046630859375, - "learning_rate": 4.435815957722885e-05, - "loss": 51.4052, - "step": 74070 - }, - { - "epoch": 0.2992925738434128, - "grad_norm": 1185.888671875, - "learning_rate": 4.4355950547241645e-05, - "loss": 79.9474, - "step": 74080 - }, - { - "epoch": 0.29933297510878043, - "grad_norm": 2298.0419921875, - "learning_rate": 4.435374113990021e-05, - "loss": 62.4108, - "step": 74090 - }, - { - "epoch": 0.299373376374148, - "grad_norm": 1640.901611328125, - "learning_rate": 4.435153135524763e-05, - "loss": 108.3077, - "step": 74100 - }, - { - "epoch": 0.29941377763951565, - "grad_norm": 509.5463562011719, - "learning_rate": 4.434932119332699e-05, - "loss": 74.0899, - "step": 74110 - }, - { - "epoch": 0.2994541789048833, - "grad_norm": 1251.0087890625, - "learning_rate": 4.434711065418137e-05, - "loss": 117.1789, - "step": 74120 - }, - { - "epoch": 0.29949458017025093, - "grad_norm": 838.3406982421875, - "learning_rate": 4.434489973785386e-05, - "loss": 99.4823, - "step": 74130 - }, - { - "epoch": 0.2995349814356186, - "grad_norm": 924.2326049804688, - "learning_rate": 4.434268844438758e-05, - "loss": 60.6183, - "step": 74140 - }, - { - "epoch": 0.2995753827009862, - "grad_norm": 602.224365234375, - "learning_rate": 4.4340476773825625e-05, - "loss": 65.4386, - "step": 74150 - }, - { - "epoch": 0.2996157839663538, - "grad_norm": 1039.2386474609375, - "learning_rate": 4.433826472621112e-05, - "loss": 72.3676, - "step": 74160 - }, - { - "epoch": 0.29965618523172144, - "grad_norm": 1209.8310546875, - "learning_rate": 4.4336052301587185e-05, - "loss": 86.5389, - "step": 74170 - }, - { - "epoch": 0.2996965864970891, - "grad_norm": 664.8455200195312, - "learning_rate": 4.4333839499996954e-05, - "loss": 77.3693, - "step": 74180 - }, - { - "epoch": 0.2997369877624567, - "grad_norm": 373.7446594238281, - "learning_rate": 4.4331626321483575e-05, - "loss": 37.4625, - "step": 74190 - }, - { - "epoch": 0.29977738902782436, - "grad_norm": 587.56396484375, - "learning_rate": 4.432941276609018e-05, - "loss": 53.5302, - "step": 74200 - }, - { - "epoch": 0.299817790293192, - "grad_norm": 399.6770935058594, - "learning_rate": 4.432719883385994e-05, - "loss": 43.0968, - "step": 74210 - }, - { - "epoch": 0.29985819155855964, - "grad_norm": 455.2350158691406, - "learning_rate": 4.4324984524836e-05, - "loss": 65.6205, - "step": 74220 - }, - { - "epoch": 0.2998985928239272, - "grad_norm": 1229.9195556640625, - "learning_rate": 4.432276983906155e-05, - "loss": 59.1207, - "step": 74230 - }, - { - "epoch": 0.29993899408929486, - "grad_norm": 1179.552734375, - "learning_rate": 4.4320554776579747e-05, - "loss": 68.4554, - "step": 74240 - }, - { - "epoch": 0.2999793953546625, - "grad_norm": 563.2706298828125, - "learning_rate": 4.431833933743378e-05, - "loss": 51.8571, - "step": 74250 - }, - { - "epoch": 0.30001979662003014, - "grad_norm": 779.345947265625, - "learning_rate": 4.431612352166684e-05, - "loss": 81.9162, - "step": 74260 - }, - { - "epoch": 0.3000601978853978, - "grad_norm": 363.64324951171875, - "learning_rate": 4.431390732932213e-05, - "loss": 55.0468, - "step": 74270 - }, - { - "epoch": 0.3001005991507654, - "grad_norm": 1637.340087890625, - "learning_rate": 4.431169076044286e-05, - "loss": 81.3846, - "step": 74280 - }, - { - "epoch": 0.300141000416133, - "grad_norm": 563.9585571289062, - "learning_rate": 4.4309473815072225e-05, - "loss": 60.6945, - "step": 74290 - }, - { - "epoch": 0.30018140168150065, - "grad_norm": 749.6682739257812, - "learning_rate": 4.4307256493253457e-05, - "loss": 68.1788, - "step": 74300 - }, - { - "epoch": 0.3002218029468683, - "grad_norm": 5840.30810546875, - "learning_rate": 4.4305038795029794e-05, - "loss": 86.4264, - "step": 74310 - }, - { - "epoch": 0.30026220421223593, - "grad_norm": 910.1226806640625, - "learning_rate": 4.4302820720444456e-05, - "loss": 77.6066, - "step": 74320 - }, - { - "epoch": 0.30030260547760357, - "grad_norm": 709.5213012695312, - "learning_rate": 4.430060226954069e-05, - "loss": 120.0873, - "step": 74330 - }, - { - "epoch": 0.3003430067429712, - "grad_norm": 907.42431640625, - "learning_rate": 4.429838344236174e-05, - "loss": 82.4217, - "step": 74340 - }, - { - "epoch": 0.30038340800833885, - "grad_norm": 595.138671875, - "learning_rate": 4.4296164238950874e-05, - "loss": 77.8745, - "step": 74350 - }, - { - "epoch": 0.30042380927370643, - "grad_norm": 1740.2891845703125, - "learning_rate": 4.429394465935136e-05, - "loss": 70.9948, - "step": 74360 - }, - { - "epoch": 0.3004642105390741, - "grad_norm": 624.7880249023438, - "learning_rate": 4.429172470360645e-05, - "loss": 84.4048, - "step": 74370 - }, - { - "epoch": 0.3005046118044417, - "grad_norm": 632.6150512695312, - "learning_rate": 4.428950437175944e-05, - "loss": 65.9942, - "step": 74380 - }, - { - "epoch": 0.30054501306980935, - "grad_norm": 217.38211059570312, - "learning_rate": 4.428728366385361e-05, - "loss": 73.9153, - "step": 74390 - }, - { - "epoch": 0.300585414335177, - "grad_norm": 1739.6029052734375, - "learning_rate": 4.428506257993226e-05, - "loss": 78.2645, - "step": 74400 - }, - { - "epoch": 0.30062581560054463, - "grad_norm": 1436.2427978515625, - "learning_rate": 4.428284112003868e-05, - "loss": 71.5589, - "step": 74410 - }, - { - "epoch": 0.3006662168659122, - "grad_norm": 316.6112060546875, - "learning_rate": 4.428061928421618e-05, - "loss": 77.6914, - "step": 74420 - }, - { - "epoch": 0.30070661813127986, - "grad_norm": 711.2620849609375, - "learning_rate": 4.427839707250809e-05, - "loss": 77.8412, - "step": 74430 - }, - { - "epoch": 0.3007470193966475, - "grad_norm": 1620.897705078125, - "learning_rate": 4.427617448495772e-05, - "loss": 78.4815, - "step": 74440 - }, - { - "epoch": 0.30078742066201514, - "grad_norm": 519.2609252929688, - "learning_rate": 4.427395152160841e-05, - "loss": 41.6989, - "step": 74450 - }, - { - "epoch": 0.3008278219273828, - "grad_norm": 666.103515625, - "learning_rate": 4.427172818250349e-05, - "loss": 61.7915, - "step": 74460 - }, - { - "epoch": 0.3008682231927504, - "grad_norm": 611.0293579101562, - "learning_rate": 4.42695044676863e-05, - "loss": 63.6666, - "step": 74470 - }, - { - "epoch": 0.300908624458118, - "grad_norm": 789.2578125, - "learning_rate": 4.4267280377200205e-05, - "loss": 82.4072, - "step": 74480 - }, - { - "epoch": 0.30094902572348564, - "grad_norm": 1049.16650390625, - "learning_rate": 4.426505591108856e-05, - "loss": 67.3771, - "step": 74490 - }, - { - "epoch": 0.3009894269888533, - "grad_norm": 565.5245361328125, - "learning_rate": 4.426283106939474e-05, - "loss": 52.4517, - "step": 74500 - }, - { - "epoch": 0.3010298282542209, - "grad_norm": 1094.810546875, - "learning_rate": 4.42606058521621e-05, - "loss": 89.7887, - "step": 74510 - }, - { - "epoch": 0.30107022951958856, - "grad_norm": 938.249267578125, - "learning_rate": 4.425838025943403e-05, - "loss": 67.3641, - "step": 74520 - }, - { - "epoch": 0.3011106307849562, - "grad_norm": 857.2614135742188, - "learning_rate": 4.4256154291253925e-05, - "loss": 102.7043, - "step": 74530 - }, - { - "epoch": 0.30115103205032384, - "grad_norm": 520.708251953125, - "learning_rate": 4.4253927947665185e-05, - "loss": 66.7792, - "step": 74540 - }, - { - "epoch": 0.3011914333156914, - "grad_norm": 764.5960693359375, - "learning_rate": 4.42517012287112e-05, - "loss": 69.1683, - "step": 74550 - }, - { - "epoch": 0.30123183458105907, - "grad_norm": 1161.567138671875, - "learning_rate": 4.424947413443539e-05, - "loss": 75.7145, - "step": 74560 - }, - { - "epoch": 0.3012722358464267, - "grad_norm": 810.30126953125, - "learning_rate": 4.424724666488117e-05, - "loss": 60.572, - "step": 74570 - }, - { - "epoch": 0.30131263711179435, - "grad_norm": 1023.4842529296875, - "learning_rate": 4.424501882009198e-05, - "loss": 60.1247, - "step": 74580 - }, - { - "epoch": 0.301353038377162, - "grad_norm": 572.3932495117188, - "learning_rate": 4.424279060011123e-05, - "loss": 58.7828, - "step": 74590 - }, - { - "epoch": 0.3013934396425296, - "grad_norm": 1139.3538818359375, - "learning_rate": 4.4240562004982364e-05, - "loss": 89.5676, - "step": 74600 - }, - { - "epoch": 0.3014338409078972, - "grad_norm": 685.8850708007812, - "learning_rate": 4.423833303474884e-05, - "loss": 74.0729, - "step": 74610 - }, - { - "epoch": 0.30147424217326485, - "grad_norm": 358.7518005371094, - "learning_rate": 4.423610368945411e-05, - "loss": 64.4605, - "step": 74620 - }, - { - "epoch": 0.3015146434386325, - "grad_norm": 2760.231201171875, - "learning_rate": 4.423387396914164e-05, - "loss": 63.1559, - "step": 74630 - }, - { - "epoch": 0.30155504470400013, - "grad_norm": 1156.1490478515625, - "learning_rate": 4.423164387385489e-05, - "loss": 68.2249, - "step": 74640 - }, - { - "epoch": 0.30159544596936777, - "grad_norm": 397.64569091796875, - "learning_rate": 4.4229413403637345e-05, - "loss": 70.5398, - "step": 74650 - }, - { - "epoch": 0.3016358472347354, - "grad_norm": 488.1241760253906, - "learning_rate": 4.422718255853248e-05, - "loss": 63.8304, - "step": 74660 - }, - { - "epoch": 0.30167624850010305, - "grad_norm": 623.3941650390625, - "learning_rate": 4.42249513385838e-05, - "loss": 93.5955, - "step": 74670 - }, - { - "epoch": 0.30171664976547063, - "grad_norm": 584.9109497070312, - "learning_rate": 4.422271974383479e-05, - "loss": 63.7987, - "step": 74680 - }, - { - "epoch": 0.3017570510308383, - "grad_norm": 1291.9105224609375, - "learning_rate": 4.4220487774328964e-05, - "loss": 80.7659, - "step": 74690 - }, - { - "epoch": 0.3017974522962059, - "grad_norm": 921.5263671875, - "learning_rate": 4.421825543010983e-05, - "loss": 57.1126, - "step": 74700 - }, - { - "epoch": 0.30183785356157355, - "grad_norm": 840.3859252929688, - "learning_rate": 4.4216022711220916e-05, - "loss": 47.0774, - "step": 74710 - }, - { - "epoch": 0.3018782548269412, - "grad_norm": 1399.228759765625, - "learning_rate": 4.4213789617705746e-05, - "loss": 60.6638, - "step": 74720 - }, - { - "epoch": 0.30191865609230883, - "grad_norm": 574.983642578125, - "learning_rate": 4.421155614960785e-05, - "loss": 64.1311, - "step": 74730 - }, - { - "epoch": 0.3019590573576764, - "grad_norm": 1133.6746826171875, - "learning_rate": 4.420932230697079e-05, - "loss": 66.0676, - "step": 74740 - }, - { - "epoch": 0.30199945862304406, - "grad_norm": 2249.599853515625, - "learning_rate": 4.420708808983809e-05, - "loss": 72.025, - "step": 74750 - }, - { - "epoch": 0.3020398598884117, - "grad_norm": 837.6932983398438, - "learning_rate": 4.420485349825332e-05, - "loss": 68.1134, - "step": 74760 - }, - { - "epoch": 0.30208026115377934, - "grad_norm": 984.0842895507812, - "learning_rate": 4.4202618532260046e-05, - "loss": 91.3187, - "step": 74770 - }, - { - "epoch": 0.302120662419147, - "grad_norm": 727.6422729492188, - "learning_rate": 4.420038319190184e-05, - "loss": 63.3439, - "step": 74780 - }, - { - "epoch": 0.3021610636845146, - "grad_norm": 412.24908447265625, - "learning_rate": 4.4198147477222274e-05, - "loss": 60.6722, - "step": 74790 - }, - { - "epoch": 0.3022014649498822, - "grad_norm": 1043.6190185546875, - "learning_rate": 4.4195911388264946e-05, - "loss": 50.6668, - "step": 74800 - }, - { - "epoch": 0.30224186621524984, - "grad_norm": 432.61956787109375, - "learning_rate": 4.419367492507343e-05, - "loss": 53.3677, - "step": 74810 - }, - { - "epoch": 0.3022822674806175, - "grad_norm": 554.3751831054688, - "learning_rate": 4.419143808769135e-05, - "loss": 45.9557, - "step": 74820 - }, - { - "epoch": 0.3023226687459851, - "grad_norm": 880.563720703125, - "learning_rate": 4.4189200876162295e-05, - "loss": 79.8755, - "step": 74830 - }, - { - "epoch": 0.30236307001135276, - "grad_norm": 766.0863647460938, - "learning_rate": 4.41869632905299e-05, - "loss": 81.7201, - "step": 74840 - }, - { - "epoch": 0.3024034712767204, - "grad_norm": 512.21240234375, - "learning_rate": 4.418472533083777e-05, - "loss": 64.7566, - "step": 74850 - }, - { - "epoch": 0.30244387254208804, - "grad_norm": 1144.3148193359375, - "learning_rate": 4.418248699712955e-05, - "loss": 119.2652, - "step": 74860 - }, - { - "epoch": 0.30248427380745563, - "grad_norm": 535.4666137695312, - "learning_rate": 4.418024828944886e-05, - "loss": 60.0249, - "step": 74870 - }, - { - "epoch": 0.30252467507282327, - "grad_norm": 3031.205322265625, - "learning_rate": 4.417800920783937e-05, - "loss": 56.3927, - "step": 74880 - }, - { - "epoch": 0.3025650763381909, - "grad_norm": 1405.0643310546875, - "learning_rate": 4.4175769752344706e-05, - "loss": 78.4172, - "step": 74890 - }, - { - "epoch": 0.30260547760355855, - "grad_norm": 779.4258422851562, - "learning_rate": 4.417352992300854e-05, - "loss": 59.0052, - "step": 74900 - }, - { - "epoch": 0.3026458788689262, - "grad_norm": 1063.1361083984375, - "learning_rate": 4.4171289719874543e-05, - "loss": 66.4332, - "step": 74910 - }, - { - "epoch": 0.30268628013429383, - "grad_norm": 479.256103515625, - "learning_rate": 4.4169049142986376e-05, - "loss": 51.7122, - "step": 74920 - }, - { - "epoch": 0.3027266813996614, - "grad_norm": 482.245849609375, - "learning_rate": 4.416680819238773e-05, - "loss": 71.5557, - "step": 74930 - }, - { - "epoch": 0.30276708266502905, - "grad_norm": 1661.60791015625, - "learning_rate": 4.4164566868122286e-05, - "loss": 110.3273, - "step": 74940 - }, - { - "epoch": 0.3028074839303967, - "grad_norm": 763.4310913085938, - "learning_rate": 4.4162325170233745e-05, - "loss": 63.9054, - "step": 74950 - }, - { - "epoch": 0.30284788519576433, - "grad_norm": 581.7908325195312, - "learning_rate": 4.4160083098765815e-05, - "loss": 73.6799, - "step": 74960 - }, - { - "epoch": 0.302888286461132, - "grad_norm": 522.9912109375, - "learning_rate": 4.4157840653762196e-05, - "loss": 63.8081, - "step": 74970 - }, - { - "epoch": 0.3029286877264996, - "grad_norm": 671.7311401367188, - "learning_rate": 4.4155597835266616e-05, - "loss": 43.7649, - "step": 74980 - }, - { - "epoch": 0.30296908899186725, - "grad_norm": 655.3804931640625, - "learning_rate": 4.415335464332279e-05, - "loss": 68.5104, - "step": 74990 - }, - { - "epoch": 0.30300949025723484, - "grad_norm": 705.6524047851562, - "learning_rate": 4.415111107797445e-05, - "loss": 55.7934, - "step": 75000 - }, - { - "epoch": 0.3030498915226025, - "grad_norm": 2905.262939453125, - "learning_rate": 4.4148867139265345e-05, - "loss": 111.0617, - "step": 75010 - }, - { - "epoch": 0.3030902927879701, - "grad_norm": 972.8043823242188, - "learning_rate": 4.414662282723922e-05, - "loss": 63.5983, - "step": 75020 - }, - { - "epoch": 0.30313069405333776, - "grad_norm": 1290.5452880859375, - "learning_rate": 4.414437814193982e-05, - "loss": 58.9312, - "step": 75030 - }, - { - "epoch": 0.3031710953187054, - "grad_norm": 1295.46044921875, - "learning_rate": 4.414213308341092e-05, - "loss": 60.9342, - "step": 75040 - }, - { - "epoch": 0.30321149658407304, - "grad_norm": 778.5823364257812, - "learning_rate": 4.4139887651696265e-05, - "loss": 65.9136, - "step": 75050 - }, - { - "epoch": 0.3032518978494406, - "grad_norm": 515.2728271484375, - "learning_rate": 4.413764184683966e-05, - "loss": 73.2117, - "step": 75060 - }, - { - "epoch": 0.30329229911480826, - "grad_norm": 1737.145751953125, - "learning_rate": 4.413539566888487e-05, - "loss": 122.1578, - "step": 75070 - }, - { - "epoch": 0.3033327003801759, - "grad_norm": 606.1090698242188, - "learning_rate": 4.413314911787569e-05, - "loss": 62.4606, - "step": 75080 - }, - { - "epoch": 0.30337310164554354, - "grad_norm": 760.4569702148438, - "learning_rate": 4.413090219385592e-05, - "loss": 42.6394, - "step": 75090 - }, - { - "epoch": 0.3034135029109112, - "grad_norm": 809.2393188476562, - "learning_rate": 4.412865489686936e-05, - "loss": 61.619, - "step": 75100 - }, - { - "epoch": 0.3034539041762788, - "grad_norm": 555.9096069335938, - "learning_rate": 4.412640722695982e-05, - "loss": 79.0981, - "step": 75110 - }, - { - "epoch": 0.3034943054416464, - "grad_norm": 745.3788452148438, - "learning_rate": 4.4124159184171134e-05, - "loss": 80.1419, - "step": 75120 - }, - { - "epoch": 0.30353470670701405, - "grad_norm": 915.6812744140625, - "learning_rate": 4.412191076854711e-05, - "loss": 80.7349, - "step": 75130 - }, - { - "epoch": 0.3035751079723817, - "grad_norm": 857.411865234375, - "learning_rate": 4.41196619801316e-05, - "loss": 43.2552, - "step": 75140 - }, - { - "epoch": 0.3036155092377493, - "grad_norm": 1478.0841064453125, - "learning_rate": 4.4117412818968426e-05, - "loss": 71.5397, - "step": 75150 - }, - { - "epoch": 0.30365591050311697, - "grad_norm": 818.4799194335938, - "learning_rate": 4.411516328510145e-05, - "loss": 42.5693, - "step": 75160 - }, - { - "epoch": 0.3036963117684846, - "grad_norm": 1053.192626953125, - "learning_rate": 4.411291337857453e-05, - "loss": 67.0812, - "step": 75170 - }, - { - "epoch": 0.30373671303385225, - "grad_norm": 399.8765563964844, - "learning_rate": 4.4110663099431514e-05, - "loss": 65.8789, - "step": 75180 - }, - { - "epoch": 0.30377711429921983, - "grad_norm": 869.1029052734375, - "learning_rate": 4.41084124477163e-05, - "loss": 86.4271, - "step": 75190 - }, - { - "epoch": 0.30381751556458747, - "grad_norm": 561.1309814453125, - "learning_rate": 4.410616142347273e-05, - "loss": 65.8079, - "step": 75200 - }, - { - "epoch": 0.3038579168299551, - "grad_norm": 1289.060302734375, - "learning_rate": 4.410391002674471e-05, - "loss": 74.7418, - "step": 75210 - }, - { - "epoch": 0.30389831809532275, - "grad_norm": 566.0087280273438, - "learning_rate": 4.410165825757613e-05, - "loss": 74.0141, - "step": 75220 - }, - { - "epoch": 0.3039387193606904, - "grad_norm": 0.0, - "learning_rate": 4.409940611601089e-05, - "loss": 52.371, - "step": 75230 - }, - { - "epoch": 0.30397912062605803, - "grad_norm": 628.7798461914062, - "learning_rate": 4.409715360209289e-05, - "loss": 70.8489, - "step": 75240 - }, - { - "epoch": 0.3040195218914256, - "grad_norm": 644.141845703125, - "learning_rate": 4.4094900715866064e-05, - "loss": 41.9273, - "step": 75250 - }, - { - "epoch": 0.30405992315679325, - "grad_norm": 727.8847045898438, - "learning_rate": 4.40926474573743e-05, - "loss": 67.9712, - "step": 75260 - }, - { - "epoch": 0.3041003244221609, - "grad_norm": 0.0, - "learning_rate": 4.409039382666155e-05, - "loss": 52.6979, - "step": 75270 - }, - { - "epoch": 0.30414072568752853, - "grad_norm": 521.0494384765625, - "learning_rate": 4.4088139823771744e-05, - "loss": 48.4199, - "step": 75280 - }, - { - "epoch": 0.3041811269528962, - "grad_norm": 494.1226501464844, - "learning_rate": 4.408588544874882e-05, - "loss": 65.0988, - "step": 75290 - }, - { - "epoch": 0.3042215282182638, - "grad_norm": 513.5343627929688, - "learning_rate": 4.408363070163675e-05, - "loss": 42.8742, - "step": 75300 - }, - { - "epoch": 0.30426192948363145, - "grad_norm": 654.8699951171875, - "learning_rate": 4.408137558247946e-05, - "loss": 64.6675, - "step": 75310 - }, - { - "epoch": 0.30430233074899904, - "grad_norm": 1542.4345703125, - "learning_rate": 4.407912009132093e-05, - "loss": 58.0917, - "step": 75320 - }, - { - "epoch": 0.3043427320143667, - "grad_norm": 1139.7677001953125, - "learning_rate": 4.4076864228205136e-05, - "loss": 62.5426, - "step": 75330 - }, - { - "epoch": 0.3043831332797343, - "grad_norm": 480.1134033203125, - "learning_rate": 4.407460799317604e-05, - "loss": 64.8383, - "step": 75340 - }, - { - "epoch": 0.30442353454510196, - "grad_norm": 1814.2696533203125, - "learning_rate": 4.4072351386277654e-05, - "loss": 78.7744, - "step": 75350 - }, - { - "epoch": 0.3044639358104696, - "grad_norm": 1329.923095703125, - "learning_rate": 4.407009440755396e-05, - "loss": 72.4044, - "step": 75360 - }, - { - "epoch": 0.30450433707583724, - "grad_norm": 3605.839599609375, - "learning_rate": 4.4067837057048956e-05, - "loss": 79.2061, - "step": 75370 - }, - { - "epoch": 0.3045447383412048, - "grad_norm": 1073.45166015625, - "learning_rate": 4.406557933480664e-05, - "loss": 81.1469, - "step": 75380 - }, - { - "epoch": 0.30458513960657246, - "grad_norm": 751.8543090820312, - "learning_rate": 4.406332124087105e-05, - "loss": 77.547, - "step": 75390 - }, - { - "epoch": 0.3046255408719401, - "grad_norm": 531.0945434570312, - "learning_rate": 4.40610627752862e-05, - "loss": 66.1161, - "step": 75400 - }, - { - "epoch": 0.30466594213730774, - "grad_norm": 738.7662963867188, - "learning_rate": 4.405880393809612e-05, - "loss": 64.749, - "step": 75410 - }, - { - "epoch": 0.3047063434026754, - "grad_norm": 877.9468383789062, - "learning_rate": 4.405654472934483e-05, - "loss": 51.235, - "step": 75420 - }, - { - "epoch": 0.304746744668043, - "grad_norm": 653.334716796875, - "learning_rate": 4.4054285149076404e-05, - "loss": 55.1535, - "step": 75430 - }, - { - "epoch": 0.3047871459334106, - "grad_norm": 1065.5577392578125, - "learning_rate": 4.4052025197334864e-05, - "loss": 75.8509, - "step": 75440 - }, - { - "epoch": 0.30482754719877825, - "grad_norm": 1329.488525390625, - "learning_rate": 4.40497648741643e-05, - "loss": 78.9796, - "step": 75450 - }, - { - "epoch": 0.3048679484641459, - "grad_norm": 1260.999755859375, - "learning_rate": 4.4047504179608755e-05, - "loss": 91.8325, - "step": 75460 - }, - { - "epoch": 0.30490834972951353, - "grad_norm": 656.1160888671875, - "learning_rate": 4.404524311371231e-05, - "loss": 35.623, - "step": 75470 - }, - { - "epoch": 0.30494875099488117, - "grad_norm": 882.13623046875, - "learning_rate": 4.404298167651905e-05, - "loss": 58.8389, - "step": 75480 - }, - { - "epoch": 0.3049891522602488, - "grad_norm": 1063.432861328125, - "learning_rate": 4.4040719868073055e-05, - "loss": 55.5148, - "step": 75490 - }, - { - "epoch": 0.30502955352561645, - "grad_norm": 1446.3270263671875, - "learning_rate": 4.403845768841842e-05, - "loss": 93.3359, - "step": 75500 - }, - { - "epoch": 0.30506995479098403, - "grad_norm": 652.8603515625, - "learning_rate": 4.403619513759926e-05, - "loss": 88.8308, - "step": 75510 - }, - { - "epoch": 0.3051103560563517, - "grad_norm": 848.1201782226562, - "learning_rate": 4.403393221565966e-05, - "loss": 51.726, - "step": 75520 - }, - { - "epoch": 0.3051507573217193, - "grad_norm": 823.25390625, - "learning_rate": 4.403166892264376e-05, - "loss": 60.7063, - "step": 75530 - }, - { - "epoch": 0.30519115858708695, - "grad_norm": 837.02978515625, - "learning_rate": 4.402940525859568e-05, - "loss": 62.3497, - "step": 75540 - }, - { - "epoch": 0.3052315598524546, - "grad_norm": 1130.3653564453125, - "learning_rate": 4.402714122355955e-05, - "loss": 86.2569, - "step": 75550 - }, - { - "epoch": 0.30527196111782223, - "grad_norm": 313.84063720703125, - "learning_rate": 4.40248768175795e-05, - "loss": 85.372, - "step": 75560 - }, - { - "epoch": 0.3053123623831898, - "grad_norm": 1272.7034912109375, - "learning_rate": 4.4022612040699676e-05, - "loss": 59.0401, - "step": 75570 - }, - { - "epoch": 0.30535276364855746, - "grad_norm": 259.1897888183594, - "learning_rate": 4.4020346892964246e-05, - "loss": 42.5053, - "step": 75580 - }, - { - "epoch": 0.3053931649139251, - "grad_norm": 541.4120483398438, - "learning_rate": 4.401808137441736e-05, - "loss": 59.4345, - "step": 75590 - }, - { - "epoch": 0.30543356617929274, - "grad_norm": 923.3859252929688, - "learning_rate": 4.401581548510318e-05, - "loss": 47.9736, - "step": 75600 - }, - { - "epoch": 0.3054739674446604, - "grad_norm": 1377.25, - "learning_rate": 4.40135492250659e-05, - "loss": 78.1078, - "step": 75610 - }, - { - "epoch": 0.305514368710028, - "grad_norm": 1074.262939453125, - "learning_rate": 4.401128259434968e-05, - "loss": 57.5512, - "step": 75620 - }, - { - "epoch": 0.30555476997539566, - "grad_norm": 616.6300048828125, - "learning_rate": 4.400901559299871e-05, - "loss": 38.326, - "step": 75630 - }, - { - "epoch": 0.30559517124076324, - "grad_norm": 1104.368408203125, - "learning_rate": 4.4006748221057206e-05, - "loss": 94.1468, - "step": 75640 - }, - { - "epoch": 0.3056355725061309, - "grad_norm": 581.0299072265625, - "learning_rate": 4.4004480478569353e-05, - "loss": 87.8687, - "step": 75650 - }, - { - "epoch": 0.3056759737714985, - "grad_norm": 1138.6673583984375, - "learning_rate": 4.400221236557938e-05, - "loss": 66.9354, - "step": 75660 - }, - { - "epoch": 0.30571637503686616, - "grad_norm": 568.2470703125, - "learning_rate": 4.399994388213149e-05, - "loss": 88.6248, - "step": 75670 - }, - { - "epoch": 0.3057567763022338, - "grad_norm": 598.5287475585938, - "learning_rate": 4.3997675028269906e-05, - "loss": 55.7092, - "step": 75680 - }, - { - "epoch": 0.30579717756760144, - "grad_norm": 971.5711669921875, - "learning_rate": 4.399540580403887e-05, - "loss": 67.1653, - "step": 75690 - }, - { - "epoch": 0.305837578832969, - "grad_norm": 1012.9927368164062, - "learning_rate": 4.399313620948262e-05, - "loss": 51.6907, - "step": 75700 - }, - { - "epoch": 0.30587798009833667, - "grad_norm": 1266.41748046875, - "learning_rate": 4.39908662446454e-05, - "loss": 59.8522, - "step": 75710 - }, - { - "epoch": 0.3059183813637043, - "grad_norm": 580.0430297851562, - "learning_rate": 4.3988595909571464e-05, - "loss": 51.2898, - "step": 75720 - }, - { - "epoch": 0.30595878262907195, - "grad_norm": 503.0343322753906, - "learning_rate": 4.3986325204305076e-05, - "loss": 69.8676, - "step": 75730 - }, - { - "epoch": 0.3059991838944396, - "grad_norm": 246.7170867919922, - "learning_rate": 4.398405412889051e-05, - "loss": 60.3378, - "step": 75740 - }, - { - "epoch": 0.3060395851598072, - "grad_norm": 440.064208984375, - "learning_rate": 4.3981782683372016e-05, - "loss": 52.1341, - "step": 75750 - }, - { - "epoch": 0.3060799864251748, - "grad_norm": 684.9658203125, - "learning_rate": 4.3979510867793917e-05, - "loss": 83.72, - "step": 75760 - }, - { - "epoch": 0.30612038769054245, - "grad_norm": 0.0, - "learning_rate": 4.397723868220047e-05, - "loss": 64.8489, - "step": 75770 - }, - { - "epoch": 0.3061607889559101, - "grad_norm": 971.6740112304688, - "learning_rate": 4.397496612663599e-05, - "loss": 59.74, - "step": 75780 - }, - { - "epoch": 0.30620119022127773, - "grad_norm": 1435.0618896484375, - "learning_rate": 4.397269320114478e-05, - "loss": 92.3261, - "step": 75790 - }, - { - "epoch": 0.30624159148664537, - "grad_norm": 1401.4566650390625, - "learning_rate": 4.3970419905771145e-05, - "loss": 86.528, - "step": 75800 - }, - { - "epoch": 0.306281992752013, - "grad_norm": 423.3729553222656, - "learning_rate": 4.39681462405594e-05, - "loss": 106.0067, - "step": 75810 - }, - { - "epoch": 0.30632239401738065, - "grad_norm": 0.0, - "learning_rate": 4.3965872205553885e-05, - "loss": 70.0686, - "step": 75820 - }, - { - "epoch": 0.30636279528274823, - "grad_norm": 1514.8172607421875, - "learning_rate": 4.3963597800798927e-05, - "loss": 85.976, - "step": 75830 - }, - { - "epoch": 0.3064031965481159, - "grad_norm": 286.7713928222656, - "learning_rate": 4.396132302633886e-05, - "loss": 47.0414, - "step": 75840 - }, - { - "epoch": 0.3064435978134835, - "grad_norm": 1182.9954833984375, - "learning_rate": 4.395904788221805e-05, - "loss": 59.4661, - "step": 75850 - }, - { - "epoch": 0.30648399907885115, - "grad_norm": 1122.1630859375, - "learning_rate": 4.3956772368480836e-05, - "loss": 94.7752, - "step": 75860 - }, - { - "epoch": 0.3065244003442188, - "grad_norm": 555.4927368164062, - "learning_rate": 4.395449648517158e-05, - "loss": 52.1087, - "step": 75870 - }, - { - "epoch": 0.30656480160958643, - "grad_norm": 698.0592041015625, - "learning_rate": 4.395222023233466e-05, - "loss": 52.4741, - "step": 75880 - }, - { - "epoch": 0.306605202874954, - "grad_norm": 628.822509765625, - "learning_rate": 4.3949943610014455e-05, - "loss": 86.0263, - "step": 75890 - }, - { - "epoch": 0.30664560414032166, - "grad_norm": 1240.1202392578125, - "learning_rate": 4.394766661825533e-05, - "loss": 46.8542, - "step": 75900 - }, - { - "epoch": 0.3066860054056893, - "grad_norm": 816.1203002929688, - "learning_rate": 4.3945389257101704e-05, - "loss": 38.3735, - "step": 75910 - }, - { - "epoch": 0.30672640667105694, - "grad_norm": 889.33349609375, - "learning_rate": 4.394311152659796e-05, - "loss": 73.3283, - "step": 75920 - }, - { - "epoch": 0.3067668079364246, - "grad_norm": 1453.927978515625, - "learning_rate": 4.3940833426788496e-05, - "loss": 62.5195, - "step": 75930 - }, - { - "epoch": 0.3068072092017922, - "grad_norm": 987.9056396484375, - "learning_rate": 4.393855495771774e-05, - "loss": 77.4361, - "step": 75940 - }, - { - "epoch": 0.3068476104671598, - "grad_norm": 1086.7833251953125, - "learning_rate": 4.3936276119430096e-05, - "loss": 87.7577, - "step": 75950 - }, - { - "epoch": 0.30688801173252744, - "grad_norm": 491.1762390136719, - "learning_rate": 4.393399691197e-05, - "loss": 71.9018, - "step": 75960 - }, - { - "epoch": 0.3069284129978951, - "grad_norm": 834.448974609375, - "learning_rate": 4.3931717335381894e-05, - "loss": 56.691, - "step": 75970 - }, - { - "epoch": 0.3069688142632627, - "grad_norm": 1463.418212890625, - "learning_rate": 4.392943738971021e-05, - "loss": 66.625, - "step": 75980 - }, - { - "epoch": 0.30700921552863036, - "grad_norm": 709.5184936523438, - "learning_rate": 4.39271570749994e-05, - "loss": 72.5582, - "step": 75990 - }, - { - "epoch": 0.307049616793998, - "grad_norm": 802.2734985351562, - "learning_rate": 4.3924876391293915e-05, - "loss": 80.8964, - "step": 76000 - }, - { - "epoch": 0.30709001805936564, - "grad_norm": 1081.5029296875, - "learning_rate": 4.3922595338638214e-05, - "loss": 64.753, - "step": 76010 - }, - { - "epoch": 0.30713041932473323, - "grad_norm": 747.075439453125, - "learning_rate": 4.3920313917076794e-05, - "loss": 48.6337, - "step": 76020 - }, - { - "epoch": 0.30717082059010087, - "grad_norm": 371.2689208984375, - "learning_rate": 4.3918032126654095e-05, - "loss": 56.0596, - "step": 76030 - }, - { - "epoch": 0.3072112218554685, - "grad_norm": 852.907958984375, - "learning_rate": 4.391574996741463e-05, - "loss": 66.6618, - "step": 76040 - }, - { - "epoch": 0.30725162312083615, - "grad_norm": 1084.6485595703125, - "learning_rate": 4.391346743940288e-05, - "loss": 66.4222, - "step": 76050 - }, - { - "epoch": 0.3072920243862038, - "grad_norm": 1998.0355224609375, - "learning_rate": 4.3911184542663344e-05, - "loss": 77.1086, - "step": 76060 - }, - { - "epoch": 0.30733242565157143, - "grad_norm": 789.2811889648438, - "learning_rate": 4.390890127724053e-05, - "loss": 68.8511, - "step": 76070 - }, - { - "epoch": 0.307372826916939, - "grad_norm": 666.281494140625, - "learning_rate": 4.390661764317895e-05, - "loss": 68.8774, - "step": 76080 - }, - { - "epoch": 0.30741322818230665, - "grad_norm": 593.8773803710938, - "learning_rate": 4.390433364052312e-05, - "loss": 45.1297, - "step": 76090 - }, - { - "epoch": 0.3074536294476743, - "grad_norm": 1460.0093994140625, - "learning_rate": 4.390204926931758e-05, - "loss": 79.1225, - "step": 76100 - }, - { - "epoch": 0.30749403071304193, - "grad_norm": 653.6718139648438, - "learning_rate": 4.389976452960686e-05, - "loss": 72.7947, - "step": 76110 - }, - { - "epoch": 0.3075344319784096, - "grad_norm": 906.63134765625, - "learning_rate": 4.38974794214355e-05, - "loss": 72.8858, - "step": 76120 - }, - { - "epoch": 0.3075748332437772, - "grad_norm": 1931.9991455078125, - "learning_rate": 4.3895193944848034e-05, - "loss": 64.3772, - "step": 76130 - }, - { - "epoch": 0.30761523450914485, - "grad_norm": 798.5737915039062, - "learning_rate": 4.3892908099889054e-05, - "loss": 55.5147, - "step": 76140 - }, - { - "epoch": 0.30765563577451244, - "grad_norm": 696.617919921875, - "learning_rate": 4.389062188660309e-05, - "loss": 62.2774, - "step": 76150 - }, - { - "epoch": 0.3076960370398801, - "grad_norm": 840.3499145507812, - "learning_rate": 4.388833530503473e-05, - "loss": 98.8143, - "step": 76160 - }, - { - "epoch": 0.3077364383052477, - "grad_norm": 764.2418212890625, - "learning_rate": 4.388604835522855e-05, - "loss": 69.4767, - "step": 76170 - }, - { - "epoch": 0.30777683957061536, - "grad_norm": 663.6424560546875, - "learning_rate": 4.3883761037229146e-05, - "loss": 69.4592, - "step": 76180 - }, - { - "epoch": 0.307817240835983, - "grad_norm": 624.4500732421875, - "learning_rate": 4.388147335108108e-05, - "loss": 53.6257, - "step": 76190 - }, - { - "epoch": 0.30785764210135064, - "grad_norm": 1174.481201171875, - "learning_rate": 4.387918529682898e-05, - "loss": 70.0419, - "step": 76200 - }, - { - "epoch": 0.3078980433667182, - "grad_norm": 466.0067443847656, - "learning_rate": 4.3876896874517434e-05, - "loss": 112.7296, - "step": 76210 - }, - { - "epoch": 0.30793844463208586, - "grad_norm": 576.7889404296875, - "learning_rate": 4.387460808419108e-05, - "loss": 56.6612, - "step": 76220 - }, - { - "epoch": 0.3079788458974535, - "grad_norm": 809.4537353515625, - "learning_rate": 4.387231892589452e-05, - "loss": 84.8902, - "step": 76230 - }, - { - "epoch": 0.30801924716282114, - "grad_norm": 1000.173095703125, - "learning_rate": 4.387002939967237e-05, - "loss": 69.7403, - "step": 76240 - }, - { - "epoch": 0.3080596484281888, - "grad_norm": 756.7673950195312, - "learning_rate": 4.386773950556931e-05, - "loss": 51.9468, - "step": 76250 - }, - { - "epoch": 0.3081000496935564, - "grad_norm": 922.7078247070312, - "learning_rate": 4.386544924362993e-05, - "loss": 80.4618, - "step": 76260 - }, - { - "epoch": 0.308140450958924, - "grad_norm": 928.8509521484375, - "learning_rate": 4.3863158613898915e-05, - "loss": 90.6427, - "step": 76270 - }, - { - "epoch": 0.30818085222429165, - "grad_norm": 990.8674926757812, - "learning_rate": 4.386086761642091e-05, - "loss": 54.2147, - "step": 76280 - }, - { - "epoch": 0.3082212534896593, - "grad_norm": 973.0326538085938, - "learning_rate": 4.385857625124058e-05, - "loss": 86.8008, - "step": 76290 - }, - { - "epoch": 0.3082616547550269, - "grad_norm": 831.694580078125, - "learning_rate": 4.3856284518402594e-05, - "loss": 77.8919, - "step": 76300 - }, - { - "epoch": 0.30830205602039457, - "grad_norm": 1294.062255859375, - "learning_rate": 4.385399241795164e-05, - "loss": 89.092, - "step": 76310 - }, - { - "epoch": 0.3083424572857622, - "grad_norm": 899.0340576171875, - "learning_rate": 4.3851699949932396e-05, - "loss": 66.3644, - "step": 76320 - }, - { - "epoch": 0.30838285855112985, - "grad_norm": 2219.54296875, - "learning_rate": 4.384940711438955e-05, - "loss": 43.2072, - "step": 76330 - }, - { - "epoch": 0.30842325981649743, - "grad_norm": 1805.721923828125, - "learning_rate": 4.384711391136781e-05, - "loss": 68.598, - "step": 76340 - }, - { - "epoch": 0.30846366108186507, - "grad_norm": 701.5916137695312, - "learning_rate": 4.384482034091189e-05, - "loss": 49.2661, - "step": 76350 - }, - { - "epoch": 0.3085040623472327, - "grad_norm": 435.8236389160156, - "learning_rate": 4.3842526403066486e-05, - "loss": 52.0933, - "step": 76360 - }, - { - "epoch": 0.30854446361260035, - "grad_norm": 2196.045166015625, - "learning_rate": 4.384023209787633e-05, - "loss": 93.9429, - "step": 76370 - }, - { - "epoch": 0.308584864877968, - "grad_norm": 780.8671264648438, - "learning_rate": 4.383793742538616e-05, - "loss": 60.4108, - "step": 76380 - }, - { - "epoch": 0.30862526614333563, - "grad_norm": 1023.3828125, - "learning_rate": 4.383564238564068e-05, - "loss": 65.5329, - "step": 76390 - }, - { - "epoch": 0.3086656674087032, - "grad_norm": 677.0743408203125, - "learning_rate": 4.3833346978684675e-05, - "loss": 89.5438, - "step": 76400 - }, - { - "epoch": 0.30870606867407085, - "grad_norm": 650.9645385742188, - "learning_rate": 4.383105120456287e-05, - "loss": 50.4317, - "step": 76410 - }, - { - "epoch": 0.3087464699394385, - "grad_norm": 543.072021484375, - "learning_rate": 4.3828755063320016e-05, - "loss": 84.4733, - "step": 76420 - }, - { - "epoch": 0.30878687120480613, - "grad_norm": 773.248046875, - "learning_rate": 4.38264585550009e-05, - "loss": 64.6917, - "step": 76430 - }, - { - "epoch": 0.3088272724701738, - "grad_norm": 365.4747009277344, - "learning_rate": 4.382416167965028e-05, - "loss": 62.2631, - "step": 76440 - }, - { - "epoch": 0.3088676737355414, - "grad_norm": 674.434326171875, - "learning_rate": 4.382186443731293e-05, - "loss": 54.153, - "step": 76450 - }, - { - "epoch": 0.30890807500090905, - "grad_norm": 1216.2164306640625, - "learning_rate": 4.381956682803365e-05, - "loss": 77.9781, - "step": 76460 - }, - { - "epoch": 0.30894847626627664, - "grad_norm": 615.4111938476562, - "learning_rate": 4.381726885185722e-05, - "loss": 66.789, - "step": 76470 - }, - { - "epoch": 0.3089888775316443, - "grad_norm": 1128.7919921875, - "learning_rate": 4.381497050882845e-05, - "loss": 61.4229, - "step": 76480 - }, - { - "epoch": 0.3090292787970119, - "grad_norm": 645.344482421875, - "learning_rate": 4.381267179899214e-05, - "loss": 47.0934, - "step": 76490 - }, - { - "epoch": 0.30906968006237956, - "grad_norm": 397.85980224609375, - "learning_rate": 4.381037272239311e-05, - "loss": 63.7962, - "step": 76500 - }, - { - "epoch": 0.3091100813277472, - "grad_norm": 463.2906188964844, - "learning_rate": 4.380807327907618e-05, - "loss": 60.8565, - "step": 76510 - }, - { - "epoch": 0.30915048259311484, - "grad_norm": 470.76104736328125, - "learning_rate": 4.380577346908618e-05, - "loss": 71.5916, - "step": 76520 - }, - { - "epoch": 0.3091908838584824, - "grad_norm": 1406.014404296875, - "learning_rate": 4.380347329246794e-05, - "loss": 76.5488, - "step": 76530 - }, - { - "epoch": 0.30923128512385006, - "grad_norm": 898.1220703125, - "learning_rate": 4.380117274926631e-05, - "loss": 75.5578, - "step": 76540 - }, - { - "epoch": 0.3092716863892177, - "grad_norm": 1027.6002197265625, - "learning_rate": 4.379887183952614e-05, - "loss": 73.7126, - "step": 76550 - }, - { - "epoch": 0.30931208765458534, - "grad_norm": 2206.342529296875, - "learning_rate": 4.379657056329228e-05, - "loss": 121.1146, - "step": 76560 - }, - { - "epoch": 0.309352488919953, - "grad_norm": 986.2785034179688, - "learning_rate": 4.3794268920609605e-05, - "loss": 59.564, - "step": 76570 - }, - { - "epoch": 0.3093928901853206, - "grad_norm": 847.2274169921875, - "learning_rate": 4.379196691152298e-05, - "loss": 69.7664, - "step": 76580 - }, - { - "epoch": 0.3094332914506882, - "grad_norm": 938.61767578125, - "learning_rate": 4.3789664536077286e-05, - "loss": 54.9465, - "step": 76590 - }, - { - "epoch": 0.30947369271605585, - "grad_norm": 879.3148193359375, - "learning_rate": 4.3787361794317405e-05, - "loss": 82.9992, - "step": 76600 - }, - { - "epoch": 0.3095140939814235, - "grad_norm": 394.82000732421875, - "learning_rate": 4.378505868628823e-05, - "loss": 79.9965, - "step": 76610 - }, - { - "epoch": 0.30955449524679113, - "grad_norm": 458.2685241699219, - "learning_rate": 4.3782755212034675e-05, - "loss": 90.1063, - "step": 76620 - }, - { - "epoch": 0.30959489651215877, - "grad_norm": 1523.7799072265625, - "learning_rate": 4.3780451371601626e-05, - "loss": 63.1249, - "step": 76630 - }, - { - "epoch": 0.3096352977775264, - "grad_norm": 656.118896484375, - "learning_rate": 4.3778147165034025e-05, - "loss": 75.1157, - "step": 76640 - }, - { - "epoch": 0.30967569904289405, - "grad_norm": 1332.3353271484375, - "learning_rate": 4.377584259237676e-05, - "loss": 98.9753, - "step": 76650 - }, - { - "epoch": 0.30971610030826163, - "grad_norm": 1163.6890869140625, - "learning_rate": 4.377353765367479e-05, - "loss": 82.146, - "step": 76660 - }, - { - "epoch": 0.3097565015736293, - "grad_norm": 994.2089233398438, - "learning_rate": 4.377123234897303e-05, - "loss": 47.6127, - "step": 76670 - }, - { - "epoch": 0.3097969028389969, - "grad_norm": 427.42059326171875, - "learning_rate": 4.376892667831644e-05, - "loss": 61.8331, - "step": 76680 - }, - { - "epoch": 0.30983730410436455, - "grad_norm": 1147.4427490234375, - "learning_rate": 4.376662064174994e-05, - "loss": 65.2715, - "step": 76690 - }, - { - "epoch": 0.3098777053697322, - "grad_norm": 1077.2244873046875, - "learning_rate": 4.376431423931853e-05, - "loss": 111.3696, - "step": 76700 - }, - { - "epoch": 0.30991810663509983, - "grad_norm": 384.9374084472656, - "learning_rate": 4.3762007471067146e-05, - "loss": 72.2117, - "step": 76710 - }, - { - "epoch": 0.3099585079004674, - "grad_norm": 0.0, - "learning_rate": 4.375970033704077e-05, - "loss": 53.84, - "step": 76720 - }, - { - "epoch": 0.30999890916583506, - "grad_norm": 514.8012084960938, - "learning_rate": 4.375739283728437e-05, - "loss": 43.7522, - "step": 76730 - }, - { - "epoch": 0.3100393104312027, - "grad_norm": 1148.413818359375, - "learning_rate": 4.3755084971842954e-05, - "loss": 73.6758, - "step": 76740 - }, - { - "epoch": 0.31007971169657034, - "grad_norm": 1197.4542236328125, - "learning_rate": 4.375277674076149e-05, - "loss": 51.1232, - "step": 76750 - }, - { - "epoch": 0.310120112961938, - "grad_norm": 1706.7191162109375, - "learning_rate": 4.375046814408499e-05, - "loss": 102.9833, - "step": 76760 - }, - { - "epoch": 0.3101605142273056, - "grad_norm": 696.5614624023438, - "learning_rate": 4.374815918185846e-05, - "loss": 58.2234, - "step": 76770 - }, - { - "epoch": 0.31020091549267326, - "grad_norm": 1848.7935791015625, - "learning_rate": 4.374584985412692e-05, - "loss": 67.9964, - "step": 76780 - }, - { - "epoch": 0.31024131675804084, - "grad_norm": 469.37255859375, - "learning_rate": 4.374354016093538e-05, - "loss": 72.5402, - "step": 76790 - }, - { - "epoch": 0.3102817180234085, - "grad_norm": 1170.890380859375, - "learning_rate": 4.374123010232888e-05, - "loss": 67.4718, - "step": 76800 - }, - { - "epoch": 0.3103221192887761, - "grad_norm": 468.8921813964844, - "learning_rate": 4.373891967835245e-05, - "loss": 50.772, - "step": 76810 - }, - { - "epoch": 0.31036252055414376, - "grad_norm": 1059.63623046875, - "learning_rate": 4.373660888905113e-05, - "loss": 74.093, - "step": 76820 - }, - { - "epoch": 0.3104029218195114, - "grad_norm": 1103.974609375, - "learning_rate": 4.373429773446998e-05, - "loss": 65.5093, - "step": 76830 - }, - { - "epoch": 0.31044332308487904, - "grad_norm": 885.3768920898438, - "learning_rate": 4.373198621465404e-05, - "loss": 77.8017, - "step": 76840 - }, - { - "epoch": 0.3104837243502466, - "grad_norm": 968.3536376953125, - "learning_rate": 4.372967432964838e-05, - "loss": 53.5939, - "step": 76850 - }, - { - "epoch": 0.31052412561561427, - "grad_norm": 365.57904052734375, - "learning_rate": 4.372736207949809e-05, - "loss": 94.1086, - "step": 76860 - }, - { - "epoch": 0.3105645268809819, - "grad_norm": 974.5148315429688, - "learning_rate": 4.3725049464248235e-05, - "loss": 62.9441, - "step": 76870 - }, - { - "epoch": 0.31060492814634955, - "grad_norm": 191.68748474121094, - "learning_rate": 4.372273648394389e-05, - "loss": 61.9872, - "step": 76880 - }, - { - "epoch": 0.3106453294117172, - "grad_norm": 661.6585693359375, - "learning_rate": 4.372042313863017e-05, - "loss": 50.0335, - "step": 76890 - }, - { - "epoch": 0.3106857306770848, - "grad_norm": 774.6177978515625, - "learning_rate": 4.371810942835215e-05, - "loss": 48.9739, - "step": 76900 - }, - { - "epoch": 0.3107261319424524, - "grad_norm": 813.4871826171875, - "learning_rate": 4.371579535315496e-05, - "loss": 69.586, - "step": 76910 - }, - { - "epoch": 0.31076653320782005, - "grad_norm": 1050.9583740234375, - "learning_rate": 4.37134809130837e-05, - "loss": 67.1586, - "step": 76920 - }, - { - "epoch": 0.3108069344731877, - "grad_norm": 747.37939453125, - "learning_rate": 4.37111661081835e-05, - "loss": 56.411, - "step": 76930 - }, - { - "epoch": 0.31084733573855533, - "grad_norm": 499.1912536621094, - "learning_rate": 4.370885093849948e-05, - "loss": 36.8895, - "step": 76940 - }, - { - "epoch": 0.31088773700392297, - "grad_norm": 1127.0404052734375, - "learning_rate": 4.3706535404076784e-05, - "loss": 100.2089, - "step": 76950 - }, - { - "epoch": 0.3109281382692906, - "grad_norm": 696.9407958984375, - "learning_rate": 4.370421950496054e-05, - "loss": 55.8497, - "step": 76960 - }, - { - "epoch": 0.31096853953465825, - "grad_norm": 867.03173828125, - "learning_rate": 4.3701903241195916e-05, - "loss": 67.1302, - "step": 76970 - }, - { - "epoch": 0.31100894080002583, - "grad_norm": 972.6212768554688, - "learning_rate": 4.369958661282805e-05, - "loss": 54.7901, - "step": 76980 - }, - { - "epoch": 0.3110493420653935, - "grad_norm": 979.4427490234375, - "learning_rate": 4.369726961990213e-05, - "loss": 63.9195, - "step": 76990 - }, - { - "epoch": 0.3110897433307611, - "grad_norm": 1254.9720458984375, - "learning_rate": 4.36949522624633e-05, - "loss": 110.9768, - "step": 77000 - }, - { - "epoch": 0.31113014459612875, - "grad_norm": 1459.2310791015625, - "learning_rate": 4.369263454055675e-05, - "loss": 81.1262, - "step": 77010 - }, - { - "epoch": 0.3111705458614964, - "grad_norm": 968.3046875, - "learning_rate": 4.3690316454227674e-05, - "loss": 64.1656, - "step": 77020 - }, - { - "epoch": 0.31121094712686403, - "grad_norm": 818.0725708007812, - "learning_rate": 4.368799800352126e-05, - "loss": 93.2603, - "step": 77030 - }, - { - "epoch": 0.3112513483922316, - "grad_norm": 521.28125, - "learning_rate": 4.368567918848269e-05, - "loss": 45.2758, - "step": 77040 - }, - { - "epoch": 0.31129174965759926, - "grad_norm": 273.4820556640625, - "learning_rate": 4.368336000915719e-05, - "loss": 69.712, - "step": 77050 - }, - { - "epoch": 0.3113321509229669, - "grad_norm": 905.5136108398438, - "learning_rate": 4.3681040465589976e-05, - "loss": 88.944, - "step": 77060 - }, - { - "epoch": 0.31137255218833454, - "grad_norm": 809.4381713867188, - "learning_rate": 4.3678720557826247e-05, - "loss": 72.0091, - "step": 77070 - }, - { - "epoch": 0.3114129534537022, - "grad_norm": 439.21533203125, - "learning_rate": 4.3676400285911256e-05, - "loss": 68.7604, - "step": 77080 - }, - { - "epoch": 0.3114533547190698, - "grad_norm": 4371.494140625, - "learning_rate": 4.367407964989022e-05, - "loss": 73.8835, - "step": 77090 - }, - { - "epoch": 0.31149375598443746, - "grad_norm": 1301.020751953125, - "learning_rate": 4.367175864980839e-05, - "loss": 71.925, - "step": 77100 - }, - { - "epoch": 0.31153415724980504, - "grad_norm": 608.1272583007812, - "learning_rate": 4.366943728571101e-05, - "loss": 61.2593, - "step": 77110 - }, - { - "epoch": 0.3115745585151727, - "grad_norm": 529.8942260742188, - "learning_rate": 4.3667115557643336e-05, - "loss": 68.4521, - "step": 77120 - }, - { - "epoch": 0.3116149597805403, - "grad_norm": 499.2430725097656, - "learning_rate": 4.366479346565064e-05, - "loss": 47.7916, - "step": 77130 - }, - { - "epoch": 0.31165536104590796, - "grad_norm": 1138.603759765625, - "learning_rate": 4.366247100977818e-05, - "loss": 58.4289, - "step": 77140 - }, - { - "epoch": 0.3116957623112756, - "grad_norm": 558.7338256835938, - "learning_rate": 4.366014819007124e-05, - "loss": 40.2173, - "step": 77150 - }, - { - "epoch": 0.31173616357664324, - "grad_norm": 0.0, - "learning_rate": 4.3657825006575106e-05, - "loss": 76.302, - "step": 77160 - }, - { - "epoch": 0.31177656484201083, - "grad_norm": 1142.35986328125, - "learning_rate": 4.365550145933507e-05, - "loss": 59.8843, - "step": 77170 - }, - { - "epoch": 0.31181696610737847, - "grad_norm": 437.07196044921875, - "learning_rate": 4.3653177548396426e-05, - "loss": 84.331, - "step": 77180 - }, - { - "epoch": 0.3118573673727461, - "grad_norm": 1512.7645263671875, - "learning_rate": 4.365085327380448e-05, - "loss": 68.1001, - "step": 77190 - }, - { - "epoch": 0.31189776863811375, - "grad_norm": 595.816650390625, - "learning_rate": 4.3648528635604556e-05, - "loss": 61.2616, - "step": 77200 - }, - { - "epoch": 0.3119381699034814, - "grad_norm": 307.5999450683594, - "learning_rate": 4.364620363384196e-05, - "loss": 66.6272, - "step": 77210 - }, - { - "epoch": 0.31197857116884903, - "grad_norm": 1286.98095703125, - "learning_rate": 4.364387826856202e-05, - "loss": 78.2909, - "step": 77220 - }, - { - "epoch": 0.3120189724342166, - "grad_norm": 1373.5860595703125, - "learning_rate": 4.364155253981008e-05, - "loss": 72.0954, - "step": 77230 - }, - { - "epoch": 0.31205937369958425, - "grad_norm": 392.8745422363281, - "learning_rate": 4.363922644763147e-05, - "loss": 71.9362, - "step": 77240 - }, - { - "epoch": 0.3120997749649519, - "grad_norm": 891.7858276367188, - "learning_rate": 4.363689999207156e-05, - "loss": 62.0088, - "step": 77250 - }, - { - "epoch": 0.31214017623031953, - "grad_norm": 861.945556640625, - "learning_rate": 4.363457317317567e-05, - "loss": 105.5122, - "step": 77260 - }, - { - "epoch": 0.3121805774956872, - "grad_norm": 843.31689453125, - "learning_rate": 4.3632245990989194e-05, - "loss": 52.6803, - "step": 77270 - }, - { - "epoch": 0.3122209787610548, - "grad_norm": 649.4727172851562, - "learning_rate": 4.362991844555749e-05, - "loss": 44.0785, - "step": 77280 - }, - { - "epoch": 0.31226138002642245, - "grad_norm": 961.2431640625, - "learning_rate": 4.362759053692593e-05, - "loss": 61.1215, - "step": 77290 - }, - { - "epoch": 0.31230178129179004, - "grad_norm": 832.6133422851562, - "learning_rate": 4.3625262265139906e-05, - "loss": 71.7721, - "step": 77300 - }, - { - "epoch": 0.3123421825571577, - "grad_norm": 1031.405517578125, - "learning_rate": 4.36229336302448e-05, - "loss": 61.8297, - "step": 77310 - }, - { - "epoch": 0.3123825838225253, - "grad_norm": 821.444580078125, - "learning_rate": 4.3620604632286024e-05, - "loss": 92.3522, - "step": 77320 - }, - { - "epoch": 0.31242298508789296, - "grad_norm": 809.0938720703125, - "learning_rate": 4.361827527130896e-05, - "loss": 75.1257, - "step": 77330 - }, - { - "epoch": 0.3124633863532606, - "grad_norm": 725.1190795898438, - "learning_rate": 4.361594554735905e-05, - "loss": 77.7145, - "step": 77340 - }, - { - "epoch": 0.31250378761862824, - "grad_norm": 888.7479858398438, - "learning_rate": 4.361361546048169e-05, - "loss": 95.2185, - "step": 77350 - }, - { - "epoch": 0.3125441888839958, - "grad_norm": 2857.910400390625, - "learning_rate": 4.361128501072231e-05, - "loss": 68.9192, - "step": 77360 - }, - { - "epoch": 0.31258459014936346, - "grad_norm": 506.87982177734375, - "learning_rate": 4.360895419812635e-05, - "loss": 58.4488, - "step": 77370 - }, - { - "epoch": 0.3126249914147311, - "grad_norm": 749.842041015625, - "learning_rate": 4.360662302273925e-05, - "loss": 71.4121, - "step": 77380 - }, - { - "epoch": 0.31266539268009874, - "grad_norm": 1351.6341552734375, - "learning_rate": 4.360429148460645e-05, - "loss": 75.3694, - "step": 77390 - }, - { - "epoch": 0.3127057939454664, - "grad_norm": 460.1432189941406, - "learning_rate": 4.3601959583773415e-05, - "loss": 89.7496, - "step": 77400 - }, - { - "epoch": 0.312746195210834, - "grad_norm": 1023.0693969726562, - "learning_rate": 4.3599627320285596e-05, - "loss": 110.523, - "step": 77410 - }, - { - "epoch": 0.31278659647620166, - "grad_norm": 385.4729919433594, - "learning_rate": 4.3597294694188475e-05, - "loss": 37.7279, - "step": 77420 - }, - { - "epoch": 0.31282699774156925, - "grad_norm": 1083.031005859375, - "learning_rate": 4.359496170552751e-05, - "loss": 65.8572, - "step": 77430 - }, - { - "epoch": 0.3128673990069369, - "grad_norm": 571.319580078125, - "learning_rate": 4.35926283543482e-05, - "loss": 54.1978, - "step": 77440 - }, - { - "epoch": 0.3129078002723045, - "grad_norm": 535.0450439453125, - "learning_rate": 4.3590294640696025e-05, - "loss": 88.7871, - "step": 77450 - }, - { - "epoch": 0.31294820153767217, - "grad_norm": 625.9801025390625, - "learning_rate": 4.358796056461648e-05, - "loss": 73.7608, - "step": 77460 - }, - { - "epoch": 0.3129886028030398, - "grad_norm": 569.495361328125, - "learning_rate": 4.3585626126155084e-05, - "loss": 78.8239, - "step": 77470 - }, - { - "epoch": 0.31302900406840745, - "grad_norm": 632.259521484375, - "learning_rate": 4.358329132535733e-05, - "loss": 119.5843, - "step": 77480 - }, - { - "epoch": 0.31306940533377503, - "grad_norm": 752.5601806640625, - "learning_rate": 4.3580956162268746e-05, - "loss": 41.869, - "step": 77490 - }, - { - "epoch": 0.31310980659914267, - "grad_norm": 1639.3475341796875, - "learning_rate": 4.357862063693486e-05, - "loss": 104.7077, - "step": 77500 - }, - { - "epoch": 0.3131502078645103, - "grad_norm": 1603.3714599609375, - "learning_rate": 4.35762847494012e-05, - "loss": 133.0148, - "step": 77510 - }, - { - "epoch": 0.31319060912987795, - "grad_norm": 921.978515625, - "learning_rate": 4.35739484997133e-05, - "loss": 54.5854, - "step": 77520 - }, - { - "epoch": 0.3132310103952456, - "grad_norm": 567.6099853515625, - "learning_rate": 4.3571611887916705e-05, - "loss": 45.054, - "step": 77530 - }, - { - "epoch": 0.31327141166061323, - "grad_norm": 1245.0203857421875, - "learning_rate": 4.356927491405699e-05, - "loss": 54.2814, - "step": 77540 - }, - { - "epoch": 0.3133118129259808, - "grad_norm": 798.68359375, - "learning_rate": 4.356693757817969e-05, - "loss": 68.1666, - "step": 77550 - }, - { - "epoch": 0.31335221419134845, - "grad_norm": 838.5280151367188, - "learning_rate": 4.356459988033039e-05, - "loss": 55.8854, - "step": 77560 - }, - { - "epoch": 0.3133926154567161, - "grad_norm": 497.4013366699219, - "learning_rate": 4.356226182055465e-05, - "loss": 43.9287, - "step": 77570 - }, - { - "epoch": 0.31343301672208373, - "grad_norm": 2377.817626953125, - "learning_rate": 4.355992339889806e-05, - "loss": 89.1516, - "step": 77580 - }, - { - "epoch": 0.3134734179874514, - "grad_norm": 1571.2337646484375, - "learning_rate": 4.355758461540622e-05, - "loss": 64.8556, - "step": 77590 - }, - { - "epoch": 0.313513819252819, - "grad_norm": 2129.327880859375, - "learning_rate": 4.355524547012471e-05, - "loss": 88.4741, - "step": 77600 - }, - { - "epoch": 0.31355422051818665, - "grad_norm": 445.593505859375, - "learning_rate": 4.355290596309912e-05, - "loss": 59.0199, - "step": 77610 - }, - { - "epoch": 0.31359462178355424, - "grad_norm": 660.9097290039062, - "learning_rate": 4.3550566094375086e-05, - "loss": 65.1326, - "step": 77620 - }, - { - "epoch": 0.3136350230489219, - "grad_norm": 670.1815185546875, - "learning_rate": 4.3548225863998224e-05, - "loss": 76.792, - "step": 77630 - }, - { - "epoch": 0.3136754243142895, - "grad_norm": 882.048828125, - "learning_rate": 4.354588527201414e-05, - "loss": 70.3437, - "step": 77640 - }, - { - "epoch": 0.31371582557965716, - "grad_norm": 831.5132446289062, - "learning_rate": 4.3543544318468485e-05, - "loss": 60.2682, - "step": 77650 - }, - { - "epoch": 0.3137562268450248, - "grad_norm": 1422.9718017578125, - "learning_rate": 4.354120300340688e-05, - "loss": 69.6125, - "step": 77660 - }, - { - "epoch": 0.31379662811039244, - "grad_norm": 703.7569580078125, - "learning_rate": 4.353886132687497e-05, - "loss": 46.6462, - "step": 77670 - }, - { - "epoch": 0.31383702937576, - "grad_norm": 620.9691772460938, - "learning_rate": 4.353651928891842e-05, - "loss": 86.715, - "step": 77680 - }, - { - "epoch": 0.31387743064112766, - "grad_norm": 586.15625, - "learning_rate": 4.353417688958289e-05, - "loss": 58.7201, - "step": 77690 - }, - { - "epoch": 0.3139178319064953, - "grad_norm": 1025.6397705078125, - "learning_rate": 4.3531834128914025e-05, - "loss": 46.0605, - "step": 77700 - }, - { - "epoch": 0.31395823317186294, - "grad_norm": 1030.073974609375, - "learning_rate": 4.352949100695752e-05, - "loss": 79.4799, - "step": 77710 - }, - { - "epoch": 0.3139986344372306, - "grad_norm": 862.0059204101562, - "learning_rate": 4.352714752375906e-05, - "loss": 94.2031, - "step": 77720 - }, - { - "epoch": 0.3140390357025982, - "grad_norm": 968.3876342773438, - "learning_rate": 4.352480367936431e-05, - "loss": 63.7685, - "step": 77730 - }, - { - "epoch": 0.31407943696796586, - "grad_norm": 819.8076782226562, - "learning_rate": 4.352245947381897e-05, - "loss": 77.5755, - "step": 77740 - }, - { - "epoch": 0.31411983823333345, - "grad_norm": 1108.0177001953125, - "learning_rate": 4.352011490716875e-05, - "loss": 70.4065, - "step": 77750 - }, - { - "epoch": 0.3141602394987011, - "grad_norm": 698.9972534179688, - "learning_rate": 4.351776997945936e-05, - "loss": 72.4767, - "step": 77760 - }, - { - "epoch": 0.31420064076406873, - "grad_norm": 1234.1883544921875, - "learning_rate": 4.351542469073651e-05, - "loss": 68.6958, - "step": 77770 - }, - { - "epoch": 0.31424104202943637, - "grad_norm": 493.3515625, - "learning_rate": 4.351307904104592e-05, - "loss": 51.1366, - "step": 77780 - }, - { - "epoch": 0.314281443294804, - "grad_norm": 1404.8785400390625, - "learning_rate": 4.351073303043332e-05, - "loss": 85.1898, - "step": 77790 - }, - { - "epoch": 0.31432184456017165, - "grad_norm": 2445.363037109375, - "learning_rate": 4.350838665894446e-05, - "loss": 67.7399, - "step": 77800 - }, - { - "epoch": 0.31436224582553923, - "grad_norm": 1352.8099365234375, - "learning_rate": 4.350603992662506e-05, - "loss": 52.9771, - "step": 77810 - }, - { - "epoch": 0.3144026470909069, - "grad_norm": 969.1926879882812, - "learning_rate": 4.3503692833520894e-05, - "loss": 62.1888, - "step": 77820 - }, - { - "epoch": 0.3144430483562745, - "grad_norm": 486.71185302734375, - "learning_rate": 4.350134537967771e-05, - "loss": 54.4507, - "step": 77830 - }, - { - "epoch": 0.31448344962164215, - "grad_norm": 817.6029663085938, - "learning_rate": 4.3498997565141267e-05, - "loss": 42.0353, - "step": 77840 - }, - { - "epoch": 0.3145238508870098, - "grad_norm": 1096.86669921875, - "learning_rate": 4.349664938995734e-05, - "loss": 79.652, - "step": 77850 - }, - { - "epoch": 0.31456425215237743, - "grad_norm": 1449.87744140625, - "learning_rate": 4.3494300854171715e-05, - "loss": 93.2444, - "step": 77860 - }, - { - "epoch": 0.314604653417745, - "grad_norm": 821.5718994140625, - "learning_rate": 4.349195195783017e-05, - "loss": 53.875, - "step": 77870 - }, - { - "epoch": 0.31464505468311266, - "grad_norm": 1154.4615478515625, - "learning_rate": 4.348960270097851e-05, - "loss": 62.7061, - "step": 77880 - }, - { - "epoch": 0.3146854559484803, - "grad_norm": 792.2052001953125, - "learning_rate": 4.348725308366252e-05, - "loss": 83.3605, - "step": 77890 - }, - { - "epoch": 0.31472585721384794, - "grad_norm": 3039.34228515625, - "learning_rate": 4.348490310592801e-05, - "loss": 87.3251, - "step": 77900 - }, - { - "epoch": 0.3147662584792156, - "grad_norm": 886.7009887695312, - "learning_rate": 4.34825527678208e-05, - "loss": 57.0282, - "step": 77910 - }, - { - "epoch": 0.3148066597445832, - "grad_norm": 786.5516967773438, - "learning_rate": 4.348020206938672e-05, - "loss": 87.8153, - "step": 77920 - }, - { - "epoch": 0.31484706100995086, - "grad_norm": 739.66015625, - "learning_rate": 4.347785101067157e-05, - "loss": 58.632, - "step": 77930 - }, - { - "epoch": 0.31488746227531844, - "grad_norm": 578.0338134765625, - "learning_rate": 4.347549959172121e-05, - "loss": 80.8651, - "step": 77940 - }, - { - "epoch": 0.3149278635406861, - "grad_norm": 1119.5306396484375, - "learning_rate": 4.347314781258147e-05, - "loss": 68.4437, - "step": 77950 - }, - { - "epoch": 0.3149682648060537, - "grad_norm": 0.0, - "learning_rate": 4.3470795673298206e-05, - "loss": 57.3587, - "step": 77960 - }, - { - "epoch": 0.31500866607142136, - "grad_norm": 681.5150146484375, - "learning_rate": 4.3468443173917267e-05, - "loss": 52.9524, - "step": 77970 - }, - { - "epoch": 0.315049067336789, - "grad_norm": 412.8719787597656, - "learning_rate": 4.346609031448452e-05, - "loss": 36.4723, - "step": 77980 - }, - { - "epoch": 0.31508946860215664, - "grad_norm": 945.4547119140625, - "learning_rate": 4.346373709504584e-05, - "loss": 67.0522, - "step": 77990 - }, - { - "epoch": 0.3151298698675242, - "grad_norm": 992.1616821289062, - "learning_rate": 4.3461383515647106e-05, - "loss": 78.1435, - "step": 78000 - }, - { - "epoch": 0.31517027113289187, - "grad_norm": 503.8204650878906, - "learning_rate": 4.345902957633418e-05, - "loss": 48.7498, - "step": 78010 - }, - { - "epoch": 0.3152106723982595, - "grad_norm": 780.5662841796875, - "learning_rate": 4.3456675277152973e-05, - "loss": 56.2306, - "step": 78020 - }, - { - "epoch": 0.31525107366362715, - "grad_norm": 454.04132080078125, - "learning_rate": 4.345432061814938e-05, - "loss": 85.0091, - "step": 78030 - }, - { - "epoch": 0.3152914749289948, - "grad_norm": 1036.3134765625, - "learning_rate": 4.345196559936932e-05, - "loss": 53.9506, - "step": 78040 - }, - { - "epoch": 0.3153318761943624, - "grad_norm": 929.426513671875, - "learning_rate": 4.344961022085867e-05, - "loss": 70.4333, - "step": 78050 - }, - { - "epoch": 0.31537227745973007, - "grad_norm": 597.806640625, - "learning_rate": 4.344725448266338e-05, - "loss": 41.9415, - "step": 78060 - }, - { - "epoch": 0.31541267872509765, - "grad_norm": 391.077392578125, - "learning_rate": 4.3444898384829364e-05, - "loss": 54.7683, - "step": 78070 - }, - { - "epoch": 0.3154530799904653, - "grad_norm": 873.1908569335938, - "learning_rate": 4.3442541927402566e-05, - "loss": 60.2897, - "step": 78080 - }, - { - "epoch": 0.31549348125583293, - "grad_norm": 464.908447265625, - "learning_rate": 4.344018511042891e-05, - "loss": 57.7049, - "step": 78090 - }, - { - "epoch": 0.31553388252120057, - "grad_norm": 481.4813537597656, - "learning_rate": 4.343782793395435e-05, - "loss": 67.3648, - "step": 78100 - }, - { - "epoch": 0.3155742837865682, - "grad_norm": 1816.134521484375, - "learning_rate": 4.343547039802485e-05, - "loss": 79.2945, - "step": 78110 - }, - { - "epoch": 0.31561468505193585, - "grad_norm": 561.038330078125, - "learning_rate": 4.3433112502686355e-05, - "loss": 50.8454, - "step": 78120 - }, - { - "epoch": 0.31565508631730343, - "grad_norm": 685.545654296875, - "learning_rate": 4.3430754247984845e-05, - "loss": 54.5997, - "step": 78130 - }, - { - "epoch": 0.3156954875826711, - "grad_norm": 618.0608520507812, - "learning_rate": 4.342839563396629e-05, - "loss": 76.7737, - "step": 78140 - }, - { - "epoch": 0.3157358888480387, - "grad_norm": 1280.976806640625, - "learning_rate": 4.3426036660676686e-05, - "loss": 54.933, - "step": 78150 - }, - { - "epoch": 0.31577629011340635, - "grad_norm": 650.5822143554688, - "learning_rate": 4.3423677328161996e-05, - "loss": 71.7501, - "step": 78160 - }, - { - "epoch": 0.315816691378774, - "grad_norm": 597.2694091796875, - "learning_rate": 4.342131763646824e-05, - "loss": 68.88, - "step": 78170 - }, - { - "epoch": 0.31585709264414163, - "grad_norm": 1208.90576171875, - "learning_rate": 4.341895758564141e-05, - "loss": 67.8843, - "step": 78180 - }, - { - "epoch": 0.3158974939095092, - "grad_norm": 830.9584350585938, - "learning_rate": 4.3416597175727514e-05, - "loss": 89.2304, - "step": 78190 - }, - { - "epoch": 0.31593789517487686, - "grad_norm": 1917.3353271484375, - "learning_rate": 4.3414236406772584e-05, - "loss": 111.667, - "step": 78200 - }, - { - "epoch": 0.3159782964402445, - "grad_norm": 924.7677612304688, - "learning_rate": 4.3411875278822635e-05, - "loss": 49.8145, - "step": 78210 - }, - { - "epoch": 0.31601869770561214, - "grad_norm": 469.1351318359375, - "learning_rate": 4.340951379192369e-05, - "loss": 116.5447, - "step": 78220 - }, - { - "epoch": 0.3160590989709798, - "grad_norm": 495.249755859375, - "learning_rate": 4.34071519461218e-05, - "loss": 63.0928, - "step": 78230 - }, - { - "epoch": 0.3160995002363474, - "grad_norm": 1259.7349853515625, - "learning_rate": 4.3404789741463e-05, - "loss": 54.0104, - "step": 78240 - }, - { - "epoch": 0.31613990150171506, - "grad_norm": 512.0302124023438, - "learning_rate": 4.3402427177993366e-05, - "loss": 48.6828, - "step": 78250 - }, - { - "epoch": 0.31618030276708264, - "grad_norm": 1092.3133544921875, - "learning_rate": 4.340006425575892e-05, - "loss": 51.555, - "step": 78260 - }, - { - "epoch": 0.3162207040324503, - "grad_norm": 668.2493896484375, - "learning_rate": 4.339770097480576e-05, - "loss": 67.2536, - "step": 78270 - }, - { - "epoch": 0.3162611052978179, - "grad_norm": 832.0966796875, - "learning_rate": 4.3395337335179945e-05, - "loss": 78.7102, - "step": 78280 - }, - { - "epoch": 0.31630150656318556, - "grad_norm": 931.7671508789062, - "learning_rate": 4.339297333692756e-05, - "loss": 57.0442, - "step": 78290 - }, - { - "epoch": 0.3163419078285532, - "grad_norm": 1212.2379150390625, - "learning_rate": 4.339060898009469e-05, - "loss": 83.3376, - "step": 78300 - }, - { - "epoch": 0.31638230909392084, - "grad_norm": 393.2463073730469, - "learning_rate": 4.338824426472743e-05, - "loss": 48.1462, - "step": 78310 - }, - { - "epoch": 0.31642271035928843, - "grad_norm": 422.79998779296875, - "learning_rate": 4.338587919087187e-05, - "loss": 57.0146, - "step": 78320 - }, - { - "epoch": 0.31646311162465607, - "grad_norm": 1542.5203857421875, - "learning_rate": 4.3383513758574143e-05, - "loss": 69.819, - "step": 78330 - }, - { - "epoch": 0.3165035128900237, - "grad_norm": 858.1940307617188, - "learning_rate": 4.338114796788035e-05, - "loss": 95.8173, - "step": 78340 - }, - { - "epoch": 0.31654391415539135, - "grad_norm": 393.74505615234375, - "learning_rate": 4.337878181883661e-05, - "loss": 46.5559, - "step": 78350 - }, - { - "epoch": 0.316584315420759, - "grad_norm": 786.9148559570312, - "learning_rate": 4.3376415311489056e-05, - "loss": 68.8837, - "step": 78360 - }, - { - "epoch": 0.31662471668612663, - "grad_norm": 1402.4737548828125, - "learning_rate": 4.337404844588382e-05, - "loss": 89.8514, - "step": 78370 - }, - { - "epoch": 0.31666511795149427, - "grad_norm": 773.1746826171875, - "learning_rate": 4.337168122206706e-05, - "loss": 74.1317, - "step": 78380 - }, - { - "epoch": 0.31670551921686185, - "grad_norm": 653.9239501953125, - "learning_rate": 4.3369313640084916e-05, - "loss": 51.6219, - "step": 78390 - }, - { - "epoch": 0.3167459204822295, - "grad_norm": 664.2848510742188, - "learning_rate": 4.336694569998354e-05, - "loss": 74.1221, - "step": 78400 - }, - { - "epoch": 0.31678632174759713, - "grad_norm": 609.3822021484375, - "learning_rate": 4.3364577401809105e-05, - "loss": 71.547, - "step": 78410 - }, - { - "epoch": 0.3168267230129648, - "grad_norm": 1231.6363525390625, - "learning_rate": 4.336220874560778e-05, - "loss": 77.0097, - "step": 78420 - }, - { - "epoch": 0.3168671242783324, - "grad_norm": 205.0146484375, - "learning_rate": 4.3359839731425735e-05, - "loss": 50.0911, - "step": 78430 - }, - { - "epoch": 0.31690752554370005, - "grad_norm": 389.177978515625, - "learning_rate": 4.335747035930916e-05, - "loss": 59.2829, - "step": 78440 - }, - { - "epoch": 0.31694792680906764, - "grad_norm": 600.1491088867188, - "learning_rate": 4.3355100629304254e-05, - "loss": 79.876, - "step": 78450 - }, - { - "epoch": 0.3169883280744353, - "grad_norm": 544.903564453125, - "learning_rate": 4.335273054145722e-05, - "loss": 82.2558, - "step": 78460 - }, - { - "epoch": 0.3170287293398029, - "grad_norm": 909.1273803710938, - "learning_rate": 4.335036009581425e-05, - "loss": 70.1611, - "step": 78470 - }, - { - "epoch": 0.31706913060517056, - "grad_norm": 639.6483764648438, - "learning_rate": 4.334798929242155e-05, - "loss": 69.5685, - "step": 78480 - }, - { - "epoch": 0.3171095318705382, - "grad_norm": 615.6561889648438, - "learning_rate": 4.3345618131325374e-05, - "loss": 76.2855, - "step": 78490 - }, - { - "epoch": 0.31714993313590584, - "grad_norm": 3859.427490234375, - "learning_rate": 4.334324661257191e-05, - "loss": 76.5629, - "step": 78500 - }, - { - "epoch": 0.3171903344012734, - "grad_norm": 1610.2098388671875, - "learning_rate": 4.334087473620742e-05, - "loss": 103.2507, - "step": 78510 - }, - { - "epoch": 0.31723073566664106, - "grad_norm": 588.2677612304688, - "learning_rate": 4.3338502502278134e-05, - "loss": 51.7838, - "step": 78520 - }, - { - "epoch": 0.3172711369320087, - "grad_norm": 192.1394500732422, - "learning_rate": 4.333612991083029e-05, - "loss": 68.0169, - "step": 78530 - }, - { - "epoch": 0.31731153819737634, - "grad_norm": 960.3283081054688, - "learning_rate": 4.3333756961910166e-05, - "loss": 47.2101, - "step": 78540 - }, - { - "epoch": 0.317351939462744, - "grad_norm": 1771.0201416015625, - "learning_rate": 4.3331383655564006e-05, - "loss": 60.5402, - "step": 78550 - }, - { - "epoch": 0.3173923407281116, - "grad_norm": 1430.1416015625, - "learning_rate": 4.3329009991838084e-05, - "loss": 53.3842, - "step": 78560 - }, - { - "epoch": 0.31743274199347926, - "grad_norm": 429.119873046875, - "learning_rate": 4.3326635970778676e-05, - "loss": 48.8891, - "step": 78570 - }, - { - "epoch": 0.31747314325884685, - "grad_norm": 798.0321655273438, - "learning_rate": 4.3324261592432056e-05, - "loss": 68.9384, - "step": 78580 - }, - { - "epoch": 0.3175135445242145, - "grad_norm": 999.2288208007812, - "learning_rate": 4.3321886856844534e-05, - "loss": 57.1692, - "step": 78590 - }, - { - "epoch": 0.3175539457895821, - "grad_norm": 923.92724609375, - "learning_rate": 4.331951176406239e-05, - "loss": 43.5955, - "step": 78600 - }, - { - "epoch": 0.31759434705494977, - "grad_norm": 967.3873901367188, - "learning_rate": 4.331713631413194e-05, - "loss": 47.511, - "step": 78610 - }, - { - "epoch": 0.3176347483203174, - "grad_norm": 884.6947021484375, - "learning_rate": 4.331476050709948e-05, - "loss": 57.999, - "step": 78620 - }, - { - "epoch": 0.31767514958568505, - "grad_norm": 611.1873168945312, - "learning_rate": 4.331238434301134e-05, - "loss": 77.5249, - "step": 78630 - }, - { - "epoch": 0.31771555085105263, - "grad_norm": 913.7862548828125, - "learning_rate": 4.3310007821913836e-05, - "loss": 54.6169, - "step": 78640 - }, - { - "epoch": 0.31775595211642027, - "grad_norm": 1171.2972412109375, - "learning_rate": 4.330763094385329e-05, - "loss": 80.6045, - "step": 78650 - }, - { - "epoch": 0.3177963533817879, - "grad_norm": 507.2369384765625, - "learning_rate": 4.330525370887607e-05, - "loss": 46.0367, - "step": 78660 - }, - { - "epoch": 0.31783675464715555, - "grad_norm": 564.16259765625, - "learning_rate": 4.33028761170285e-05, - "loss": 73.1167, - "step": 78670 - }, - { - "epoch": 0.3178771559125232, - "grad_norm": 897.42919921875, - "learning_rate": 4.330049816835694e-05, - "loss": 92.9207, - "step": 78680 - }, - { - "epoch": 0.31791755717789083, - "grad_norm": 3515.646240234375, - "learning_rate": 4.3298119862907744e-05, - "loss": 119.0089, - "step": 78690 - }, - { - "epoch": 0.31795795844325847, - "grad_norm": 1024.9483642578125, - "learning_rate": 4.329574120072728e-05, - "loss": 58.4423, - "step": 78700 - }, - { - "epoch": 0.31799835970862605, - "grad_norm": 969.68115234375, - "learning_rate": 4.329336218186192e-05, - "loss": 55.7534, - "step": 78710 - }, - { - "epoch": 0.3180387609739937, - "grad_norm": 602.466552734375, - "learning_rate": 4.3290982806358046e-05, - "loss": 75.1919, - "step": 78720 - }, - { - "epoch": 0.31807916223936133, - "grad_norm": 599.2578735351562, - "learning_rate": 4.3288603074262054e-05, - "loss": 105.2766, - "step": 78730 - }, - { - "epoch": 0.318119563504729, - "grad_norm": 790.10693359375, - "learning_rate": 4.328622298562033e-05, - "loss": 53.7875, - "step": 78740 - }, - { - "epoch": 0.3181599647700966, - "grad_norm": 551.5433349609375, - "learning_rate": 4.3283842540479264e-05, - "loss": 63.0458, - "step": 78750 - }, - { - "epoch": 0.31820036603546425, - "grad_norm": 0.0, - "learning_rate": 4.3281461738885274e-05, - "loss": 61.5794, - "step": 78760 - }, - { - "epoch": 0.31824076730083184, - "grad_norm": 681.5363159179688, - "learning_rate": 4.327908058088479e-05, - "loss": 67.191, - "step": 78770 - }, - { - "epoch": 0.3182811685661995, - "grad_norm": 1063.3834228515625, - "learning_rate": 4.327669906652421e-05, - "loss": 82.7017, - "step": 78780 - }, - { - "epoch": 0.3183215698315671, - "grad_norm": 500.39361572265625, - "learning_rate": 4.327431719584997e-05, - "loss": 85.2169, - "step": 78790 - }, - { - "epoch": 0.31836197109693476, - "grad_norm": 1306.898681640625, - "learning_rate": 4.3271934968908514e-05, - "loss": 72.2316, - "step": 78800 - }, - { - "epoch": 0.3184023723623024, - "grad_norm": 815.1703491210938, - "learning_rate": 4.326955238574627e-05, - "loss": 50.3086, - "step": 78810 - }, - { - "epoch": 0.31844277362767004, - "grad_norm": 460.31005859375, - "learning_rate": 4.32671694464097e-05, - "loss": 64.6161, - "step": 78820 - }, - { - "epoch": 0.3184831748930376, - "grad_norm": 2179.723388671875, - "learning_rate": 4.326478615094526e-05, - "loss": 49.6086, - "step": 78830 - }, - { - "epoch": 0.31852357615840526, - "grad_norm": 1665.5799560546875, - "learning_rate": 4.3262402499399404e-05, - "loss": 97.2382, - "step": 78840 - }, - { - "epoch": 0.3185639774237729, - "grad_norm": 1162.87841796875, - "learning_rate": 4.326001849181862e-05, - "loss": 57.7263, - "step": 78850 - }, - { - "epoch": 0.31860437868914054, - "grad_norm": 333.8135681152344, - "learning_rate": 4.325763412824937e-05, - "loss": 49.9047, - "step": 78860 - }, - { - "epoch": 0.3186447799545082, - "grad_norm": 510.8147888183594, - "learning_rate": 4.325524940873814e-05, - "loss": 82.3427, - "step": 78870 - }, - { - "epoch": 0.3186851812198758, - "grad_norm": 434.4716491699219, - "learning_rate": 4.325286433333142e-05, - "loss": 71.3787, - "step": 78880 - }, - { - "epoch": 0.31872558248524346, - "grad_norm": 725.61279296875, - "learning_rate": 4.325047890207572e-05, - "loss": 63.1463, - "step": 78890 - }, - { - "epoch": 0.31876598375061105, - "grad_norm": 366.7152404785156, - "learning_rate": 4.324809311501754e-05, - "loss": 66.2424, - "step": 78900 - }, - { - "epoch": 0.3188063850159787, - "grad_norm": 1318.6060791015625, - "learning_rate": 4.3245706972203385e-05, - "loss": 67.3569, - "step": 78910 - }, - { - "epoch": 0.31884678628134633, - "grad_norm": 727.7313842773438, - "learning_rate": 4.3243320473679785e-05, - "loss": 63.7311, - "step": 78920 - }, - { - "epoch": 0.31888718754671397, - "grad_norm": 1180.8690185546875, - "learning_rate": 4.324093361949325e-05, - "loss": 54.1359, - "step": 78930 - }, - { - "epoch": 0.3189275888120816, - "grad_norm": 652.950439453125, - "learning_rate": 4.323854640969033e-05, - "loss": 66.1783, - "step": 78940 - }, - { - "epoch": 0.31896799007744925, - "grad_norm": 554.9948120117188, - "learning_rate": 4.323615884431756e-05, - "loss": 77.2896, - "step": 78950 - }, - { - "epoch": 0.31900839134281683, - "grad_norm": 657.7294921875, - "learning_rate": 4.323377092342148e-05, - "loss": 56.7781, - "step": 78960 - }, - { - "epoch": 0.3190487926081845, - "grad_norm": 1056.711181640625, - "learning_rate": 4.323138264704864e-05, - "loss": 51.1476, - "step": 78970 - }, - { - "epoch": 0.3190891938735521, - "grad_norm": 1820.9912109375, - "learning_rate": 4.322899401524563e-05, - "loss": 90.834, - "step": 78980 - }, - { - "epoch": 0.31912959513891975, - "grad_norm": 1768.5189208984375, - "learning_rate": 4.322660502805899e-05, - "loss": 79.8078, - "step": 78990 - }, - { - "epoch": 0.3191699964042874, - "grad_norm": 601.8983154296875, - "learning_rate": 4.3224215685535294e-05, - "loss": 58.5694, - "step": 79000 - }, - { - "epoch": 0.31921039766965503, - "grad_norm": 1303.4908447265625, - "learning_rate": 4.322182598772113e-05, - "loss": 45.729, - "step": 79010 - }, - { - "epoch": 0.3192507989350226, - "grad_norm": 480.8334655761719, - "learning_rate": 4.321943593466309e-05, - "loss": 84.9618, - "step": 79020 - }, - { - "epoch": 0.31929120020039026, - "grad_norm": 932.1970825195312, - "learning_rate": 4.321704552640777e-05, - "loss": 67.6896, - "step": 79030 - }, - { - "epoch": 0.3193316014657579, - "grad_norm": 1057.0179443359375, - "learning_rate": 4.321465476300177e-05, - "loss": 100.6307, - "step": 79040 - }, - { - "epoch": 0.31937200273112554, - "grad_norm": 996.9810791015625, - "learning_rate": 4.321226364449169e-05, - "loss": 71.2474, - "step": 79050 - }, - { - "epoch": 0.3194124039964932, - "grad_norm": 568.8705444335938, - "learning_rate": 4.320987217092416e-05, - "loss": 43.2181, - "step": 79060 - }, - { - "epoch": 0.3194528052618608, - "grad_norm": 420.3672180175781, - "learning_rate": 4.320748034234579e-05, - "loss": 57.1254, - "step": 79070 - }, - { - "epoch": 0.31949320652722846, - "grad_norm": 1757.289306640625, - "learning_rate": 4.3205088158803226e-05, - "loss": 68.5235, - "step": 79080 - }, - { - "epoch": 0.31953360779259604, - "grad_norm": 660.7200927734375, - "learning_rate": 4.3202695620343083e-05, - "loss": 50.2219, - "step": 79090 - }, - { - "epoch": 0.3195740090579637, - "grad_norm": 1262.357421875, - "learning_rate": 4.320030272701203e-05, - "loss": 58.4075, - "step": 79100 - }, - { - "epoch": 0.3196144103233313, - "grad_norm": 898.780029296875, - "learning_rate": 4.31979094788567e-05, - "loss": 88.9766, - "step": 79110 - }, - { - "epoch": 0.31965481158869896, - "grad_norm": 535.6188354492188, - "learning_rate": 4.319551587592376e-05, - "loss": 30.2677, - "step": 79120 - }, - { - "epoch": 0.3196952128540666, - "grad_norm": 559.2113647460938, - "learning_rate": 4.319312191825987e-05, - "loss": 78.3262, - "step": 79130 - }, - { - "epoch": 0.31973561411943424, - "grad_norm": 952.9019775390625, - "learning_rate": 4.31907276059117e-05, - "loss": 67.9382, - "step": 79140 - }, - { - "epoch": 0.3197760153848018, - "grad_norm": 959.8912353515625, - "learning_rate": 4.318833293892592e-05, - "loss": 68.8999, - "step": 79150 - }, - { - "epoch": 0.31981641665016947, - "grad_norm": 985.2177124023438, - "learning_rate": 4.318593791734924e-05, - "loss": 73.2616, - "step": 79160 - }, - { - "epoch": 0.3198568179155371, - "grad_norm": 1765.3321533203125, - "learning_rate": 4.318354254122833e-05, - "loss": 101.728, - "step": 79170 - }, - { - "epoch": 0.31989721918090475, - "grad_norm": 2424.275390625, - "learning_rate": 4.31811468106099e-05, - "loss": 40.6484, - "step": 79180 - }, - { - "epoch": 0.3199376204462724, - "grad_norm": 529.9296264648438, - "learning_rate": 4.317875072554065e-05, - "loss": 56.8446, - "step": 79190 - }, - { - "epoch": 0.31997802171164, - "grad_norm": 988.1986083984375, - "learning_rate": 4.31763542860673e-05, - "loss": 89.1671, - "step": 79200 - }, - { - "epoch": 0.32001842297700767, - "grad_norm": 954.2215576171875, - "learning_rate": 4.317395749223656e-05, - "loss": 62.5503, - "step": 79210 - }, - { - "epoch": 0.32005882424237525, - "grad_norm": 1343.7386474609375, - "learning_rate": 4.3171560344095164e-05, - "loss": 85.5978, - "step": 79220 - }, - { - "epoch": 0.3200992255077429, - "grad_norm": 992.0386962890625, - "learning_rate": 4.3169162841689846e-05, - "loss": 77.3141, - "step": 79230 - }, - { - "epoch": 0.32013962677311053, - "grad_norm": 684.0596313476562, - "learning_rate": 4.3166764985067343e-05, - "loss": 68.0206, - "step": 79240 - }, - { - "epoch": 0.32018002803847817, - "grad_norm": 544.3250732421875, - "learning_rate": 4.31643667742744e-05, - "loss": 64.4232, - "step": 79250 - }, - { - "epoch": 0.3202204293038458, - "grad_norm": 485.05413818359375, - "learning_rate": 4.3161968209357776e-05, - "loss": 72.3071, - "step": 79260 - }, - { - "epoch": 0.32026083056921345, - "grad_norm": 1109.7979736328125, - "learning_rate": 4.315956929036423e-05, - "loss": 87.8763, - "step": 79270 - }, - { - "epoch": 0.32030123183458103, - "grad_norm": 735.7939453125, - "learning_rate": 4.3157170017340545e-05, - "loss": 60.9775, - "step": 79280 - }, - { - "epoch": 0.3203416330999487, - "grad_norm": 515.8373413085938, - "learning_rate": 4.3154770390333463e-05, - "loss": 57.4723, - "step": 79290 - }, - { - "epoch": 0.3203820343653163, - "grad_norm": 11893.99609375, - "learning_rate": 4.3152370409389795e-05, - "loss": 97.6898, - "step": 79300 - }, - { - "epoch": 0.32042243563068395, - "grad_norm": 583.2001953125, - "learning_rate": 4.3149970074556324e-05, - "loss": 104.1178, - "step": 79310 - }, - { - "epoch": 0.3204628368960516, - "grad_norm": 1154.815185546875, - "learning_rate": 4.314756938587984e-05, - "loss": 47.7879, - "step": 79320 - }, - { - "epoch": 0.32050323816141923, - "grad_norm": 1132.3023681640625, - "learning_rate": 4.314516834340715e-05, - "loss": 59.4125, - "step": 79330 - }, - { - "epoch": 0.3205436394267868, - "grad_norm": 530.7276611328125, - "learning_rate": 4.3142766947185056e-05, - "loss": 76.4587, - "step": 79340 - }, - { - "epoch": 0.32058404069215446, - "grad_norm": 943.7814331054688, - "learning_rate": 4.314036519726038e-05, - "loss": 83.0291, - "step": 79350 - }, - { - "epoch": 0.3206244419575221, - "grad_norm": 1113.9444580078125, - "learning_rate": 4.3137963093679945e-05, - "loss": 62.54, - "step": 79360 - }, - { - "epoch": 0.32066484322288974, - "grad_norm": 839.0965576171875, - "learning_rate": 4.313556063649059e-05, - "loss": 65.0847, - "step": 79370 - }, - { - "epoch": 0.3207052444882574, - "grad_norm": 645.9111938476562, - "learning_rate": 4.313315782573913e-05, - "loss": 66.1717, - "step": 79380 - }, - { - "epoch": 0.320745645753625, - "grad_norm": 1160.1456298828125, - "learning_rate": 4.3130754661472435e-05, - "loss": 72.6717, - "step": 79390 - }, - { - "epoch": 0.32078604701899266, - "grad_norm": 861.6610107421875, - "learning_rate": 4.3128351143737335e-05, - "loss": 94.8734, - "step": 79400 - }, - { - "epoch": 0.32082644828436024, - "grad_norm": 1239.6209716796875, - "learning_rate": 4.31259472725807e-05, - "loss": 65.5956, - "step": 79410 - }, - { - "epoch": 0.3208668495497279, - "grad_norm": 1918.2100830078125, - "learning_rate": 4.312354304804939e-05, - "loss": 98.4281, - "step": 79420 - }, - { - "epoch": 0.3209072508150955, - "grad_norm": 496.2858581542969, - "learning_rate": 4.312113847019028e-05, - "loss": 64.9097, - "step": 79430 - }, - { - "epoch": 0.32094765208046316, - "grad_norm": 568.856689453125, - "learning_rate": 4.3118733539050244e-05, - "loss": 80.5664, - "step": 79440 - }, - { - "epoch": 0.3209880533458308, - "grad_norm": 647.578369140625, - "learning_rate": 4.311632825467617e-05, - "loss": 81.0642, - "step": 79450 - }, - { - "epoch": 0.32102845461119844, - "grad_norm": 606.3375854492188, - "learning_rate": 4.311392261711495e-05, - "loss": 87.3034, - "step": 79460 - }, - { - "epoch": 0.32106885587656603, - "grad_norm": 486.86273193359375, - "learning_rate": 4.3111516626413485e-05, - "loss": 36.4118, - "step": 79470 - }, - { - "epoch": 0.32110925714193367, - "grad_norm": 527.7968139648438, - "learning_rate": 4.310911028261867e-05, - "loss": 75.8701, - "step": 79480 - }, - { - "epoch": 0.3211496584073013, - "grad_norm": 899.1015014648438, - "learning_rate": 4.310670358577744e-05, - "loss": 64.9072, - "step": 79490 - }, - { - "epoch": 0.32119005967266895, - "grad_norm": 491.0596923828125, - "learning_rate": 4.3104296535936695e-05, - "loss": 62.814, - "step": 79500 - }, - { - "epoch": 0.3212304609380366, - "grad_norm": 643.3750610351562, - "learning_rate": 4.3101889133143365e-05, - "loss": 62.7745, - "step": 79510 - }, - { - "epoch": 0.32127086220340423, - "grad_norm": 584.9881591796875, - "learning_rate": 4.3099481377444384e-05, - "loss": 70.7787, - "step": 79520 - }, - { - "epoch": 0.32131126346877187, - "grad_norm": 0.0, - "learning_rate": 4.30970732688867e-05, - "loss": 72.2548, - "step": 79530 - }, - { - "epoch": 0.32135166473413945, - "grad_norm": 551.8460693359375, - "learning_rate": 4.309466480751726e-05, - "loss": 57.843, - "step": 79540 - }, - { - "epoch": 0.3213920659995071, - "grad_norm": 1137.9078369140625, - "learning_rate": 4.309225599338301e-05, - "loss": 67.0156, - "step": 79550 - }, - { - "epoch": 0.32143246726487473, - "grad_norm": 1040.0809326171875, - "learning_rate": 4.308984682653092e-05, - "loss": 51.4768, - "step": 79560 - }, - { - "epoch": 0.3214728685302424, - "grad_norm": 496.1695556640625, - "learning_rate": 4.308743730700795e-05, - "loss": 49.8229, - "step": 79570 - }, - { - "epoch": 0.32151326979561, - "grad_norm": 928.9419555664062, - "learning_rate": 4.308502743486107e-05, - "loss": 68.5493, - "step": 79580 - }, - { - "epoch": 0.32155367106097765, - "grad_norm": 554.7418212890625, - "learning_rate": 4.308261721013728e-05, - "loss": 58.4424, - "step": 79590 - }, - { - "epoch": 0.32159407232634524, - "grad_norm": 530.4016723632812, - "learning_rate": 4.3080206632883554e-05, - "loss": 71.472, - "step": 79600 - }, - { - "epoch": 0.3216344735917129, - "grad_norm": 823.0756225585938, - "learning_rate": 4.307779570314689e-05, - "loss": 54.4288, - "step": 79610 - }, - { - "epoch": 0.3216748748570805, - "grad_norm": 523.12744140625, - "learning_rate": 4.307538442097429e-05, - "loss": 52.3814, - "step": 79620 - }, - { - "epoch": 0.32171527612244816, - "grad_norm": 1036.486083984375, - "learning_rate": 4.307297278641277e-05, - "loss": 56.4284, - "step": 79630 - }, - { - "epoch": 0.3217556773878158, - "grad_norm": 886.7937622070312, - "learning_rate": 4.307056079950934e-05, - "loss": 61.4247, - "step": 79640 - }, - { - "epoch": 0.32179607865318344, - "grad_norm": 1947.759033203125, - "learning_rate": 4.306814846031102e-05, - "loss": 69.1207, - "step": 79650 - }, - { - "epoch": 0.321836479918551, - "grad_norm": 1469.478759765625, - "learning_rate": 4.306573576886484e-05, - "loss": 76.3007, - "step": 79660 - }, - { - "epoch": 0.32187688118391866, - "grad_norm": 883.7667846679688, - "learning_rate": 4.306332272521785e-05, - "loss": 45.0061, - "step": 79670 - }, - { - "epoch": 0.3219172824492863, - "grad_norm": 418.3061828613281, - "learning_rate": 4.306090932941708e-05, - "loss": 45.161, - "step": 79680 - }, - { - "epoch": 0.32195768371465394, - "grad_norm": 504.8469543457031, - "learning_rate": 4.3058495581509586e-05, - "loss": 89.2983, - "step": 79690 - }, - { - "epoch": 0.3219980849800216, - "grad_norm": 510.06805419921875, - "learning_rate": 4.305608148154242e-05, - "loss": 57.7559, - "step": 79700 - }, - { - "epoch": 0.3220384862453892, - "grad_norm": 1332.9613037109375, - "learning_rate": 4.305366702956265e-05, - "loss": 54.3312, - "step": 79710 - }, - { - "epoch": 0.32207888751075686, - "grad_norm": 608.1598510742188, - "learning_rate": 4.305125222561736e-05, - "loss": 63.2103, - "step": 79720 - }, - { - "epoch": 0.32211928877612445, - "grad_norm": 504.0339050292969, - "learning_rate": 4.304883706975359e-05, - "loss": 45.3614, - "step": 79730 - }, - { - "epoch": 0.3221596900414921, - "grad_norm": 865.779052734375, - "learning_rate": 4.304642156201847e-05, - "loss": 77.652, - "step": 79740 - }, - { - "epoch": 0.3222000913068597, - "grad_norm": 1158.892333984375, - "learning_rate": 4.304400570245906e-05, - "loss": 46.2974, - "step": 79750 - }, - { - "epoch": 0.32224049257222737, - "grad_norm": 1002.193603515625, - "learning_rate": 4.304158949112247e-05, - "loss": 78.2484, - "step": 79760 - }, - { - "epoch": 0.322280893837595, - "grad_norm": 1321.477294921875, - "learning_rate": 4.3039172928055805e-05, - "loss": 72.8844, - "step": 79770 - }, - { - "epoch": 0.32232129510296265, - "grad_norm": 188.91424560546875, - "learning_rate": 4.303675601330618e-05, - "loss": 47.1185, - "step": 79780 - }, - { - "epoch": 0.32236169636833023, - "grad_norm": 535.0440673828125, - "learning_rate": 4.3034338746920707e-05, - "loss": 63.9909, - "step": 79790 - }, - { - "epoch": 0.32240209763369787, - "grad_norm": 474.7372741699219, - "learning_rate": 4.303192112894652e-05, - "loss": 73.8586, - "step": 79800 - }, - { - "epoch": 0.3224424988990655, - "grad_norm": 813.8701782226562, - "learning_rate": 4.302950315943074e-05, - "loss": 90.6143, - "step": 79810 - }, - { - "epoch": 0.32248290016443315, - "grad_norm": 344.40325927734375, - "learning_rate": 4.3027084838420516e-05, - "loss": 36.8543, - "step": 79820 - }, - { - "epoch": 0.3225233014298008, - "grad_norm": 300.6861267089844, - "learning_rate": 4.302466616596299e-05, - "loss": 48.8195, - "step": 79830 - }, - { - "epoch": 0.32256370269516843, - "grad_norm": 782.69970703125, - "learning_rate": 4.302224714210532e-05, - "loss": 59.1578, - "step": 79840 - }, - { - "epoch": 0.32260410396053607, - "grad_norm": 801.529541015625, - "learning_rate": 4.301982776689467e-05, - "loss": 44.7871, - "step": 79850 - }, - { - "epoch": 0.32264450522590365, - "grad_norm": 926.8488159179688, - "learning_rate": 4.301740804037819e-05, - "loss": 65.3786, - "step": 79860 - }, - { - "epoch": 0.3226849064912713, - "grad_norm": 917.0177001953125, - "learning_rate": 4.301498796260307e-05, - "loss": 55.9066, - "step": 79870 - }, - { - "epoch": 0.32272530775663893, - "grad_norm": 0.0, - "learning_rate": 4.301256753361649e-05, - "loss": 50.5934, - "step": 79880 - }, - { - "epoch": 0.3227657090220066, - "grad_norm": 771.5294799804688, - "learning_rate": 4.301014675346562e-05, - "loss": 109.5298, - "step": 79890 - }, - { - "epoch": 0.3228061102873742, - "grad_norm": 562.3530883789062, - "learning_rate": 4.3007725622197674e-05, - "loss": 43.964, - "step": 79900 - }, - { - "epoch": 0.32284651155274185, - "grad_norm": 468.8666076660156, - "learning_rate": 4.300530413985985e-05, - "loss": 49.0986, - "step": 79910 - }, - { - "epoch": 0.32288691281810944, - "grad_norm": 678.0904541015625, - "learning_rate": 4.3002882306499345e-05, - "loss": 91.667, - "step": 79920 - }, - { - "epoch": 0.3229273140834771, - "grad_norm": 601.5751342773438, - "learning_rate": 4.300046012216338e-05, - "loss": 52.2733, - "step": 79930 - }, - { - "epoch": 0.3229677153488447, - "grad_norm": 1330.5103759765625, - "learning_rate": 4.299803758689919e-05, - "loss": 82.389, - "step": 79940 - }, - { - "epoch": 0.32300811661421236, - "grad_norm": 509.53277587890625, - "learning_rate": 4.299561470075397e-05, - "loss": 78.0836, - "step": 79950 - }, - { - "epoch": 0.32304851787958, - "grad_norm": 589.202392578125, - "learning_rate": 4.2993191463774997e-05, - "loss": 60.7983, - "step": 79960 - }, - { - "epoch": 0.32308891914494764, - "grad_norm": 738.194580078125, - "learning_rate": 4.299076787600948e-05, - "loss": 77.6573, - "step": 79970 - }, - { - "epoch": 0.3231293204103152, - "grad_norm": 1307.095947265625, - "learning_rate": 4.2988343937504686e-05, - "loss": 61.8344, - "step": 79980 - }, - { - "epoch": 0.32316972167568286, - "grad_norm": 656.2578125, - "learning_rate": 4.298591964830787e-05, - "loss": 95.2794, - "step": 79990 - }, - { - "epoch": 0.3232101229410505, - "grad_norm": 1574.1217041015625, - "learning_rate": 4.2983495008466276e-05, - "loss": 46.9904, - "step": 80000 - }, - { - "epoch": 0.32325052420641814, - "grad_norm": 678.7640380859375, - "learning_rate": 4.2981070018027204e-05, - "loss": 83.523, - "step": 80010 - }, - { - "epoch": 0.3232909254717858, - "grad_norm": 802.5941772460938, - "learning_rate": 4.29786446770379e-05, - "loss": 47.3945, - "step": 80020 - }, - { - "epoch": 0.3233313267371534, - "grad_norm": 880.6734008789062, - "learning_rate": 4.297621898554568e-05, - "loss": 69.5324, - "step": 80030 - }, - { - "epoch": 0.32337172800252106, - "grad_norm": 1087.5975341796875, - "learning_rate": 4.297379294359781e-05, - "loss": 68.7424, - "step": 80040 - }, - { - "epoch": 0.32341212926788865, - "grad_norm": 1480.21826171875, - "learning_rate": 4.297136655124159e-05, - "loss": 68.1467, - "step": 80050 - }, - { - "epoch": 0.3234525305332563, - "grad_norm": 375.3006286621094, - "learning_rate": 4.2968939808524323e-05, - "loss": 58.7787, - "step": 80060 - }, - { - "epoch": 0.32349293179862393, - "grad_norm": 811.9970092773438, - "learning_rate": 4.296651271549333e-05, - "loss": 72.5431, - "step": 80070 - }, - { - "epoch": 0.32353333306399157, - "grad_norm": 533.86474609375, - "learning_rate": 4.296408527219592e-05, - "loss": 87.0758, - "step": 80080 - }, - { - "epoch": 0.3235737343293592, - "grad_norm": 850.876220703125, - "learning_rate": 4.296165747867942e-05, - "loss": 41.6743, - "step": 80090 - }, - { - "epoch": 0.32361413559472685, - "grad_norm": 924.040283203125, - "learning_rate": 4.2959229334991156e-05, - "loss": 69.6225, - "step": 80100 - }, - { - "epoch": 0.32365453686009443, - "grad_norm": 351.706298828125, - "learning_rate": 4.295680084117847e-05, - "loss": 60.5032, - "step": 80110 - }, - { - "epoch": 0.3236949381254621, - "grad_norm": 893.626708984375, - "learning_rate": 4.295437199728871e-05, - "loss": 77.7679, - "step": 80120 - }, - { - "epoch": 0.3237353393908297, - "grad_norm": 652.7449951171875, - "learning_rate": 4.2951942803369225e-05, - "loss": 54.3826, - "step": 80130 - }, - { - "epoch": 0.32377574065619735, - "grad_norm": 1125.824462890625, - "learning_rate": 4.294951325946737e-05, - "loss": 93.5955, - "step": 80140 - }, - { - "epoch": 0.323816141921565, - "grad_norm": 1012.917236328125, - "learning_rate": 4.2947083365630514e-05, - "loss": 61.9179, - "step": 80150 - }, - { - "epoch": 0.32385654318693263, - "grad_norm": 807.2591552734375, - "learning_rate": 4.294465312190603e-05, - "loss": 46.5419, - "step": 80160 - }, - { - "epoch": 0.3238969444523003, - "grad_norm": 317.6124572753906, - "learning_rate": 4.294222252834129e-05, - "loss": 68.4578, - "step": 80170 - }, - { - "epoch": 0.32393734571766786, - "grad_norm": 2099.4423828125, - "learning_rate": 4.293979158498369e-05, - "loss": 87.771, - "step": 80180 - }, - { - "epoch": 0.3239777469830355, - "grad_norm": 482.209228515625, - "learning_rate": 4.293736029188061e-05, - "loss": 73.7765, - "step": 80190 - }, - { - "epoch": 0.32401814824840314, - "grad_norm": 1821.595947265625, - "learning_rate": 4.293492864907947e-05, - "loss": 84.6861, - "step": 80200 - }, - { - "epoch": 0.3240585495137708, - "grad_norm": 640.3442993164062, - "learning_rate": 4.293249665662765e-05, - "loss": 55.7724, - "step": 80210 - }, - { - "epoch": 0.3240989507791384, - "grad_norm": 974.443115234375, - "learning_rate": 4.293006431457258e-05, - "loss": 89.1751, - "step": 80220 - }, - { - "epoch": 0.32413935204450606, - "grad_norm": 625.10888671875, - "learning_rate": 4.2927631622961674e-05, - "loss": 77.496, - "step": 80230 - }, - { - "epoch": 0.32417975330987364, - "grad_norm": 267.2085266113281, - "learning_rate": 4.292519858184236e-05, - "loss": 54.031, - "step": 80240 - }, - { - "epoch": 0.3242201545752413, - "grad_norm": 543.6681518554688, - "learning_rate": 4.292276519126207e-05, - "loss": 61.7213, - "step": 80250 - }, - { - "epoch": 0.3242605558406089, - "grad_norm": 1130.407958984375, - "learning_rate": 4.292033145126825e-05, - "loss": 61.5954, - "step": 80260 - }, - { - "epoch": 0.32430095710597656, - "grad_norm": 590.1688842773438, - "learning_rate": 4.2917897361908335e-05, - "loss": 54.5805, - "step": 80270 - }, - { - "epoch": 0.3243413583713442, - "grad_norm": 1286.2869873046875, - "learning_rate": 4.291546292322979e-05, - "loss": 94.4061, - "step": 80280 - }, - { - "epoch": 0.32438175963671184, - "grad_norm": 520.0972290039062, - "learning_rate": 4.2913028135280076e-05, - "loss": 49.1347, - "step": 80290 - }, - { - "epoch": 0.3244221609020794, - "grad_norm": 775.9698486328125, - "learning_rate": 4.291059299810665e-05, - "loss": 58.7011, - "step": 80300 - }, - { - "epoch": 0.32446256216744707, - "grad_norm": 276.6414794921875, - "learning_rate": 4.2908157511757e-05, - "loss": 70.6107, - "step": 80310 - }, - { - "epoch": 0.3245029634328147, - "grad_norm": 446.62158203125, - "learning_rate": 4.290572167627859e-05, - "loss": 59.438, - "step": 80320 - }, - { - "epoch": 0.32454336469818235, - "grad_norm": 692.0556030273438, - "learning_rate": 4.290328549171893e-05, - "loss": 62.021, - "step": 80330 - }, - { - "epoch": 0.32458376596355, - "grad_norm": 446.0535583496094, - "learning_rate": 4.2900848958125485e-05, - "loss": 56.7572, - "step": 80340 - }, - { - "epoch": 0.3246241672289176, - "grad_norm": 477.32568359375, - "learning_rate": 4.289841207554578e-05, - "loss": 110.483, - "step": 80350 - }, - { - "epoch": 0.32466456849428527, - "grad_norm": 700.8925170898438, - "learning_rate": 4.289597484402732e-05, - "loss": 65.4878, - "step": 80360 - }, - { - "epoch": 0.32470496975965285, - "grad_norm": 1176.870849609375, - "learning_rate": 4.289353726361762e-05, - "loss": 61.7405, - "step": 80370 - }, - { - "epoch": 0.3247453710250205, - "grad_norm": 574.3339233398438, - "learning_rate": 4.289109933436419e-05, - "loss": 58.5538, - "step": 80380 - }, - { - "epoch": 0.32478577229038813, - "grad_norm": 928.0294799804688, - "learning_rate": 4.2888661056314574e-05, - "loss": 62.1923, - "step": 80390 - }, - { - "epoch": 0.32482617355575577, - "grad_norm": 1341.2864990234375, - "learning_rate": 4.2886222429516296e-05, - "loss": 67.9961, - "step": 80400 - }, - { - "epoch": 0.3248665748211234, - "grad_norm": 575.213134765625, - "learning_rate": 4.2883783454016915e-05, - "loss": 43.7556, - "step": 80410 - }, - { - "epoch": 0.32490697608649105, - "grad_norm": 637.3678588867188, - "learning_rate": 4.288134412986395e-05, - "loss": 72.743, - "step": 80420 - }, - { - "epoch": 0.32494737735185864, - "grad_norm": 936.275146484375, - "learning_rate": 4.287890445710499e-05, - "loss": 89.083, - "step": 80430 - }, - { - "epoch": 0.3249877786172263, - "grad_norm": 587.6963500976562, - "learning_rate": 4.287646443578758e-05, - "loss": 40.0728, - "step": 80440 - }, - { - "epoch": 0.3250281798825939, - "grad_norm": 465.3719177246094, - "learning_rate": 4.287402406595929e-05, - "loss": 53.9417, - "step": 80450 - }, - { - "epoch": 0.32506858114796156, - "grad_norm": 862.9938354492188, - "learning_rate": 4.28715833476677e-05, - "loss": 59.9648, - "step": 80460 - }, - { - "epoch": 0.3251089824133292, - "grad_norm": 902.8772583007812, - "learning_rate": 4.2869142280960396e-05, - "loss": 53.2347, - "step": 80470 - }, - { - "epoch": 0.32514938367869683, - "grad_norm": 827.6386108398438, - "learning_rate": 4.2866700865884954e-05, - "loss": 64.4303, - "step": 80480 - }, - { - "epoch": 0.3251897849440645, - "grad_norm": 1176.3564453125, - "learning_rate": 4.2864259102488984e-05, - "loss": 70.9066, - "step": 80490 - }, - { - "epoch": 0.32523018620943206, - "grad_norm": 2174.75244140625, - "learning_rate": 4.2861816990820084e-05, - "loss": 82.248, - "step": 80500 - }, - { - "epoch": 0.3252705874747997, - "grad_norm": 1319.863037109375, - "learning_rate": 4.285937453092587e-05, - "loss": 73.5724, - "step": 80510 - }, - { - "epoch": 0.32531098874016734, - "grad_norm": 802.91845703125, - "learning_rate": 4.285693172285396e-05, - "loss": 62.4872, - "step": 80520 - }, - { - "epoch": 0.325351390005535, - "grad_norm": 735.5004272460938, - "learning_rate": 4.2854488566651965e-05, - "loss": 80.5372, - "step": 80530 - }, - { - "epoch": 0.3253917912709026, - "grad_norm": 813.9871215820312, - "learning_rate": 4.2852045062367516e-05, - "loss": 59.1823, - "step": 80540 - }, - { - "epoch": 0.32543219253627026, - "grad_norm": 911.4254150390625, - "learning_rate": 4.2849601210048274e-05, - "loss": 73.9285, - "step": 80550 - }, - { - "epoch": 0.32547259380163784, - "grad_norm": 1268.9906005859375, - "learning_rate": 4.2847157009741856e-05, - "loss": 63.6592, - "step": 80560 - }, - { - "epoch": 0.3255129950670055, - "grad_norm": 514.0250244140625, - "learning_rate": 4.2844712461495926e-05, - "loss": 66.8111, - "step": 80570 - }, - { - "epoch": 0.3255533963323731, - "grad_norm": 1467.4144287109375, - "learning_rate": 4.284226756535814e-05, - "loss": 66.5214, - "step": 80580 - }, - { - "epoch": 0.32559379759774076, - "grad_norm": 416.23828125, - "learning_rate": 4.283982232137617e-05, - "loss": 47.4183, - "step": 80590 - }, - { - "epoch": 0.3256341988631084, - "grad_norm": 1595.3319091796875, - "learning_rate": 4.283737672959766e-05, - "loss": 72.357, - "step": 80600 - }, - { - "epoch": 0.32567460012847604, - "grad_norm": 1119.335693359375, - "learning_rate": 4.283493079007032e-05, - "loss": 51.9857, - "step": 80610 - }, - { - "epoch": 0.32571500139384363, - "grad_norm": 838.667236328125, - "learning_rate": 4.283248450284182e-05, - "loss": 55.8164, - "step": 80620 - }, - { - "epoch": 0.32575540265921127, - "grad_norm": 394.1136779785156, - "learning_rate": 4.283003786795986e-05, - "loss": 49.545, - "step": 80630 - }, - { - "epoch": 0.3257958039245789, - "grad_norm": 874.3743286132812, - "learning_rate": 4.2827590885472125e-05, - "loss": 64.6963, - "step": 80640 - }, - { - "epoch": 0.32583620518994655, - "grad_norm": 1160.2918701171875, - "learning_rate": 4.2825143555426326e-05, - "loss": 61.9271, - "step": 80650 - }, - { - "epoch": 0.3258766064553142, - "grad_norm": 1602.0166015625, - "learning_rate": 4.2822695877870177e-05, - "loss": 51.2635, - "step": 80660 - }, - { - "epoch": 0.32591700772068183, - "grad_norm": 382.5502014160156, - "learning_rate": 4.28202478528514e-05, - "loss": 54.0187, - "step": 80670 - }, - { - "epoch": 0.32595740898604947, - "grad_norm": 1229.9798583984375, - "learning_rate": 4.281779948041772e-05, - "loss": 60.0021, - "step": 80680 - }, - { - "epoch": 0.32599781025141705, - "grad_norm": 565.9646606445312, - "learning_rate": 4.2815350760616864e-05, - "loss": 61.7397, - "step": 80690 - }, - { - "epoch": 0.3260382115167847, - "grad_norm": 769.6617431640625, - "learning_rate": 4.2812901693496564e-05, - "loss": 60.3849, - "step": 80700 - }, - { - "epoch": 0.32607861278215233, - "grad_norm": 648.3060913085938, - "learning_rate": 4.281045227910459e-05, - "loss": 75.0846, - "step": 80710 - }, - { - "epoch": 0.32611901404752, - "grad_norm": 3067.703857421875, - "learning_rate": 4.2808002517488667e-05, - "loss": 90.7922, - "step": 80720 - }, - { - "epoch": 0.3261594153128876, - "grad_norm": 109.4122085571289, - "learning_rate": 4.280555240869657e-05, - "loss": 69.8678, - "step": 80730 - }, - { - "epoch": 0.32619981657825525, - "grad_norm": 733.5319213867188, - "learning_rate": 4.280310195277606e-05, - "loss": 81.5929, - "step": 80740 - }, - { - "epoch": 0.32624021784362284, - "grad_norm": 610.775634765625, - "learning_rate": 4.280065114977492e-05, - "loss": 57.4617, - "step": 80750 - }, - { - "epoch": 0.3262806191089905, - "grad_norm": 551.3561401367188, - "learning_rate": 4.279819999974091e-05, - "loss": 61.8333, - "step": 80760 - }, - { - "epoch": 0.3263210203743581, - "grad_norm": 685.814697265625, - "learning_rate": 4.279574850272183e-05, - "loss": 67.4173, - "step": 80770 - }, - { - "epoch": 0.32636142163972576, - "grad_norm": 1033.493896484375, - "learning_rate": 4.279329665876548e-05, - "loss": 53.4654, - "step": 80780 - }, - { - "epoch": 0.3264018229050934, - "grad_norm": 485.32745361328125, - "learning_rate": 4.2790844467919646e-05, - "loss": 54.8732, - "step": 80790 - }, - { - "epoch": 0.32644222417046104, - "grad_norm": 0.0, - "learning_rate": 4.278839193023214e-05, - "loss": 75.1927, - "step": 80800 - }, - { - "epoch": 0.3264826254358287, - "grad_norm": 743.0775756835938, - "learning_rate": 4.278593904575077e-05, - "loss": 44.6109, - "step": 80810 - }, - { - "epoch": 0.32652302670119626, - "grad_norm": 1801.4681396484375, - "learning_rate": 4.278348581452337e-05, - "loss": 76.2488, - "step": 80820 - }, - { - "epoch": 0.3265634279665639, - "grad_norm": 1143.9945068359375, - "learning_rate": 4.278103223659775e-05, - "loss": 68.8127, - "step": 80830 - }, - { - "epoch": 0.32660382923193154, - "grad_norm": 568.1923217773438, - "learning_rate": 4.2778578312021754e-05, - "loss": 46.9349, - "step": 80840 - }, - { - "epoch": 0.3266442304972992, - "grad_norm": 758.5953979492188, - "learning_rate": 4.277612404084322e-05, - "loss": 67.1757, - "step": 80850 - }, - { - "epoch": 0.3266846317626668, - "grad_norm": 696.4761352539062, - "learning_rate": 4.277366942311001e-05, - "loss": 62.2708, - "step": 80860 - }, - { - "epoch": 0.32672503302803446, - "grad_norm": 664.263671875, - "learning_rate": 4.277121445886995e-05, - "loss": 70.6147, - "step": 80870 - }, - { - "epoch": 0.32676543429340205, - "grad_norm": 184.5323944091797, - "learning_rate": 4.2768759148170915e-05, - "loss": 42.6514, - "step": 80880 - }, - { - "epoch": 0.3268058355587697, - "grad_norm": 586.1389770507812, - "learning_rate": 4.276630349106078e-05, - "loss": 38.8002, - "step": 80890 - }, - { - "epoch": 0.3268462368241373, - "grad_norm": 1424.904541015625, - "learning_rate": 4.276384748758741e-05, - "loss": 77.3322, - "step": 80900 - }, - { - "epoch": 0.32688663808950497, - "grad_norm": 859.6981811523438, - "learning_rate": 4.2761391137798676e-05, - "loss": 86.453, - "step": 80910 - }, - { - "epoch": 0.3269270393548726, - "grad_norm": 1004.2837524414062, - "learning_rate": 4.2758934441742496e-05, - "loss": 56.4649, - "step": 80920 - }, - { - "epoch": 0.32696744062024025, - "grad_norm": 1162.806396484375, - "learning_rate": 4.2756477399466735e-05, - "loss": 97.9132, - "step": 80930 - }, - { - "epoch": 0.32700784188560783, - "grad_norm": 425.3879699707031, - "learning_rate": 4.275402001101931e-05, - "loss": 68.8998, - "step": 80940 - }, - { - "epoch": 0.32704824315097547, - "grad_norm": 645.8928833007812, - "learning_rate": 4.2751562276448124e-05, - "loss": 51.0803, - "step": 80950 - }, - { - "epoch": 0.3270886444163431, - "grad_norm": 2057.58837890625, - "learning_rate": 4.274910419580108e-05, - "loss": 60.8407, - "step": 80960 - }, - { - "epoch": 0.32712904568171075, - "grad_norm": 1685.50048828125, - "learning_rate": 4.274664576912613e-05, - "loss": 66.7414, - "step": 80970 - }, - { - "epoch": 0.3271694469470784, - "grad_norm": 1081.0294189453125, - "learning_rate": 4.2744186996471174e-05, - "loss": 87.3173, - "step": 80980 - }, - { - "epoch": 0.32720984821244603, - "grad_norm": 1577.8040771484375, - "learning_rate": 4.2741727877884155e-05, - "loss": 85.7177, - "step": 80990 - }, - { - "epoch": 0.32725024947781367, - "grad_norm": 547.41259765625, - "learning_rate": 4.273926841341302e-05, - "loss": 47.8796, - "step": 81000 - }, - { - "epoch": 0.32729065074318126, - "grad_norm": 990.9943237304688, - "learning_rate": 4.273680860310572e-05, - "loss": 71.2299, - "step": 81010 - }, - { - "epoch": 0.3273310520085489, - "grad_norm": 379.1767883300781, - "learning_rate": 4.2734348447010206e-05, - "loss": 53.3839, - "step": 81020 - }, - { - "epoch": 0.32737145327391654, - "grad_norm": 280.44287109375, - "learning_rate": 4.2731887945174434e-05, - "loss": 66.233, - "step": 81030 - }, - { - "epoch": 0.3274118545392842, - "grad_norm": 659.9508056640625, - "learning_rate": 4.272942709764638e-05, - "loss": 80.5297, - "step": 81040 - }, - { - "epoch": 0.3274522558046518, - "grad_norm": 688.0112915039062, - "learning_rate": 4.2726965904474e-05, - "loss": 58.7139, - "step": 81050 - }, - { - "epoch": 0.32749265707001946, - "grad_norm": 742.99169921875, - "learning_rate": 4.2724504365705314e-05, - "loss": 46.189, - "step": 81060 - }, - { - "epoch": 0.32753305833538704, - "grad_norm": 1215.4967041015625, - "learning_rate": 4.272204248138828e-05, - "loss": 48.7161, - "step": 81070 - }, - { - "epoch": 0.3275734596007547, - "grad_norm": 575.4824829101562, - "learning_rate": 4.2719580251570915e-05, - "loss": 78.7691, - "step": 81080 - }, - { - "epoch": 0.3276138608661223, - "grad_norm": 547.03759765625, - "learning_rate": 4.2717117676301196e-05, - "loss": 42.8608, - "step": 81090 - }, - { - "epoch": 0.32765426213148996, - "grad_norm": 605.3917846679688, - "learning_rate": 4.271465475562716e-05, - "loss": 52.3641, - "step": 81100 - }, - { - "epoch": 0.3276946633968576, - "grad_norm": 402.33050537109375, - "learning_rate": 4.2712191489596796e-05, - "loss": 81.6275, - "step": 81110 - }, - { - "epoch": 0.32773506466222524, - "grad_norm": 944.0283813476562, - "learning_rate": 4.270972787825815e-05, - "loss": 44.0819, - "step": 81120 - }, - { - "epoch": 0.3277754659275929, - "grad_norm": 2794.85986328125, - "learning_rate": 4.2707263921659236e-05, - "loss": 77.4065, - "step": 81130 - }, - { - "epoch": 0.32781586719296046, - "grad_norm": 706.5042724609375, - "learning_rate": 4.27047996198481e-05, - "loss": 84.9347, - "step": 81140 - }, - { - "epoch": 0.3278562684583281, - "grad_norm": 1184.057861328125, - "learning_rate": 4.2702334972872776e-05, - "loss": 70.5365, - "step": 81150 - }, - { - "epoch": 0.32789666972369574, - "grad_norm": 548.59716796875, - "learning_rate": 4.269986998078132e-05, - "loss": 57.2449, - "step": 81160 - }, - { - "epoch": 0.3279370709890634, - "grad_norm": 712.474609375, - "learning_rate": 4.2697404643621786e-05, - "loss": 68.5629, - "step": 81170 - }, - { - "epoch": 0.327977472254431, - "grad_norm": 727.8631591796875, - "learning_rate": 4.269493896144224e-05, - "loss": 63.5557, - "step": 81180 - }, - { - "epoch": 0.32801787351979866, - "grad_norm": 1237.4130859375, - "learning_rate": 4.2692472934290746e-05, - "loss": 70.6992, - "step": 81190 - }, - { - "epoch": 0.32805827478516625, - "grad_norm": 625.5675048828125, - "learning_rate": 4.2690006562215384e-05, - "loss": 80.8208, - "step": 81200 - }, - { - "epoch": 0.3280986760505339, - "grad_norm": 561.9657592773438, - "learning_rate": 4.2687539845264235e-05, - "loss": 62.1039, - "step": 81210 - }, - { - "epoch": 0.32813907731590153, - "grad_norm": 1042.281494140625, - "learning_rate": 4.268507278348539e-05, - "loss": 39.331, - "step": 81220 - }, - { - "epoch": 0.32817947858126917, - "grad_norm": 401.18792724609375, - "learning_rate": 4.2682605376926955e-05, - "loss": 61.7103, - "step": 81230 - }, - { - "epoch": 0.3282198798466368, - "grad_norm": 1227.1231689453125, - "learning_rate": 4.268013762563702e-05, - "loss": 72.3686, - "step": 81240 - }, - { - "epoch": 0.32826028111200445, - "grad_norm": 2529.718994140625, - "learning_rate": 4.267766952966369e-05, - "loss": 111.4047, - "step": 81250 - }, - { - "epoch": 0.32830068237737203, - "grad_norm": 762.4780883789062, - "learning_rate": 4.2675201089055096e-05, - "loss": 44.7507, - "step": 81260 - }, - { - "epoch": 0.3283410836427397, - "grad_norm": 917.88720703125, - "learning_rate": 4.2672732303859365e-05, - "loss": 74.2715, - "step": 81270 - }, - { - "epoch": 0.3283814849081073, - "grad_norm": 520.3221435546875, - "learning_rate": 4.267026317412461e-05, - "loss": 54.3115, - "step": 81280 - }, - { - "epoch": 0.32842188617347495, - "grad_norm": 185.31436157226562, - "learning_rate": 4.266779369989899e-05, - "loss": 50.5796, - "step": 81290 - }, - { - "epoch": 0.3284622874388426, - "grad_norm": 1354.3519287109375, - "learning_rate": 4.2665323881230624e-05, - "loss": 101.209, - "step": 81300 - }, - { - "epoch": 0.32850268870421023, - "grad_norm": 387.4610900878906, - "learning_rate": 4.266285371816767e-05, - "loss": 80.6324, - "step": 81310 - }, - { - "epoch": 0.3285430899695779, - "grad_norm": 1534.5985107421875, - "learning_rate": 4.266038321075831e-05, - "loss": 55.0623, - "step": 81320 - }, - { - "epoch": 0.32858349123494546, - "grad_norm": 750.3333129882812, - "learning_rate": 4.265791235905067e-05, - "loss": 65.2265, - "step": 81330 - }, - { - "epoch": 0.3286238925003131, - "grad_norm": 438.5273742675781, - "learning_rate": 4.265544116309294e-05, - "loss": 82.4092, - "step": 81340 - }, - { - "epoch": 0.32866429376568074, - "grad_norm": 1133.52099609375, - "learning_rate": 4.265296962293329e-05, - "loss": 58.1583, - "step": 81350 - }, - { - "epoch": 0.3287046950310484, - "grad_norm": 632.3345336914062, - "learning_rate": 4.265049773861991e-05, - "loss": 79.6419, - "step": 81360 - }, - { - "epoch": 0.328745096296416, - "grad_norm": 778.0379028320312, - "learning_rate": 4.2648025510201e-05, - "loss": 49.8477, - "step": 81370 - }, - { - "epoch": 0.32878549756178366, - "grad_norm": 662.5733642578125, - "learning_rate": 4.2645552937724744e-05, - "loss": 58.1828, - "step": 81380 - }, - { - "epoch": 0.32882589882715124, - "grad_norm": 1248.02001953125, - "learning_rate": 4.264308002123935e-05, - "loss": 57.9085, - "step": 81390 - }, - { - "epoch": 0.3288663000925189, - "grad_norm": 645.2049560546875, - "learning_rate": 4.264060676079302e-05, - "loss": 56.5214, - "step": 81400 - }, - { - "epoch": 0.3289067013578865, - "grad_norm": 2653.940673828125, - "learning_rate": 4.2638133156433986e-05, - "loss": 68.7137, - "step": 81410 - }, - { - "epoch": 0.32894710262325416, - "grad_norm": 756.0985717773438, - "learning_rate": 4.263565920821046e-05, - "loss": 53.9748, - "step": 81420 - }, - { - "epoch": 0.3289875038886218, - "grad_norm": 449.9473571777344, - "learning_rate": 4.2633184916170677e-05, - "loss": 69.4224, - "step": 81430 - }, - { - "epoch": 0.32902790515398944, - "grad_norm": 1535.21923828125, - "learning_rate": 4.263071028036288e-05, - "loss": 66.2684, - "step": 81440 - }, - { - "epoch": 0.3290683064193571, - "grad_norm": 899.3699951171875, - "learning_rate": 4.2628235300835314e-05, - "loss": 65.152, - "step": 81450 - }, - { - "epoch": 0.32910870768472467, - "grad_norm": 956.332275390625, - "learning_rate": 4.2625759977636214e-05, - "loss": 68.6298, - "step": 81460 - }, - { - "epoch": 0.3291491089500923, - "grad_norm": 815.9563598632812, - "learning_rate": 4.262328431081386e-05, - "loss": 80.0543, - "step": 81470 - }, - { - "epoch": 0.32918951021545995, - "grad_norm": 536.765625, - "learning_rate": 4.26208083004165e-05, - "loss": 47.5851, - "step": 81480 - }, - { - "epoch": 0.3292299114808276, - "grad_norm": 564.234375, - "learning_rate": 4.261833194649241e-05, - "loss": 73.5911, - "step": 81490 - }, - { - "epoch": 0.3292703127461952, - "grad_norm": 999.4091186523438, - "learning_rate": 4.261585524908987e-05, - "loss": 68.8592, - "step": 81500 - }, - { - "epoch": 0.32931071401156287, - "grad_norm": 1350.5775146484375, - "learning_rate": 4.261337820825716e-05, - "loss": 70.5121, - "step": 81510 - }, - { - "epoch": 0.32935111527693045, - "grad_norm": 735.5888061523438, - "learning_rate": 4.261090082404258e-05, - "loss": 75.4246, - "step": 81520 - }, - { - "epoch": 0.3293915165422981, - "grad_norm": 365.2147521972656, - "learning_rate": 4.2608423096494406e-05, - "loss": 61.5511, - "step": 81530 - }, - { - "epoch": 0.32943191780766573, - "grad_norm": 1004.5228881835938, - "learning_rate": 4.260594502566097e-05, - "loss": 66.0472, - "step": 81540 - }, - { - "epoch": 0.32947231907303337, - "grad_norm": 725.521484375, - "learning_rate": 4.260346661159058e-05, - "loss": 61.7147, - "step": 81550 - }, - { - "epoch": 0.329512720338401, - "grad_norm": 1222.5792236328125, - "learning_rate": 4.260098785433154e-05, - "loss": 60.5714, - "step": 81560 - }, - { - "epoch": 0.32955312160376865, - "grad_norm": 1317.46728515625, - "learning_rate": 4.259850875393217e-05, - "loss": 75.1795, - "step": 81570 - }, - { - "epoch": 0.32959352286913624, - "grad_norm": 1155.3607177734375, - "learning_rate": 4.2596029310440824e-05, - "loss": 65.329, - "step": 81580 - }, - { - "epoch": 0.3296339241345039, - "grad_norm": 549.3975219726562, - "learning_rate": 4.259354952390582e-05, - "loss": 65.329, - "step": 81590 - }, - { - "epoch": 0.3296743253998715, - "grad_norm": 1814.1624755859375, - "learning_rate": 4.259106939437551e-05, - "loss": 66.1722, - "step": 81600 - }, - { - "epoch": 0.32971472666523916, - "grad_norm": 355.4609680175781, - "learning_rate": 4.258858892189825e-05, - "loss": 80.2931, - "step": 81610 - }, - { - "epoch": 0.3297551279306068, - "grad_norm": 570.4844360351562, - "learning_rate": 4.258610810652239e-05, - "loss": 44.5537, - "step": 81620 - }, - { - "epoch": 0.32979552919597444, - "grad_norm": 719.2767333984375, - "learning_rate": 4.258362694829629e-05, - "loss": 63.4508, - "step": 81630 - }, - { - "epoch": 0.3298359304613421, - "grad_norm": 494.90911865234375, - "learning_rate": 4.258114544726835e-05, - "loss": 67.4852, - "step": 81640 - }, - { - "epoch": 0.32987633172670966, - "grad_norm": 730.2485961914062, - "learning_rate": 4.257866360348692e-05, - "loss": 58.3942, - "step": 81650 - }, - { - "epoch": 0.3299167329920773, - "grad_norm": 528.159423828125, - "learning_rate": 4.257618141700039e-05, - "loss": 55.3444, - "step": 81660 - }, - { - "epoch": 0.32995713425744494, - "grad_norm": 911.97998046875, - "learning_rate": 4.257369888785715e-05, - "loss": 69.4855, - "step": 81670 - }, - { - "epoch": 0.3299975355228126, - "grad_norm": 1036.72021484375, - "learning_rate": 4.2571216016105614e-05, - "loss": 90.7197, - "step": 81680 - }, - { - "epoch": 0.3300379367881802, - "grad_norm": 1021.642333984375, - "learning_rate": 4.256873280179416e-05, - "loss": 96.8164, - "step": 81690 - }, - { - "epoch": 0.33007833805354786, - "grad_norm": 1377.108642578125, - "learning_rate": 4.256624924497123e-05, - "loss": 73.541, - "step": 81700 - }, - { - "epoch": 0.33011873931891544, - "grad_norm": 702.5912475585938, - "learning_rate": 4.256376534568522e-05, - "loss": 98.5258, - "step": 81710 - }, - { - "epoch": 0.3301591405842831, - "grad_norm": 3800.063720703125, - "learning_rate": 4.256128110398457e-05, - "loss": 64.3711, - "step": 81720 - }, - { - "epoch": 0.3301995418496507, - "grad_norm": 806.7174072265625, - "learning_rate": 4.25587965199177e-05, - "loss": 77.5143, - "step": 81730 - }, - { - "epoch": 0.33023994311501836, - "grad_norm": 753.8306274414062, - "learning_rate": 4.255631159353305e-05, - "loss": 85.3019, - "step": 81740 - }, - { - "epoch": 0.330280344380386, - "grad_norm": 1084.2166748046875, - "learning_rate": 4.2553826324879064e-05, - "loss": 70.9181, - "step": 81750 - }, - { - "epoch": 0.33032074564575364, - "grad_norm": 1052.1856689453125, - "learning_rate": 4.2551340714004203e-05, - "loss": 71.8444, - "step": 81760 - }, - { - "epoch": 0.3303611469111213, - "grad_norm": 1021.4315185546875, - "learning_rate": 4.254885476095691e-05, - "loss": 72.4072, - "step": 81770 - }, - { - "epoch": 0.33040154817648887, - "grad_norm": 433.3638000488281, - "learning_rate": 4.254636846578566e-05, - "loss": 95.435, - "step": 81780 - }, - { - "epoch": 0.3304419494418565, - "grad_norm": 778.073974609375, - "learning_rate": 4.254388182853894e-05, - "loss": 72.969, - "step": 81790 - }, - { - "epoch": 0.33048235070722415, - "grad_norm": 1745.2950439453125, - "learning_rate": 4.254139484926519e-05, - "loss": 81.5817, - "step": 81800 - }, - { - "epoch": 0.3305227519725918, - "grad_norm": 273.6972961425781, - "learning_rate": 4.253890752801293e-05, - "loss": 73.0842, - "step": 81810 - }, - { - "epoch": 0.33056315323795943, - "grad_norm": 711.11328125, - "learning_rate": 4.253641986483062e-05, - "loss": 66.8901, - "step": 81820 - }, - { - "epoch": 0.33060355450332707, - "grad_norm": 810.6575927734375, - "learning_rate": 4.2533931859766794e-05, - "loss": 80.1247, - "step": 81830 - }, - { - "epoch": 0.33064395576869465, - "grad_norm": 0.0, - "learning_rate": 4.253144351286994e-05, - "loss": 72.4615, - "step": 81840 - }, - { - "epoch": 0.3306843570340623, - "grad_norm": 2913.29443359375, - "learning_rate": 4.252895482418856e-05, - "loss": 76.5862, - "step": 81850 - }, - { - "epoch": 0.33072475829942993, - "grad_norm": 1155.0936279296875, - "learning_rate": 4.252646579377119e-05, - "loss": 84.624, - "step": 81860 - }, - { - "epoch": 0.3307651595647976, - "grad_norm": 587.8722534179688, - "learning_rate": 4.252397642166633e-05, - "loss": 72.8901, - "step": 81870 - }, - { - "epoch": 0.3308055608301652, - "grad_norm": 840.6516723632812, - "learning_rate": 4.252148670792254e-05, - "loss": 92.0446, - "step": 81880 - }, - { - "epoch": 0.33084596209553285, - "grad_norm": 701.7250366210938, - "learning_rate": 4.251899665258835e-05, - "loss": 30.5136, - "step": 81890 - }, - { - "epoch": 0.33088636336090044, - "grad_norm": 0.0, - "learning_rate": 4.2516506255712296e-05, - "loss": 71.1064, - "step": 81900 - }, - { - "epoch": 0.3309267646262681, - "grad_norm": 485.8135986328125, - "learning_rate": 4.251401551734293e-05, - "loss": 77.7925, - "step": 81910 - }, - { - "epoch": 0.3309671658916357, - "grad_norm": 918.968994140625, - "learning_rate": 4.2511524437528825e-05, - "loss": 79.0183, - "step": 81920 - }, - { - "epoch": 0.33100756715700336, - "grad_norm": 808.7003173828125, - "learning_rate": 4.250903301631853e-05, - "loss": 45.6422, - "step": 81930 - }, - { - "epoch": 0.331047968422371, - "grad_norm": 1006.4502563476562, - "learning_rate": 4.250654125376062e-05, - "loss": 81.9783, - "step": 81940 - }, - { - "epoch": 0.33108836968773864, - "grad_norm": 926.7799682617188, - "learning_rate": 4.250404914990367e-05, - "loss": 51.5458, - "step": 81950 - }, - { - "epoch": 0.3311287709531063, - "grad_norm": 422.4704284667969, - "learning_rate": 4.250155670479628e-05, - "loss": 68.1897, - "step": 81960 - }, - { - "epoch": 0.33116917221847386, - "grad_norm": 661.1273193359375, - "learning_rate": 4.2499063918487034e-05, - "loss": 57.1841, - "step": 81970 - }, - { - "epoch": 0.3312095734838415, - "grad_norm": 1525.314208984375, - "learning_rate": 4.2496570791024513e-05, - "loss": 85.9705, - "step": 81980 - }, - { - "epoch": 0.33124997474920914, - "grad_norm": 677.7116088867188, - "learning_rate": 4.2494077322457346e-05, - "loss": 49.9874, - "step": 81990 - }, - { - "epoch": 0.3312903760145768, - "grad_norm": 854.898193359375, - "learning_rate": 4.249158351283414e-05, - "loss": 67.6385, - "step": 82000 - }, - { - "epoch": 0.3313307772799444, - "grad_norm": 836.3336791992188, - "learning_rate": 4.24890893622035e-05, - "loss": 79.6089, - "step": 82010 - }, - { - "epoch": 0.33137117854531206, - "grad_norm": 509.12847900390625, - "learning_rate": 4.248659487061406e-05, - "loss": 30.2002, - "step": 82020 - }, - { - "epoch": 0.33141157981067965, - "grad_norm": 623.1883544921875, - "learning_rate": 4.248410003811445e-05, - "loss": 54.6345, - "step": 82030 - }, - { - "epoch": 0.3314519810760473, - "grad_norm": 1047.8133544921875, - "learning_rate": 4.248160486475331e-05, - "loss": 69.4941, - "step": 82040 - }, - { - "epoch": 0.3314923823414149, - "grad_norm": 2248.522216796875, - "learning_rate": 4.247910935057929e-05, - "loss": 75.4848, - "step": 82050 - }, - { - "epoch": 0.33153278360678257, - "grad_norm": 834.0875854492188, - "learning_rate": 4.2476613495641026e-05, - "loss": 56.126, - "step": 82060 - }, - { - "epoch": 0.3315731848721502, - "grad_norm": 629.9591064453125, - "learning_rate": 4.247411729998718e-05, - "loss": 64.6742, - "step": 82070 - }, - { - "epoch": 0.33161358613751785, - "grad_norm": 738.5386352539062, - "learning_rate": 4.247162076366643e-05, - "loss": 56.9134, - "step": 82080 - }, - { - "epoch": 0.33165398740288543, - "grad_norm": 734.9909057617188, - "learning_rate": 4.246912388672744e-05, - "loss": 58.0093, - "step": 82090 - }, - { - "epoch": 0.33169438866825307, - "grad_norm": 315.8343505859375, - "learning_rate": 4.246662666921888e-05, - "loss": 68.2145, - "step": 82100 - }, - { - "epoch": 0.3317347899336207, - "grad_norm": 471.6864318847656, - "learning_rate": 4.2464129111189444e-05, - "loss": 98.6437, - "step": 82110 - }, - { - "epoch": 0.33177519119898835, - "grad_norm": 913.5321044921875, - "learning_rate": 4.2461631212687816e-05, - "loss": 50.3258, - "step": 82120 - }, - { - "epoch": 0.331815592464356, - "grad_norm": 837.1478271484375, - "learning_rate": 4.24591329737627e-05, - "loss": 79.771, - "step": 82130 - }, - { - "epoch": 0.33185599372972363, - "grad_norm": 681.1831665039062, - "learning_rate": 4.24566343944628e-05, - "loss": 54.0234, - "step": 82140 - }, - { - "epoch": 0.33189639499509127, - "grad_norm": 497.15582275390625, - "learning_rate": 4.245413547483682e-05, - "loss": 79.7911, - "step": 82150 - }, - { - "epoch": 0.33193679626045886, - "grad_norm": 463.25885009765625, - "learning_rate": 4.245163621493349e-05, - "loss": 60.4234, - "step": 82160 - }, - { - "epoch": 0.3319771975258265, - "grad_norm": 566.7998046875, - "learning_rate": 4.244913661480152e-05, - "loss": 47.7919, - "step": 82170 - }, - { - "epoch": 0.33201759879119414, - "grad_norm": 1045.619384765625, - "learning_rate": 4.2446636674489645e-05, - "loss": 67.3056, - "step": 82180 - }, - { - "epoch": 0.3320580000565618, - "grad_norm": 605.4349975585938, - "learning_rate": 4.244413639404662e-05, - "loss": 68.3609, - "step": 82190 - }, - { - "epoch": 0.3320984013219294, - "grad_norm": 562.8687133789062, - "learning_rate": 4.244163577352116e-05, - "loss": 64.8594, - "step": 82200 - }, - { - "epoch": 0.33213880258729706, - "grad_norm": 830.25927734375, - "learning_rate": 4.243913481296205e-05, - "loss": 42.151, - "step": 82210 - }, - { - "epoch": 0.33217920385266464, - "grad_norm": 570.14453125, - "learning_rate": 4.243663351241801e-05, - "loss": 91.821, - "step": 82220 - }, - { - "epoch": 0.3322196051180323, - "grad_norm": 499.4716796875, - "learning_rate": 4.243413187193783e-05, - "loss": 56.719, - "step": 82230 - }, - { - "epoch": 0.3322600063833999, - "grad_norm": 536.1835327148438, - "learning_rate": 4.2431629891570266e-05, - "loss": 71.9568, - "step": 82240 - }, - { - "epoch": 0.33230040764876756, - "grad_norm": 907.7794799804688, - "learning_rate": 4.242912757136412e-05, - "loss": 80.2039, - "step": 82250 - }, - { - "epoch": 0.3323408089141352, - "grad_norm": 896.99267578125, - "learning_rate": 4.2426624911368146e-05, - "loss": 61.2477, - "step": 82260 - }, - { - "epoch": 0.33238121017950284, - "grad_norm": 3104.49169921875, - "learning_rate": 4.242412191163115e-05, - "loss": 64.1988, - "step": 82270 - }, - { - "epoch": 0.3324216114448705, - "grad_norm": 739.8251342773438, - "learning_rate": 4.242161857220193e-05, - "loss": 99.7195, - "step": 82280 - }, - { - "epoch": 0.33246201271023806, - "grad_norm": 717.78662109375, - "learning_rate": 4.241911489312927e-05, - "loss": 69.5272, - "step": 82290 - }, - { - "epoch": 0.3325024139756057, - "grad_norm": 494.52197265625, - "learning_rate": 4.241661087446202e-05, - "loss": 46.656, - "step": 82300 - }, - { - "epoch": 0.33254281524097334, - "grad_norm": 355.43536376953125, - "learning_rate": 4.2414106516248964e-05, - "loss": 56.7551, - "step": 82310 - }, - { - "epoch": 0.332583216506341, - "grad_norm": 1103.3970947265625, - "learning_rate": 4.241160181853894e-05, - "loss": 68.9124, - "step": 82320 - }, - { - "epoch": 0.3326236177717086, - "grad_norm": 742.4725952148438, - "learning_rate": 4.240909678138077e-05, - "loss": 83.4965, - "step": 82330 - }, - { - "epoch": 0.33266401903707626, - "grad_norm": 1117.4141845703125, - "learning_rate": 4.24065914048233e-05, - "loss": 71.4723, - "step": 82340 - }, - { - "epoch": 0.33270442030244385, - "grad_norm": 539.0693969726562, - "learning_rate": 4.2404085688915364e-05, - "loss": 53.2493, - "step": 82350 - }, - { - "epoch": 0.3327448215678115, - "grad_norm": 4054.252197265625, - "learning_rate": 4.240157963370582e-05, - "loss": 99.1258, - "step": 82360 - }, - { - "epoch": 0.33278522283317913, - "grad_norm": 826.72509765625, - "learning_rate": 4.2399073239243526e-05, - "loss": 68.9222, - "step": 82370 - }, - { - "epoch": 0.33282562409854677, - "grad_norm": 831.2509155273438, - "learning_rate": 4.239656650557734e-05, - "loss": 73.0231, - "step": 82380 - }, - { - "epoch": 0.3328660253639144, - "grad_norm": 409.9931335449219, - "learning_rate": 4.239405943275613e-05, - "loss": 45.4566, - "step": 82390 - }, - { - "epoch": 0.33290642662928205, - "grad_norm": 926.7503051757812, - "learning_rate": 4.2391552020828775e-05, - "loss": 77.0996, - "step": 82400 - }, - { - "epoch": 0.33294682789464963, - "grad_norm": 1059.4488525390625, - "learning_rate": 4.2389044269844155e-05, - "loss": 95.9397, - "step": 82410 - }, - { - "epoch": 0.3329872291600173, - "grad_norm": 0.0, - "learning_rate": 4.238653617985118e-05, - "loss": 49.413, - "step": 82420 - }, - { - "epoch": 0.3330276304253849, - "grad_norm": 1252.39453125, - "learning_rate": 4.238402775089871e-05, - "loss": 66.4441, - "step": 82430 - }, - { - "epoch": 0.33306803169075255, - "grad_norm": 1263.342529296875, - "learning_rate": 4.238151898303569e-05, - "loss": 76.9728, - "step": 82440 - }, - { - "epoch": 0.3331084329561202, - "grad_norm": 473.049072265625, - "learning_rate": 4.2379009876311e-05, - "loss": 63.0306, - "step": 82450 - }, - { - "epoch": 0.33314883422148783, - "grad_norm": 1100.3658447265625, - "learning_rate": 4.237650043077357e-05, - "loss": 107.0959, - "step": 82460 - }, - { - "epoch": 0.3331892354868555, - "grad_norm": 731.7568359375, - "learning_rate": 4.237399064647231e-05, - "loss": 105.6519, - "step": 82470 - }, - { - "epoch": 0.33322963675222306, - "grad_norm": 569.7168579101562, - "learning_rate": 4.237148052345616e-05, - "loss": 89.5767, - "step": 82480 - }, - { - "epoch": 0.3332700380175907, - "grad_norm": 650.82958984375, - "learning_rate": 4.236897006177405e-05, - "loss": 84.1082, - "step": 82490 - }, - { - "epoch": 0.33331043928295834, - "grad_norm": 2010.1734619140625, - "learning_rate": 4.2366459261474933e-05, - "loss": 60.609, - "step": 82500 - }, - { - "epoch": 0.333350840548326, - "grad_norm": 1313.4893798828125, - "learning_rate": 4.2363948122607756e-05, - "loss": 72.3615, - "step": 82510 - }, - { - "epoch": 0.3333912418136936, - "grad_norm": 538.3892211914062, - "learning_rate": 4.236143664522146e-05, - "loss": 78.7254, - "step": 82520 - }, - { - "epoch": 0.33343164307906126, - "grad_norm": 678.4175415039062, - "learning_rate": 4.235892482936502e-05, - "loss": 67.0122, - "step": 82530 - }, - { - "epoch": 0.33347204434442884, - "grad_norm": 1527.4241943359375, - "learning_rate": 4.2356412675087406e-05, - "loss": 46.1357, - "step": 82540 - }, - { - "epoch": 0.3335124456097965, - "grad_norm": 889.2577514648438, - "learning_rate": 4.23539001824376e-05, - "loss": 85.7127, - "step": 82550 - }, - { - "epoch": 0.3335528468751641, - "grad_norm": 621.6854858398438, - "learning_rate": 4.2351387351464565e-05, - "loss": 62.1339, - "step": 82560 - }, - { - "epoch": 0.33359324814053176, - "grad_norm": 606.4068603515625, - "learning_rate": 4.2348874182217305e-05, - "loss": 56.7306, - "step": 82570 - }, - { - "epoch": 0.3336336494058994, - "grad_norm": 785.7623901367188, - "learning_rate": 4.2346360674744815e-05, - "loss": 49.6323, - "step": 82580 - }, - { - "epoch": 0.33367405067126704, - "grad_norm": 1377.6890869140625, - "learning_rate": 4.234384682909608e-05, - "loss": 84.6193, - "step": 82590 - }, - { - "epoch": 0.3337144519366347, - "grad_norm": 2395.7802734375, - "learning_rate": 4.234133264532012e-05, - "loss": 70.3222, - "step": 82600 - }, - { - "epoch": 0.33375485320200227, - "grad_norm": 746.2875366210938, - "learning_rate": 4.2338818123465966e-05, - "loss": 41.8322, - "step": 82610 - }, - { - "epoch": 0.3337952544673699, - "grad_norm": 461.2134094238281, - "learning_rate": 4.2336303263582624e-05, - "loss": 40.6642, - "step": 82620 - }, - { - "epoch": 0.33383565573273755, - "grad_norm": 469.310546875, - "learning_rate": 4.233378806571912e-05, - "loss": 67.8555, - "step": 82630 - }, - { - "epoch": 0.3338760569981052, - "grad_norm": 827.2962036132812, - "learning_rate": 4.2331272529924495e-05, - "loss": 74.3388, - "step": 82640 - }, - { - "epoch": 0.3339164582634728, - "grad_norm": 544.0149536132812, - "learning_rate": 4.2328756656247795e-05, - "loss": 79.4444, - "step": 82650 - }, - { - "epoch": 0.33395685952884047, - "grad_norm": 383.1783142089844, - "learning_rate": 4.2326240444738055e-05, - "loss": 51.7502, - "step": 82660 - }, - { - "epoch": 0.33399726079420805, - "grad_norm": 1237.60009765625, - "learning_rate": 4.232372389544434e-05, - "loss": 73.6413, - "step": 82670 - }, - { - "epoch": 0.3340376620595757, - "grad_norm": 309.4773864746094, - "learning_rate": 4.232120700841571e-05, - "loss": 59.2111, - "step": 82680 - }, - { - "epoch": 0.33407806332494333, - "grad_norm": 1590.2628173828125, - "learning_rate": 4.2318689783701224e-05, - "loss": 59.3465, - "step": 82690 - }, - { - "epoch": 0.33411846459031097, - "grad_norm": 464.6656799316406, - "learning_rate": 4.2316172221349973e-05, - "loss": 38.0717, - "step": 82700 - }, - { - "epoch": 0.3341588658556786, - "grad_norm": 2365.112060546875, - "learning_rate": 4.231365432141103e-05, - "loss": 52.5256, - "step": 82710 - }, - { - "epoch": 0.33419926712104625, - "grad_norm": 905.1858520507812, - "learning_rate": 4.231113608393348e-05, - "loss": 78.6967, - "step": 82720 - }, - { - "epoch": 0.33423966838641384, - "grad_norm": 399.5769348144531, - "learning_rate": 4.2308617508966414e-05, - "loss": 79.4581, - "step": 82730 - }, - { - "epoch": 0.3342800696517815, - "grad_norm": 2698.52685546875, - "learning_rate": 4.230609859655895e-05, - "loss": 78.356, - "step": 82740 - }, - { - "epoch": 0.3343204709171491, - "grad_norm": 1030.6182861328125, - "learning_rate": 4.230357934676017e-05, - "loss": 57.3243, - "step": 82750 - }, - { - "epoch": 0.33436087218251676, - "grad_norm": 1275.8216552734375, - "learning_rate": 4.230105975961921e-05, - "loss": 77.0945, - "step": 82760 - }, - { - "epoch": 0.3344012734478844, - "grad_norm": 846.1837768554688, - "learning_rate": 4.229853983518518e-05, - "loss": 58.413, - "step": 82770 - }, - { - "epoch": 0.33444167471325204, - "grad_norm": 502.4288330078125, - "learning_rate": 4.229601957350722e-05, - "loss": 55.0714, - "step": 82780 - }, - { - "epoch": 0.3344820759786197, - "grad_norm": 1429.5887451171875, - "learning_rate": 4.229349897463445e-05, - "loss": 74.8495, - "step": 82790 - }, - { - "epoch": 0.33452247724398726, - "grad_norm": 534.1192016601562, - "learning_rate": 4.2290978038616e-05, - "loss": 69.4769, - "step": 82800 - }, - { - "epoch": 0.3345628785093549, - "grad_norm": 625.2440185546875, - "learning_rate": 4.228845676550105e-05, - "loss": 68.7517, - "step": 82810 - }, - { - "epoch": 0.33460327977472254, - "grad_norm": 529.8661499023438, - "learning_rate": 4.2285935155338724e-05, - "loss": 77.1349, - "step": 82820 - }, - { - "epoch": 0.3346436810400902, - "grad_norm": 1316.5428466796875, - "learning_rate": 4.22834132081782e-05, - "loss": 61.7906, - "step": 82830 - }, - { - "epoch": 0.3346840823054578, - "grad_norm": 483.9060974121094, - "learning_rate": 4.2280890924068625e-05, - "loss": 62.0083, - "step": 82840 - }, - { - "epoch": 0.33472448357082546, - "grad_norm": 882.261962890625, - "learning_rate": 4.22783683030592e-05, - "loss": 62.3824, - "step": 82850 - }, - { - "epoch": 0.33476488483619304, - "grad_norm": 733.6650390625, - "learning_rate": 4.227584534519907e-05, - "loss": 81.4946, - "step": 82860 - }, - { - "epoch": 0.3348052861015607, - "grad_norm": 604.4740600585938, - "learning_rate": 4.227332205053746e-05, - "loss": 72.0598, - "step": 82870 - }, - { - "epoch": 0.3348456873669283, - "grad_norm": 4985.6357421875, - "learning_rate": 4.2270798419123534e-05, - "loss": 66.5251, - "step": 82880 - }, - { - "epoch": 0.33488608863229596, - "grad_norm": 477.78289794921875, - "learning_rate": 4.2268274451006506e-05, - "loss": 116.5259, - "step": 82890 - }, - { - "epoch": 0.3349264898976636, - "grad_norm": 1549.6126708984375, - "learning_rate": 4.226575014623557e-05, - "loss": 49.6759, - "step": 82900 - }, - { - "epoch": 0.33496689116303124, - "grad_norm": 416.32879638671875, - "learning_rate": 4.2263225504859955e-05, - "loss": 69.3102, - "step": 82910 - }, - { - "epoch": 0.3350072924283989, - "grad_norm": 575.8262939453125, - "learning_rate": 4.226070052692886e-05, - "loss": 54.391, - "step": 82920 - }, - { - "epoch": 0.33504769369376647, - "grad_norm": 318.236572265625, - "learning_rate": 4.2258175212491537e-05, - "loss": 58.843, - "step": 82930 - }, - { - "epoch": 0.3350880949591341, - "grad_norm": 439.44488525390625, - "learning_rate": 4.2255649561597186e-05, - "loss": 62.2686, - "step": 82940 - }, - { - "epoch": 0.33512849622450175, - "grad_norm": 857.3777465820312, - "learning_rate": 4.225312357429508e-05, - "loss": 68.3515, - "step": 82950 - }, - { - "epoch": 0.3351688974898694, - "grad_norm": 1923.6248779296875, - "learning_rate": 4.225059725063444e-05, - "loss": 75.4591, - "step": 82960 - }, - { - "epoch": 0.33520929875523703, - "grad_norm": 1054.3583984375, - "learning_rate": 4.2248070590664525e-05, - "loss": 57.7459, - "step": 82970 - }, - { - "epoch": 0.33524970002060467, - "grad_norm": 974.0528564453125, - "learning_rate": 4.224554359443459e-05, - "loss": 52.0075, - "step": 82980 - }, - { - "epoch": 0.33529010128597225, - "grad_norm": 1275.845947265625, - "learning_rate": 4.22430162619939e-05, - "loss": 94.6089, - "step": 82990 - }, - { - "epoch": 0.3353305025513399, - "grad_norm": 1231.898681640625, - "learning_rate": 4.224048859339175e-05, - "loss": 65.2636, - "step": 83000 - }, - { - "epoch": 0.33537090381670753, - "grad_norm": 592.545166015625, - "learning_rate": 4.223796058867738e-05, - "loss": 74.3102, - "step": 83010 - }, - { - "epoch": 0.3354113050820752, - "grad_norm": 570.7125244140625, - "learning_rate": 4.22354322479001e-05, - "loss": 77.6862, - "step": 83020 - }, - { - "epoch": 0.3354517063474428, - "grad_norm": 814.4917602539062, - "learning_rate": 4.22329035711092e-05, - "loss": 78.4505, - "step": 83030 - }, - { - "epoch": 0.33549210761281045, - "grad_norm": 1354.454345703125, - "learning_rate": 4.223037455835397e-05, - "loss": 56.5575, - "step": 83040 - }, - { - "epoch": 0.33553250887817804, - "grad_norm": 1030.1226806640625, - "learning_rate": 4.2227845209683716e-05, - "loss": 53.6785, - "step": 83050 - }, - { - "epoch": 0.3355729101435457, - "grad_norm": 1466.65966796875, - "learning_rate": 4.222531552514775e-05, - "loss": 68.2507, - "step": 83060 - }, - { - "epoch": 0.3356133114089133, - "grad_norm": 705.6240844726562, - "learning_rate": 4.2222785504795394e-05, - "loss": 92.5086, - "step": 83070 - }, - { - "epoch": 0.33565371267428096, - "grad_norm": 703.1721801757812, - "learning_rate": 4.2220255148675956e-05, - "loss": 59.856, - "step": 83080 - }, - { - "epoch": 0.3356941139396486, - "grad_norm": 523.6635131835938, - "learning_rate": 4.221772445683878e-05, - "loss": 93.9165, - "step": 83090 - }, - { - "epoch": 0.33573451520501624, - "grad_norm": 915.0527954101562, - "learning_rate": 4.221519342933321e-05, - "loss": 62.3113, - "step": 83100 - }, - { - "epoch": 0.3357749164703839, - "grad_norm": 5522.94091796875, - "learning_rate": 4.221266206620859e-05, - "loss": 62.9414, - "step": 83110 - }, - { - "epoch": 0.33581531773575146, - "grad_norm": 619.2235717773438, - "learning_rate": 4.221013036751424e-05, - "loss": 51.0544, - "step": 83120 - }, - { - "epoch": 0.3358557190011191, - "grad_norm": 860.1320190429688, - "learning_rate": 4.220759833329955e-05, - "loss": 57.2342, - "step": 83130 - }, - { - "epoch": 0.33589612026648674, - "grad_norm": 1275.2235107421875, - "learning_rate": 4.2205065963613864e-05, - "loss": 72.2249, - "step": 83140 - }, - { - "epoch": 0.3359365215318544, - "grad_norm": 1409.1829833984375, - "learning_rate": 4.220253325850657e-05, - "loss": 75.4809, - "step": 83150 - }, - { - "epoch": 0.335976922797222, - "grad_norm": 155.7539825439453, - "learning_rate": 4.220000021802702e-05, - "loss": 66.316, - "step": 83160 - }, - { - "epoch": 0.33601732406258966, - "grad_norm": 519.6751098632812, - "learning_rate": 4.219746684222462e-05, - "loss": 46.7393, - "step": 83170 - }, - { - "epoch": 0.33605772532795725, - "grad_norm": 3975.830078125, - "learning_rate": 4.219493313114875e-05, - "loss": 113.0992, - "step": 83180 - }, - { - "epoch": 0.3360981265933249, - "grad_norm": 741.3836059570312, - "learning_rate": 4.219239908484881e-05, - "loss": 62.3929, - "step": 83190 - }, - { - "epoch": 0.3361385278586925, - "grad_norm": 1000.0194091796875, - "learning_rate": 4.218986470337419e-05, - "loss": 63.8103, - "step": 83200 - }, - { - "epoch": 0.33617892912406017, - "grad_norm": 915.8265380859375, - "learning_rate": 4.21873299867743e-05, - "loss": 64.819, - "step": 83210 - }, - { - "epoch": 0.3362193303894278, - "grad_norm": 1064.971923828125, - "learning_rate": 4.218479493509858e-05, - "loss": 75.7704, - "step": 83220 - }, - { - "epoch": 0.33625973165479545, - "grad_norm": 381.1853942871094, - "learning_rate": 4.218225954839643e-05, - "loss": 44.5471, - "step": 83230 - }, - { - "epoch": 0.3363001329201631, - "grad_norm": 2388.5380859375, - "learning_rate": 4.217972382671729e-05, - "loss": 56.8206, - "step": 83240 - }, - { - "epoch": 0.33634053418553067, - "grad_norm": 2224.6982421875, - "learning_rate": 4.2177187770110576e-05, - "loss": 75.5993, - "step": 83250 - }, - { - "epoch": 0.3363809354508983, - "grad_norm": 584.9492797851562, - "learning_rate": 4.2174651378625754e-05, - "loss": 76.4168, - "step": 83260 - }, - { - "epoch": 0.33642133671626595, - "grad_norm": 576.220947265625, - "learning_rate": 4.217211465231226e-05, - "loss": 69.955, - "step": 83270 - }, - { - "epoch": 0.3364617379816336, - "grad_norm": 419.0504455566406, - "learning_rate": 4.2169577591219545e-05, - "loss": 61.2648, - "step": 83280 - }, - { - "epoch": 0.33650213924700123, - "grad_norm": 649.897705078125, - "learning_rate": 4.216704019539707e-05, - "loss": 50.4646, - "step": 83290 - }, - { - "epoch": 0.33654254051236887, - "grad_norm": 1460.0035400390625, - "learning_rate": 4.2164502464894316e-05, - "loss": 66.0741, - "step": 83300 - }, - { - "epoch": 0.33658294177773646, - "grad_norm": 618.8856201171875, - "learning_rate": 4.216196439976076e-05, - "loss": 57.7886, - "step": 83310 - }, - { - "epoch": 0.3366233430431041, - "grad_norm": 540.6898193359375, - "learning_rate": 4.2159426000045854e-05, - "loss": 62.811, - "step": 83320 - }, - { - "epoch": 0.33666374430847174, - "grad_norm": 629.9652099609375, - "learning_rate": 4.215688726579911e-05, - "loss": 61.0531, - "step": 83330 - }, - { - "epoch": 0.3367041455738394, - "grad_norm": 3191.65185546875, - "learning_rate": 4.2154348197070017e-05, - "loss": 70.6365, - "step": 83340 - }, - { - "epoch": 0.336744546839207, - "grad_norm": 483.31396484375, - "learning_rate": 4.215180879390808e-05, - "loss": 71.8596, - "step": 83350 - }, - { - "epoch": 0.33678494810457466, - "grad_norm": 633.4132080078125, - "learning_rate": 4.2149269056362794e-05, - "loss": 65.0201, - "step": 83360 - }, - { - "epoch": 0.33682534936994224, - "grad_norm": 1588.8582763671875, - "learning_rate": 4.214672898448367e-05, - "loss": 81.9684, - "step": 83370 - }, - { - "epoch": 0.3368657506353099, - "grad_norm": 1885.4788818359375, - "learning_rate": 4.214418857832025e-05, - "loss": 81.2159, - "step": 83380 - }, - { - "epoch": 0.3369061519006775, - "grad_norm": 2098.720947265625, - "learning_rate": 4.214164783792205e-05, - "loss": 78.4835, - "step": 83390 - }, - { - "epoch": 0.33694655316604516, - "grad_norm": 934.3864135742188, - "learning_rate": 4.213910676333859e-05, - "loss": 51.1806, - "step": 83400 - }, - { - "epoch": 0.3369869544314128, - "grad_norm": 561.3240356445312, - "learning_rate": 4.213656535461942e-05, - "loss": 83.0985, - "step": 83410 - }, - { - "epoch": 0.33702735569678044, - "grad_norm": 517.56982421875, - "learning_rate": 4.213402361181409e-05, - "loss": 52.2328, - "step": 83420 - }, - { - "epoch": 0.3370677569621481, - "grad_norm": 501.65325927734375, - "learning_rate": 4.213148153497215e-05, - "loss": 45.5168, - "step": 83430 - }, - { - "epoch": 0.33710815822751566, - "grad_norm": 619.2743530273438, - "learning_rate": 4.212893912414316e-05, - "loss": 54.3897, - "step": 83440 - }, - { - "epoch": 0.3371485594928833, - "grad_norm": 281.7502746582031, - "learning_rate": 4.212639637937668e-05, - "loss": 48.6456, - "step": 83450 - }, - { - "epoch": 0.33718896075825094, - "grad_norm": 1214.2874755859375, - "learning_rate": 4.212385330072228e-05, - "loss": 80.0758, - "step": 83460 - }, - { - "epoch": 0.3372293620236186, - "grad_norm": 639.830322265625, - "learning_rate": 4.2121309888229544e-05, - "loss": 64.5304, - "step": 83470 - }, - { - "epoch": 0.3372697632889862, - "grad_norm": 823.2178955078125, - "learning_rate": 4.2118766141948066e-05, - "loss": 52.8296, - "step": 83480 - }, - { - "epoch": 0.33731016455435386, - "grad_norm": 429.5492858886719, - "learning_rate": 4.211622206192742e-05, - "loss": 63.3038, - "step": 83490 - }, - { - "epoch": 0.33735056581972145, - "grad_norm": 0.0, - "learning_rate": 4.211367764821722e-05, - "loss": 82.3349, - "step": 83500 - }, - { - "epoch": 0.3373909670850891, - "grad_norm": 974.5023803710938, - "learning_rate": 4.211113290086706e-05, - "loss": 43.6207, - "step": 83510 - }, - { - "epoch": 0.33743136835045673, - "grad_norm": 804.0608520507812, - "learning_rate": 4.2108587819926554e-05, - "loss": 66.1226, - "step": 83520 - }, - { - "epoch": 0.33747176961582437, - "grad_norm": 269.8977355957031, - "learning_rate": 4.2106042405445325e-05, - "loss": 82.0022, - "step": 83530 - }, - { - "epoch": 0.337512170881192, - "grad_norm": 1011.4495849609375, - "learning_rate": 4.210349665747299e-05, - "loss": 62.9397, - "step": 83540 - }, - { - "epoch": 0.33755257214655965, - "grad_norm": 568.0720825195312, - "learning_rate": 4.210095057605917e-05, - "loss": 80.0488, - "step": 83550 - }, - { - "epoch": 0.3375929734119273, - "grad_norm": 3479.609375, - "learning_rate": 4.209840416125353e-05, - "loss": 88.0878, - "step": 83560 - }, - { - "epoch": 0.3376333746772949, - "grad_norm": 0.0, - "learning_rate": 4.20958574131057e-05, - "loss": 100.4558, - "step": 83570 - }, - { - "epoch": 0.3376737759426625, - "grad_norm": 2387.772216796875, - "learning_rate": 4.209331033166531e-05, - "loss": 77.3918, - "step": 83580 - }, - { - "epoch": 0.33771417720803015, - "grad_norm": 710.8726806640625, - "learning_rate": 4.209076291698205e-05, - "loss": 53.3156, - "step": 83590 - }, - { - "epoch": 0.3377545784733978, - "grad_norm": 847.5494384765625, - "learning_rate": 4.208821516910557e-05, - "loss": 64.4423, - "step": 83600 - }, - { - "epoch": 0.33779497973876543, - "grad_norm": 540.9591674804688, - "learning_rate": 4.208566708808554e-05, - "loss": 54.3956, - "step": 83610 - }, - { - "epoch": 0.3378353810041331, - "grad_norm": 426.9322814941406, - "learning_rate": 4.2083118673971613e-05, - "loss": 80.7343, - "step": 83620 - }, - { - "epoch": 0.33787578226950066, - "grad_norm": 530.9072875976562, - "learning_rate": 4.2080569926813503e-05, - "loss": 62.1767, - "step": 83630 - }, - { - "epoch": 0.3379161835348683, - "grad_norm": 3503.007080078125, - "learning_rate": 4.20780208466609e-05, - "loss": 95.5601, - "step": 83640 - }, - { - "epoch": 0.33795658480023594, - "grad_norm": 764.01708984375, - "learning_rate": 4.207547143356347e-05, - "loss": 44.9169, - "step": 83650 - }, - { - "epoch": 0.3379969860656036, - "grad_norm": 898.1088256835938, - "learning_rate": 4.207292168757095e-05, - "loss": 42.9797, - "step": 83660 - }, - { - "epoch": 0.3380373873309712, - "grad_norm": 616.1493530273438, - "learning_rate": 4.2070371608733025e-05, - "loss": 55.619, - "step": 83670 - }, - { - "epoch": 0.33807778859633886, - "grad_norm": 1211.4007568359375, - "learning_rate": 4.206782119709942e-05, - "loss": 92.8507, - "step": 83680 - }, - { - "epoch": 0.33811818986170644, - "grad_norm": 1338.9266357421875, - "learning_rate": 4.206527045271985e-05, - "loss": 60.5956, - "step": 83690 - }, - { - "epoch": 0.3381585911270741, - "grad_norm": 5582.28515625, - "learning_rate": 4.206271937564405e-05, - "loss": 60.9006, - "step": 83700 - }, - { - "epoch": 0.3381989923924417, - "grad_norm": 1023.3121337890625, - "learning_rate": 4.206016796592174e-05, - "loss": 69.0761, - "step": 83710 - }, - { - "epoch": 0.33823939365780936, - "grad_norm": 665.3640747070312, - "learning_rate": 4.2057616223602684e-05, - "loss": 63.9492, - "step": 83720 - }, - { - "epoch": 0.338279794923177, - "grad_norm": 360.83563232421875, - "learning_rate": 4.205506414873661e-05, - "loss": 40.3897, - "step": 83730 - }, - { - "epoch": 0.33832019618854464, - "grad_norm": 1596.3583984375, - "learning_rate": 4.205251174137329e-05, - "loss": 72.7595, - "step": 83740 - }, - { - "epoch": 0.3383605974539123, - "grad_norm": 361.8247985839844, - "learning_rate": 4.2049959001562464e-05, - "loss": 98.5569, - "step": 83750 - }, - { - "epoch": 0.33840099871927987, - "grad_norm": 818.7006225585938, - "learning_rate": 4.204740592935392e-05, - "loss": 88.2147, - "step": 83760 - }, - { - "epoch": 0.3384413999846475, - "grad_norm": 434.5075378417969, - "learning_rate": 4.2044852524797406e-05, - "loss": 104.2029, - "step": 83770 - }, - { - "epoch": 0.33848180125001515, - "grad_norm": 722.4409790039062, - "learning_rate": 4.204229878794273e-05, - "loss": 48.2412, - "step": 83780 - }, - { - "epoch": 0.3385222025153828, - "grad_norm": 328.6600036621094, - "learning_rate": 4.203974471883966e-05, - "loss": 50.057, - "step": 83790 - }, - { - "epoch": 0.3385626037807504, - "grad_norm": 381.5543518066406, - "learning_rate": 4.2037190317538e-05, - "loss": 62.9453, - "step": 83800 - }, - { - "epoch": 0.33860300504611807, - "grad_norm": 1003.5863037109375, - "learning_rate": 4.2034635584087535e-05, - "loss": 97.3556, - "step": 83810 - }, - { - "epoch": 0.33864340631148565, - "grad_norm": 257.1349182128906, - "learning_rate": 4.203208051853808e-05, - "loss": 65.2049, - "step": 83820 - }, - { - "epoch": 0.3386838075768533, - "grad_norm": 542.9520263671875, - "learning_rate": 4.202952512093945e-05, - "loss": 60.1434, - "step": 83830 - }, - { - "epoch": 0.33872420884222093, - "grad_norm": 742.0493774414062, - "learning_rate": 4.202696939134146e-05, - "loss": 75.3315, - "step": 83840 - }, - { - "epoch": 0.33876461010758857, - "grad_norm": 839.355712890625, - "learning_rate": 4.202441332979394e-05, - "loss": 53.2438, - "step": 83850 - }, - { - "epoch": 0.3388050113729562, - "grad_norm": 616.1432495117188, - "learning_rate": 4.20218569363467e-05, - "loss": 64.6932, - "step": 83860 - }, - { - "epoch": 0.33884541263832385, - "grad_norm": 956.6621704101562, - "learning_rate": 4.2019300211049615e-05, - "loss": 66.2041, - "step": 83870 - }, - { - "epoch": 0.3388858139036915, - "grad_norm": 1587.0950927734375, - "learning_rate": 4.2016743153952505e-05, - "loss": 72.81, - "step": 83880 - }, - { - "epoch": 0.3389262151690591, - "grad_norm": 1762.2025146484375, - "learning_rate": 4.201418576510523e-05, - "loss": 55.4421, - "step": 83890 - }, - { - "epoch": 0.3389666164344267, - "grad_norm": 874.0090942382812, - "learning_rate": 4.201162804455763e-05, - "loss": 62.957, - "step": 83900 - }, - { - "epoch": 0.33900701769979436, - "grad_norm": 457.5177307128906, - "learning_rate": 4.2009069992359595e-05, - "loss": 100.793, - "step": 83910 - }, - { - "epoch": 0.339047418965162, - "grad_norm": 469.1245422363281, - "learning_rate": 4.200651160856098e-05, - "loss": 66.5053, - "step": 83920 - }, - { - "epoch": 0.33908782023052964, - "grad_norm": 394.78167724609375, - "learning_rate": 4.200395289321167e-05, - "loss": 35.0095, - "step": 83930 - }, - { - "epoch": 0.3391282214958973, - "grad_norm": 1044.5858154296875, - "learning_rate": 4.2001393846361536e-05, - "loss": 99.2867, - "step": 83940 - }, - { - "epoch": 0.33916862276126486, - "grad_norm": 793.2132568359375, - "learning_rate": 4.199883446806048e-05, - "loss": 31.189, - "step": 83950 - }, - { - "epoch": 0.3392090240266325, - "grad_norm": 441.0299072265625, - "learning_rate": 4.19962747583584e-05, - "loss": 78.0899, - "step": 83960 - }, - { - "epoch": 0.33924942529200014, - "grad_norm": 1687.0687255859375, - "learning_rate": 4.1993714717305185e-05, - "loss": 110.2709, - "step": 83970 - }, - { - "epoch": 0.3392898265573678, - "grad_norm": 645.5243530273438, - "learning_rate": 4.199115434495076e-05, - "loss": 70.5179, - "step": 83980 - }, - { - "epoch": 0.3393302278227354, - "grad_norm": 889.2882690429688, - "learning_rate": 4.1988593641345024e-05, - "loss": 59.327, - "step": 83990 - }, - { - "epoch": 0.33937062908810306, - "grad_norm": 545.3060913085938, - "learning_rate": 4.198603260653792e-05, - "loss": 62.0888, - "step": 84000 - }, - { - "epoch": 0.33941103035347064, - "grad_norm": 437.35064697265625, - "learning_rate": 4.1983471240579356e-05, - "loss": 48.4227, - "step": 84010 - }, - { - "epoch": 0.3394514316188383, - "grad_norm": 662.9463500976562, - "learning_rate": 4.198090954351928e-05, - "loss": 80.6067, - "step": 84020 - }, - { - "epoch": 0.3394918328842059, - "grad_norm": 952.4712524414062, - "learning_rate": 4.197834751540762e-05, - "loss": 84.5905, - "step": 84030 - }, - { - "epoch": 0.33953223414957356, - "grad_norm": 781.6859130859375, - "learning_rate": 4.197578515629435e-05, - "loss": 60.8523, - "step": 84040 - }, - { - "epoch": 0.3395726354149412, - "grad_norm": 975.0667724609375, - "learning_rate": 4.1973222466229404e-05, - "loss": 54.1651, - "step": 84050 - }, - { - "epoch": 0.33961303668030884, - "grad_norm": 2085.77392578125, - "learning_rate": 4.197065944526275e-05, - "loss": 60.0403, - "step": 84060 - }, - { - "epoch": 0.3396534379456765, - "grad_norm": 599.7440185546875, - "learning_rate": 4.196809609344434e-05, - "loss": 56.4642, - "step": 84070 - }, - { - "epoch": 0.33969383921104407, - "grad_norm": 1469.0364990234375, - "learning_rate": 4.196553241082418e-05, - "loss": 57.0351, - "step": 84080 - }, - { - "epoch": 0.3397342404764117, - "grad_norm": 579.5526123046875, - "learning_rate": 4.1962968397452216e-05, - "loss": 58.5323, - "step": 84090 - }, - { - "epoch": 0.33977464174177935, - "grad_norm": 639.9765625, - "learning_rate": 4.1960404053378454e-05, - "loss": 65.8241, - "step": 84100 - }, - { - "epoch": 0.339815043007147, - "grad_norm": 1099.93017578125, - "learning_rate": 4.1957839378652886e-05, - "loss": 81.618, - "step": 84110 - }, - { - "epoch": 0.33985544427251463, - "grad_norm": 490.1031494140625, - "learning_rate": 4.1955274373325506e-05, - "loss": 71.3405, - "step": 84120 - }, - { - "epoch": 0.33989584553788227, - "grad_norm": 2551.330322265625, - "learning_rate": 4.1952709037446324e-05, - "loss": 63.7435, - "step": 84130 - }, - { - "epoch": 0.33993624680324985, - "grad_norm": 930.4701538085938, - "learning_rate": 4.1950143371065355e-05, - "loss": 58.0226, - "step": 84140 - }, - { - "epoch": 0.3399766480686175, - "grad_norm": 856.0897216796875, - "learning_rate": 4.194757737423261e-05, - "loss": 58.936, - "step": 84150 - }, - { - "epoch": 0.34001704933398513, - "grad_norm": 592.7817993164062, - "learning_rate": 4.194501104699812e-05, - "loss": 55.8844, - "step": 84160 - }, - { - "epoch": 0.3400574505993528, - "grad_norm": 1155.2713623046875, - "learning_rate": 4.194244438941192e-05, - "loss": 62.0951, - "step": 84170 - }, - { - "epoch": 0.3400978518647204, - "grad_norm": 353.23583984375, - "learning_rate": 4.193987740152404e-05, - "loss": 59.3539, - "step": 84180 - }, - { - "epoch": 0.34013825313008805, - "grad_norm": 1153.6064453125, - "learning_rate": 4.193731008338453e-05, - "loss": 73.7305, - "step": 84190 - }, - { - "epoch": 0.3401786543954557, - "grad_norm": 524.4332885742188, - "learning_rate": 4.193474243504343e-05, - "loss": 77.0145, - "step": 84200 - }, - { - "epoch": 0.3402190556608233, - "grad_norm": 718.3153686523438, - "learning_rate": 4.193217445655082e-05, - "loss": 61.5601, - "step": 84210 - }, - { - "epoch": 0.3402594569261909, - "grad_norm": 1005.5006713867188, - "learning_rate": 4.192960614795675e-05, - "loss": 53.4194, - "step": 84220 - }, - { - "epoch": 0.34029985819155856, - "grad_norm": 1067.0313720703125, - "learning_rate": 4.192703750931129e-05, - "loss": 62.9559, - "step": 84230 - }, - { - "epoch": 0.3403402594569262, - "grad_norm": 794.4739990234375, - "learning_rate": 4.192446854066452e-05, - "loss": 46.9849, - "step": 84240 - }, - { - "epoch": 0.34038066072229384, - "grad_norm": 821.4888305664062, - "learning_rate": 4.192189924206652e-05, - "loss": 54.5941, - "step": 84250 - }, - { - "epoch": 0.3404210619876615, - "grad_norm": 876.5982666015625, - "learning_rate": 4.191932961356739e-05, - "loss": 91.2899, - "step": 84260 - }, - { - "epoch": 0.34046146325302906, - "grad_norm": 780.8380737304688, - "learning_rate": 4.1916759655217206e-05, - "loss": 53.8854, - "step": 84270 - }, - { - "epoch": 0.3405018645183967, - "grad_norm": 2006.7110595703125, - "learning_rate": 4.1914189367066094e-05, - "loss": 64.6178, - "step": 84280 - }, - { - "epoch": 0.34054226578376434, - "grad_norm": 663.4932861328125, - "learning_rate": 4.191161874916415e-05, - "loss": 71.5971, - "step": 84290 - }, - { - "epoch": 0.340582667049132, - "grad_norm": 1891.99658203125, - "learning_rate": 4.1909047801561484e-05, - "loss": 87.5498, - "step": 84300 - }, - { - "epoch": 0.3406230683144996, - "grad_norm": 483.8343811035156, - "learning_rate": 4.1906476524308235e-05, - "loss": 87.025, - "step": 84310 - }, - { - "epoch": 0.34066346957986726, - "grad_norm": 1536.576171875, - "learning_rate": 4.1903904917454516e-05, - "loss": 88.3288, - "step": 84320 - }, - { - "epoch": 0.34070387084523485, - "grad_norm": 708.4118041992188, - "learning_rate": 4.190133298105047e-05, - "loss": 59.6797, - "step": 84330 - }, - { - "epoch": 0.3407442721106025, - "grad_norm": 837.2510986328125, - "learning_rate": 4.189876071514624e-05, - "loss": 60.1374, - "step": 84340 - }, - { - "epoch": 0.3407846733759701, - "grad_norm": 2082.153564453125, - "learning_rate": 4.189618811979197e-05, - "loss": 65.0576, - "step": 84350 - }, - { - "epoch": 0.34082507464133777, - "grad_norm": 1336.408447265625, - "learning_rate": 4.18936151950378e-05, - "loss": 76.66, - "step": 84360 - }, - { - "epoch": 0.3408654759067054, - "grad_norm": 2176.240234375, - "learning_rate": 4.189104194093392e-05, - "loss": 84.4399, - "step": 84370 - }, - { - "epoch": 0.34090587717207305, - "grad_norm": 602.0006103515625, - "learning_rate": 4.1888468357530476e-05, - "loss": 68.7765, - "step": 84380 - }, - { - "epoch": 0.3409462784374407, - "grad_norm": 614.801513671875, - "learning_rate": 4.188589444487765e-05, - "loss": 63.4444, - "step": 84390 - }, - { - "epoch": 0.34098667970280827, - "grad_norm": 1069.0560302734375, - "learning_rate": 4.188332020302561e-05, - "loss": 65.1551, - "step": 84400 - }, - { - "epoch": 0.3410270809681759, - "grad_norm": 667.7435913085938, - "learning_rate": 4.1880745632024554e-05, - "loss": 62.9196, - "step": 84410 - }, - { - "epoch": 0.34106748223354355, - "grad_norm": 668.0552368164062, - "learning_rate": 4.187817073192468e-05, - "loss": 40.3692, - "step": 84420 - }, - { - "epoch": 0.3411078834989112, - "grad_norm": 943.7819213867188, - "learning_rate": 4.187559550277617e-05, - "loss": 57.6345, - "step": 84430 - }, - { - "epoch": 0.34114828476427883, - "grad_norm": 314.5076599121094, - "learning_rate": 4.187301994462924e-05, - "loss": 59.5329, - "step": 84440 - }, - { - "epoch": 0.34118868602964647, - "grad_norm": 759.6746826171875, - "learning_rate": 4.1870444057534095e-05, - "loss": 58.5424, - "step": 84450 - }, - { - "epoch": 0.34122908729501406, - "grad_norm": 802.6051635742188, - "learning_rate": 4.1867867841540964e-05, - "loss": 58.658, - "step": 84460 - }, - { - "epoch": 0.3412694885603817, - "grad_norm": 1856.4595947265625, - "learning_rate": 4.186529129670006e-05, - "loss": 88.8393, - "step": 84470 - }, - { - "epoch": 0.34130988982574934, - "grad_norm": 921.8723754882812, - "learning_rate": 4.1862714423061624e-05, - "loss": 62.6543, - "step": 84480 - }, - { - "epoch": 0.341350291091117, - "grad_norm": 756.6406860351562, - "learning_rate": 4.186013722067588e-05, - "loss": 70.8543, - "step": 84490 - }, - { - "epoch": 0.3413906923564846, - "grad_norm": 2013.621826171875, - "learning_rate": 4.185755968959308e-05, - "loss": 62.2787, - "step": 84500 - }, - { - "epoch": 0.34143109362185226, - "grad_norm": 831.4722900390625, - "learning_rate": 4.185498182986349e-05, - "loss": 42.0386, - "step": 84510 - }, - { - "epoch": 0.3414714948872199, - "grad_norm": 723.7086181640625, - "learning_rate": 4.185240364153734e-05, - "loss": 67.1348, - "step": 84520 - }, - { - "epoch": 0.3415118961525875, - "grad_norm": 754.1615600585938, - "learning_rate": 4.184982512466491e-05, - "loss": 80.2152, - "step": 84530 - }, - { - "epoch": 0.3415522974179551, - "grad_norm": 326.5327453613281, - "learning_rate": 4.1847246279296464e-05, - "loss": 81.1519, - "step": 84540 - }, - { - "epoch": 0.34159269868332276, - "grad_norm": 528.33935546875, - "learning_rate": 4.184466710548227e-05, - "loss": 64.1323, - "step": 84550 - }, - { - "epoch": 0.3416330999486904, - "grad_norm": 682.2901611328125, - "learning_rate": 4.184208760327263e-05, - "loss": 64.297, - "step": 84560 - }, - { - "epoch": 0.34167350121405804, - "grad_norm": 516.5701904296875, - "learning_rate": 4.183950777271781e-05, - "loss": 54.8298, - "step": 84570 - }, - { - "epoch": 0.3417139024794257, - "grad_norm": 674.0258178710938, - "learning_rate": 4.183692761386813e-05, - "loss": 66.7959, - "step": 84580 - }, - { - "epoch": 0.34175430374479326, - "grad_norm": 317.5458984375, - "learning_rate": 4.183434712677387e-05, - "loss": 43.3866, - "step": 84590 - }, - { - "epoch": 0.3417947050101609, - "grad_norm": 1114.966064453125, - "learning_rate": 4.183176631148534e-05, - "loss": 63.7891, - "step": 84600 - }, - { - "epoch": 0.34183510627552854, - "grad_norm": 887.722412109375, - "learning_rate": 4.1829185168052877e-05, - "loss": 87.129, - "step": 84610 - }, - { - "epoch": 0.3418755075408962, - "grad_norm": 735.17822265625, - "learning_rate": 4.182660369652677e-05, - "loss": 52.123, - "step": 84620 - }, - { - "epoch": 0.3419159088062638, - "grad_norm": 470.5625305175781, - "learning_rate": 4.182402189695736e-05, - "loss": 82.246, - "step": 84630 - }, - { - "epoch": 0.34195631007163146, - "grad_norm": 1080.039794921875, - "learning_rate": 4.1821439769395e-05, - "loss": 85.6906, - "step": 84640 - }, - { - "epoch": 0.34199671133699905, - "grad_norm": 529.3787841796875, - "learning_rate": 4.181885731388999e-05, - "loss": 66.6203, - "step": 84650 - }, - { - "epoch": 0.3420371126023667, - "grad_norm": 710.7547607421875, - "learning_rate": 4.1816274530492713e-05, - "loss": 64.3684, - "step": 84660 - }, - { - "epoch": 0.34207751386773433, - "grad_norm": 1438.0509033203125, - "learning_rate": 4.18136914192535e-05, - "loss": 71.3715, - "step": 84670 - }, - { - "epoch": 0.34211791513310197, - "grad_norm": 779.0742797851562, - "learning_rate": 4.181110798022271e-05, - "loss": 64.4421, - "step": 84680 - }, - { - "epoch": 0.3421583163984696, - "grad_norm": 415.0228576660156, - "learning_rate": 4.180852421345072e-05, - "loss": 58.738, - "step": 84690 - }, - { - "epoch": 0.34219871766383725, - "grad_norm": 530.8126220703125, - "learning_rate": 4.180594011898791e-05, - "loss": 64.9684, - "step": 84700 - }, - { - "epoch": 0.3422391189292049, - "grad_norm": 553.169677734375, - "learning_rate": 4.1803355696884625e-05, - "loss": 58.3446, - "step": 84710 - }, - { - "epoch": 0.3422795201945725, - "grad_norm": 766.584228515625, - "learning_rate": 4.180077094719128e-05, - "loss": 66.1009, - "step": 84720 - }, - { - "epoch": 0.3423199214599401, - "grad_norm": 801.9248657226562, - "learning_rate": 4.179818586995825e-05, - "loss": 49.8386, - "step": 84730 - }, - { - "epoch": 0.34236032272530775, - "grad_norm": 1037.901611328125, - "learning_rate": 4.1795600465235947e-05, - "loss": 79.0902, - "step": 84740 - }, - { - "epoch": 0.3424007239906754, - "grad_norm": 871.1192626953125, - "learning_rate": 4.179301473307476e-05, - "loss": 89.6048, - "step": 84750 - }, - { - "epoch": 0.34244112525604303, - "grad_norm": 257.207763671875, - "learning_rate": 4.179042867352511e-05, - "loss": 53.3181, - "step": 84760 - }, - { - "epoch": 0.3424815265214107, - "grad_norm": 1137.479736328125, - "learning_rate": 4.17878422866374e-05, - "loss": 64.5483, - "step": 84770 - }, - { - "epoch": 0.34252192778677826, - "grad_norm": 1104.8631591796875, - "learning_rate": 4.1785255572462066e-05, - "loss": 50.5376, - "step": 84780 - }, - { - "epoch": 0.3425623290521459, - "grad_norm": 674.0195922851562, - "learning_rate": 4.178266853104954e-05, - "loss": 61.1695, - "step": 84790 - }, - { - "epoch": 0.34260273031751354, - "grad_norm": 647.3651733398438, - "learning_rate": 4.178008116245024e-05, - "loss": 73.4516, - "step": 84800 - }, - { - "epoch": 0.3426431315828812, - "grad_norm": 1168.34228515625, - "learning_rate": 4.1777493466714624e-05, - "loss": 62.1275, - "step": 84810 - }, - { - "epoch": 0.3426835328482488, - "grad_norm": 2343.4892578125, - "learning_rate": 4.177490544389313e-05, - "loss": 64.6482, - "step": 84820 - }, - { - "epoch": 0.34272393411361646, - "grad_norm": 817.9552001953125, - "learning_rate": 4.177231709403622e-05, - "loss": 64.0418, - "step": 84830 - }, - { - "epoch": 0.3427643353789841, - "grad_norm": 753.8648681640625, - "learning_rate": 4.176972841719435e-05, - "loss": 57.1917, - "step": 84840 - }, - { - "epoch": 0.3428047366443517, - "grad_norm": 510.9037780761719, - "learning_rate": 4.1767139413418e-05, - "loss": 54.1073, - "step": 84850 - }, - { - "epoch": 0.3428451379097193, - "grad_norm": 641.147705078125, - "learning_rate": 4.176455008275764e-05, - "loss": 81.8066, - "step": 84860 - }, - { - "epoch": 0.34288553917508696, - "grad_norm": 1563.3798828125, - "learning_rate": 4.1761960425263735e-05, - "loss": 71.83, - "step": 84870 - }, - { - "epoch": 0.3429259404404546, - "grad_norm": 1007.089111328125, - "learning_rate": 4.1759370440986775e-05, - "loss": 65.9258, - "step": 84880 - }, - { - "epoch": 0.34296634170582224, - "grad_norm": 694.405517578125, - "learning_rate": 4.175678012997727e-05, - "loss": 53.0368, - "step": 84890 - }, - { - "epoch": 0.3430067429711899, - "grad_norm": 787.7047119140625, - "learning_rate": 4.1754189492285714e-05, - "loss": 78.8049, - "step": 84900 - }, - { - "epoch": 0.34304714423655747, - "grad_norm": 1134.1265869140625, - "learning_rate": 4.17515985279626e-05, - "loss": 52.2765, - "step": 84910 - }, - { - "epoch": 0.3430875455019251, - "grad_norm": 984.3347778320312, - "learning_rate": 4.174900723705845e-05, - "loss": 79.6157, - "step": 84920 - }, - { - "epoch": 0.34312794676729275, - "grad_norm": 671.3541259765625, - "learning_rate": 4.174641561962378e-05, - "loss": 68.477, - "step": 84930 - }, - { - "epoch": 0.3431683480326604, - "grad_norm": 870.8594970703125, - "learning_rate": 4.174382367570912e-05, - "loss": 60.4037, - "step": 84940 - }, - { - "epoch": 0.343208749298028, - "grad_norm": 11791.9111328125, - "learning_rate": 4.174123140536499e-05, - "loss": 129.9077, - "step": 84950 - }, - { - "epoch": 0.34324915056339567, - "grad_norm": 656.0787963867188, - "learning_rate": 4.1738638808641936e-05, - "loss": 68.1256, - "step": 84960 - }, - { - "epoch": 0.34328955182876325, - "grad_norm": 549.08935546875, - "learning_rate": 4.17360458855905e-05, - "loss": 46.7033, - "step": 84970 - }, - { - "epoch": 0.3433299530941309, - "grad_norm": 748.6014404296875, - "learning_rate": 4.1733452636261244e-05, - "loss": 37.716, - "step": 84980 - }, - { - "epoch": 0.34337035435949853, - "grad_norm": 870.7546997070312, - "learning_rate": 4.173085906070471e-05, - "loss": 46.5251, - "step": 84990 - }, - { - "epoch": 0.34341075562486617, - "grad_norm": 525.0897216796875, - "learning_rate": 4.172826515897146e-05, - "loss": 88.4015, - "step": 85000 - }, - { - "epoch": 0.3434511568902338, - "grad_norm": 1008.6449584960938, - "learning_rate": 4.172567093111207e-05, - "loss": 69.9759, - "step": 85010 - }, - { - "epoch": 0.34349155815560145, - "grad_norm": 694.2141723632812, - "learning_rate": 4.172307637717711e-05, - "loss": 64.9771, - "step": 85020 - }, - { - "epoch": 0.3435319594209691, - "grad_norm": 601.18505859375, - "learning_rate": 4.172048149721717e-05, - "loss": 72.7544, - "step": 85030 - }, - { - "epoch": 0.3435723606863367, - "grad_norm": 1000.3665771484375, - "learning_rate": 4.171788629128284e-05, - "loss": 79.2617, - "step": 85040 - }, - { - "epoch": 0.3436127619517043, - "grad_norm": 380.0011901855469, - "learning_rate": 4.1715290759424705e-05, - "loss": 77.5077, - "step": 85050 - }, - { - "epoch": 0.34365316321707196, - "grad_norm": 674.123779296875, - "learning_rate": 4.1712694901693374e-05, - "loss": 104.6444, - "step": 85060 - }, - { - "epoch": 0.3436935644824396, - "grad_norm": 2173.708251953125, - "learning_rate": 4.171009871813944e-05, - "loss": 66.0904, - "step": 85070 - }, - { - "epoch": 0.34373396574780724, - "grad_norm": 1144.228759765625, - "learning_rate": 4.170750220881354e-05, - "loss": 66.5176, - "step": 85080 - }, - { - "epoch": 0.3437743670131749, - "grad_norm": 648.3857421875, - "learning_rate": 4.1704905373766286e-05, - "loss": 58.1772, - "step": 85090 - }, - { - "epoch": 0.34381476827854246, - "grad_norm": 1063.3370361328125, - "learning_rate": 4.170230821304829e-05, - "loss": 56.7559, - "step": 85100 - }, - { - "epoch": 0.3438551695439101, - "grad_norm": 704.5231323242188, - "learning_rate": 4.169971072671021e-05, - "loss": 68.8396, - "step": 85110 - }, - { - "epoch": 0.34389557080927774, - "grad_norm": 857.602294921875, - "learning_rate": 4.169711291480266e-05, - "loss": 49.2389, - "step": 85120 - }, - { - "epoch": 0.3439359720746454, - "grad_norm": 1297.68701171875, - "learning_rate": 4.16945147773763e-05, - "loss": 61.9332, - "step": 85130 - }, - { - "epoch": 0.343976373340013, - "grad_norm": 603.5067138671875, - "learning_rate": 4.169191631448178e-05, - "loss": 83.4856, - "step": 85140 - }, - { - "epoch": 0.34401677460538066, - "grad_norm": 541.5191040039062, - "learning_rate": 4.1689317526169766e-05, - "loss": 44.5568, - "step": 85150 - }, - { - "epoch": 0.34405717587074824, - "grad_norm": 589.892578125, - "learning_rate": 4.168671841249091e-05, - "loss": 56.9382, - "step": 85160 - }, - { - "epoch": 0.3440975771361159, - "grad_norm": 555.4111328125, - "learning_rate": 4.168411897349588e-05, - "loss": 83.5068, - "step": 85170 - }, - { - "epoch": 0.3441379784014835, - "grad_norm": 547.9937133789062, - "learning_rate": 4.168151920923536e-05, - "loss": 34.5336, - "step": 85180 - }, - { - "epoch": 0.34417837966685116, - "grad_norm": 1771.55078125, - "learning_rate": 4.1678919119760054e-05, - "loss": 90.6765, - "step": 85190 - }, - { - "epoch": 0.3442187809322188, - "grad_norm": 696.42236328125, - "learning_rate": 4.1676318705120616e-05, - "loss": 59.9467, - "step": 85200 - }, - { - "epoch": 0.34425918219758644, - "grad_norm": 877.6763916015625, - "learning_rate": 4.167371796536777e-05, - "loss": 95.2035, - "step": 85210 - }, - { - "epoch": 0.3442995834629541, - "grad_norm": 1338.104248046875, - "learning_rate": 4.1671116900552194e-05, - "loss": 66.9077, - "step": 85220 - }, - { - "epoch": 0.34433998472832167, - "grad_norm": 992.7899169921875, - "learning_rate": 4.166851551072462e-05, - "loss": 60.782, - "step": 85230 - }, - { - "epoch": 0.3443803859936893, - "grad_norm": 1082.4932861328125, - "learning_rate": 4.166591379593575e-05, - "loss": 61.3283, - "step": 85240 - }, - { - "epoch": 0.34442078725905695, - "grad_norm": 1248.29052734375, - "learning_rate": 4.166331175623631e-05, - "loss": 65.6705, - "step": 85250 - }, - { - "epoch": 0.3444611885244246, - "grad_norm": 651.8319091796875, - "learning_rate": 4.166070939167703e-05, - "loss": 107.7256, - "step": 85260 - }, - { - "epoch": 0.34450158978979223, - "grad_norm": 950.817138671875, - "learning_rate": 4.165810670230865e-05, - "loss": 53.1864, - "step": 85270 - }, - { - "epoch": 0.34454199105515987, - "grad_norm": 871.7650756835938, - "learning_rate": 4.16555036881819e-05, - "loss": 97.4988, - "step": 85280 - }, - { - "epoch": 0.34458239232052745, - "grad_norm": 581.1294555664062, - "learning_rate": 4.1652900349347533e-05, - "loss": 52.556, - "step": 85290 - }, - { - "epoch": 0.3446227935858951, - "grad_norm": 919.3056640625, - "learning_rate": 4.165029668585629e-05, - "loss": 55.3191, - "step": 85300 - }, - { - "epoch": 0.34466319485126273, - "grad_norm": 622.2424926757812, - "learning_rate": 4.164769269775896e-05, - "loss": 107.1292, - "step": 85310 - }, - { - "epoch": 0.3447035961166304, - "grad_norm": 0.0, - "learning_rate": 4.1645088385106266e-05, - "loss": 51.1484, - "step": 85320 - }, - { - "epoch": 0.344743997381998, - "grad_norm": 2131.662353515625, - "learning_rate": 4.164248374794902e-05, - "loss": 83.7406, - "step": 85330 - }, - { - "epoch": 0.34478439864736565, - "grad_norm": 527.97509765625, - "learning_rate": 4.163987878633798e-05, - "loss": 57.3865, - "step": 85340 - }, - { - "epoch": 0.3448247999127333, - "grad_norm": 460.5814208984375, - "learning_rate": 4.163727350032394e-05, - "loss": 35.4961, - "step": 85350 - }, - { - "epoch": 0.3448652011781009, - "grad_norm": 1813.7447509765625, - "learning_rate": 4.1634667889957676e-05, - "loss": 93.811, - "step": 85360 - }, - { - "epoch": 0.3449056024434685, - "grad_norm": 516.6041259765625, - "learning_rate": 4.1632061955290017e-05, - "loss": 66.2047, - "step": 85370 - }, - { - "epoch": 0.34494600370883616, - "grad_norm": 1379.9407958984375, - "learning_rate": 4.1629455696371734e-05, - "loss": 62.7511, - "step": 85380 - }, - { - "epoch": 0.3449864049742038, - "grad_norm": 573.064453125, - "learning_rate": 4.162684911325365e-05, - "loss": 68.7898, - "step": 85390 - }, - { - "epoch": 0.34502680623957144, - "grad_norm": 620.5324096679688, - "learning_rate": 4.162424220598658e-05, - "loss": 62.7597, - "step": 85400 - }, - { - "epoch": 0.3450672075049391, - "grad_norm": 522.501708984375, - "learning_rate": 4.162163497462136e-05, - "loss": 63.0242, - "step": 85410 - }, - { - "epoch": 0.34510760877030666, - "grad_norm": 687.868896484375, - "learning_rate": 4.161902741920881e-05, - "loss": 68.5159, - "step": 85420 - }, - { - "epoch": 0.3451480100356743, - "grad_norm": 842.2858276367188, - "learning_rate": 4.1616419539799754e-05, - "loss": 46.8133, - "step": 85430 - }, - { - "epoch": 0.34518841130104194, - "grad_norm": 808.839111328125, - "learning_rate": 4.161381133644505e-05, - "loss": 58.4971, - "step": 85440 - }, - { - "epoch": 0.3452288125664096, - "grad_norm": 2333.049072265625, - "learning_rate": 4.161120280919554e-05, - "loss": 99.8404, - "step": 85450 - }, - { - "epoch": 0.3452692138317772, - "grad_norm": 667.53125, - "learning_rate": 4.160859395810208e-05, - "loss": 74.1404, - "step": 85460 - }, - { - "epoch": 0.34530961509714486, - "grad_norm": 886.0628051757812, - "learning_rate": 4.160598478321553e-05, - "loss": 62.568, - "step": 85470 - }, - { - "epoch": 0.34535001636251245, - "grad_norm": 421.406005859375, - "learning_rate": 4.160337528458676e-05, - "loss": 75.3051, - "step": 85480 - }, - { - "epoch": 0.3453904176278801, - "grad_norm": 852.6929931640625, - "learning_rate": 4.160076546226663e-05, - "loss": 67.8079, - "step": 85490 - }, - { - "epoch": 0.3454308188932477, - "grad_norm": 1610.6650390625, - "learning_rate": 4.1598155316306044e-05, - "loss": 67.7026, - "step": 85500 - }, - { - "epoch": 0.34547122015861537, - "grad_norm": 740.6041259765625, - "learning_rate": 4.1595544846755865e-05, - "loss": 69.3296, - "step": 85510 - }, - { - "epoch": 0.345511621423983, - "grad_norm": 713.525390625, - "learning_rate": 4.1592934053667004e-05, - "loss": 66.8102, - "step": 85520 - }, - { - "epoch": 0.34555202268935065, - "grad_norm": 1433.9876708984375, - "learning_rate": 4.1590322937090345e-05, - "loss": 49.2965, - "step": 85530 - }, - { - "epoch": 0.3455924239547183, - "grad_norm": 974.6792602539062, - "learning_rate": 4.15877114970768e-05, - "loss": 71.5766, - "step": 85540 - }, - { - "epoch": 0.34563282522008587, - "grad_norm": 996.3238525390625, - "learning_rate": 4.158509973367728e-05, - "loss": 82.5995, - "step": 85550 - }, - { - "epoch": 0.3456732264854535, - "grad_norm": 3350.908935546875, - "learning_rate": 4.1582487646942706e-05, - "loss": 86.8919, - "step": 85560 - }, - { - "epoch": 0.34571362775082115, - "grad_norm": 1187.7080078125, - "learning_rate": 4.157987523692399e-05, - "loss": 59.8579, - "step": 85570 - }, - { - "epoch": 0.3457540290161888, - "grad_norm": 713.9551391601562, - "learning_rate": 4.157726250367207e-05, - "loss": 61.2336, - "step": 85580 - }, - { - "epoch": 0.34579443028155643, - "grad_norm": 932.2216186523438, - "learning_rate": 4.157464944723789e-05, - "loss": 72.413, - "step": 85590 - }, - { - "epoch": 0.34583483154692407, - "grad_norm": 813.9993286132812, - "learning_rate": 4.157203606767238e-05, - "loss": 65.8375, - "step": 85600 - }, - { - "epoch": 0.34587523281229166, - "grad_norm": 648.2496948242188, - "learning_rate": 4.15694223650265e-05, - "loss": 56.1922, - "step": 85610 - }, - { - "epoch": 0.3459156340776593, - "grad_norm": 1841.92236328125, - "learning_rate": 4.156680833935119e-05, - "loss": 73.8934, - "step": 85620 - }, - { - "epoch": 0.34595603534302694, - "grad_norm": 1668.5313720703125, - "learning_rate": 4.156419399069744e-05, - "loss": 98.3084, - "step": 85630 - }, - { - "epoch": 0.3459964366083946, - "grad_norm": 451.40338134765625, - "learning_rate": 4.156157931911619e-05, - "loss": 41.3341, - "step": 85640 - }, - { - "epoch": 0.3460368378737622, - "grad_norm": 740.665771484375, - "learning_rate": 4.155896432465842e-05, - "loss": 93.8652, - "step": 85650 - }, - { - "epoch": 0.34607723913912986, - "grad_norm": 716.4408569335938, - "learning_rate": 4.155634900737513e-05, - "loss": 57.4022, - "step": 85660 - }, - { - "epoch": 0.3461176404044975, - "grad_norm": 543.2172241210938, - "learning_rate": 4.155373336731728e-05, - "loss": 49.6871, - "step": 85670 - }, - { - "epoch": 0.3461580416698651, - "grad_norm": 1185.981689453125, - "learning_rate": 4.155111740453588e-05, - "loss": 69.9553, - "step": 85680 - }, - { - "epoch": 0.3461984429352327, - "grad_norm": 817.763671875, - "learning_rate": 4.154850111908192e-05, - "loss": 54.951, - "step": 85690 - }, - { - "epoch": 0.34623884420060036, - "grad_norm": 793.4094848632812, - "learning_rate": 4.154588451100642e-05, - "loss": 98.9682, - "step": 85700 - }, - { - "epoch": 0.346279245465968, - "grad_norm": 643.4264526367188, - "learning_rate": 4.1543267580360374e-05, - "loss": 52.4739, - "step": 85710 - }, - { - "epoch": 0.34631964673133564, - "grad_norm": 1268.8812255859375, - "learning_rate": 4.154065032719481e-05, - "loss": 83.5729, - "step": 85720 - }, - { - "epoch": 0.3463600479967033, - "grad_norm": 532.7398681640625, - "learning_rate": 4.153803275156076e-05, - "loss": 52.5248, - "step": 85730 - }, - { - "epoch": 0.34640044926207086, - "grad_norm": 2376.361083984375, - "learning_rate": 4.153541485350924e-05, - "loss": 97.7747, - "step": 85740 - }, - { - "epoch": 0.3464408505274385, - "grad_norm": 2388.05908203125, - "learning_rate": 4.1532796633091296e-05, - "loss": 77.3507, - "step": 85750 - }, - { - "epoch": 0.34648125179280614, - "grad_norm": 625.3255004882812, - "learning_rate": 4.1530178090357976e-05, - "loss": 56.9691, - "step": 85760 - }, - { - "epoch": 0.3465216530581738, - "grad_norm": 792.126953125, - "learning_rate": 4.152755922536032e-05, - "loss": 51.1804, - "step": 85770 - }, - { - "epoch": 0.3465620543235414, - "grad_norm": 379.2236328125, - "learning_rate": 4.1524940038149384e-05, - "loss": 53.4376, - "step": 85780 - }, - { - "epoch": 0.34660245558890906, - "grad_norm": 1024.7281494140625, - "learning_rate": 4.152232052877624e-05, - "loss": 101.2286, - "step": 85790 - }, - { - "epoch": 0.34664285685427665, - "grad_norm": 1685.01611328125, - "learning_rate": 4.1519700697291944e-05, - "loss": 58.5797, - "step": 85800 - }, - { - "epoch": 0.3466832581196443, - "grad_norm": 642.328857421875, - "learning_rate": 4.1517080543747584e-05, - "loss": 80.0744, - "step": 85810 - }, - { - "epoch": 0.34672365938501193, - "grad_norm": 760.1344604492188, - "learning_rate": 4.151446006819423e-05, - "loss": 65.5687, - "step": 85820 - }, - { - "epoch": 0.34676406065037957, - "grad_norm": 499.570556640625, - "learning_rate": 4.151183927068298e-05, - "loss": 57.8297, - "step": 85830 - }, - { - "epoch": 0.3468044619157472, - "grad_norm": 1389.1029052734375, - "learning_rate": 4.150921815126493e-05, - "loss": 74.6479, - "step": 85840 - }, - { - "epoch": 0.34684486318111485, - "grad_norm": 1510.54150390625, - "learning_rate": 4.150659670999116e-05, - "loss": 82.4179, - "step": 85850 - }, - { - "epoch": 0.3468852644464825, - "grad_norm": 728.5758056640625, - "learning_rate": 4.150397494691279e-05, - "loss": 70.3151, - "step": 85860 - }, - { - "epoch": 0.3469256657118501, - "grad_norm": 1455.7420654296875, - "learning_rate": 4.150135286208093e-05, - "loss": 73.2886, - "step": 85870 - }, - { - "epoch": 0.3469660669772177, - "grad_norm": 328.35015869140625, - "learning_rate": 4.149873045554671e-05, - "loss": 42.4879, - "step": 85880 - }, - { - "epoch": 0.34700646824258535, - "grad_norm": 434.59527587890625, - "learning_rate": 4.1496107727361235e-05, - "loss": 45.5354, - "step": 85890 - }, - { - "epoch": 0.347046869507953, - "grad_norm": 3333.067138671875, - "learning_rate": 4.149348467757566e-05, - "loss": 98.8698, - "step": 85900 - }, - { - "epoch": 0.34708727077332063, - "grad_norm": 605.552978515625, - "learning_rate": 4.1490861306241096e-05, - "loss": 74.4398, - "step": 85910 - }, - { - "epoch": 0.3471276720386883, - "grad_norm": 1759.8612060546875, - "learning_rate": 4.148823761340871e-05, - "loss": 76.9145, - "step": 85920 - }, - { - "epoch": 0.34716807330405586, - "grad_norm": 567.1038818359375, - "learning_rate": 4.1485613599129636e-05, - "loss": 40.8438, - "step": 85930 - }, - { - "epoch": 0.3472084745694235, - "grad_norm": 3881.6103515625, - "learning_rate": 4.148298926345504e-05, - "loss": 66.3889, - "step": 85940 - }, - { - "epoch": 0.34724887583479114, - "grad_norm": 621.2008666992188, - "learning_rate": 4.148036460643608e-05, - "loss": 59.3511, - "step": 85950 - }, - { - "epoch": 0.3472892771001588, - "grad_norm": 468.0349426269531, - "learning_rate": 4.1477739628123934e-05, - "loss": 101.2036, - "step": 85960 - }, - { - "epoch": 0.3473296783655264, - "grad_norm": 1429.98095703125, - "learning_rate": 4.1475114328569776e-05, - "loss": 56.234, - "step": 85970 - }, - { - "epoch": 0.34737007963089406, - "grad_norm": 695.315673828125, - "learning_rate": 4.147248870782477e-05, - "loss": 54.0909, - "step": 85980 - }, - { - "epoch": 0.3474104808962617, - "grad_norm": 1032.339111328125, - "learning_rate": 4.146986276594012e-05, - "loss": 73.7455, - "step": 85990 - }, - { - "epoch": 0.3474508821616293, - "grad_norm": 353.8186950683594, - "learning_rate": 4.146723650296701e-05, - "loss": 55.2589, - "step": 86000 - }, - { - "epoch": 0.3474912834269969, - "grad_norm": 1116.111572265625, - "learning_rate": 4.146460991895666e-05, - "loss": 58.2723, - "step": 86010 - }, - { - "epoch": 0.34753168469236456, - "grad_norm": 416.8829040527344, - "learning_rate": 4.1461983013960245e-05, - "loss": 47.4193, - "step": 86020 - }, - { - "epoch": 0.3475720859577322, - "grad_norm": 794.2473754882812, - "learning_rate": 4.1459355788029013e-05, - "loss": 67.6769, - "step": 86030 - }, - { - "epoch": 0.34761248722309984, - "grad_norm": 727.6878051757812, - "learning_rate": 4.145672824121416e-05, - "loss": 84.4826, - "step": 86040 - }, - { - "epoch": 0.3476528884884675, - "grad_norm": 2156.322265625, - "learning_rate": 4.145410037356692e-05, - "loss": 82.6664, - "step": 86050 - }, - { - "epoch": 0.34769328975383507, - "grad_norm": 511.3225402832031, - "learning_rate": 4.145147218513852e-05, - "loss": 43.9569, - "step": 86060 - }, - { - "epoch": 0.3477336910192027, - "grad_norm": 298.62005615234375, - "learning_rate": 4.14488436759802e-05, - "loss": 45.4608, - "step": 86070 - }, - { - "epoch": 0.34777409228457035, - "grad_norm": 1355.5355224609375, - "learning_rate": 4.144621484614319e-05, - "loss": 124.826, - "step": 86080 - }, - { - "epoch": 0.347814493549938, - "grad_norm": 1087.8583984375, - "learning_rate": 4.1443585695678774e-05, - "loss": 54.7751, - "step": 86090 - }, - { - "epoch": 0.3478548948153056, - "grad_norm": 431.3752746582031, - "learning_rate": 4.1440956224638184e-05, - "loss": 63.882, - "step": 86100 - }, - { - "epoch": 0.34789529608067327, - "grad_norm": 1415.1689453125, - "learning_rate": 4.143832643307269e-05, - "loss": 55.6841, - "step": 86110 - }, - { - "epoch": 0.34793569734604085, - "grad_norm": 500.28759765625, - "learning_rate": 4.1435696321033554e-05, - "loss": 75.8191, - "step": 86120 - }, - { - "epoch": 0.3479760986114085, - "grad_norm": 725.256591796875, - "learning_rate": 4.143306588857206e-05, - "loss": 68.6684, - "step": 86130 - }, - { - "epoch": 0.34801649987677613, - "grad_norm": 1193.91357421875, - "learning_rate": 4.143043513573949e-05, - "loss": 68.4111, - "step": 86140 - }, - { - "epoch": 0.34805690114214377, - "grad_norm": 311.3026428222656, - "learning_rate": 4.1427804062587116e-05, - "loss": 60.3588, - "step": 86150 - }, - { - "epoch": 0.3480973024075114, - "grad_norm": 2180.667236328125, - "learning_rate": 4.142517266916625e-05, - "loss": 71.0806, - "step": 86160 - }, - { - "epoch": 0.34813770367287905, - "grad_norm": 561.2362060546875, - "learning_rate": 4.1422540955528186e-05, - "loss": 65.3, - "step": 86170 - }, - { - "epoch": 0.3481781049382467, - "grad_norm": 674.5610961914062, - "learning_rate": 4.141990892172424e-05, - "loss": 77.8949, - "step": 86180 - }, - { - "epoch": 0.3482185062036143, - "grad_norm": 1787.229248046875, - "learning_rate": 4.14172765678057e-05, - "loss": 88.2457, - "step": 86190 - }, - { - "epoch": 0.3482589074689819, - "grad_norm": 1689.93505859375, - "learning_rate": 4.1414643893823914e-05, - "loss": 63.5292, - "step": 86200 - }, - { - "epoch": 0.34829930873434956, - "grad_norm": 1198.643310546875, - "learning_rate": 4.141201089983019e-05, - "loss": 57.5542, - "step": 86210 - }, - { - "epoch": 0.3483397099997172, - "grad_norm": 301.3227233886719, - "learning_rate": 4.1409377585875865e-05, - "loss": 59.0172, - "step": 86220 - }, - { - "epoch": 0.34838011126508484, - "grad_norm": 4092.246826171875, - "learning_rate": 4.1406743952012275e-05, - "loss": 72.4229, - "step": 86230 - }, - { - "epoch": 0.3484205125304525, - "grad_norm": 2412.6689453125, - "learning_rate": 4.140410999829076e-05, - "loss": 118.2052, - "step": 86240 - }, - { - "epoch": 0.34846091379582006, - "grad_norm": 649.2863159179688, - "learning_rate": 4.140147572476268e-05, - "loss": 78.7629, - "step": 86250 - }, - { - "epoch": 0.3485013150611877, - "grad_norm": 634.6868286132812, - "learning_rate": 4.1398841131479395e-05, - "loss": 59.1645, - "step": 86260 - }, - { - "epoch": 0.34854171632655534, - "grad_norm": 908.8325805664062, - "learning_rate": 4.139620621849225e-05, - "loss": 60.6888, - "step": 86270 - }, - { - "epoch": 0.348582117591923, - "grad_norm": 1681.2099609375, - "learning_rate": 4.139357098585262e-05, - "loss": 82.6059, - "step": 86280 - }, - { - "epoch": 0.3486225188572906, - "grad_norm": 743.29443359375, - "learning_rate": 4.1390935433611886e-05, - "loss": 86.1345, - "step": 86290 - }, - { - "epoch": 0.34866292012265826, - "grad_norm": 906.7951049804688, - "learning_rate": 4.138829956182144e-05, - "loss": 63.9069, - "step": 86300 - }, - { - "epoch": 0.3487033213880259, - "grad_norm": 821.8445434570312, - "learning_rate": 4.138566337053264e-05, - "loss": 64.0225, - "step": 86310 - }, - { - "epoch": 0.3487437226533935, - "grad_norm": 1009.7755737304688, - "learning_rate": 4.1383026859796905e-05, - "loss": 71.5133, - "step": 86320 - }, - { - "epoch": 0.3487841239187611, - "grad_norm": 260.2582702636719, - "learning_rate": 4.138039002966563e-05, - "loss": 52.0272, - "step": 86330 - }, - { - "epoch": 0.34882452518412876, - "grad_norm": 847.3204956054688, - "learning_rate": 4.137775288019021e-05, - "loss": 59.5128, - "step": 86340 - }, - { - "epoch": 0.3488649264494964, - "grad_norm": 414.3377380371094, - "learning_rate": 4.137511541142207e-05, - "loss": 55.5041, - "step": 86350 - }, - { - "epoch": 0.34890532771486404, - "grad_norm": 2023.2427978515625, - "learning_rate": 4.137247762341262e-05, - "loss": 67.8408, - "step": 86360 - }, - { - "epoch": 0.3489457289802317, - "grad_norm": 1327.11767578125, - "learning_rate": 4.136983951621329e-05, - "loss": 50.12, - "step": 86370 - }, - { - "epoch": 0.34898613024559927, - "grad_norm": 608.749267578125, - "learning_rate": 4.136720108987552e-05, - "loss": 61.7877, - "step": 86380 - }, - { - "epoch": 0.3490265315109669, - "grad_norm": 864.80322265625, - "learning_rate": 4.136456234445073e-05, - "loss": 72.582, - "step": 86390 - }, - { - "epoch": 0.34906693277633455, - "grad_norm": 749.7403564453125, - "learning_rate": 4.136192327999037e-05, - "loss": 68.246, - "step": 86400 - }, - { - "epoch": 0.3491073340417022, - "grad_norm": 1620.177734375, - "learning_rate": 4.1359283896545895e-05, - "loss": 94.8168, - "step": 86410 - }, - { - "epoch": 0.34914773530706983, - "grad_norm": 2137.778564453125, - "learning_rate": 4.135664419416877e-05, - "loss": 54.6499, - "step": 86420 - }, - { - "epoch": 0.34918813657243747, - "grad_norm": 355.5881042480469, - "learning_rate": 4.1354004172910434e-05, - "loss": 71.6114, - "step": 86430 - }, - { - "epoch": 0.34922853783780505, - "grad_norm": 579.06005859375, - "learning_rate": 4.135136383282237e-05, - "loss": 83.3694, - "step": 86440 - }, - { - "epoch": 0.3492689391031727, - "grad_norm": 363.8780822753906, - "learning_rate": 4.134872317395604e-05, - "loss": 71.6161, - "step": 86450 - }, - { - "epoch": 0.34930934036854033, - "grad_norm": 734.9833984375, - "learning_rate": 4.134608219636294e-05, - "loss": 63.2569, - "step": 86460 - }, - { - "epoch": 0.349349741633908, - "grad_norm": 659.5712890625, - "learning_rate": 4.134344090009455e-05, - "loss": 59.2688, - "step": 86470 - }, - { - "epoch": 0.3493901428992756, - "grad_norm": 658.69677734375, - "learning_rate": 4.1340799285202376e-05, - "loss": 49.0295, - "step": 86480 - }, - { - "epoch": 0.34943054416464325, - "grad_norm": 687.8778076171875, - "learning_rate": 4.13381573517379e-05, - "loss": 84.1034, - "step": 86490 - }, - { - "epoch": 0.3494709454300109, - "grad_norm": 576.1513061523438, - "learning_rate": 4.133551509975264e-05, - "loss": 46.6568, - "step": 86500 - }, - { - "epoch": 0.3495113466953785, - "grad_norm": 435.50640869140625, - "learning_rate": 4.13328725292981e-05, - "loss": 51.9577, - "step": 86510 - }, - { - "epoch": 0.3495517479607461, - "grad_norm": 538.1187744140625, - "learning_rate": 4.13302296404258e-05, - "loss": 159.9702, - "step": 86520 - }, - { - "epoch": 0.34959214922611376, - "grad_norm": 989.9334716796875, - "learning_rate": 4.132758643318726e-05, - "loss": 90.6167, - "step": 86530 - }, - { - "epoch": 0.3496325504914814, - "grad_norm": 538.1937255859375, - "learning_rate": 4.132494290763403e-05, - "loss": 52.8154, - "step": 86540 - }, - { - "epoch": 0.34967295175684904, - "grad_norm": 2396.126708984375, - "learning_rate": 4.1322299063817624e-05, - "loss": 60.857, - "step": 86550 - }, - { - "epoch": 0.3497133530222167, - "grad_norm": 909.927978515625, - "learning_rate": 4.131965490178959e-05, - "loss": 127.2578, - "step": 86560 - }, - { - "epoch": 0.34975375428758426, - "grad_norm": 535.4166259765625, - "learning_rate": 4.131701042160149e-05, - "loss": 77.5207, - "step": 86570 - }, - { - "epoch": 0.3497941555529519, - "grad_norm": 947.1220703125, - "learning_rate": 4.131436562330487e-05, - "loss": 85.3897, - "step": 86580 - }, - { - "epoch": 0.34983455681831954, - "grad_norm": 1153.6351318359375, - "learning_rate": 4.13117205069513e-05, - "loss": 85.3414, - "step": 86590 - }, - { - "epoch": 0.3498749580836872, - "grad_norm": 1040.203857421875, - "learning_rate": 4.130907507259233e-05, - "loss": 79.24, - "step": 86600 - }, - { - "epoch": 0.3499153593490548, - "grad_norm": 737.9971313476562, - "learning_rate": 4.130642932027955e-05, - "loss": 47.7125, - "step": 86610 - }, - { - "epoch": 0.34995576061442246, - "grad_norm": 1519.4359130859375, - "learning_rate": 4.130378325006453e-05, - "loss": 59.9955, - "step": 86620 - }, - { - "epoch": 0.3499961618797901, - "grad_norm": 734.98828125, - "learning_rate": 4.130113686199887e-05, - "loss": 44.1955, - "step": 86630 - }, - { - "epoch": 0.3500365631451577, - "grad_norm": 1075.6939697265625, - "learning_rate": 4.129849015613415e-05, - "loss": 73.9399, - "step": 86640 - }, - { - "epoch": 0.3500769644105253, - "grad_norm": 3161.4248046875, - "learning_rate": 4.1295843132521973e-05, - "loss": 81.6022, - "step": 86650 - }, - { - "epoch": 0.35011736567589297, - "grad_norm": 1227.13134765625, - "learning_rate": 4.129319579121394e-05, - "loss": 67.1966, - "step": 86660 - }, - { - "epoch": 0.3501577669412606, - "grad_norm": 779.573974609375, - "learning_rate": 4.129054813226167e-05, - "loss": 72.8276, - "step": 86670 - }, - { - "epoch": 0.35019816820662825, - "grad_norm": 378.6111755371094, - "learning_rate": 4.1287900155716784e-05, - "loss": 43.1125, - "step": 86680 - }, - { - "epoch": 0.3502385694719959, - "grad_norm": 865.598876953125, - "learning_rate": 4.128525186163089e-05, - "loss": 75.8998, - "step": 86690 - }, - { - "epoch": 0.35027897073736347, - "grad_norm": 1067.35693359375, - "learning_rate": 4.128260325005564e-05, - "loss": 66.426, - "step": 86700 - }, - { - "epoch": 0.3503193720027311, - "grad_norm": 1469.167236328125, - "learning_rate": 4.127995432104264e-05, - "loss": 95.7886, - "step": 86710 - }, - { - "epoch": 0.35035977326809875, - "grad_norm": 554.118408203125, - "learning_rate": 4.127730507464356e-05, - "loss": 83.7477, - "step": 86720 - }, - { - "epoch": 0.3504001745334664, - "grad_norm": 1134.07470703125, - "learning_rate": 4.127465551091003e-05, - "loss": 123.3934, - "step": 86730 - }, - { - "epoch": 0.35044057579883403, - "grad_norm": 1663.503173828125, - "learning_rate": 4.1272005629893714e-05, - "loss": 51.4834, - "step": 86740 - }, - { - "epoch": 0.35048097706420167, - "grad_norm": 464.7097473144531, - "learning_rate": 4.1269355431646274e-05, - "loss": 47.998, - "step": 86750 - }, - { - "epoch": 0.35052137832956926, - "grad_norm": 533.934814453125, - "learning_rate": 4.126670491621938e-05, - "loss": 89.6148, - "step": 86760 - }, - { - "epoch": 0.3505617795949369, - "grad_norm": 1182.53466796875, - "learning_rate": 4.126405408366468e-05, - "loss": 58.9152, - "step": 86770 - }, - { - "epoch": 0.35060218086030454, - "grad_norm": 1748.3175048828125, - "learning_rate": 4.1261402934033886e-05, - "loss": 76.4899, - "step": 86780 - }, - { - "epoch": 0.3506425821256722, - "grad_norm": 503.2514343261719, - "learning_rate": 4.125875146737868e-05, - "loss": 94.3811, - "step": 86790 - }, - { - "epoch": 0.3506829833910398, - "grad_norm": 602.4920043945312, - "learning_rate": 4.125609968375072e-05, - "loss": 41.265, - "step": 86800 - }, - { - "epoch": 0.35072338465640746, - "grad_norm": 1253.5115966796875, - "learning_rate": 4.125344758320174e-05, - "loss": 56.0734, - "step": 86810 - }, - { - "epoch": 0.3507637859217751, - "grad_norm": 1205.50634765625, - "learning_rate": 4.125079516578344e-05, - "loss": 90.1057, - "step": 86820 - }, - { - "epoch": 0.3508041871871427, - "grad_norm": 422.2942810058594, - "learning_rate": 4.12481424315475e-05, - "loss": 60.9675, - "step": 86830 - }, - { - "epoch": 0.3508445884525103, - "grad_norm": 702.0094604492188, - "learning_rate": 4.124548938054568e-05, - "loss": 53.5536, - "step": 86840 - }, - { - "epoch": 0.35088498971787796, - "grad_norm": 604.8543090820312, - "learning_rate": 4.1242836012829665e-05, - "loss": 64.9087, - "step": 86850 - }, - { - "epoch": 0.3509253909832456, - "grad_norm": 307.5919494628906, - "learning_rate": 4.1240182328451204e-05, - "loss": 72.9601, - "step": 86860 - }, - { - "epoch": 0.35096579224861324, - "grad_norm": 1943.6920166015625, - "learning_rate": 4.123752832746203e-05, - "loss": 87.0177, - "step": 86870 - }, - { - "epoch": 0.3510061935139809, - "grad_norm": 1072.779296875, - "learning_rate": 4.123487400991388e-05, - "loss": 73.2874, - "step": 86880 - }, - { - "epoch": 0.35104659477934846, - "grad_norm": 644.7705078125, - "learning_rate": 4.1232219375858504e-05, - "loss": 82.4604, - "step": 86890 - }, - { - "epoch": 0.3510869960447161, - "grad_norm": 214.5071563720703, - "learning_rate": 4.1229564425347654e-05, - "loss": 50.0135, - "step": 86900 - }, - { - "epoch": 0.35112739731008374, - "grad_norm": 662.0516357421875, - "learning_rate": 4.122690915843309e-05, - "loss": 45.6385, - "step": 86910 - }, - { - "epoch": 0.3511677985754514, - "grad_norm": 1001.0264282226562, - "learning_rate": 4.122425357516658e-05, - "loss": 63.8673, - "step": 86920 - }, - { - "epoch": 0.351208199840819, - "grad_norm": 1112.534912109375, - "learning_rate": 4.1221597675599886e-05, - "loss": 54.2994, - "step": 86930 - }, - { - "epoch": 0.35124860110618666, - "grad_norm": 565.2496948242188, - "learning_rate": 4.1218941459784796e-05, - "loss": 50.4045, - "step": 86940 - }, - { - "epoch": 0.3512890023715543, - "grad_norm": 823.8052978515625, - "learning_rate": 4.121628492777311e-05, - "loss": 66.9651, - "step": 86950 - }, - { - "epoch": 0.3513294036369219, - "grad_norm": 875.3696899414062, - "learning_rate": 4.121362807961658e-05, - "loss": 84.8828, - "step": 86960 - }, - { - "epoch": 0.35136980490228953, - "grad_norm": 412.0173034667969, - "learning_rate": 4.1210970915367026e-05, - "loss": 78.3683, - "step": 86970 - }, - { - "epoch": 0.35141020616765717, - "grad_norm": 1158.7080078125, - "learning_rate": 4.120831343507625e-05, - "loss": 77.7848, - "step": 86980 - }, - { - "epoch": 0.3514506074330248, - "grad_norm": 261.9429016113281, - "learning_rate": 4.1205655638796065e-05, - "loss": 43.2981, - "step": 86990 - }, - { - "epoch": 0.35149100869839245, - "grad_norm": 779.2062377929688, - "learning_rate": 4.1202997526578276e-05, - "loss": 57.5505, - "step": 87000 - }, - { - "epoch": 0.3515314099637601, - "grad_norm": 742.50048828125, - "learning_rate": 4.120033909847471e-05, - "loss": 68.5733, - "step": 87010 - }, - { - "epoch": 0.3515718112291277, - "grad_norm": 610.2131958007812, - "learning_rate": 4.1197680354537186e-05, - "loss": 64.115, - "step": 87020 - }, - { - "epoch": 0.3516122124944953, - "grad_norm": 994.4957275390625, - "learning_rate": 4.119502129481755e-05, - "loss": 42.9862, - "step": 87030 - }, - { - "epoch": 0.35165261375986295, - "grad_norm": 1622.78076171875, - "learning_rate": 4.119236191936764e-05, - "loss": 55.3836, - "step": 87040 - }, - { - "epoch": 0.3516930150252306, - "grad_norm": 1582.70458984375, - "learning_rate": 4.118970222823929e-05, - "loss": 71.9621, - "step": 87050 - }, - { - "epoch": 0.35173341629059823, - "grad_norm": 1007.8423461914062, - "learning_rate": 4.118704222148436e-05, - "loss": 66.6885, - "step": 87060 - }, - { - "epoch": 0.3517738175559659, - "grad_norm": 431.42828369140625, - "learning_rate": 4.118438189915471e-05, - "loss": 83.4902, - "step": 87070 - }, - { - "epoch": 0.35181421882133346, - "grad_norm": 1616.8265380859375, - "learning_rate": 4.118172126130221e-05, - "loss": 49.96, - "step": 87080 - }, - { - "epoch": 0.3518546200867011, - "grad_norm": 358.4612121582031, - "learning_rate": 4.117906030797871e-05, - "loss": 85.6933, - "step": 87090 - }, - { - "epoch": 0.35189502135206874, - "grad_norm": 595.7027587890625, - "learning_rate": 4.1176399039236116e-05, - "loss": 67.9241, - "step": 87100 - }, - { - "epoch": 0.3519354226174364, - "grad_norm": 826.05517578125, - "learning_rate": 4.117373745512628e-05, - "loss": 92.1798, - "step": 87110 - }, - { - "epoch": 0.351975823882804, - "grad_norm": 2575.160888671875, - "learning_rate": 4.117107555570111e-05, - "loss": 80.2855, - "step": 87120 - }, - { - "epoch": 0.35201622514817166, - "grad_norm": 635.9917602539062, - "learning_rate": 4.1168413341012496e-05, - "loss": 51.3892, - "step": 87130 - }, - { - "epoch": 0.3520566264135393, - "grad_norm": 586.3646240234375, - "learning_rate": 4.116575081111235e-05, - "loss": 74.7427, - "step": 87140 - }, - { - "epoch": 0.3520970276789069, - "grad_norm": 379.6501770019531, - "learning_rate": 4.116308796605255e-05, - "loss": 42.7718, - "step": 87150 - }, - { - "epoch": 0.3521374289442745, - "grad_norm": 757.53076171875, - "learning_rate": 4.116042480588505e-05, - "loss": 79.6147, - "step": 87160 - }, - { - "epoch": 0.35217783020964216, - "grad_norm": 521.4224853515625, - "learning_rate": 4.1157761330661734e-05, - "loss": 114.8519, - "step": 87170 - }, - { - "epoch": 0.3522182314750098, - "grad_norm": 543.6570434570312, - "learning_rate": 4.115509754043454e-05, - "loss": 67.7495, - "step": 87180 - }, - { - "epoch": 0.35225863274037744, - "grad_norm": 635.4970703125, - "learning_rate": 4.115243343525541e-05, - "loss": 78.6889, - "step": 87190 - }, - { - "epoch": 0.3522990340057451, - "grad_norm": 1206.6654052734375, - "learning_rate": 4.1149769015176275e-05, - "loss": 56.5697, - "step": 87200 - }, - { - "epoch": 0.35233943527111267, - "grad_norm": 811.91455078125, - "learning_rate": 4.114710428024907e-05, - "loss": 78.0935, - "step": 87210 - }, - { - "epoch": 0.3523798365364803, - "grad_norm": 1116.96240234375, - "learning_rate": 4.114443923052577e-05, - "loss": 59.2988, - "step": 87220 - }, - { - "epoch": 0.35242023780184795, - "grad_norm": 623.7386474609375, - "learning_rate": 4.11417738660583e-05, - "loss": 56.9475, - "step": 87230 - }, - { - "epoch": 0.3524606390672156, - "grad_norm": 1740.817138671875, - "learning_rate": 4.113910818689864e-05, - "loss": 65.6938, - "step": 87240 - }, - { - "epoch": 0.3525010403325832, - "grad_norm": 752.4931640625, - "learning_rate": 4.113644219309877e-05, - "loss": 83.3021, - "step": 87250 - }, - { - "epoch": 0.35254144159795087, - "grad_norm": 1242.556640625, - "learning_rate": 4.1133775884710634e-05, - "loss": 59.4292, - "step": 87260 - }, - { - "epoch": 0.3525818428633185, - "grad_norm": 1105.26318359375, - "learning_rate": 4.113110926178625e-05, - "loss": 52.1607, - "step": 87270 - }, - { - "epoch": 0.3526222441286861, - "grad_norm": 941.704345703125, - "learning_rate": 4.112844232437757e-05, - "loss": 44.7782, - "step": 87280 - }, - { - "epoch": 0.35266264539405373, - "grad_norm": 975.8741455078125, - "learning_rate": 4.112577507253661e-05, - "loss": 63.2205, - "step": 87290 - }, - { - "epoch": 0.35270304665942137, - "grad_norm": 267.49029541015625, - "learning_rate": 4.112310750631536e-05, - "loss": 55.718, - "step": 87300 - }, - { - "epoch": 0.352743447924789, - "grad_norm": 492.7359619140625, - "learning_rate": 4.112043962576583e-05, - "loss": 87.6296, - "step": 87310 - }, - { - "epoch": 0.35278384919015665, - "grad_norm": 723.97412109375, - "learning_rate": 4.1117771430940035e-05, - "loss": 72.1795, - "step": 87320 - }, - { - "epoch": 0.3528242504555243, - "grad_norm": 1166.4947509765625, - "learning_rate": 4.111510292188998e-05, - "loss": 61.6153, - "step": 87330 - }, - { - "epoch": 0.3528646517208919, - "grad_norm": 943.3729248046875, - "learning_rate": 4.111243409866769e-05, - "loss": 78.3224, - "step": 87340 - }, - { - "epoch": 0.3529050529862595, - "grad_norm": 1874.787353515625, - "learning_rate": 4.110976496132522e-05, - "loss": 43.5677, - "step": 87350 - }, - { - "epoch": 0.35294545425162716, - "grad_norm": 567.2123413085938, - "learning_rate": 4.1107095509914584e-05, - "loss": 114.9888, - "step": 87360 - }, - { - "epoch": 0.3529858555169948, - "grad_norm": 1504.210205078125, - "learning_rate": 4.1104425744487826e-05, - "loss": 63.7791, - "step": 87370 - }, - { - "epoch": 0.35302625678236244, - "grad_norm": 252.47015380859375, - "learning_rate": 4.1101755665096996e-05, - "loss": 46.304, - "step": 87380 - }, - { - "epoch": 0.3530666580477301, - "grad_norm": 952.9064331054688, - "learning_rate": 4.109908527179415e-05, - "loss": 68.7049, - "step": 87390 - }, - { - "epoch": 0.35310705931309766, - "grad_norm": 3070.967041015625, - "learning_rate": 4.109641456463135e-05, - "loss": 70.3882, - "step": 87400 - }, - { - "epoch": 0.3531474605784653, - "grad_norm": 1895.307861328125, - "learning_rate": 4.109374354366066e-05, - "loss": 44.8805, - "step": 87410 - }, - { - "epoch": 0.35318786184383294, - "grad_norm": 1331.394287109375, - "learning_rate": 4.109107220893415e-05, - "loss": 59.9866, - "step": 87420 - }, - { - "epoch": 0.3532282631092006, - "grad_norm": 249.96646118164062, - "learning_rate": 4.1088400560503905e-05, - "loss": 66.2172, - "step": 87430 - }, - { - "epoch": 0.3532686643745682, - "grad_norm": 947.6691284179688, - "learning_rate": 4.108572859842201e-05, - "loss": 90.3438, - "step": 87440 - }, - { - "epoch": 0.35330906563993586, - "grad_norm": 573.4298706054688, - "learning_rate": 4.108305632274055e-05, - "loss": 34.3865, - "step": 87450 - }, - { - "epoch": 0.3533494669053035, - "grad_norm": 602.94384765625, - "learning_rate": 4.108038373351163e-05, - "loss": 51.0906, - "step": 87460 - }, - { - "epoch": 0.3533898681706711, - "grad_norm": 3697.798095703125, - "learning_rate": 4.107771083078735e-05, - "loss": 70.8239, - "step": 87470 - }, - { - "epoch": 0.3534302694360387, - "grad_norm": 549.9475708007812, - "learning_rate": 4.107503761461983e-05, - "loss": 48.9701, - "step": 87480 - }, - { - "epoch": 0.35347067070140636, - "grad_norm": 651.7734985351562, - "learning_rate": 4.107236408506116e-05, - "loss": 87.1631, - "step": 87490 - }, - { - "epoch": 0.353511071966774, - "grad_norm": 854.0162963867188, - "learning_rate": 4.1069690242163484e-05, - "loss": 39.3587, - "step": 87500 - }, - { - "epoch": 0.35355147323214164, - "grad_norm": 845.365234375, - "learning_rate": 4.106701608597893e-05, - "loss": 80.0646, - "step": 87510 - }, - { - "epoch": 0.3535918744975093, - "grad_norm": 571.5369262695312, - "learning_rate": 4.106434161655962e-05, - "loss": 54.8925, - "step": 87520 - }, - { - "epoch": 0.35363227576287687, - "grad_norm": 514.8745727539062, - "learning_rate": 4.106166683395769e-05, - "loss": 80.812, - "step": 87530 - }, - { - "epoch": 0.3536726770282445, - "grad_norm": 895.7685546875, - "learning_rate": 4.105899173822531e-05, - "loss": 52.7344, - "step": 87540 - }, - { - "epoch": 0.35371307829361215, - "grad_norm": 408.03485107421875, - "learning_rate": 4.1056316329414616e-05, - "loss": 63.1606, - "step": 87550 - }, - { - "epoch": 0.3537534795589798, - "grad_norm": 794.3991088867188, - "learning_rate": 4.105364060757776e-05, - "loss": 81.9505, - "step": 87560 - }, - { - "epoch": 0.35379388082434743, - "grad_norm": 308.87799072265625, - "learning_rate": 4.1050964572766923e-05, - "loss": 71.2676, - "step": 87570 - }, - { - "epoch": 0.35383428208971507, - "grad_norm": 743.8997192382812, - "learning_rate": 4.104828822503427e-05, - "loss": 54.9144, - "step": 87580 - }, - { - "epoch": 0.3538746833550827, - "grad_norm": 800.4918212890625, - "learning_rate": 4.104561156443197e-05, - "loss": 48.0048, - "step": 87590 - }, - { - "epoch": 0.3539150846204503, - "grad_norm": 217.0595245361328, - "learning_rate": 4.104293459101222e-05, - "loss": 71.4233, - "step": 87600 - }, - { - "epoch": 0.35395548588581793, - "grad_norm": 778.3648071289062, - "learning_rate": 4.104025730482719e-05, - "loss": 66.6853, - "step": 87610 - }, - { - "epoch": 0.3539958871511856, - "grad_norm": 712.687255859375, - "learning_rate": 4.103757970592909e-05, - "loss": 59.8492, - "step": 87620 - }, - { - "epoch": 0.3540362884165532, - "grad_norm": 366.0815734863281, - "learning_rate": 4.1034901794370116e-05, - "loss": 49.8096, - "step": 87630 - }, - { - "epoch": 0.35407668968192085, - "grad_norm": 1185.479736328125, - "learning_rate": 4.1032223570202474e-05, - "loss": 83.2932, - "step": 87640 - }, - { - "epoch": 0.3541170909472885, - "grad_norm": 411.51373291015625, - "learning_rate": 4.102954503347839e-05, - "loss": 45.1826, - "step": 87650 - }, - { - "epoch": 0.3541574922126561, - "grad_norm": 749.7902221679688, - "learning_rate": 4.102686618425006e-05, - "loss": 56.4452, - "step": 87660 - }, - { - "epoch": 0.3541978934780237, - "grad_norm": 573.7445068359375, - "learning_rate": 4.102418702256973e-05, - "loss": 59.8702, - "step": 87670 - }, - { - "epoch": 0.35423829474339136, - "grad_norm": 1168.4649658203125, - "learning_rate": 4.1021507548489625e-05, - "loss": 66.0522, - "step": 87680 - }, - { - "epoch": 0.354278696008759, - "grad_norm": 949.6201171875, - "learning_rate": 4.1018827762061985e-05, - "loss": 82.0665, - "step": 87690 - }, - { - "epoch": 0.35431909727412664, - "grad_norm": 1565.3529052734375, - "learning_rate": 4.101614766333904e-05, - "loss": 102.3075, - "step": 87700 - }, - { - "epoch": 0.3543594985394943, - "grad_norm": 1542.8006591796875, - "learning_rate": 4.101346725237305e-05, - "loss": 66.2509, - "step": 87710 - }, - { - "epoch": 0.35439989980486186, - "grad_norm": 527.8439331054688, - "learning_rate": 4.1010786529216284e-05, - "loss": 71.4895, - "step": 87720 - }, - { - "epoch": 0.3544403010702295, - "grad_norm": 1168.476318359375, - "learning_rate": 4.100810549392099e-05, - "loss": 85.4038, - "step": 87730 - }, - { - "epoch": 0.35448070233559714, - "grad_norm": 714.1607666015625, - "learning_rate": 4.100542414653943e-05, - "loss": 51.9524, - "step": 87740 - }, - { - "epoch": 0.3545211036009648, - "grad_norm": 2820.892578125, - "learning_rate": 4.100274248712389e-05, - "loss": 84.763, - "step": 87750 - }, - { - "epoch": 0.3545615048663324, - "grad_norm": 1056.6544189453125, - "learning_rate": 4.1000060515726647e-05, - "loss": 46.4493, - "step": 87760 - }, - { - "epoch": 0.35460190613170006, - "grad_norm": 637.3196411132812, - "learning_rate": 4.0997378232399984e-05, - "loss": 64.2221, - "step": 87770 - }, - { - "epoch": 0.3546423073970677, - "grad_norm": 1249.30078125, - "learning_rate": 4.09946956371962e-05, - "loss": 62.3628, - "step": 87780 - }, - { - "epoch": 0.3546827086624353, - "grad_norm": 562.5096435546875, - "learning_rate": 4.0992012730167584e-05, - "loss": 56.4057, - "step": 87790 - }, - { - "epoch": 0.3547231099278029, - "grad_norm": 515.3623046875, - "learning_rate": 4.098932951136645e-05, - "loss": 75.1756, - "step": 87800 - }, - { - "epoch": 0.35476351119317057, - "grad_norm": 1004.6653442382812, - "learning_rate": 4.098664598084511e-05, - "loss": 87.7658, - "step": 87810 - }, - { - "epoch": 0.3548039124585382, - "grad_norm": 514.6812133789062, - "learning_rate": 4.0983962138655873e-05, - "loss": 63.0205, - "step": 87820 - }, - { - "epoch": 0.35484431372390585, - "grad_norm": 327.2781677246094, - "learning_rate": 4.0981277984851066e-05, - "loss": 64.0254, - "step": 87830 - }, - { - "epoch": 0.3548847149892735, - "grad_norm": 945.9813842773438, - "learning_rate": 4.097859351948301e-05, - "loss": 71.4724, - "step": 87840 - }, - { - "epoch": 0.35492511625464107, - "grad_norm": 1146.121337890625, - "learning_rate": 4.0975908742604055e-05, - "loss": 74.1327, - "step": 87850 - }, - { - "epoch": 0.3549655175200087, - "grad_norm": 2756.161376953125, - "learning_rate": 4.097322365426653e-05, - "loss": 60.8623, - "step": 87860 - }, - { - "epoch": 0.35500591878537635, - "grad_norm": 616.4971923828125, - "learning_rate": 4.097053825452278e-05, - "loss": 74.3813, - "step": 87870 - }, - { - "epoch": 0.355046320050744, - "grad_norm": 796.4453735351562, - "learning_rate": 4.0967852543425175e-05, - "loss": 65.7487, - "step": 87880 - }, - { - "epoch": 0.35508672131611163, - "grad_norm": 377.42138671875, - "learning_rate": 4.0965166521026065e-05, - "loss": 46.341, - "step": 87890 - }, - { - "epoch": 0.35512712258147927, - "grad_norm": 605.7762451171875, - "learning_rate": 4.096248018737781e-05, - "loss": 81.4626, - "step": 87900 - }, - { - "epoch": 0.3551675238468469, - "grad_norm": 408.6437072753906, - "learning_rate": 4.095979354253279e-05, - "loss": 85.3279, - "step": 87910 - }, - { - "epoch": 0.3552079251122145, - "grad_norm": 673.0602416992188, - "learning_rate": 4.095710658654337e-05, - "loss": 35.6571, - "step": 87920 - }, - { - "epoch": 0.35524832637758214, - "grad_norm": 562.7644653320312, - "learning_rate": 4.0954419319461946e-05, - "loss": 68.498, - "step": 87930 - }, - { - "epoch": 0.3552887276429498, - "grad_norm": 1345.530029296875, - "learning_rate": 4.09517317413409e-05, - "loss": 54.9481, - "step": 87940 - }, - { - "epoch": 0.3553291289083174, - "grad_norm": 670.7591552734375, - "learning_rate": 4.094904385223264e-05, - "loss": 95.4115, - "step": 87950 - }, - { - "epoch": 0.35536953017368506, - "grad_norm": 6252.8359375, - "learning_rate": 4.094635565218955e-05, - "loss": 85.2645, - "step": 87960 - }, - { - "epoch": 0.3554099314390527, - "grad_norm": 699.56982421875, - "learning_rate": 4.094366714126405e-05, - "loss": 61.0998, - "step": 87970 - }, - { - "epoch": 0.3554503327044203, - "grad_norm": 572.2498779296875, - "learning_rate": 4.094097831950855e-05, - "loss": 55.9745, - "step": 87980 - }, - { - "epoch": 0.3554907339697879, - "grad_norm": 477.888671875, - "learning_rate": 4.093828918697547e-05, - "loss": 64.4699, - "step": 87990 - }, - { - "epoch": 0.35553113523515556, - "grad_norm": 1221.31982421875, - "learning_rate": 4.093559974371725e-05, - "loss": 60.6648, - "step": 88000 - }, - { - "epoch": 0.3555715365005232, - "grad_norm": 390.103759765625, - "learning_rate": 4.09329099897863e-05, - "loss": 51.6583, - "step": 88010 - }, - { - "epoch": 0.35561193776589084, - "grad_norm": 1909.0634765625, - "learning_rate": 4.0930219925235056e-05, - "loss": 62.6641, - "step": 88020 - }, - { - "epoch": 0.3556523390312585, - "grad_norm": 0.0, - "learning_rate": 4.0927529550115986e-05, - "loss": 54.3005, - "step": 88030 - }, - { - "epoch": 0.35569274029662606, - "grad_norm": 1166.9232177734375, - "learning_rate": 4.0924838864481516e-05, - "loss": 77.7679, - "step": 88040 - }, - { - "epoch": 0.3557331415619937, - "grad_norm": 736.1539916992188, - "learning_rate": 4.092214786838413e-05, - "loss": 55.0295, - "step": 88050 - }, - { - "epoch": 0.35577354282736134, - "grad_norm": 957.0504150390625, - "learning_rate": 4.0919456561876256e-05, - "loss": 72.6961, - "step": 88060 - }, - { - "epoch": 0.355813944092729, - "grad_norm": 687.1393432617188, - "learning_rate": 4.091676494501039e-05, - "loss": 60.3476, - "step": 88070 - }, - { - "epoch": 0.3558543453580966, - "grad_norm": 1779.8883056640625, - "learning_rate": 4.0914073017838996e-05, - "loss": 85.0388, - "step": 88080 - }, - { - "epoch": 0.35589474662346426, - "grad_norm": 1815.7283935546875, - "learning_rate": 4.091138078041455e-05, - "loss": 67.7276, - "step": 88090 - }, - { - "epoch": 0.3559351478888319, - "grad_norm": 510.8993835449219, - "learning_rate": 4.090868823278956e-05, - "loss": 74.9138, - "step": 88100 - }, - { - "epoch": 0.3559755491541995, - "grad_norm": 1010.1596069335938, - "learning_rate": 4.090599537501649e-05, - "loss": 70.1757, - "step": 88110 - }, - { - "epoch": 0.35601595041956713, - "grad_norm": 806.6387329101562, - "learning_rate": 4.090330220714785e-05, - "loss": 64.2958, - "step": 88120 - }, - { - "epoch": 0.35605635168493477, - "grad_norm": 1274.768310546875, - "learning_rate": 4.090060872923615e-05, - "loss": 98.0021, - "step": 88130 - }, - { - "epoch": 0.3560967529503024, - "grad_norm": 819.7002563476562, - "learning_rate": 4.089791494133389e-05, - "loss": 52.4494, - "step": 88140 - }, - { - "epoch": 0.35613715421567005, - "grad_norm": 1136.931396484375, - "learning_rate": 4.0895220843493606e-05, - "loss": 105.6924, - "step": 88150 - }, - { - "epoch": 0.3561775554810377, - "grad_norm": 2234.421630859375, - "learning_rate": 4.0892526435767795e-05, - "loss": 103.7004, - "step": 88160 - }, - { - "epoch": 0.3562179567464053, - "grad_norm": 841.349853515625, - "learning_rate": 4.088983171820901e-05, - "loss": 52.9514, - "step": 88170 - }, - { - "epoch": 0.3562583580117729, - "grad_norm": 725.3030395507812, - "learning_rate": 4.088713669086977e-05, - "loss": 49.6578, - "step": 88180 - }, - { - "epoch": 0.35629875927714055, - "grad_norm": 1026.1910400390625, - "learning_rate": 4.088444135380262e-05, - "loss": 63.2285, - "step": 88190 - }, - { - "epoch": 0.3563391605425082, - "grad_norm": 1608.8538818359375, - "learning_rate": 4.088174570706011e-05, - "loss": 65.9221, - "step": 88200 - }, - { - "epoch": 0.35637956180787583, - "grad_norm": 608.7267456054688, - "learning_rate": 4.0879049750694795e-05, - "loss": 59.0359, - "step": 88210 - }, - { - "epoch": 0.3564199630732435, - "grad_norm": 3048.426513671875, - "learning_rate": 4.0876353484759224e-05, - "loss": 100.1788, - "step": 88220 - }, - { - "epoch": 0.35646036433861106, - "grad_norm": 540.722412109375, - "learning_rate": 4.087365690930597e-05, - "loss": 66.9113, - "step": 88230 - }, - { - "epoch": 0.3565007656039787, - "grad_norm": 781.4616088867188, - "learning_rate": 4.0870960024387596e-05, - "loss": 103.4365, - "step": 88240 - }, - { - "epoch": 0.35654116686934634, - "grad_norm": 1404.780029296875, - "learning_rate": 4.086826283005669e-05, - "loss": 58.6043, - "step": 88250 - }, - { - "epoch": 0.356581568134714, - "grad_norm": 440.78472900390625, - "learning_rate": 4.0865565326365835e-05, - "loss": 43.4884, - "step": 88260 - }, - { - "epoch": 0.3566219694000816, - "grad_norm": 425.8787841796875, - "learning_rate": 4.086286751336761e-05, - "loss": 44.2717, - "step": 88270 - }, - { - "epoch": 0.35666237066544926, - "grad_norm": 386.0218200683594, - "learning_rate": 4.0860169391114625e-05, - "loss": 52.1067, - "step": 88280 - }, - { - "epoch": 0.3567027719308169, - "grad_norm": 421.0408935546875, - "learning_rate": 4.085747095965946e-05, - "loss": 63.7093, - "step": 88290 - }, - { - "epoch": 0.3567431731961845, - "grad_norm": 688.8589477539062, - "learning_rate": 4.085477221905474e-05, - "loss": 63.6186, - "step": 88300 - }, - { - "epoch": 0.3567835744615521, - "grad_norm": 2102.650634765625, - "learning_rate": 4.085207316935308e-05, - "loss": 120.8342, - "step": 88310 - }, - { - "epoch": 0.35682397572691976, - "grad_norm": 644.7640380859375, - "learning_rate": 4.084937381060708e-05, - "loss": 71.6069, - "step": 88320 - }, - { - "epoch": 0.3568643769922874, - "grad_norm": 718.741455078125, - "learning_rate": 4.084667414286939e-05, - "loss": 49.9685, - "step": 88330 - }, - { - "epoch": 0.35690477825765504, - "grad_norm": 1353.2684326171875, - "learning_rate": 4.0843974166192614e-05, - "loss": 89.7978, - "step": 88340 - }, - { - "epoch": 0.3569451795230227, - "grad_norm": 518.8037109375, - "learning_rate": 4.0841273880629416e-05, - "loss": 68.2225, - "step": 88350 - }, - { - "epoch": 0.35698558078839027, - "grad_norm": 491.72265625, - "learning_rate": 4.083857328623243e-05, - "loss": 62.1392, - "step": 88360 - }, - { - "epoch": 0.3570259820537579, - "grad_norm": 713.7077026367188, - "learning_rate": 4.0835872383054296e-05, - "loss": 59.1912, - "step": 88370 - }, - { - "epoch": 0.35706638331912555, - "grad_norm": 546.5416259765625, - "learning_rate": 4.083317117114768e-05, - "loss": 65.1588, - "step": 88380 - }, - { - "epoch": 0.3571067845844932, - "grad_norm": 749.3577270507812, - "learning_rate": 4.083046965056524e-05, - "loss": 65.049, - "step": 88390 - }, - { - "epoch": 0.3571471858498608, - "grad_norm": 808.5428466796875, - "learning_rate": 4.082776782135964e-05, - "loss": 59.8705, - "step": 88400 - }, - { - "epoch": 0.35718758711522847, - "grad_norm": 372.74261474609375, - "learning_rate": 4.082506568358357e-05, - "loss": 62.2298, - "step": 88410 - }, - { - "epoch": 0.3572279883805961, - "grad_norm": 574.08447265625, - "learning_rate": 4.082236323728968e-05, - "loss": 56.4968, - "step": 88420 - }, - { - "epoch": 0.3572683896459637, - "grad_norm": 901.8555297851562, - "learning_rate": 4.0819660482530684e-05, - "loss": 67.4959, - "step": 88430 - }, - { - "epoch": 0.35730879091133133, - "grad_norm": 2012.2177734375, - "learning_rate": 4.0816957419359264e-05, - "loss": 60.3428, - "step": 88440 - }, - { - "epoch": 0.35734919217669897, - "grad_norm": 434.2742614746094, - "learning_rate": 4.0814254047828116e-05, - "loss": 55.9326, - "step": 88450 - }, - { - "epoch": 0.3573895934420666, - "grad_norm": 1581.9569091796875, - "learning_rate": 4.081155036798994e-05, - "loss": 80.8084, - "step": 88460 - }, - { - "epoch": 0.35742999470743425, - "grad_norm": 980.014404296875, - "learning_rate": 4.080884637989745e-05, - "loss": 53.9677, - "step": 88470 - }, - { - "epoch": 0.3574703959728019, - "grad_norm": 438.5036926269531, - "learning_rate": 4.080614208360336e-05, - "loss": 46.5922, - "step": 88480 - }, - { - "epoch": 0.3575107972381695, - "grad_norm": 1167.0660400390625, - "learning_rate": 4.080343747916039e-05, - "loss": 60.1899, - "step": 88490 - }, - { - "epoch": 0.3575511985035371, - "grad_norm": 1493.02734375, - "learning_rate": 4.080073256662127e-05, - "loss": 55.8727, - "step": 88500 - }, - { - "epoch": 0.35759159976890476, - "grad_norm": 643.5103759765625, - "learning_rate": 4.079802734603874e-05, - "loss": 75.9446, - "step": 88510 - }, - { - "epoch": 0.3576320010342724, - "grad_norm": 619.2804565429688, - "learning_rate": 4.079532181746553e-05, - "loss": 59.9599, - "step": 88520 - }, - { - "epoch": 0.35767240229964004, - "grad_norm": 699.5809936523438, - "learning_rate": 4.079261598095439e-05, - "loss": 75.2107, - "step": 88530 - }, - { - "epoch": 0.3577128035650077, - "grad_norm": 2605.017578125, - "learning_rate": 4.078990983655807e-05, - "loss": 78.6188, - "step": 88540 - }, - { - "epoch": 0.35775320483037526, - "grad_norm": 483.28961181640625, - "learning_rate": 4.078720338432933e-05, - "loss": 52.8732, - "step": 88550 - }, - { - "epoch": 0.3577936060957429, - "grad_norm": 371.9551086425781, - "learning_rate": 4.078449662432093e-05, - "loss": 67.858, - "step": 88560 - }, - { - "epoch": 0.35783400736111054, - "grad_norm": 539.9357299804688, - "learning_rate": 4.078178955658565e-05, - "loss": 62.4049, - "step": 88570 - }, - { - "epoch": 0.3578744086264782, - "grad_norm": 571.8364868164062, - "learning_rate": 4.077908218117625e-05, - "loss": 80.4749, - "step": 88580 - }, - { - "epoch": 0.3579148098918458, - "grad_norm": 1833.5181884765625, - "learning_rate": 4.077637449814552e-05, - "loss": 59.1497, - "step": 88590 - }, - { - "epoch": 0.35795521115721346, - "grad_norm": 1636.6201171875, - "learning_rate": 4.077366650754624e-05, - "loss": 69.4785, - "step": 88600 - }, - { - "epoch": 0.3579956124225811, - "grad_norm": 2520.92822265625, - "learning_rate": 4.077095820943122e-05, - "loss": 92.7431, - "step": 88610 - }, - { - "epoch": 0.3580360136879487, - "grad_norm": 781.56298828125, - "learning_rate": 4.0768249603853245e-05, - "loss": 66.4768, - "step": 88620 - }, - { - "epoch": 0.3580764149533163, - "grad_norm": 462.7001037597656, - "learning_rate": 4.0765540690865134e-05, - "loss": 39.243, - "step": 88630 - }, - { - "epoch": 0.35811681621868396, - "grad_norm": 1026.7464599609375, - "learning_rate": 4.076283147051968e-05, - "loss": 68.4941, - "step": 88640 - }, - { - "epoch": 0.3581572174840516, - "grad_norm": 558.1005859375, - "learning_rate": 4.0760121942869725e-05, - "loss": 75.5133, - "step": 88650 - }, - { - "epoch": 0.35819761874941924, - "grad_norm": 1109.0501708984375, - "learning_rate": 4.075741210796806e-05, - "loss": 89.089, - "step": 88660 - }, - { - "epoch": 0.3582380200147869, - "grad_norm": 1361.9794921875, - "learning_rate": 4.075470196586755e-05, - "loss": 81.0406, - "step": 88670 - }, - { - "epoch": 0.35827842128015447, - "grad_norm": 390.9439697265625, - "learning_rate": 4.075199151662101e-05, - "loss": 67.3965, - "step": 88680 - }, - { - "epoch": 0.3583188225455221, - "grad_norm": 433.46759033203125, - "learning_rate": 4.074928076028128e-05, - "loss": 59.471, - "step": 88690 - }, - { - "epoch": 0.35835922381088975, - "grad_norm": 3093.1083984375, - "learning_rate": 4.074656969690122e-05, - "loss": 67.4171, - "step": 88700 - }, - { - "epoch": 0.3583996250762574, - "grad_norm": 650.4321899414062, - "learning_rate": 4.0743858326533674e-05, - "loss": 57.6206, - "step": 88710 - }, - { - "epoch": 0.35844002634162503, - "grad_norm": 693.6144409179688, - "learning_rate": 4.0741146649231504e-05, - "loss": 69.4233, - "step": 88720 - }, - { - "epoch": 0.35848042760699267, - "grad_norm": 1305.1888427734375, - "learning_rate": 4.0738434665047575e-05, - "loss": 66.7869, - "step": 88730 - }, - { - "epoch": 0.3585208288723603, - "grad_norm": 671.9458618164062, - "learning_rate": 4.0735722374034764e-05, - "loss": 94.4328, - "step": 88740 - }, - { - "epoch": 0.3585612301377279, - "grad_norm": 782.94873046875, - "learning_rate": 4.073300977624594e-05, - "loss": 59.1198, - "step": 88750 - }, - { - "epoch": 0.35860163140309553, - "grad_norm": 808.3102416992188, - "learning_rate": 4.073029687173399e-05, - "loss": 65.3728, - "step": 88760 - }, - { - "epoch": 0.3586420326684632, - "grad_norm": 2808.516357421875, - "learning_rate": 4.0727583660551806e-05, - "loss": 57.111, - "step": 88770 - }, - { - "epoch": 0.3586824339338308, - "grad_norm": 905.2903442382812, - "learning_rate": 4.0724870142752284e-05, - "loss": 80.5446, - "step": 88780 - }, - { - "epoch": 0.35872283519919845, - "grad_norm": 245.458251953125, - "learning_rate": 4.0722156318388315e-05, - "loss": 81.0946, - "step": 88790 - }, - { - "epoch": 0.3587632364645661, - "grad_norm": 1166.0023193359375, - "learning_rate": 4.071944218751282e-05, - "loss": 62.5315, - "step": 88800 - }, - { - "epoch": 0.3588036377299337, - "grad_norm": 1779.5706787109375, - "learning_rate": 4.0716727750178704e-05, - "loss": 73.8024, - "step": 88810 - }, - { - "epoch": 0.3588440389953013, - "grad_norm": 3553.527587890625, - "learning_rate": 4.071401300643889e-05, - "loss": 107.6503, - "step": 88820 - }, - { - "epoch": 0.35888444026066896, - "grad_norm": 990.9931030273438, - "learning_rate": 4.0711297956346306e-05, - "loss": 51.3191, - "step": 88830 - }, - { - "epoch": 0.3589248415260366, - "grad_norm": 567.8832397460938, - "learning_rate": 4.070858259995387e-05, - "loss": 85.3655, - "step": 88840 - }, - { - "epoch": 0.35896524279140424, - "grad_norm": 978.09130859375, - "learning_rate": 4.070586693731454e-05, - "loss": 86.0509, - "step": 88850 - }, - { - "epoch": 0.3590056440567719, - "grad_norm": 654.3468017578125, - "learning_rate": 4.0703150968481246e-05, - "loss": 80.2084, - "step": 88860 - }, - { - "epoch": 0.35904604532213946, - "grad_norm": 219.86631774902344, - "learning_rate": 4.070043469350694e-05, - "loss": 61.1951, - "step": 88870 - }, - { - "epoch": 0.3590864465875071, - "grad_norm": 804.8790283203125, - "learning_rate": 4.069771811244457e-05, - "loss": 75.843, - "step": 88880 - }, - { - "epoch": 0.35912684785287474, - "grad_norm": 546.8237915039062, - "learning_rate": 4.0695001225347104e-05, - "loss": 51.4436, - "step": 88890 - }, - { - "epoch": 0.3591672491182424, - "grad_norm": 584.9393920898438, - "learning_rate": 4.0692284032267516e-05, - "loss": 50.8256, - "step": 88900 - }, - { - "epoch": 0.35920765038361, - "grad_norm": 2287.378173828125, - "learning_rate": 4.0689566533258765e-05, - "loss": 85.8929, - "step": 88910 - }, - { - "epoch": 0.35924805164897766, - "grad_norm": 798.82470703125, - "learning_rate": 4.068684872837384e-05, - "loss": 54.3008, - "step": 88920 - }, - { - "epoch": 0.3592884529143453, - "grad_norm": 631.5094604492188, - "learning_rate": 4.068413061766572e-05, - "loss": 67.0688, - "step": 88930 - }, - { - "epoch": 0.3593288541797129, - "grad_norm": 788.1356811523438, - "learning_rate": 4.068141220118741e-05, - "loss": 63.0645, - "step": 88940 - }, - { - "epoch": 0.3593692554450805, - "grad_norm": 424.5524597167969, - "learning_rate": 4.067869347899188e-05, - "loss": 54.8084, - "step": 88950 - }, - { - "epoch": 0.35940965671044817, - "grad_norm": 653.3056640625, - "learning_rate": 4.067597445113216e-05, - "loss": 50.6989, - "step": 88960 - }, - { - "epoch": 0.3594500579758158, - "grad_norm": 343.56512451171875, - "learning_rate": 4.067325511766124e-05, - "loss": 82.852, - "step": 88970 - }, - { - "epoch": 0.35949045924118345, - "grad_norm": 750.8988647460938, - "learning_rate": 4.067053547863215e-05, - "loss": 85.4746, - "step": 88980 - }, - { - "epoch": 0.3595308605065511, - "grad_norm": 1303.02197265625, - "learning_rate": 4.06678155340979e-05, - "loss": 50.7246, - "step": 88990 - }, - { - "epoch": 0.35957126177191867, - "grad_norm": 777.9240112304688, - "learning_rate": 4.066509528411152e-05, - "loss": 62.3671, - "step": 89000 - }, - { - "epoch": 0.3596116630372863, - "grad_norm": 304.01275634765625, - "learning_rate": 4.066237472872604e-05, - "loss": 68.3459, - "step": 89010 - }, - { - "epoch": 0.35965206430265395, - "grad_norm": 1499.3465576171875, - "learning_rate": 4.0659653867994496e-05, - "loss": 69.0432, - "step": 89020 - }, - { - "epoch": 0.3596924655680216, - "grad_norm": 1114.2425537109375, - "learning_rate": 4.065693270196995e-05, - "loss": 68.1484, - "step": 89030 - }, - { - "epoch": 0.35973286683338923, - "grad_norm": 738.8199462890625, - "learning_rate": 4.065421123070543e-05, - "loss": 59.836, - "step": 89040 - }, - { - "epoch": 0.35977326809875687, - "grad_norm": 616.7838134765625, - "learning_rate": 4.065148945425401e-05, - "loss": 61.9072, - "step": 89050 - }, - { - "epoch": 0.3598136693641245, - "grad_norm": 215.50350952148438, - "learning_rate": 4.064876737266874e-05, - "loss": 81.0354, - "step": 89060 - }, - { - "epoch": 0.3598540706294921, - "grad_norm": 344.2589111328125, - "learning_rate": 4.06460449860027e-05, - "loss": 61.0841, - "step": 89070 - }, - { - "epoch": 0.35989447189485974, - "grad_norm": 740.9758911132812, - "learning_rate": 4.064332229430895e-05, - "loss": 53.366, - "step": 89080 - }, - { - "epoch": 0.3599348731602274, - "grad_norm": 717.7742919921875, - "learning_rate": 4.0640599297640584e-05, - "loss": 42.3575, - "step": 89090 - }, - { - "epoch": 0.359975274425595, - "grad_norm": 1074.0352783203125, - "learning_rate": 4.063787599605068e-05, - "loss": 79.3194, - "step": 89100 - }, - { - "epoch": 0.36001567569096266, - "grad_norm": 1248.140869140625, - "learning_rate": 4.063515238959233e-05, - "loss": 60.0265, - "step": 89110 - }, - { - "epoch": 0.3600560769563303, - "grad_norm": 400.5279235839844, - "learning_rate": 4.063242847831864e-05, - "loss": 56.0902, - "step": 89120 - }, - { - "epoch": 0.3600964782216979, - "grad_norm": 4143.07568359375, - "learning_rate": 4.06297042622827e-05, - "loss": 77.4453, - "step": 89130 - }, - { - "epoch": 0.3601368794870655, - "grad_norm": 520.5917358398438, - "learning_rate": 4.062697974153764e-05, - "loss": 56.0878, - "step": 89140 - }, - { - "epoch": 0.36017728075243316, - "grad_norm": 1186.0679931640625, - "learning_rate": 4.062425491613656e-05, - "loss": 70.4133, - "step": 89150 - }, - { - "epoch": 0.3602176820178008, - "grad_norm": 972.8978271484375, - "learning_rate": 4.062152978613258e-05, - "loss": 47.6216, - "step": 89160 - }, - { - "epoch": 0.36025808328316844, - "grad_norm": 1175.881591796875, - "learning_rate": 4.061880435157884e-05, - "loss": 52.5779, - "step": 89170 - }, - { - "epoch": 0.3602984845485361, - "grad_norm": 1707.835205078125, - "learning_rate": 4.061607861252847e-05, - "loss": 49.4881, - "step": 89180 - }, - { - "epoch": 0.36033888581390366, - "grad_norm": 898.6934814453125, - "learning_rate": 4.0613352569034615e-05, - "loss": 83.9531, - "step": 89190 - }, - { - "epoch": 0.3603792870792713, - "grad_norm": 791.3284912109375, - "learning_rate": 4.0610626221150394e-05, - "loss": 56.8294, - "step": 89200 - }, - { - "epoch": 0.36041968834463894, - "grad_norm": 439.3869323730469, - "learning_rate": 4.060789956892899e-05, - "loss": 78.4056, - "step": 89210 - }, - { - "epoch": 0.3604600896100066, - "grad_norm": 306.3957214355469, - "learning_rate": 4.060517261242355e-05, - "loss": 53.2591, - "step": 89220 - }, - { - "epoch": 0.3605004908753742, - "grad_norm": 862.3554077148438, - "learning_rate": 4.060244535168723e-05, - "loss": 57.5533, - "step": 89230 - }, - { - "epoch": 0.36054089214074186, - "grad_norm": 631.755859375, - "learning_rate": 4.0599717786773204e-05, - "loss": 46.8937, - "step": 89240 - }, - { - "epoch": 0.3605812934061095, - "grad_norm": 1259.4813232421875, - "learning_rate": 4.059698991773466e-05, - "loss": 62.3537, - "step": 89250 - }, - { - "epoch": 0.3606216946714771, - "grad_norm": 467.6385192871094, - "learning_rate": 4.059426174462476e-05, - "loss": 65.602, - "step": 89260 - }, - { - "epoch": 0.36066209593684473, - "grad_norm": 492.99609375, - "learning_rate": 4.0591533267496694e-05, - "loss": 106.2255, - "step": 89270 - }, - { - "epoch": 0.36070249720221237, - "grad_norm": 893.2991943359375, - "learning_rate": 4.058880448640367e-05, - "loss": 96.5574, - "step": 89280 - }, - { - "epoch": 0.36074289846758, - "grad_norm": 645.7114868164062, - "learning_rate": 4.058607540139887e-05, - "loss": 90.8627, - "step": 89290 - }, - { - "epoch": 0.36078329973294765, - "grad_norm": 3922.514404296875, - "learning_rate": 4.0583346012535506e-05, - "loss": 111.6305, - "step": 89300 - }, - { - "epoch": 0.3608237009983153, - "grad_norm": 904.625, - "learning_rate": 4.058061631986679e-05, - "loss": 79.8804, - "step": 89310 - }, - { - "epoch": 0.3608641022636829, - "grad_norm": 384.1736755371094, - "learning_rate": 4.057788632344593e-05, - "loss": 60.1387, - "step": 89320 - }, - { - "epoch": 0.3609045035290505, - "grad_norm": 675.1691284179688, - "learning_rate": 4.0575156023326166e-05, - "loss": 60.848, - "step": 89330 - }, - { - "epoch": 0.36094490479441815, - "grad_norm": 963.3970947265625, - "learning_rate": 4.0572425419560714e-05, - "loss": 57.821, - "step": 89340 - }, - { - "epoch": 0.3609853060597858, - "grad_norm": 0.0, - "learning_rate": 4.056969451220282e-05, - "loss": 72.7515, - "step": 89350 - }, - { - "epoch": 0.36102570732515343, - "grad_norm": 610.1162719726562, - "learning_rate": 4.0566963301305705e-05, - "loss": 50.6369, - "step": 89360 - }, - { - "epoch": 0.3610661085905211, - "grad_norm": 643.9298706054688, - "learning_rate": 4.056423178692262e-05, - "loss": 39.3118, - "step": 89370 - }, - { - "epoch": 0.3611065098558887, - "grad_norm": 531.35693359375, - "learning_rate": 4.056149996910683e-05, - "loss": 75.3905, - "step": 89380 - }, - { - "epoch": 0.3611469111212563, - "grad_norm": 503.90777587890625, - "learning_rate": 4.05587678479116e-05, - "loss": 65.8769, - "step": 89390 - }, - { - "epoch": 0.36118731238662394, - "grad_norm": 1145.5985107421875, - "learning_rate": 4.055603542339016e-05, - "loss": 51.2441, - "step": 89400 - }, - { - "epoch": 0.3612277136519916, - "grad_norm": 411.92626953125, - "learning_rate": 4.055330269559581e-05, - "loss": 35.5195, - "step": 89410 - }, - { - "epoch": 0.3612681149173592, - "grad_norm": 745.9331665039062, - "learning_rate": 4.055056966458182e-05, - "loss": 58.2253, - "step": 89420 - }, - { - "epoch": 0.36130851618272686, - "grad_norm": 5621.34814453125, - "learning_rate": 4.054783633040146e-05, - "loss": 78.9581, - "step": 89430 - }, - { - "epoch": 0.3613489174480945, - "grad_norm": 5587.95263671875, - "learning_rate": 4.054510269310803e-05, - "loss": 83.4821, - "step": 89440 - }, - { - "epoch": 0.3613893187134621, - "grad_norm": 402.0248107910156, - "learning_rate": 4.0542368752754825e-05, - "loss": 57.0781, - "step": 89450 - }, - { - "epoch": 0.3614297199788297, - "grad_norm": 1605.563720703125, - "learning_rate": 4.053963450939513e-05, - "loss": 90.5012, - "step": 89460 - }, - { - "epoch": 0.36147012124419736, - "grad_norm": 471.9956359863281, - "learning_rate": 4.053689996308227e-05, - "loss": 71.2476, - "step": 89470 - }, - { - "epoch": 0.361510522509565, - "grad_norm": 0.0, - "learning_rate": 4.053416511386954e-05, - "loss": 45.8093, - "step": 89480 - }, - { - "epoch": 0.36155092377493264, - "grad_norm": 800.1768188476562, - "learning_rate": 4.0531429961810264e-05, - "loss": 46.3764, - "step": 89490 - }, - { - "epoch": 0.3615913250403003, - "grad_norm": 731.527587890625, - "learning_rate": 4.052869450695776e-05, - "loss": 78.3263, - "step": 89500 - }, - { - "epoch": 0.36163172630566787, - "grad_norm": 1999.2166748046875, - "learning_rate": 4.052595874936537e-05, - "loss": 70.2021, - "step": 89510 - }, - { - "epoch": 0.3616721275710355, - "grad_norm": 0.0, - "learning_rate": 4.0523222689086414e-05, - "loss": 75.0036, - "step": 89520 - }, - { - "epoch": 0.36171252883640315, - "grad_norm": 896.73388671875, - "learning_rate": 4.052048632617424e-05, - "loss": 73.4701, - "step": 89530 - }, - { - "epoch": 0.3617529301017708, - "grad_norm": 1283.2412109375, - "learning_rate": 4.05177496606822e-05, - "loss": 50.1073, - "step": 89540 - }, - { - "epoch": 0.3617933313671384, - "grad_norm": 543.897705078125, - "learning_rate": 4.0515012692663646e-05, - "loss": 52.8131, - "step": 89550 - }, - { - "epoch": 0.36183373263250607, - "grad_norm": 1975.94384765625, - "learning_rate": 4.051227542217192e-05, - "loss": 92.001, - "step": 89560 - }, - { - "epoch": 0.3618741338978737, - "grad_norm": 247.90350341796875, - "learning_rate": 4.0509537849260404e-05, - "loss": 31.5658, - "step": 89570 - }, - { - "epoch": 0.3619145351632413, - "grad_norm": 859.4539184570312, - "learning_rate": 4.0506799973982465e-05, - "loss": 55.6403, - "step": 89580 - }, - { - "epoch": 0.36195493642860893, - "grad_norm": 1263.11474609375, - "learning_rate": 4.0504061796391474e-05, - "loss": 59.762, - "step": 89590 - }, - { - "epoch": 0.36199533769397657, - "grad_norm": 263.7984313964844, - "learning_rate": 4.050132331654082e-05, - "loss": 36.6857, - "step": 89600 - }, - { - "epoch": 0.3620357389593442, - "grad_norm": 599.0194702148438, - "learning_rate": 4.0498584534483877e-05, - "loss": 101.6762, - "step": 89610 - }, - { - "epoch": 0.36207614022471185, - "grad_norm": 1161.4105224609375, - "learning_rate": 4.0495845450274064e-05, - "loss": 73.1191, - "step": 89620 - }, - { - "epoch": 0.3621165414900795, - "grad_norm": 977.6044921875, - "learning_rate": 4.0493106063964754e-05, - "loss": 63.5236, - "step": 89630 - }, - { - "epoch": 0.3621569427554471, - "grad_norm": 653.26708984375, - "learning_rate": 4.0490366375609376e-05, - "loss": 99.8486, - "step": 89640 - }, - { - "epoch": 0.3621973440208147, - "grad_norm": 1127.0865478515625, - "learning_rate": 4.048762638526132e-05, - "loss": 51.8638, - "step": 89650 - }, - { - "epoch": 0.36223774528618236, - "grad_norm": 565.7835693359375, - "learning_rate": 4.048488609297402e-05, - "loss": 61.2772, - "step": 89660 - }, - { - "epoch": 0.36227814655155, - "grad_norm": 774.4669799804688, - "learning_rate": 4.0482145498800884e-05, - "loss": 61.0465, - "step": 89670 - }, - { - "epoch": 0.36231854781691764, - "grad_norm": 2059.27880859375, - "learning_rate": 4.047940460279537e-05, - "loss": 80.3147, - "step": 89680 - }, - { - "epoch": 0.3623589490822853, - "grad_norm": 890.2815551757812, - "learning_rate": 4.0476663405010874e-05, - "loss": 85.4335, - "step": 89690 - }, - { - "epoch": 0.3623993503476529, - "grad_norm": 1310.5477294921875, - "learning_rate": 4.047392190550087e-05, - "loss": 51.1065, - "step": 89700 - }, - { - "epoch": 0.3624397516130205, - "grad_norm": 640.0499877929688, - "learning_rate": 4.047118010431879e-05, - "loss": 79.5663, - "step": 89710 - }, - { - "epoch": 0.36248015287838814, - "grad_norm": 773.0057373046875, - "learning_rate": 4.0468438001518084e-05, - "loss": 56.4114, - "step": 89720 - }, - { - "epoch": 0.3625205541437558, - "grad_norm": 652.1478881835938, - "learning_rate": 4.046569559715221e-05, - "loss": 52.9334, - "step": 89730 - }, - { - "epoch": 0.3625609554091234, - "grad_norm": 1459.3680419921875, - "learning_rate": 4.0462952891274655e-05, - "loss": 71.04, - "step": 89740 - }, - { - "epoch": 0.36260135667449106, - "grad_norm": 1920.9996337890625, - "learning_rate": 4.046020988393885e-05, - "loss": 61.0197, - "step": 89750 - }, - { - "epoch": 0.3626417579398587, - "grad_norm": 1488.5030517578125, - "learning_rate": 4.045746657519831e-05, - "loss": 76.9832, - "step": 89760 - }, - { - "epoch": 0.3626821592052263, - "grad_norm": 852.4432983398438, - "learning_rate": 4.04547229651065e-05, - "loss": 61.3612, - "step": 89770 - }, - { - "epoch": 0.3627225604705939, - "grad_norm": 748.500244140625, - "learning_rate": 4.0451979053716906e-05, - "loss": 61.8753, - "step": 89780 - }, - { - "epoch": 0.36276296173596156, - "grad_norm": 988.9141235351562, - "learning_rate": 4.044923484108303e-05, - "loss": 53.8847, - "step": 89790 - }, - { - "epoch": 0.3628033630013292, - "grad_norm": 1280.90673828125, - "learning_rate": 4.044649032725836e-05, - "loss": 63.5749, - "step": 89800 - }, - { - "epoch": 0.36284376426669684, - "grad_norm": 1100.82177734375, - "learning_rate": 4.044374551229641e-05, - "loss": 87.8433, - "step": 89810 - }, - { - "epoch": 0.3628841655320645, - "grad_norm": 936.5944213867188, - "learning_rate": 4.0441000396250694e-05, - "loss": 62.9258, - "step": 89820 - }, - { - "epoch": 0.36292456679743207, - "grad_norm": 701.0770874023438, - "learning_rate": 4.0438254979174725e-05, - "loss": 50.3677, - "step": 89830 - }, - { - "epoch": 0.3629649680627997, - "grad_norm": 0.0, - "learning_rate": 4.043550926112203e-05, - "loss": 60.1964, - "step": 89840 - }, - { - "epoch": 0.36300536932816735, - "grad_norm": 546.4686889648438, - "learning_rate": 4.043276324214613e-05, - "loss": 53.8509, - "step": 89850 - }, - { - "epoch": 0.363045770593535, - "grad_norm": 1106.510009765625, - "learning_rate": 4.043001692230056e-05, - "loss": 69.3517, - "step": 89860 - }, - { - "epoch": 0.36308617185890263, - "grad_norm": 665.1596069335938, - "learning_rate": 4.042727030163888e-05, - "loss": 53.5625, - "step": 89870 - }, - { - "epoch": 0.36312657312427027, - "grad_norm": 640.2094116210938, - "learning_rate": 4.042452338021461e-05, - "loss": 90.0176, - "step": 89880 - }, - { - "epoch": 0.3631669743896379, - "grad_norm": 552.5271606445312, - "learning_rate": 4.0421776158081326e-05, - "loss": 39.7842, - "step": 89890 - }, - { - "epoch": 0.3632073756550055, - "grad_norm": 609.4852905273438, - "learning_rate": 4.041902863529256e-05, - "loss": 98.3096, - "step": 89900 - }, - { - "epoch": 0.36324777692037313, - "grad_norm": 1036.63671875, - "learning_rate": 4.041628081190191e-05, - "loss": 74.554, - "step": 89910 - }, - { - "epoch": 0.3632881781857408, - "grad_norm": 296.92547607421875, - "learning_rate": 4.041353268796293e-05, - "loss": 75.188, - "step": 89920 - }, - { - "epoch": 0.3633285794511084, - "grad_norm": 404.4271240234375, - "learning_rate": 4.041078426352918e-05, - "loss": 43.2436, - "step": 89930 - }, - { - "epoch": 0.36336898071647605, - "grad_norm": 1445.6839599609375, - "learning_rate": 4.0408035538654264e-05, - "loss": 48.7288, - "step": 89940 - }, - { - "epoch": 0.3634093819818437, - "grad_norm": 745.0489501953125, - "learning_rate": 4.040528651339176e-05, - "loss": 77.1038, - "step": 89950 - }, - { - "epoch": 0.3634497832472113, - "grad_norm": 543.2232055664062, - "learning_rate": 4.0402537187795274e-05, - "loss": 56.8202, - "step": 89960 - }, - { - "epoch": 0.3634901845125789, - "grad_norm": 557.49072265625, - "learning_rate": 4.039978756191839e-05, - "loss": 62.1664, - "step": 89970 - }, - { - "epoch": 0.36353058577794656, - "grad_norm": 612.9962768554688, - "learning_rate": 4.039703763581472e-05, - "loss": 82.6836, - "step": 89980 - }, - { - "epoch": 0.3635709870433142, - "grad_norm": 921.2637939453125, - "learning_rate": 4.039428740953787e-05, - "loss": 50.0055, - "step": 89990 - }, - { - "epoch": 0.36361138830868184, - "grad_norm": 1612.236328125, - "learning_rate": 4.039153688314145e-05, - "loss": 85.7117, - "step": 90000 - }, - { - "epoch": 0.3636517895740495, - "grad_norm": 656.6378173828125, - "learning_rate": 4.038878605667912e-05, - "loss": 84.2719, - "step": 90010 - }, - { - "epoch": 0.3636921908394171, - "grad_norm": 1089.3055419921875, - "learning_rate": 4.038603493020447e-05, - "loss": 76.3893, - "step": 90020 - }, - { - "epoch": 0.3637325921047847, - "grad_norm": 954.1974487304688, - "learning_rate": 4.038328350377115e-05, - "loss": 111.787, - "step": 90030 - }, - { - "epoch": 0.36377299337015234, - "grad_norm": 725.2201538085938, - "learning_rate": 4.0380531777432794e-05, - "loss": 60.9843, - "step": 90040 - }, - { - "epoch": 0.36381339463552, - "grad_norm": 1051.9312744140625, - "learning_rate": 4.037777975124306e-05, - "loss": 75.9995, - "step": 90050 - }, - { - "epoch": 0.3638537959008876, - "grad_norm": 1385.2197265625, - "learning_rate": 4.037502742525559e-05, - "loss": 85.4595, - "step": 90060 - }, - { - "epoch": 0.36389419716625526, - "grad_norm": 505.546875, - "learning_rate": 4.037227479952404e-05, - "loss": 56.1493, - "step": 90070 - }, - { - "epoch": 0.3639345984316229, - "grad_norm": 749.1838989257812, - "learning_rate": 4.036952187410208e-05, - "loss": 63.6978, - "step": 90080 - }, - { - "epoch": 0.3639749996969905, - "grad_norm": 534.1312866210938, - "learning_rate": 4.036676864904338e-05, - "loss": 57.1218, - "step": 90090 - }, - { - "epoch": 0.3640154009623581, - "grad_norm": 921.06005859375, - "learning_rate": 4.036401512440161e-05, - "loss": 58.3411, - "step": 90100 - }, - { - "epoch": 0.36405580222772577, - "grad_norm": 679.614990234375, - "learning_rate": 4.0361261300230465e-05, - "loss": 66.4312, - "step": 90110 - }, - { - "epoch": 0.3640962034930934, - "grad_norm": 511.14501953125, - "learning_rate": 4.035850717658362e-05, - "loss": 106.1777, - "step": 90120 - }, - { - "epoch": 0.36413660475846105, - "grad_norm": 1034.344482421875, - "learning_rate": 4.035575275351476e-05, - "loss": 78.6351, - "step": 90130 - }, - { - "epoch": 0.3641770060238287, - "grad_norm": 880.7720947265625, - "learning_rate": 4.0352998031077604e-05, - "loss": 67.1489, - "step": 90140 - }, - { - "epoch": 0.36421740728919627, - "grad_norm": 1041.2879638671875, - "learning_rate": 4.035024300932584e-05, - "loss": 59.1619, - "step": 90150 - }, - { - "epoch": 0.3642578085545639, - "grad_norm": 414.6109924316406, - "learning_rate": 4.0347487688313194e-05, - "loss": 57.6462, - "step": 90160 - }, - { - "epoch": 0.36429820981993155, - "grad_norm": 803.5387573242188, - "learning_rate": 4.034473206809337e-05, - "loss": 62.8699, - "step": 90170 - }, - { - "epoch": 0.3643386110852992, - "grad_norm": 982.3478393554688, - "learning_rate": 4.0341976148720095e-05, - "loss": 97.5246, - "step": 90180 - }, - { - "epoch": 0.36437901235066683, - "grad_norm": 951.2240600585938, - "learning_rate": 4.03392199302471e-05, - "loss": 51.8766, - "step": 90190 - }, - { - "epoch": 0.36441941361603447, - "grad_norm": 754.9638061523438, - "learning_rate": 4.033646341272811e-05, - "loss": 59.1747, - "step": 90200 - }, - { - "epoch": 0.3644598148814021, - "grad_norm": 1095.536376953125, - "learning_rate": 4.033370659621687e-05, - "loss": 43.2328, - "step": 90210 - }, - { - "epoch": 0.3645002161467697, - "grad_norm": 915.0740966796875, - "learning_rate": 4.033094948076713e-05, - "loss": 77.3148, - "step": 90220 - }, - { - "epoch": 0.36454061741213734, - "grad_norm": 502.77545166015625, - "learning_rate": 4.032819206643263e-05, - "loss": 41.8404, - "step": 90230 - }, - { - "epoch": 0.364581018677505, - "grad_norm": 352.51165771484375, - "learning_rate": 4.032543435326714e-05, - "loss": 55.2335, - "step": 90240 - }, - { - "epoch": 0.3646214199428726, - "grad_norm": 1066.30615234375, - "learning_rate": 4.0322676341324415e-05, - "loss": 87.1459, - "step": 90250 - }, - { - "epoch": 0.36466182120824026, - "grad_norm": 486.5985107421875, - "learning_rate": 4.0319918030658225e-05, - "loss": 67.0294, - "step": 90260 - }, - { - "epoch": 0.3647022224736079, - "grad_norm": 637.4968872070312, - "learning_rate": 4.031715942132235e-05, - "loss": 67.5639, - "step": 90270 - }, - { - "epoch": 0.3647426237389755, - "grad_norm": 919.53759765625, - "learning_rate": 4.031440051337056e-05, - "loss": 88.9865, - "step": 90280 - }, - { - "epoch": 0.3647830250043431, - "grad_norm": 551.5164184570312, - "learning_rate": 4.031164130685665e-05, - "loss": 68.1998, - "step": 90290 - }, - { - "epoch": 0.36482342626971076, - "grad_norm": 1867.7833251953125, - "learning_rate": 4.030888180183441e-05, - "loss": 80.087, - "step": 90300 - }, - { - "epoch": 0.3648638275350784, - "grad_norm": 731.1893920898438, - "learning_rate": 4.030612199835764e-05, - "loss": 49.1685, - "step": 90310 - }, - { - "epoch": 0.36490422880044604, - "grad_norm": 1434.2115478515625, - "learning_rate": 4.030336189648014e-05, - "loss": 79.8181, - "step": 90320 - }, - { - "epoch": 0.3649446300658137, - "grad_norm": 5654.36669921875, - "learning_rate": 4.030060149625573e-05, - "loss": 106.5813, - "step": 90330 - }, - { - "epoch": 0.3649850313311813, - "grad_norm": 1383.1339111328125, - "learning_rate": 4.02978407977382e-05, - "loss": 81.1142, - "step": 90340 - }, - { - "epoch": 0.3650254325965489, - "grad_norm": 1352.9986572265625, - "learning_rate": 4.0295079800981395e-05, - "loss": 75.8624, - "step": 90350 - }, - { - "epoch": 0.36506583386191654, - "grad_norm": 821.6564331054688, - "learning_rate": 4.029231850603914e-05, - "loss": 87.5254, - "step": 90360 - }, - { - "epoch": 0.3651062351272842, - "grad_norm": 375.5804138183594, - "learning_rate": 4.028955691296526e-05, - "loss": 43.4082, - "step": 90370 - }, - { - "epoch": 0.3651466363926518, - "grad_norm": 2169.375244140625, - "learning_rate": 4.0286795021813594e-05, - "loss": 67.1946, - "step": 90380 - }, - { - "epoch": 0.36518703765801946, - "grad_norm": 7678.23828125, - "learning_rate": 4.0284032832637985e-05, - "loss": 96.252, - "step": 90390 - }, - { - "epoch": 0.3652274389233871, - "grad_norm": 1672.2906494140625, - "learning_rate": 4.028127034549229e-05, - "loss": 76.2553, - "step": 90400 - }, - { - "epoch": 0.3652678401887547, - "grad_norm": 959.1575927734375, - "learning_rate": 4.027850756043037e-05, - "loss": 55.6581, - "step": 90410 - }, - { - "epoch": 0.36530824145412233, - "grad_norm": 646.5990600585938, - "learning_rate": 4.0275744477506074e-05, - "loss": 79.772, - "step": 90420 - }, - { - "epoch": 0.36534864271948997, - "grad_norm": 905.00439453125, - "learning_rate": 4.027298109677327e-05, - "loss": 40.6796, - "step": 90430 - }, - { - "epoch": 0.3653890439848576, - "grad_norm": 800.4560546875, - "learning_rate": 4.027021741828584e-05, - "loss": 52.6281, - "step": 90440 - }, - { - "epoch": 0.36542944525022525, - "grad_norm": 507.7906188964844, - "learning_rate": 4.0267453442097664e-05, - "loss": 72.2229, - "step": 90450 - }, - { - "epoch": 0.3654698465155929, - "grad_norm": 371.3627624511719, - "learning_rate": 4.026468916826262e-05, - "loss": 65.535, - "step": 90460 - }, - { - "epoch": 0.3655102477809605, - "grad_norm": 393.8462219238281, - "learning_rate": 4.02619245968346e-05, - "loss": 75.8037, - "step": 90470 - }, - { - "epoch": 0.3655506490463281, - "grad_norm": 460.1830749511719, - "learning_rate": 4.0259159727867504e-05, - "loss": 82.6051, - "step": 90480 - }, - { - "epoch": 0.36559105031169575, - "grad_norm": 521.975341796875, - "learning_rate": 4.025639456141523e-05, - "loss": 61.9708, - "step": 90490 - }, - { - "epoch": 0.3656314515770634, - "grad_norm": 1583.7652587890625, - "learning_rate": 4.02536290975317e-05, - "loss": 64.9353, - "step": 90500 - }, - { - "epoch": 0.36567185284243103, - "grad_norm": 465.25244140625, - "learning_rate": 4.02508633362708e-05, - "loss": 65.8302, - "step": 90510 - }, - { - "epoch": 0.3657122541077987, - "grad_norm": 293.57965087890625, - "learning_rate": 4.024809727768648e-05, - "loss": 45.0099, - "step": 90520 - }, - { - "epoch": 0.3657526553731663, - "grad_norm": 536.3772583007812, - "learning_rate": 4.024533092183266e-05, - "loss": 74.4069, - "step": 90530 - }, - { - "epoch": 0.3657930566385339, - "grad_norm": 479.0234680175781, - "learning_rate": 4.024256426876325e-05, - "loss": 51.6421, - "step": 90540 - }, - { - "epoch": 0.36583345790390154, - "grad_norm": 1907.06494140625, - "learning_rate": 4.02397973185322e-05, - "loss": 77.2531, - "step": 90550 - }, - { - "epoch": 0.3658738591692692, - "grad_norm": 1270.7117919921875, - "learning_rate": 4.023703007119347e-05, - "loss": 59.475, - "step": 90560 - }, - { - "epoch": 0.3659142604346368, - "grad_norm": 380.741943359375, - "learning_rate": 4.023426252680098e-05, - "loss": 59.4132, - "step": 90570 - }, - { - "epoch": 0.36595466170000446, - "grad_norm": 1124.605224609375, - "learning_rate": 4.023149468540871e-05, - "loss": 67.3577, - "step": 90580 - }, - { - "epoch": 0.3659950629653721, - "grad_norm": 1151.210205078125, - "learning_rate": 4.02287265470706e-05, - "loss": 63.5007, - "step": 90590 - }, - { - "epoch": 0.3660354642307397, - "grad_norm": 0.0, - "learning_rate": 4.022595811184064e-05, - "loss": 38.7889, - "step": 90600 - }, - { - "epoch": 0.3660758654961073, - "grad_norm": 531.6884155273438, - "learning_rate": 4.022318937977277e-05, - "loss": 70.17, - "step": 90610 - }, - { - "epoch": 0.36611626676147496, - "grad_norm": 928.3363647460938, - "learning_rate": 4.022042035092101e-05, - "loss": 63.7609, - "step": 90620 - }, - { - "epoch": 0.3661566680268426, - "grad_norm": 1050.515380859375, - "learning_rate": 4.02176510253393e-05, - "loss": 62.498, - "step": 90630 - }, - { - "epoch": 0.36619706929221024, - "grad_norm": 1077.6502685546875, - "learning_rate": 4.021488140308165e-05, - "loss": 90.2313, - "step": 90640 - }, - { - "epoch": 0.3662374705575779, - "grad_norm": 1099.2191162109375, - "learning_rate": 4.021211148420205e-05, - "loss": 65.9301, - "step": 90650 - }, - { - "epoch": 0.3662778718229455, - "grad_norm": 633.58935546875, - "learning_rate": 4.020934126875452e-05, - "loss": 41.3643, - "step": 90660 - }, - { - "epoch": 0.3663182730883131, - "grad_norm": 1048.8428955078125, - "learning_rate": 4.0206570756793046e-05, - "loss": 63.0074, - "step": 90670 - }, - { - "epoch": 0.36635867435368075, - "grad_norm": 369.955810546875, - "learning_rate": 4.020379994837164e-05, - "loss": 76.6917, - "step": 90680 - }, - { - "epoch": 0.3663990756190484, - "grad_norm": 654.5360717773438, - "learning_rate": 4.020102884354433e-05, - "loss": 71.9556, - "step": 90690 - }, - { - "epoch": 0.366439476884416, - "grad_norm": 689.0057983398438, - "learning_rate": 4.019825744236514e-05, - "loss": 68.2348, - "step": 90700 - }, - { - "epoch": 0.36647987814978367, - "grad_norm": 408.1641540527344, - "learning_rate": 4.0195485744888096e-05, - "loss": 99.4495, - "step": 90710 - }, - { - "epoch": 0.3665202794151513, - "grad_norm": 817.236328125, - "learning_rate": 4.019271375116722e-05, - "loss": 72.3637, - "step": 90720 - }, - { - "epoch": 0.3665606806805189, - "grad_norm": 1630.006103515625, - "learning_rate": 4.018994146125659e-05, - "loss": 85.6407, - "step": 90730 - }, - { - "epoch": 0.36660108194588653, - "grad_norm": 1074.2464599609375, - "learning_rate": 4.0187168875210216e-05, - "loss": 59.7042, - "step": 90740 - }, - { - "epoch": 0.36664148321125417, - "grad_norm": 380.92657470703125, - "learning_rate": 4.018439599308217e-05, - "loss": 81.9409, - "step": 90750 - }, - { - "epoch": 0.3666818844766218, - "grad_norm": 0.0, - "learning_rate": 4.0181622814926504e-05, - "loss": 81.8555, - "step": 90760 - }, - { - "epoch": 0.36672228574198945, - "grad_norm": 703.6498413085938, - "learning_rate": 4.0178849340797285e-05, - "loss": 67.2053, - "step": 90770 - }, - { - "epoch": 0.3667626870073571, - "grad_norm": 706.4108276367188, - "learning_rate": 4.0176075570748596e-05, - "loss": 49.0458, - "step": 90780 - }, - { - "epoch": 0.3668030882727247, - "grad_norm": 2707.36572265625, - "learning_rate": 4.017330150483449e-05, - "loss": 53.4966, - "step": 90790 - }, - { - "epoch": 0.3668434895380923, - "grad_norm": 472.0765075683594, - "learning_rate": 4.017052714310906e-05, - "loss": 38.695, - "step": 90800 - }, - { - "epoch": 0.36688389080345996, - "grad_norm": 786.6532592773438, - "learning_rate": 4.0167752485626385e-05, - "loss": 52.5584, - "step": 90810 - }, - { - "epoch": 0.3669242920688276, - "grad_norm": 680.0988159179688, - "learning_rate": 4.0164977532440584e-05, - "loss": 55.2876, - "step": 90820 - }, - { - "epoch": 0.36696469333419524, - "grad_norm": 1092.18359375, - "learning_rate": 4.0162202283605725e-05, - "loss": 55.0247, - "step": 90830 - }, - { - "epoch": 0.3670050945995629, - "grad_norm": 944.074951171875, - "learning_rate": 4.015942673917593e-05, - "loss": 72.8051, - "step": 90840 - }, - { - "epoch": 0.3670454958649305, - "grad_norm": 1217.025634765625, - "learning_rate": 4.015665089920531e-05, - "loss": 74.4885, - "step": 90850 - }, - { - "epoch": 0.3670858971302981, - "grad_norm": 710.3255004882812, - "learning_rate": 4.0153874763747976e-05, - "loss": 72.1298, - "step": 90860 - }, - { - "epoch": 0.36712629839566574, - "grad_norm": 494.13946533203125, - "learning_rate": 4.015109833285805e-05, - "loss": 66.7202, - "step": 90870 - }, - { - "epoch": 0.3671666996610334, - "grad_norm": 693.8568115234375, - "learning_rate": 4.0148321606589656e-05, - "loss": 60.7179, - "step": 90880 - }, - { - "epoch": 0.367207100926401, - "grad_norm": 548.3522338867188, - "learning_rate": 4.014554458499694e-05, - "loss": 47.7584, - "step": 90890 - }, - { - "epoch": 0.36724750219176866, - "grad_norm": 885.053466796875, - "learning_rate": 4.014276726813404e-05, - "loss": 26.558, - "step": 90900 - }, - { - "epoch": 0.3672879034571363, - "grad_norm": 1183.4205322265625, - "learning_rate": 4.013998965605509e-05, - "loss": 71.821, - "step": 90910 - }, - { - "epoch": 0.3673283047225039, - "grad_norm": 1207.58203125, - "learning_rate": 4.013721174881425e-05, - "loss": 49.7049, - "step": 90920 - }, - { - "epoch": 0.3673687059878715, - "grad_norm": 1142.948486328125, - "learning_rate": 4.013443354646567e-05, - "loss": 74.3073, - "step": 90930 - }, - { - "epoch": 0.36740910725323916, - "grad_norm": 0.0, - "learning_rate": 4.0131655049063514e-05, - "loss": 50.2877, - "step": 90940 - }, - { - "epoch": 0.3674495085186068, - "grad_norm": 2414.29541015625, - "learning_rate": 4.012887625666195e-05, - "loss": 76.8267, - "step": 90950 - }, - { - "epoch": 0.36748990978397444, - "grad_norm": 682.6073608398438, - "learning_rate": 4.012609716931517e-05, - "loss": 46.8848, - "step": 90960 - }, - { - "epoch": 0.3675303110493421, - "grad_norm": 648.0629272460938, - "learning_rate": 4.012331778707732e-05, - "loss": 57.106, - "step": 90970 - }, - { - "epoch": 0.3675707123147097, - "grad_norm": 976.9451293945312, - "learning_rate": 4.012053811000262e-05, - "loss": 56.0116, - "step": 90980 - }, - { - "epoch": 0.3676111135800773, - "grad_norm": 712.3170166015625, - "learning_rate": 4.0117758138145235e-05, - "loss": 72.1889, - "step": 90990 - }, - { - "epoch": 0.36765151484544495, - "grad_norm": 579.1261596679688, - "learning_rate": 4.011497787155938e-05, - "loss": 54.2764, - "step": 91000 - }, - { - "epoch": 0.3676919161108126, - "grad_norm": 885.5346069335938, - "learning_rate": 4.0112197310299235e-05, - "loss": 78.5092, - "step": 91010 - }, - { - "epoch": 0.36773231737618023, - "grad_norm": 661.86572265625, - "learning_rate": 4.010941645441904e-05, - "loss": 68.8041, - "step": 91020 - }, - { - "epoch": 0.36777271864154787, - "grad_norm": 474.86328125, - "learning_rate": 4.010663530397298e-05, - "loss": 62.9406, - "step": 91030 - }, - { - "epoch": 0.3678131199069155, - "grad_norm": 476.5716857910156, - "learning_rate": 4.01038538590153e-05, - "loss": 102.7171, - "step": 91040 - }, - { - "epoch": 0.3678535211722831, - "grad_norm": 696.08837890625, - "learning_rate": 4.0101072119600196e-05, - "loss": 97.6508, - "step": 91050 - }, - { - "epoch": 0.36789392243765073, - "grad_norm": 751.9807739257812, - "learning_rate": 4.009829008578192e-05, - "loss": 85.6849, - "step": 91060 - }, - { - "epoch": 0.3679343237030184, - "grad_norm": 505.8662109375, - "learning_rate": 4.0095507757614717e-05, - "loss": 54.5299, - "step": 91070 - }, - { - "epoch": 0.367974724968386, - "grad_norm": 711.6263427734375, - "learning_rate": 4.009272513515281e-05, - "loss": 58.3999, - "step": 91080 - }, - { - "epoch": 0.36801512623375365, - "grad_norm": 623.3084106445312, - "learning_rate": 4.008994221845046e-05, - "loss": 65.1364, - "step": 91090 - }, - { - "epoch": 0.3680555274991213, - "grad_norm": 479.5476379394531, - "learning_rate": 4.0087159007561916e-05, - "loss": 40.831, - "step": 91100 - }, - { - "epoch": 0.3680959287644889, - "grad_norm": 969.3098754882812, - "learning_rate": 4.0084375502541446e-05, - "loss": 64.9479, - "step": 91110 - }, - { - "epoch": 0.3681363300298565, - "grad_norm": 267.7486572265625, - "learning_rate": 4.00815917034433e-05, - "loss": 107.4876, - "step": 91120 - }, - { - "epoch": 0.36817673129522416, - "grad_norm": 1258.0924072265625, - "learning_rate": 4.007880761032177e-05, - "loss": 63.7383, - "step": 91130 - }, - { - "epoch": 0.3682171325605918, - "grad_norm": 664.6405029296875, - "learning_rate": 4.0076023223231105e-05, - "loss": 65.6308, - "step": 91140 - }, - { - "epoch": 0.36825753382595944, - "grad_norm": 869.2908325195312, - "learning_rate": 4.007323854222562e-05, - "loss": 48.834, - "step": 91150 - }, - { - "epoch": 0.3682979350913271, - "grad_norm": 359.8714599609375, - "learning_rate": 4.007045356735959e-05, - "loss": 51.4937, - "step": 91160 - }, - { - "epoch": 0.3683383363566947, - "grad_norm": 454.6327209472656, - "learning_rate": 4.00676682986873e-05, - "loss": 80.0302, - "step": 91170 - }, - { - "epoch": 0.3683787376220623, - "grad_norm": 582.725341796875, - "learning_rate": 4.006488273626307e-05, - "loss": 81.7921, - "step": 91180 - }, - { - "epoch": 0.36841913888742994, - "grad_norm": 998.3577880859375, - "learning_rate": 4.006209688014119e-05, - "loss": 40.4945, - "step": 91190 - }, - { - "epoch": 0.3684595401527976, - "grad_norm": 951.7125854492188, - "learning_rate": 4.005931073037596e-05, - "loss": 54.5525, - "step": 91200 - }, - { - "epoch": 0.3684999414181652, - "grad_norm": 517.168701171875, - "learning_rate": 4.005652428702173e-05, - "loss": 52.3269, - "step": 91210 - }, - { - "epoch": 0.36854034268353286, - "grad_norm": 982.7772216796875, - "learning_rate": 4.0053737550132816e-05, - "loss": 65.8004, - "step": 91220 - }, - { - "epoch": 0.3685807439489005, - "grad_norm": 0.0, - "learning_rate": 4.005095051976353e-05, - "loss": 58.1448, - "step": 91230 - }, - { - "epoch": 0.3686211452142681, - "grad_norm": 1081.756103515625, - "learning_rate": 4.0048163195968214e-05, - "loss": 51.2047, - "step": 91240 - }, - { - "epoch": 0.3686615464796357, - "grad_norm": 837.7747802734375, - "learning_rate": 4.0045375578801214e-05, - "loss": 66.0523, - "step": 91250 - }, - { - "epoch": 0.36870194774500337, - "grad_norm": 316.6372375488281, - "learning_rate": 4.004258766831686e-05, - "loss": 64.1896, - "step": 91260 - }, - { - "epoch": 0.368742349010371, - "grad_norm": 820.6416015625, - "learning_rate": 4.0039799464569524e-05, - "loss": 83.5785, - "step": 91270 - }, - { - "epoch": 0.36878275027573865, - "grad_norm": 1047.304443359375, - "learning_rate": 4.003701096761355e-05, - "loss": 64.335, - "step": 91280 - }, - { - "epoch": 0.3688231515411063, - "grad_norm": 882.9609375, - "learning_rate": 4.0034222177503314e-05, - "loss": 74.856, - "step": 91290 - }, - { - "epoch": 0.36886355280647387, - "grad_norm": 758.5153198242188, - "learning_rate": 4.003143309429317e-05, - "loss": 57.2044, - "step": 91300 - }, - { - "epoch": 0.3689039540718415, - "grad_norm": 637.7432861328125, - "learning_rate": 4.0028643718037496e-05, - "loss": 67.143, - "step": 91310 - }, - { - "epoch": 0.36894435533720915, - "grad_norm": 982.8681640625, - "learning_rate": 4.0025854048790677e-05, - "loss": 56.672, - "step": 91320 - }, - { - "epoch": 0.3689847566025768, - "grad_norm": 975.6199951171875, - "learning_rate": 4.00230640866071e-05, - "loss": 77.8675, - "step": 91330 - }, - { - "epoch": 0.36902515786794443, - "grad_norm": 463.59063720703125, - "learning_rate": 4.0020273831541155e-05, - "loss": 47.0041, - "step": 91340 - }, - { - "epoch": 0.36906555913331207, - "grad_norm": 1927.79541015625, - "learning_rate": 4.001748328364724e-05, - "loss": 100.6176, - "step": 91350 - }, - { - "epoch": 0.3691059603986797, - "grad_norm": 614.8819580078125, - "learning_rate": 4.001469244297975e-05, - "loss": 63.3884, - "step": 91360 - }, - { - "epoch": 0.3691463616640473, - "grad_norm": 578.9991455078125, - "learning_rate": 4.00119013095931e-05, - "loss": 38.7527, - "step": 91370 - }, - { - "epoch": 0.36918676292941494, - "grad_norm": 424.689697265625, - "learning_rate": 4.0009109883541715e-05, - "loss": 71.5254, - "step": 91380 - }, - { - "epoch": 0.3692271641947826, - "grad_norm": 592.7898559570312, - "learning_rate": 4.000631816488001e-05, - "loss": 88.0966, - "step": 91390 - }, - { - "epoch": 0.3692675654601502, - "grad_norm": 699.6017456054688, - "learning_rate": 4.000352615366239e-05, - "loss": 65.126, - "step": 91400 - }, - { - "epoch": 0.36930796672551786, - "grad_norm": 3042.03662109375, - "learning_rate": 4.0000733849943313e-05, - "loss": 60.7142, - "step": 91410 - }, - { - "epoch": 0.3693483679908855, - "grad_norm": 799.2810668945312, - "learning_rate": 3.999794125377721e-05, - "loss": 36.586, - "step": 91420 - }, - { - "epoch": 0.3693887692562531, - "grad_norm": 968.9716796875, - "learning_rate": 3.999514836521851e-05, - "loss": 69.6768, - "step": 91430 - }, - { - "epoch": 0.3694291705216207, - "grad_norm": 1178.7864990234375, - "learning_rate": 3.999235518432168e-05, - "loss": 61.2758, - "step": 91440 - }, - { - "epoch": 0.36946957178698836, - "grad_norm": 490.9415283203125, - "learning_rate": 3.998956171114116e-05, - "loss": 92.8936, - "step": 91450 - }, - { - "epoch": 0.369509973052356, - "grad_norm": 429.26019287109375, - "learning_rate": 3.998676794573142e-05, - "loss": 54.8671, - "step": 91460 - }, - { - "epoch": 0.36955037431772364, - "grad_norm": 556.7578125, - "learning_rate": 3.998397388814693e-05, - "loss": 50.8339, - "step": 91470 - }, - { - "epoch": 0.3695907755830913, - "grad_norm": 745.5537719726562, - "learning_rate": 3.9981179538442146e-05, - "loss": 75.3068, - "step": 91480 - }, - { - "epoch": 0.3696311768484589, - "grad_norm": 1237.29638671875, - "learning_rate": 3.9978384896671564e-05, - "loss": 71.3501, - "step": 91490 - }, - { - "epoch": 0.3696715781138265, - "grad_norm": 846.2611083984375, - "learning_rate": 3.997558996288965e-05, - "loss": 68.3017, - "step": 91500 - }, - { - "epoch": 0.36971197937919414, - "grad_norm": 1087.683837890625, - "learning_rate": 3.9972794737150895e-05, - "loss": 89.0834, - "step": 91510 - }, - { - "epoch": 0.3697523806445618, - "grad_norm": 462.5960998535156, - "learning_rate": 3.996999921950981e-05, - "loss": 72.2417, - "step": 91520 - }, - { - "epoch": 0.3697927819099294, - "grad_norm": 623.9275512695312, - "learning_rate": 3.9967203410020875e-05, - "loss": 79.5964, - "step": 91530 - }, - { - "epoch": 0.36983318317529706, - "grad_norm": 2321.183837890625, - "learning_rate": 3.99644073087386e-05, - "loss": 100.5441, - "step": 91540 - }, - { - "epoch": 0.3698735844406647, - "grad_norm": 931.0929565429688, - "learning_rate": 3.9961610915717515e-05, - "loss": 69.4745, - "step": 91550 - }, - { - "epoch": 0.3699139857060323, - "grad_norm": 459.2430419921875, - "learning_rate": 3.9958814231012115e-05, - "loss": 48.2317, - "step": 91560 - }, - { - "epoch": 0.36995438697139993, - "grad_norm": 2218.796630859375, - "learning_rate": 3.9956017254676923e-05, - "loss": 73.4239, - "step": 91570 - }, - { - "epoch": 0.36999478823676757, - "grad_norm": 579.8865966796875, - "learning_rate": 3.995321998676648e-05, - "loss": 69.1222, - "step": 91580 - }, - { - "epoch": 0.3700351895021352, - "grad_norm": 805.66943359375, - "learning_rate": 3.995042242733532e-05, - "loss": 40.6279, - "step": 91590 - }, - { - "epoch": 0.37007559076750285, - "grad_norm": 1477.2010498046875, - "learning_rate": 3.9947624576437975e-05, - "loss": 54.183, - "step": 91600 - }, - { - "epoch": 0.3701159920328705, - "grad_norm": 1422.080078125, - "learning_rate": 3.994482643412899e-05, - "loss": 44.8291, - "step": 91610 - }, - { - "epoch": 0.3701563932982381, - "grad_norm": 596.1489868164062, - "learning_rate": 3.994202800046292e-05, - "loss": 50.5651, - "step": 91620 - }, - { - "epoch": 0.3701967945636057, - "grad_norm": 697.937744140625, - "learning_rate": 3.993922927549432e-05, - "loss": 51.1245, - "step": 91630 - }, - { - "epoch": 0.37023719582897335, - "grad_norm": 660.627197265625, - "learning_rate": 3.993643025927776e-05, - "loss": 73.9116, - "step": 91640 - }, - { - "epoch": 0.370277597094341, - "grad_norm": 735.249267578125, - "learning_rate": 3.9933630951867805e-05, - "loss": 41.1959, - "step": 91650 - }, - { - "epoch": 0.37031799835970863, - "grad_norm": 1717.015380859375, - "learning_rate": 3.9930831353319023e-05, - "loss": 66.5677, - "step": 91660 - }, - { - "epoch": 0.3703583996250763, - "grad_norm": 628.260009765625, - "learning_rate": 3.992803146368599e-05, - "loss": 50.1199, - "step": 91670 - }, - { - "epoch": 0.3703988008904439, - "grad_norm": 1197.982666015625, - "learning_rate": 3.99252312830233e-05, - "loss": 53.4161, - "step": 91680 - }, - { - "epoch": 0.3704392021558115, - "grad_norm": 759.8359375, - "learning_rate": 3.992243081138555e-05, - "loss": 49.2217, - "step": 91690 - }, - { - "epoch": 0.37047960342117914, - "grad_norm": 881.2600708007812, - "learning_rate": 3.9919630048827314e-05, - "loss": 75.7815, - "step": 91700 - }, - { - "epoch": 0.3705200046865468, - "grad_norm": 356.75958251953125, - "learning_rate": 3.991682899540322e-05, - "loss": 67.6897, - "step": 91710 - }, - { - "epoch": 0.3705604059519144, - "grad_norm": 1126.136474609375, - "learning_rate": 3.9914027651167866e-05, - "loss": 67.9846, - "step": 91720 - }, - { - "epoch": 0.37060080721728206, - "grad_norm": 467.5633544921875, - "learning_rate": 3.9911226016175866e-05, - "loss": 77.1551, - "step": 91730 - }, - { - "epoch": 0.3706412084826497, - "grad_norm": 590.6211547851562, - "learning_rate": 3.990842409048183e-05, - "loss": 52.3267, - "step": 91740 - }, - { - "epoch": 0.3706816097480173, - "grad_norm": 2134.7158203125, - "learning_rate": 3.99056218741404e-05, - "loss": 89.5028, - "step": 91750 - }, - { - "epoch": 0.3707220110133849, - "grad_norm": 545.798583984375, - "learning_rate": 3.990281936720619e-05, - "loss": 59.62, - "step": 91760 - }, - { - "epoch": 0.37076241227875256, - "grad_norm": 2029.9058837890625, - "learning_rate": 3.990001656973385e-05, - "loss": 59.6643, - "step": 91770 - }, - { - "epoch": 0.3708028135441202, - "grad_norm": 413.22491455078125, - "learning_rate": 3.9897213481778006e-05, - "loss": 48.1997, - "step": 91780 - }, - { - "epoch": 0.37084321480948784, - "grad_norm": 1251.12744140625, - "learning_rate": 3.9894410103393323e-05, - "loss": 59.8171, - "step": 91790 - }, - { - "epoch": 0.3708836160748555, - "grad_norm": 520.3812866210938, - "learning_rate": 3.989160643463445e-05, - "loss": 71.2307, - "step": 91800 - }, - { - "epoch": 0.3709240173402231, - "grad_norm": 866.8963012695312, - "learning_rate": 3.988880247555604e-05, - "loss": 102.0608, - "step": 91810 - }, - { - "epoch": 0.3709644186055907, - "grad_norm": 546.447998046875, - "learning_rate": 3.9885998226212764e-05, - "loss": 48.1409, - "step": 91820 - }, - { - "epoch": 0.37100481987095835, - "grad_norm": 1045.4818115234375, - "learning_rate": 3.988319368665928e-05, - "loss": 96.8548, - "step": 91830 - }, - { - "epoch": 0.371045221136326, - "grad_norm": 1263.72314453125, - "learning_rate": 3.988038885695028e-05, - "loss": 51.555, - "step": 91840 - }, - { - "epoch": 0.3710856224016936, - "grad_norm": 929.8193359375, - "learning_rate": 3.987758373714044e-05, - "loss": 48.9912, - "step": 91850 - }, - { - "epoch": 0.37112602366706127, - "grad_norm": 1874.9107666015625, - "learning_rate": 3.987477832728444e-05, - "loss": 48.1193, - "step": 91860 - }, - { - "epoch": 0.3711664249324289, - "grad_norm": 449.92388916015625, - "learning_rate": 3.987197262743697e-05, - "loss": 60.4172, - "step": 91870 - }, - { - "epoch": 0.3712068261977965, - "grad_norm": 3075.2314453125, - "learning_rate": 3.986916663765275e-05, - "loss": 66.7121, - "step": 91880 - }, - { - "epoch": 0.37124722746316413, - "grad_norm": 805.412841796875, - "learning_rate": 3.9866360357986467e-05, - "loss": 60.1465, - "step": 91890 - }, - { - "epoch": 0.37128762872853177, - "grad_norm": 925.1229248046875, - "learning_rate": 3.9863553788492834e-05, - "loss": 40.3686, - "step": 91900 - }, - { - "epoch": 0.3713280299938994, - "grad_norm": 600.6539306640625, - "learning_rate": 3.9860746929226567e-05, - "loss": 66.7164, - "step": 91910 - }, - { - "epoch": 0.37136843125926705, - "grad_norm": 1517.1585693359375, - "learning_rate": 3.985793978024239e-05, - "loss": 42.1975, - "step": 91920 - }, - { - "epoch": 0.3714088325246347, - "grad_norm": 1709.787353515625, - "learning_rate": 3.985513234159502e-05, - "loss": 61.1718, - "step": 91930 - }, - { - "epoch": 0.3714492337900023, - "grad_norm": 610.5380249023438, - "learning_rate": 3.985232461333921e-05, - "loss": 66.061, - "step": 91940 - }, - { - "epoch": 0.3714896350553699, - "grad_norm": 1254.192138671875, - "learning_rate": 3.984951659552968e-05, - "loss": 61.3981, - "step": 91950 - }, - { - "epoch": 0.37153003632073756, - "grad_norm": 725.9743041992188, - "learning_rate": 3.984670828822118e-05, - "loss": 50.754, - "step": 91960 - }, - { - "epoch": 0.3715704375861052, - "grad_norm": 898.7955322265625, - "learning_rate": 3.984389969146846e-05, - "loss": 80.66, - "step": 91970 - }, - { - "epoch": 0.37161083885147284, - "grad_norm": 1205.29345703125, - "learning_rate": 3.9841090805326264e-05, - "loss": 88.857, - "step": 91980 - }, - { - "epoch": 0.3716512401168405, - "grad_norm": 304.8878479003906, - "learning_rate": 3.983828162984937e-05, - "loss": 39.1823, - "step": 91990 - }, - { - "epoch": 0.3716916413822081, - "grad_norm": 474.86700439453125, - "learning_rate": 3.983547216509254e-05, - "loss": 55.8331, - "step": 92000 - }, - { - "epoch": 0.3717320426475757, - "grad_norm": 401.64044189453125, - "learning_rate": 3.9832662411110536e-05, - "loss": 69.07, - "step": 92010 - }, - { - "epoch": 0.37177244391294334, - "grad_norm": 338.5098571777344, - "learning_rate": 3.9829852367958144e-05, - "loss": 49.6506, - "step": 92020 - }, - { - "epoch": 0.371812845178311, - "grad_norm": 616.5515747070312, - "learning_rate": 3.9827042035690145e-05, - "loss": 55.922, - "step": 92030 - }, - { - "epoch": 0.3718532464436786, - "grad_norm": 696.16357421875, - "learning_rate": 3.9824231414361324e-05, - "loss": 56.3399, - "step": 92040 - }, - { - "epoch": 0.37189364770904626, - "grad_norm": 555.5895385742188, - "learning_rate": 3.982142050402649e-05, - "loss": 57.3369, - "step": 92050 - }, - { - "epoch": 0.3719340489744139, - "grad_norm": 777.5509033203125, - "learning_rate": 3.9818609304740414e-05, - "loss": 72.6481, - "step": 92060 - }, - { - "epoch": 0.3719744502397815, - "grad_norm": 752.47705078125, - "learning_rate": 3.981579781655794e-05, - "loss": 71.8169, - "step": 92070 - }, - { - "epoch": 0.3720148515051491, - "grad_norm": 973.05419921875, - "learning_rate": 3.981298603953385e-05, - "loss": 84.1057, - "step": 92080 - }, - { - "epoch": 0.37205525277051676, - "grad_norm": 956.1757202148438, - "learning_rate": 3.9810173973722974e-05, - "loss": 57.5237, - "step": 92090 - }, - { - "epoch": 0.3720956540358844, - "grad_norm": 606.1538696289062, - "learning_rate": 3.980736161918013e-05, - "loss": 51.6493, - "step": 92100 - }, - { - "epoch": 0.37213605530125204, - "grad_norm": 401.66314697265625, - "learning_rate": 3.980454897596014e-05, - "loss": 74.039, - "step": 92110 - }, - { - "epoch": 0.3721764565666197, - "grad_norm": 469.2101745605469, - "learning_rate": 3.980173604411786e-05, - "loss": 62.7403, - "step": 92120 - }, - { - "epoch": 0.3722168578319873, - "grad_norm": 814.7521362304688, - "learning_rate": 3.979892282370811e-05, - "loss": 93.9603, - "step": 92130 - }, - { - "epoch": 0.3722572590973549, - "grad_norm": 1050.54296875, - "learning_rate": 3.979610931478574e-05, - "loss": 79.703, - "step": 92140 - }, - { - "epoch": 0.37229766036272255, - "grad_norm": 426.4966735839844, - "learning_rate": 3.97932955174056e-05, - "loss": 74.3453, - "step": 92150 - }, - { - "epoch": 0.3723380616280902, - "grad_norm": 587.7415161132812, - "learning_rate": 3.979048143162255e-05, - "loss": 58.7905, - "step": 92160 - }, - { - "epoch": 0.37237846289345783, - "grad_norm": 1028.9864501953125, - "learning_rate": 3.978766705749145e-05, - "loss": 49.0045, - "step": 92170 - }, - { - "epoch": 0.37241886415882547, - "grad_norm": 722.3172607421875, - "learning_rate": 3.9784852395067166e-05, - "loss": 50.1978, - "step": 92180 - }, - { - "epoch": 0.3724592654241931, - "grad_norm": 1028.6400146484375, - "learning_rate": 3.978203744440457e-05, - "loss": 65.8073, - "step": 92190 - }, - { - "epoch": 0.3724996666895607, - "grad_norm": 350.545654296875, - "learning_rate": 3.977922220555855e-05, - "loss": 66.0355, - "step": 92200 - }, - { - "epoch": 0.37254006795492833, - "grad_norm": 888.4639282226562, - "learning_rate": 3.977640667858398e-05, - "loss": 61.8959, - "step": 92210 - }, - { - "epoch": 0.372580469220296, - "grad_norm": 938.3932495117188, - "learning_rate": 3.977359086353576e-05, - "loss": 69.4164, - "step": 92220 - }, - { - "epoch": 0.3726208704856636, - "grad_norm": 1005.269775390625, - "learning_rate": 3.977077476046877e-05, - "loss": 65.6192, - "step": 92230 - }, - { - "epoch": 0.37266127175103125, - "grad_norm": 557.5337524414062, - "learning_rate": 3.976795836943793e-05, - "loss": 59.2249, - "step": 92240 - }, - { - "epoch": 0.3727016730163989, - "grad_norm": 2857.20703125, - "learning_rate": 3.976514169049814e-05, - "loss": 68.5465, - "step": 92250 - }, - { - "epoch": 0.3727420742817665, - "grad_norm": 1169.2440185546875, - "learning_rate": 3.97623247237043e-05, - "loss": 46.9372, - "step": 92260 - }, - { - "epoch": 0.3727824755471341, - "grad_norm": 561.6387329101562, - "learning_rate": 3.9759507469111346e-05, - "loss": 59.3901, - "step": 92270 - }, - { - "epoch": 0.37282287681250176, - "grad_norm": 835.4718017578125, - "learning_rate": 3.9756689926774196e-05, - "loss": 85.8709, - "step": 92280 - }, - { - "epoch": 0.3728632780778694, - "grad_norm": 789.12841796875, - "learning_rate": 3.975387209674778e-05, - "loss": 71.775, - "step": 92290 - }, - { - "epoch": 0.37290367934323704, - "grad_norm": 0.0, - "learning_rate": 3.9751053979087035e-05, - "loss": 47.8555, - "step": 92300 - }, - { - "epoch": 0.3729440806086047, - "grad_norm": 776.5151977539062, - "learning_rate": 3.9748235573846894e-05, - "loss": 70.7797, - "step": 92310 - }, - { - "epoch": 0.3729844818739723, - "grad_norm": 987.219482421875, - "learning_rate": 3.97454168810823e-05, - "loss": 74.3305, - "step": 92320 - }, - { - "epoch": 0.3730248831393399, - "grad_norm": 508.5974426269531, - "learning_rate": 3.974259790084822e-05, - "loss": 80.4925, - "step": 92330 - }, - { - "epoch": 0.37306528440470754, - "grad_norm": 784.2496948242188, - "learning_rate": 3.973977863319961e-05, - "loss": 75.617, - "step": 92340 - }, - { - "epoch": 0.3731056856700752, - "grad_norm": 418.7388916015625, - "learning_rate": 3.973695907819142e-05, - "loss": 49.6771, - "step": 92350 - }, - { - "epoch": 0.3731460869354428, - "grad_norm": 1385.4676513671875, - "learning_rate": 3.973413923587862e-05, - "loss": 54.8952, - "step": 92360 - }, - { - "epoch": 0.37318648820081046, - "grad_norm": 1004.26611328125, - "learning_rate": 3.97313191063162e-05, - "loss": 66.2516, - "step": 92370 - }, - { - "epoch": 0.3732268894661781, - "grad_norm": 331.77972412109375, - "learning_rate": 3.9728498689559126e-05, - "loss": 62.8471, - "step": 92380 - }, - { - "epoch": 0.3732672907315457, - "grad_norm": 799.3560180664062, - "learning_rate": 3.972567798566238e-05, - "loss": 76.5567, - "step": 92390 - }, - { - "epoch": 0.3733076919969133, - "grad_norm": 733.6048583984375, - "learning_rate": 3.9722856994680966e-05, - "loss": 60.599, - "step": 92400 - }, - { - "epoch": 0.37334809326228097, - "grad_norm": 610.0665893554688, - "learning_rate": 3.9720035716669876e-05, - "loss": 58.7812, - "step": 92410 - }, - { - "epoch": 0.3733884945276486, - "grad_norm": 757.6383666992188, - "learning_rate": 3.971721415168411e-05, - "loss": 72.6537, - "step": 92420 - }, - { - "epoch": 0.37342889579301625, - "grad_norm": 628.03515625, - "learning_rate": 3.971439229977869e-05, - "loss": 58.028, - "step": 92430 - }, - { - "epoch": 0.3734692970583839, - "grad_norm": 567.295654296875, - "learning_rate": 3.9711570161008596e-05, - "loss": 54.5905, - "step": 92440 - }, - { - "epoch": 0.3735096983237515, - "grad_norm": 453.08258056640625, - "learning_rate": 3.9708747735428886e-05, - "loss": 53.7484, - "step": 92450 - }, - { - "epoch": 0.3735500995891191, - "grad_norm": 426.8596496582031, - "learning_rate": 3.9705925023094554e-05, - "loss": 73.5093, - "step": 92460 - }, - { - "epoch": 0.37359050085448675, - "grad_norm": 372.20684814453125, - "learning_rate": 3.970310202406064e-05, - "loss": 44.4281, - "step": 92470 - }, - { - "epoch": 0.3736309021198544, - "grad_norm": 714.4765014648438, - "learning_rate": 3.970027873838219e-05, - "loss": 78.6218, - "step": 92480 - }, - { - "epoch": 0.37367130338522203, - "grad_norm": 709.8063354492188, - "learning_rate": 3.969745516611424e-05, - "loss": 55.8991, - "step": 92490 - }, - { - "epoch": 0.37371170465058967, - "grad_norm": 840.8602294921875, - "learning_rate": 3.969463130731183e-05, - "loss": 68.27, - "step": 92500 - }, - { - "epoch": 0.3737521059159573, - "grad_norm": 1879.0968017578125, - "learning_rate": 3.969180716203002e-05, - "loss": 98.6429, - "step": 92510 - }, - { - "epoch": 0.3737925071813249, - "grad_norm": 925.2473754882812, - "learning_rate": 3.9688982730323865e-05, - "loss": 74.3842, - "step": 92520 - }, - { - "epoch": 0.37383290844669254, - "grad_norm": 1487.61962890625, - "learning_rate": 3.968615801224843e-05, - "loss": 81.2509, - "step": 92530 - }, - { - "epoch": 0.3738733097120602, - "grad_norm": 585.5298461914062, - "learning_rate": 3.968333300785878e-05, - "loss": 89.9086, - "step": 92540 - }, - { - "epoch": 0.3739137109774278, - "grad_norm": 598.8243408203125, - "learning_rate": 3.968050771720999e-05, - "loss": 52.4405, - "step": 92550 - }, - { - "epoch": 0.37395411224279546, - "grad_norm": 451.022705078125, - "learning_rate": 3.967768214035715e-05, - "loss": 71.7542, - "step": 92560 - }, - { - "epoch": 0.3739945135081631, - "grad_norm": 768.9769287109375, - "learning_rate": 3.967485627735534e-05, - "loss": 92.9239, - "step": 92570 - }, - { - "epoch": 0.3740349147735307, - "grad_norm": 854.9164428710938, - "learning_rate": 3.967203012825965e-05, - "loss": 71.4999, - "step": 92580 - }, - { - "epoch": 0.3740753160388983, - "grad_norm": 471.7916259765625, - "learning_rate": 3.966920369312518e-05, - "loss": 47.2625, - "step": 92590 - }, - { - "epoch": 0.37411571730426596, - "grad_norm": 478.0005187988281, - "learning_rate": 3.966637697200703e-05, - "loss": 56.9574, - "step": 92600 - }, - { - "epoch": 0.3741561185696336, - "grad_norm": 968.0185546875, - "learning_rate": 3.9663549964960314e-05, - "loss": 62.3624, - "step": 92610 - }, - { - "epoch": 0.37419651983500124, - "grad_norm": 867.2205810546875, - "learning_rate": 3.966072267204014e-05, - "loss": 33.5701, - "step": 92620 - }, - { - "epoch": 0.3742369211003689, - "grad_norm": 941.1777954101562, - "learning_rate": 3.965789509330163e-05, - "loss": 78.525, - "step": 92630 - }, - { - "epoch": 0.3742773223657365, - "grad_norm": 612.6570434570312, - "learning_rate": 3.965506722879991e-05, - "loss": 40.913, - "step": 92640 - }, - { - "epoch": 0.3743177236311041, - "grad_norm": 127.48741149902344, - "learning_rate": 3.965223907859011e-05, - "loss": 63.8331, - "step": 92650 - }, - { - "epoch": 0.37435812489647174, - "grad_norm": 443.2701721191406, - "learning_rate": 3.964941064272736e-05, - "loss": 71.8824, - "step": 92660 - }, - { - "epoch": 0.3743985261618394, - "grad_norm": 431.3937072753906, - "learning_rate": 3.9646581921266815e-05, - "loss": 74.9664, - "step": 92670 - }, - { - "epoch": 0.374438927427207, - "grad_norm": 852.2191772460938, - "learning_rate": 3.964375291426361e-05, - "loss": 51.0597, - "step": 92680 - }, - { - "epoch": 0.37447932869257466, - "grad_norm": 493.4492492675781, - "learning_rate": 3.96409236217729e-05, - "loss": 39.8483, - "step": 92690 - }, - { - "epoch": 0.3745197299579423, - "grad_norm": 363.88836669921875, - "learning_rate": 3.963809404384985e-05, - "loss": 44.7268, - "step": 92700 - }, - { - "epoch": 0.3745601312233099, - "grad_norm": 2506.282470703125, - "learning_rate": 3.9635264180549624e-05, - "loss": 79.4526, - "step": 92710 - }, - { - "epoch": 0.37460053248867753, - "grad_norm": 1399.570556640625, - "learning_rate": 3.963243403192739e-05, - "loss": 62.9623, - "step": 92720 - }, - { - "epoch": 0.37464093375404517, - "grad_norm": 502.1692810058594, - "learning_rate": 3.9629603598038314e-05, - "loss": 73.1174, - "step": 92730 - }, - { - "epoch": 0.3746813350194128, - "grad_norm": 478.8690185546875, - "learning_rate": 3.962677287893758e-05, - "loss": 66.815, - "step": 92740 - }, - { - "epoch": 0.37472173628478045, - "grad_norm": 563.9537963867188, - "learning_rate": 3.962394187468039e-05, - "loss": 72.0108, - "step": 92750 - }, - { - "epoch": 0.3747621375501481, - "grad_norm": 761.1668090820312, - "learning_rate": 3.962111058532192e-05, - "loss": 42.6949, - "step": 92760 - }, - { - "epoch": 0.37480253881551573, - "grad_norm": 795.1704711914062, - "learning_rate": 3.961827901091737e-05, - "loss": 58.1634, - "step": 92770 - }, - { - "epoch": 0.3748429400808833, - "grad_norm": 811.5784301757812, - "learning_rate": 3.9615447151521945e-05, - "loss": 75.2723, - "step": 92780 - }, - { - "epoch": 0.37488334134625095, - "grad_norm": 444.902099609375, - "learning_rate": 3.961261500719085e-05, - "loss": 62.2803, - "step": 92790 - }, - { - "epoch": 0.3749237426116186, - "grad_norm": 530.8676147460938, - "learning_rate": 3.960978257797931e-05, - "loss": 62.1482, - "step": 92800 - }, - { - "epoch": 0.37496414387698623, - "grad_norm": 230.55746459960938, - "learning_rate": 3.9606949863942526e-05, - "loss": 60.5643, - "step": 92810 - }, - { - "epoch": 0.3750045451423539, - "grad_norm": 656.6939697265625, - "learning_rate": 3.960411686513574e-05, - "loss": 49.9396, - "step": 92820 - }, - { - "epoch": 0.3750449464077215, - "grad_norm": 1161.34765625, - "learning_rate": 3.960128358161418e-05, - "loss": 50.9719, - "step": 92830 - }, - { - "epoch": 0.3750853476730891, - "grad_norm": 768.6620483398438, - "learning_rate": 3.9598450013433075e-05, - "loss": 68.8406, - "step": 92840 - }, - { - "epoch": 0.37512574893845674, - "grad_norm": 512.8631591796875, - "learning_rate": 3.9595616160647674e-05, - "loss": 61.0759, - "step": 92850 - }, - { - "epoch": 0.3751661502038244, - "grad_norm": 341.27178955078125, - "learning_rate": 3.959278202331322e-05, - "loss": 50.0951, - "step": 92860 - }, - { - "epoch": 0.375206551469192, - "grad_norm": 1139.0494384765625, - "learning_rate": 3.9589947601484974e-05, - "loss": 72.3378, - "step": 92870 - }, - { - "epoch": 0.37524695273455966, - "grad_norm": 703.4913940429688, - "learning_rate": 3.9587112895218184e-05, - "loss": 57.7523, - "step": 92880 - }, - { - "epoch": 0.3752873539999273, - "grad_norm": 1080.4427490234375, - "learning_rate": 3.958427790456811e-05, - "loss": 68.2021, - "step": 92890 - }, - { - "epoch": 0.3753277552652949, - "grad_norm": 669.9296264648438, - "learning_rate": 3.958144262959004e-05, - "loss": 71.9688, - "step": 92900 - }, - { - "epoch": 0.3753681565306625, - "grad_norm": 945.6260986328125, - "learning_rate": 3.9578607070339235e-05, - "loss": 72.3592, - "step": 92910 - }, - { - "epoch": 0.37540855779603016, - "grad_norm": 310.77203369140625, - "learning_rate": 3.957577122687098e-05, - "loss": 53.5487, - "step": 92920 - }, - { - "epoch": 0.3754489590613978, - "grad_norm": 602.9820556640625, - "learning_rate": 3.957293509924056e-05, - "loss": 61.1102, - "step": 92930 - }, - { - "epoch": 0.37548936032676544, - "grad_norm": 257.7418518066406, - "learning_rate": 3.9570098687503274e-05, - "loss": 56.4356, - "step": 92940 - }, - { - "epoch": 0.3755297615921331, - "grad_norm": 896.652587890625, - "learning_rate": 3.9567261991714404e-05, - "loss": 63.6779, - "step": 92950 - }, - { - "epoch": 0.3755701628575007, - "grad_norm": 1012.0924072265625, - "learning_rate": 3.9564425011929265e-05, - "loss": 68.4286, - "step": 92960 - }, - { - "epoch": 0.3756105641228683, - "grad_norm": 525.5440673828125, - "learning_rate": 3.956158774820316e-05, - "loss": 53.852, - "step": 92970 - }, - { - "epoch": 0.37565096538823595, - "grad_norm": 657.0477905273438, - "learning_rate": 3.955875020059141e-05, - "loss": 79.606, - "step": 92980 - }, - { - "epoch": 0.3756913666536036, - "grad_norm": 1086.502197265625, - "learning_rate": 3.955591236914933e-05, - "loss": 46.6891, - "step": 92990 - }, - { - "epoch": 0.3757317679189712, - "grad_norm": 1276.398193359375, - "learning_rate": 3.955307425393224e-05, - "loss": 83.8669, - "step": 93000 - }, - { - "epoch": 0.37577216918433887, - "grad_norm": 1241.0797119140625, - "learning_rate": 3.955023585499547e-05, - "loss": 65.6787, - "step": 93010 - }, - { - "epoch": 0.3758125704497065, - "grad_norm": 516.9462890625, - "learning_rate": 3.954739717239437e-05, - "loss": 80.84, - "step": 93020 - }, - { - "epoch": 0.3758529717150741, - "grad_norm": 475.4250183105469, - "learning_rate": 3.954455820618427e-05, - "loss": 72.3098, - "step": 93030 - }, - { - "epoch": 0.37589337298044173, - "grad_norm": 950.8383178710938, - "learning_rate": 3.954171895642052e-05, - "loss": 76.2437, - "step": 93040 - }, - { - "epoch": 0.37593377424580937, - "grad_norm": 631.9634399414062, - "learning_rate": 3.953887942315847e-05, - "loss": 42.5692, - "step": 93050 - }, - { - "epoch": 0.375974175511177, - "grad_norm": 942.7606201171875, - "learning_rate": 3.953603960645349e-05, - "loss": 79.1041, - "step": 93060 - }, - { - "epoch": 0.37601457677654465, - "grad_norm": 1219.8829345703125, - "learning_rate": 3.953319950636092e-05, - "loss": 107.0019, - "step": 93070 - }, - { - "epoch": 0.3760549780419123, - "grad_norm": 688.3053588867188, - "learning_rate": 3.953035912293616e-05, - "loss": 64.1869, - "step": 93080 - }, - { - "epoch": 0.37609537930727993, - "grad_norm": 632.4610595703125, - "learning_rate": 3.9527518456234544e-05, - "loss": 57.2202, - "step": 93090 - }, - { - "epoch": 0.3761357805726475, - "grad_norm": 323.7148742675781, - "learning_rate": 3.95246775063115e-05, - "loss": 45.6848, - "step": 93100 - }, - { - "epoch": 0.37617618183801516, - "grad_norm": 789.0226440429688, - "learning_rate": 3.952183627322238e-05, - "loss": 76.8052, - "step": 93110 - }, - { - "epoch": 0.3762165831033828, - "grad_norm": 631.6780395507812, - "learning_rate": 3.951899475702259e-05, - "loss": 57.1333, - "step": 93120 - }, - { - "epoch": 0.37625698436875044, - "grad_norm": 1251.869140625, - "learning_rate": 3.951615295776752e-05, - "loss": 120.3193, - "step": 93130 - }, - { - "epoch": 0.3762973856341181, - "grad_norm": 525.0264892578125, - "learning_rate": 3.951331087551257e-05, - "loss": 77.0742, - "step": 93140 - }, - { - "epoch": 0.3763377868994857, - "grad_norm": 927.7319946289062, - "learning_rate": 3.951046851031315e-05, - "loss": 68.461, - "step": 93150 - }, - { - "epoch": 0.3763781881648533, - "grad_norm": 388.8170471191406, - "learning_rate": 3.950762586222468e-05, - "loss": 33.0051, - "step": 93160 - }, - { - "epoch": 0.37641858943022094, - "grad_norm": 462.57025146484375, - "learning_rate": 3.950478293130258e-05, - "loss": 57.6939, - "step": 93170 - }, - { - "epoch": 0.3764589906955886, - "grad_norm": 818.913818359375, - "learning_rate": 3.950193971760226e-05, - "loss": 47.7335, - "step": 93180 - }, - { - "epoch": 0.3764993919609562, - "grad_norm": 2200.9609375, - "learning_rate": 3.949909622117918e-05, - "loss": 64.2052, - "step": 93190 - }, - { - "epoch": 0.37653979322632386, - "grad_norm": 908.902099609375, - "learning_rate": 3.9496252442088733e-05, - "loss": 36.8062, - "step": 93200 - }, - { - "epoch": 0.3765801944916915, - "grad_norm": 740.1572265625, - "learning_rate": 3.949340838038639e-05, - "loss": 68.5629, - "step": 93210 - }, - { - "epoch": 0.3766205957570591, - "grad_norm": 1135.758544921875, - "learning_rate": 3.949056403612758e-05, - "loss": 62.5657, - "step": 93220 - }, - { - "epoch": 0.3766609970224267, - "grad_norm": 2026.0989990234375, - "learning_rate": 3.9487719409367774e-05, - "loss": 74.2239, - "step": 93230 - }, - { - "epoch": 0.37670139828779436, - "grad_norm": 864.1495361328125, - "learning_rate": 3.948487450016242e-05, - "loss": 92.9084, - "step": 93240 - }, - { - "epoch": 0.376741799553162, - "grad_norm": 810.767333984375, - "learning_rate": 3.948202930856697e-05, - "loss": 80.3417, - "step": 93250 - }, - { - "epoch": 0.37678220081852964, - "grad_norm": 170.60748291015625, - "learning_rate": 3.947918383463691e-05, - "loss": 58.1501, - "step": 93260 - }, - { - "epoch": 0.3768226020838973, - "grad_norm": 521.3902587890625, - "learning_rate": 3.947633807842771e-05, - "loss": 24.7328, - "step": 93270 - }, - { - "epoch": 0.3768630033492649, - "grad_norm": 615.25048828125, - "learning_rate": 3.947349203999484e-05, - "loss": 69.8992, - "step": 93280 - }, - { - "epoch": 0.3769034046146325, - "grad_norm": 616.8837280273438, - "learning_rate": 3.9470645719393794e-05, - "loss": 48.8142, - "step": 93290 - }, - { - "epoch": 0.37694380588000015, - "grad_norm": 1704.8671875, - "learning_rate": 3.946779911668006e-05, - "loss": 62.0936, - "step": 93300 - }, - { - "epoch": 0.3769842071453678, - "grad_norm": 816.0055541992188, - "learning_rate": 3.9464952231909135e-05, - "loss": 56.5077, - "step": 93310 - }, - { - "epoch": 0.37702460841073543, - "grad_norm": 501.01641845703125, - "learning_rate": 3.946210506513651e-05, - "loss": 51.1529, - "step": 93320 - }, - { - "epoch": 0.37706500967610307, - "grad_norm": 1191.9462890625, - "learning_rate": 3.945925761641771e-05, - "loss": 56.0396, - "step": 93330 - }, - { - "epoch": 0.3771054109414707, - "grad_norm": 910.802734375, - "learning_rate": 3.945640988580824e-05, - "loss": 83.711, - "step": 93340 - }, - { - "epoch": 0.3771458122068383, - "grad_norm": 1067.3336181640625, - "learning_rate": 3.9453561873363615e-05, - "loss": 57.1342, - "step": 93350 - }, - { - "epoch": 0.37718621347220593, - "grad_norm": 894.1754150390625, - "learning_rate": 3.945071357913935e-05, - "loss": 53.6304, - "step": 93360 - }, - { - "epoch": 0.3772266147375736, - "grad_norm": 760.7733764648438, - "learning_rate": 3.9447865003191e-05, - "loss": 67.4268, - "step": 93370 - }, - { - "epoch": 0.3772670160029412, - "grad_norm": 771.8887939453125, - "learning_rate": 3.9445016145574074e-05, - "loss": 69.5092, - "step": 93380 - }, - { - "epoch": 0.37730741726830885, - "grad_norm": 459.5794372558594, - "learning_rate": 3.9442167006344124e-05, - "loss": 46.507, - "step": 93390 - }, - { - "epoch": 0.3773478185336765, - "grad_norm": 2548.902099609375, - "learning_rate": 3.943931758555669e-05, - "loss": 73.9357, - "step": 93400 - }, - { - "epoch": 0.37738821979904413, - "grad_norm": 1343.5716552734375, - "learning_rate": 3.9436467883267334e-05, - "loss": 52.8962, - "step": 93410 - }, - { - "epoch": 0.3774286210644117, - "grad_norm": 1176.486083984375, - "learning_rate": 3.9433617899531597e-05, - "loss": 71.4715, - "step": 93420 - }, - { - "epoch": 0.37746902232977936, - "grad_norm": 3818.821533203125, - "learning_rate": 3.943076763440505e-05, - "loss": 73.5444, - "step": 93430 - }, - { - "epoch": 0.377509423595147, - "grad_norm": 1363.684814453125, - "learning_rate": 3.942791708794326e-05, - "loss": 45.8224, - "step": 93440 - }, - { - "epoch": 0.37754982486051464, - "grad_norm": 602.17138671875, - "learning_rate": 3.9425066260201796e-05, - "loss": 56.5135, - "step": 93450 - }, - { - "epoch": 0.3775902261258823, - "grad_norm": 533.1290893554688, - "learning_rate": 3.942221515123623e-05, - "loss": 47.6091, - "step": 93460 - }, - { - "epoch": 0.3776306273912499, - "grad_norm": 868.53125, - "learning_rate": 3.941936376110217e-05, - "loss": 60.5942, - "step": 93470 - }, - { - "epoch": 0.3776710286566175, - "grad_norm": 581.7073364257812, - "learning_rate": 3.9416512089855184e-05, - "loss": 52.1139, - "step": 93480 - }, - { - "epoch": 0.37771142992198514, - "grad_norm": 983.8074951171875, - "learning_rate": 3.941366013755087e-05, - "loss": 46.983, - "step": 93490 - }, - { - "epoch": 0.3777518311873528, - "grad_norm": 1439.6392822265625, - "learning_rate": 3.941080790424484e-05, - "loss": 56.4244, - "step": 93500 - }, - { - "epoch": 0.3777922324527204, - "grad_norm": 1822.520751953125, - "learning_rate": 3.940795538999268e-05, - "loss": 77.834, - "step": 93510 - }, - { - "epoch": 0.37783263371808806, - "grad_norm": 437.8319396972656, - "learning_rate": 3.940510259485002e-05, - "loss": 47.9851, - "step": 93520 - }, - { - "epoch": 0.3778730349834557, - "grad_norm": 1477.3594970703125, - "learning_rate": 3.9402249518872456e-05, - "loss": 104.0979, - "step": 93530 - }, - { - "epoch": 0.3779134362488233, - "grad_norm": 517.4141235351562, - "learning_rate": 3.939939616211563e-05, - "loss": 46.596, - "step": 93540 - }, - { - "epoch": 0.3779538375141909, - "grad_norm": 673.5197143554688, - "learning_rate": 3.9396542524635175e-05, - "loss": 61.4171, - "step": 93550 - }, - { - "epoch": 0.37799423877955857, - "grad_norm": 679.6529541015625, - "learning_rate": 3.939368860648669e-05, - "loss": 50.3042, - "step": 93560 - }, - { - "epoch": 0.3780346400449262, - "grad_norm": 1323.72314453125, - "learning_rate": 3.939083440772585e-05, - "loss": 52.2302, - "step": 93570 - }, - { - "epoch": 0.37807504131029385, - "grad_norm": 1001.907958984375, - "learning_rate": 3.938797992840828e-05, - "loss": 66.2964, - "step": 93580 - }, - { - "epoch": 0.3781154425756615, - "grad_norm": 615.739990234375, - "learning_rate": 3.9385125168589635e-05, - "loss": 72.0375, - "step": 93590 - }, - { - "epoch": 0.3781558438410291, - "grad_norm": 190.5300750732422, - "learning_rate": 3.938227012832557e-05, - "loss": 38.2591, - "step": 93600 - }, - { - "epoch": 0.3781962451063967, - "grad_norm": 4424.119140625, - "learning_rate": 3.9379414807671736e-05, - "loss": 86.606, - "step": 93610 - }, - { - "epoch": 0.37823664637176435, - "grad_norm": 1783.222900390625, - "learning_rate": 3.937655920668382e-05, - "loss": 45.5719, - "step": 93620 - }, - { - "epoch": 0.378277047637132, - "grad_norm": 508.1633605957031, - "learning_rate": 3.937370332541747e-05, - "loss": 46.5244, - "step": 93630 - }, - { - "epoch": 0.37831744890249963, - "grad_norm": 1679.250732421875, - "learning_rate": 3.937084716392838e-05, - "loss": 105.5736, - "step": 93640 - }, - { - "epoch": 0.37835785016786727, - "grad_norm": 992.9168090820312, - "learning_rate": 3.936799072227222e-05, - "loss": 66.7025, - "step": 93650 - }, - { - "epoch": 0.3783982514332349, - "grad_norm": 709.3222045898438, - "learning_rate": 3.936513400050469e-05, - "loss": 49.8676, - "step": 93660 - }, - { - "epoch": 0.3784386526986025, - "grad_norm": 1248.9063720703125, - "learning_rate": 3.936227699868147e-05, - "loss": 67.6602, - "step": 93670 - }, - { - "epoch": 0.37847905396397014, - "grad_norm": 494.0985107421875, - "learning_rate": 3.9359419716858274e-05, - "loss": 52.3046, - "step": 93680 - }, - { - "epoch": 0.3785194552293378, - "grad_norm": 317.8280334472656, - "learning_rate": 3.9356562155090795e-05, - "loss": 37.2544, - "step": 93690 - }, - { - "epoch": 0.3785598564947054, - "grad_norm": 2666.53662109375, - "learning_rate": 3.935370431343475e-05, - "loss": 82.7156, - "step": 93700 - }, - { - "epoch": 0.37860025776007306, - "grad_norm": 1855.157958984375, - "learning_rate": 3.935084619194584e-05, - "loss": 79.1015, - "step": 93710 - }, - { - "epoch": 0.3786406590254407, - "grad_norm": 621.322265625, - "learning_rate": 3.93479877906798e-05, - "loss": 57.4279, - "step": 93720 - }, - { - "epoch": 0.37868106029080834, - "grad_norm": 785.6000366210938, - "learning_rate": 3.934512910969235e-05, - "loss": 75.8258, - "step": 93730 - }, - { - "epoch": 0.3787214615561759, - "grad_norm": 507.967041015625, - "learning_rate": 3.934227014903922e-05, - "loss": 63.3973, - "step": 93740 - }, - { - "epoch": 0.37876186282154356, - "grad_norm": 1563.5101318359375, - "learning_rate": 3.933941090877615e-05, - "loss": 52.865, - "step": 93750 - }, - { - "epoch": 0.3788022640869112, - "grad_norm": 571.6914672851562, - "learning_rate": 3.933655138895889e-05, - "loss": 61.8429, - "step": 93760 - }, - { - "epoch": 0.37884266535227884, - "grad_norm": 927.9751586914062, - "learning_rate": 3.9333691589643177e-05, - "loss": 51.3117, - "step": 93770 - }, - { - "epoch": 0.3788830666176465, - "grad_norm": 582.5885009765625, - "learning_rate": 3.9330831510884755e-05, - "loss": 85.4374, - "step": 93780 - }, - { - "epoch": 0.3789234678830141, - "grad_norm": 681.2293701171875, - "learning_rate": 3.932797115273941e-05, - "loss": 75.4987, - "step": 93790 - }, - { - "epoch": 0.3789638691483817, - "grad_norm": 720.52392578125, - "learning_rate": 3.932511051526289e-05, - "loss": 83.3338, - "step": 93800 - }, - { - "epoch": 0.37900427041374934, - "grad_norm": 888.4859008789062, - "learning_rate": 3.9322249598510955e-05, - "loss": 67.421, - "step": 93810 - }, - { - "epoch": 0.379044671679117, - "grad_norm": 1525.38134765625, - "learning_rate": 3.93193884025394e-05, - "loss": 85.0812, - "step": 93820 - }, - { - "epoch": 0.3790850729444846, - "grad_norm": 705.2191772460938, - "learning_rate": 3.931652692740399e-05, - "loss": 67.6637, - "step": 93830 - }, - { - "epoch": 0.37912547420985226, - "grad_norm": 1749.32568359375, - "learning_rate": 3.931366517316052e-05, - "loss": 68.8921, - "step": 93840 - }, - { - "epoch": 0.3791658754752199, - "grad_norm": 751.9481201171875, - "learning_rate": 3.9310803139864775e-05, - "loss": 123.6256, - "step": 93850 - }, - { - "epoch": 0.3792062767405875, - "grad_norm": 476.33636474609375, - "learning_rate": 3.9307940827572555e-05, - "loss": 50.916, - "step": 93860 - }, - { - "epoch": 0.37924667800595513, - "grad_norm": 267.0869140625, - "learning_rate": 3.9305078236339666e-05, - "loss": 57.5847, - "step": 93870 - }, - { - "epoch": 0.37928707927132277, - "grad_norm": 953.2073364257812, - "learning_rate": 3.930221536622191e-05, - "loss": 61.0358, - "step": 93880 - }, - { - "epoch": 0.3793274805366904, - "grad_norm": 630.3345336914062, - "learning_rate": 3.9299352217275105e-05, - "loss": 70.866, - "step": 93890 - }, - { - "epoch": 0.37936788180205805, - "grad_norm": 356.209228515625, - "learning_rate": 3.9296488789555066e-05, - "loss": 45.0516, - "step": 93900 - }, - { - "epoch": 0.3794082830674257, - "grad_norm": 410.72320556640625, - "learning_rate": 3.9293625083117616e-05, - "loss": 73.401, - "step": 93910 - }, - { - "epoch": 0.37944868433279333, - "grad_norm": 855.3485107421875, - "learning_rate": 3.9290761098018585e-05, - "loss": 55.5836, - "step": 93920 - }, - { - "epoch": 0.3794890855981609, - "grad_norm": 787.3507690429688, - "learning_rate": 3.928789683431381e-05, - "loss": 55.8925, - "step": 93930 - }, - { - "epoch": 0.37952948686352855, - "grad_norm": 600.7017822265625, - "learning_rate": 3.928503229205913e-05, - "loss": 60.9399, - "step": 93940 - }, - { - "epoch": 0.3795698881288962, - "grad_norm": 561.0108032226562, - "learning_rate": 3.928216747131039e-05, - "loss": 69.8876, - "step": 93950 - }, - { - "epoch": 0.37961028939426383, - "grad_norm": 1152.1119384765625, - "learning_rate": 3.927930237212345e-05, - "loss": 76.2245, - "step": 93960 - }, - { - "epoch": 0.3796506906596315, - "grad_norm": 743.5120239257812, - "learning_rate": 3.9276436994554144e-05, - "loss": 57.824, - "step": 93970 - }, - { - "epoch": 0.3796910919249991, - "grad_norm": 733.8156127929688, - "learning_rate": 3.927357133865836e-05, - "loss": 42.9731, - "step": 93980 - }, - { - "epoch": 0.3797314931903667, - "grad_norm": 431.87158203125, - "learning_rate": 3.927070540449195e-05, - "loss": 63.5863, - "step": 93990 - }, - { - "epoch": 0.37977189445573434, - "grad_norm": 751.9428100585938, - "learning_rate": 3.92678391921108e-05, - "loss": 78.9025, - "step": 94000 - }, - { - "epoch": 0.379812295721102, - "grad_norm": 767.0701293945312, - "learning_rate": 3.926497270157077e-05, - "loss": 42.9222, - "step": 94010 - }, - { - "epoch": 0.3798526969864696, - "grad_norm": 768.52197265625, - "learning_rate": 3.926210593292775e-05, - "loss": 48.6413, - "step": 94020 - }, - { - "epoch": 0.37989309825183726, - "grad_norm": 618.26806640625, - "learning_rate": 3.925923888623764e-05, - "loss": 54.2861, - "step": 94030 - }, - { - "epoch": 0.3799334995172049, - "grad_norm": 647.1188354492188, - "learning_rate": 3.925637156155633e-05, - "loss": 70.6201, - "step": 94040 - }, - { - "epoch": 0.37997390078257254, - "grad_norm": 886.6661987304688, - "learning_rate": 3.925350395893971e-05, - "loss": 85.2603, - "step": 94050 - }, - { - "epoch": 0.3800143020479401, - "grad_norm": 1664.85693359375, - "learning_rate": 3.925063607844369e-05, - "loss": 55.2438, - "step": 94060 - }, - { - "epoch": 0.38005470331330776, - "grad_norm": 788.0062866210938, - "learning_rate": 3.9247767920124176e-05, - "loss": 73.1715, - "step": 94070 - }, - { - "epoch": 0.3800951045786754, - "grad_norm": 435.3290710449219, - "learning_rate": 3.924489948403711e-05, - "loss": 70.93, - "step": 94080 - }, - { - "epoch": 0.38013550584404304, - "grad_norm": 143.54469299316406, - "learning_rate": 3.924203077023839e-05, - "loss": 31.3308, - "step": 94090 - }, - { - "epoch": 0.3801759071094107, - "grad_norm": 1035.6220703125, - "learning_rate": 3.923916177878394e-05, - "loss": 59.6959, - "step": 94100 - }, - { - "epoch": 0.3802163083747783, - "grad_norm": 1633.117431640625, - "learning_rate": 3.9236292509729697e-05, - "loss": 77.1727, - "step": 94110 - }, - { - "epoch": 0.3802567096401459, - "grad_norm": 421.79974365234375, - "learning_rate": 3.9233422963131616e-05, - "loss": 45.4822, - "step": 94120 - }, - { - "epoch": 0.38029711090551355, - "grad_norm": 953.0307006835938, - "learning_rate": 3.9230553139045617e-05, - "loss": 98.6893, - "step": 94130 - }, - { - "epoch": 0.3803375121708812, - "grad_norm": 1626.166015625, - "learning_rate": 3.922768303752766e-05, - "loss": 73.0103, - "step": 94140 - }, - { - "epoch": 0.3803779134362488, - "grad_norm": 1201.143310546875, - "learning_rate": 3.92248126586337e-05, - "loss": 49.0338, - "step": 94150 - }, - { - "epoch": 0.38041831470161647, - "grad_norm": 1199.5015869140625, - "learning_rate": 3.922194200241969e-05, - "loss": 49.4027, - "step": 94160 - }, - { - "epoch": 0.3804587159669841, - "grad_norm": 948.0160522460938, - "learning_rate": 3.92190710689416e-05, - "loss": 47.4217, - "step": 94170 - }, - { - "epoch": 0.3804991172323517, - "grad_norm": 1361.0826416015625, - "learning_rate": 3.92161998582554e-05, - "loss": 68.6793, - "step": 94180 - }, - { - "epoch": 0.38053951849771933, - "grad_norm": 691.0305786132812, - "learning_rate": 3.9213328370417065e-05, - "loss": 86.5888, - "step": 94190 - }, - { - "epoch": 0.38057991976308697, - "grad_norm": 448.7333984375, - "learning_rate": 3.9210456605482576e-05, - "loss": 45.7264, - "step": 94200 - }, - { - "epoch": 0.3806203210284546, - "grad_norm": 926.4880981445312, - "learning_rate": 3.920758456350792e-05, - "loss": 73.8021, - "step": 94210 - }, - { - "epoch": 0.38066072229382225, - "grad_norm": 542.78662109375, - "learning_rate": 3.9204712244549085e-05, - "loss": 65.7361, - "step": 94220 - }, - { - "epoch": 0.3807011235591899, - "grad_norm": 3470.885986328125, - "learning_rate": 3.9201839648662074e-05, - "loss": 63.5717, - "step": 94230 - }, - { - "epoch": 0.38074152482455753, - "grad_norm": 596.5708618164062, - "learning_rate": 3.919896677590289e-05, - "loss": 69.1542, - "step": 94240 - }, - { - "epoch": 0.3807819260899251, - "grad_norm": 797.0637817382812, - "learning_rate": 3.919609362632753e-05, - "loss": 106.799, - "step": 94250 - }, - { - "epoch": 0.38082232735529276, - "grad_norm": 641.5678100585938, - "learning_rate": 3.9193220199992025e-05, - "loss": 42.721, - "step": 94260 - }, - { - "epoch": 0.3808627286206604, - "grad_norm": 370.0324401855469, - "learning_rate": 3.919034649695238e-05, - "loss": 47.1425, - "step": 94270 - }, - { - "epoch": 0.38090312988602804, - "grad_norm": 1143.82861328125, - "learning_rate": 3.918747251726463e-05, - "loss": 51.9892, - "step": 94280 - }, - { - "epoch": 0.3809435311513957, - "grad_norm": 859.6338500976562, - "learning_rate": 3.9184598260984795e-05, - "loss": 64.1427, - "step": 94290 - }, - { - "epoch": 0.3809839324167633, - "grad_norm": 423.8070068359375, - "learning_rate": 3.9181723728168916e-05, - "loss": 75.3932, - "step": 94300 - }, - { - "epoch": 0.3810243336821309, - "grad_norm": 945.8095703125, - "learning_rate": 3.9178848918873027e-05, - "loss": 47.6868, - "step": 94310 - }, - { - "epoch": 0.38106473494749854, - "grad_norm": 1396.765625, - "learning_rate": 3.9175973833153186e-05, - "loss": 53.5173, - "step": 94320 - }, - { - "epoch": 0.3811051362128662, - "grad_norm": 1253.411376953125, - "learning_rate": 3.9173098471065434e-05, - "loss": 48.6057, - "step": 94330 - }, - { - "epoch": 0.3811455374782338, - "grad_norm": 627.4695434570312, - "learning_rate": 3.9170222832665825e-05, - "loss": 68.0112, - "step": 94340 - }, - { - "epoch": 0.38118593874360146, - "grad_norm": 918.7919311523438, - "learning_rate": 3.9167346918010425e-05, - "loss": 102.3109, - "step": 94350 - }, - { - "epoch": 0.3812263400089691, - "grad_norm": 1036.47265625, - "learning_rate": 3.9164470727155314e-05, - "loss": 77.9767, - "step": 94360 - }, - { - "epoch": 0.3812667412743367, - "grad_norm": 1388.8292236328125, - "learning_rate": 3.916159426015655e-05, - "loss": 74.0313, - "step": 94370 - }, - { - "epoch": 0.3813071425397043, - "grad_norm": 761.83203125, - "learning_rate": 3.9158717517070214e-05, - "loss": 58.2245, - "step": 94380 - }, - { - "epoch": 0.38134754380507196, - "grad_norm": 416.04351806640625, - "learning_rate": 3.915584049795239e-05, - "loss": 50.2299, - "step": 94390 - }, - { - "epoch": 0.3813879450704396, - "grad_norm": 1973.710205078125, - "learning_rate": 3.915296320285917e-05, - "loss": 56.33, - "step": 94400 - }, - { - "epoch": 0.38142834633580724, - "grad_norm": 895.8132934570312, - "learning_rate": 3.915008563184664e-05, - "loss": 54.553, - "step": 94410 - }, - { - "epoch": 0.3814687476011749, - "grad_norm": 521.8209838867188, - "learning_rate": 3.9147207784970914e-05, - "loss": 69.8902, - "step": 94420 - }, - { - "epoch": 0.3815091488665425, - "grad_norm": 1060.228515625, - "learning_rate": 3.914432966228808e-05, - "loss": 72.3957, - "step": 94430 - }, - { - "epoch": 0.3815495501319101, - "grad_norm": 478.51885986328125, - "learning_rate": 3.914145126385426e-05, - "loss": 68.0388, - "step": 94440 - }, - { - "epoch": 0.38158995139727775, - "grad_norm": 855.6203002929688, - "learning_rate": 3.9138572589725576e-05, - "loss": 52.0944, - "step": 94450 - }, - { - "epoch": 0.3816303526626454, - "grad_norm": 2904.56298828125, - "learning_rate": 3.9135693639958125e-05, - "loss": 50.8609, - "step": 94460 - }, - { - "epoch": 0.38167075392801303, - "grad_norm": 1457.2967529296875, - "learning_rate": 3.913281441460806e-05, - "loss": 86.8889, - "step": 94470 - }, - { - "epoch": 0.38171115519338067, - "grad_norm": 745.7070922851562, - "learning_rate": 3.91299349137315e-05, - "loss": 63.1558, - "step": 94480 - }, - { - "epoch": 0.3817515564587483, - "grad_norm": 832.9826049804688, - "learning_rate": 3.912705513738458e-05, - "loss": 59.3702, - "step": 94490 - }, - { - "epoch": 0.3817919577241159, - "grad_norm": 768.7559204101562, - "learning_rate": 3.912417508562345e-05, - "loss": 65.5276, - "step": 94500 - }, - { - "epoch": 0.38183235898948353, - "grad_norm": 657.02734375, - "learning_rate": 3.912129475850426e-05, - "loss": 71.6129, - "step": 94510 - }, - { - "epoch": 0.3818727602548512, - "grad_norm": 1496.1007080078125, - "learning_rate": 3.911841415608315e-05, - "loss": 83.0548, - "step": 94520 - }, - { - "epoch": 0.3819131615202188, - "grad_norm": 1321.64697265625, - "learning_rate": 3.911553327841629e-05, - "loss": 42.8167, - "step": 94530 - }, - { - "epoch": 0.38195356278558645, - "grad_norm": 434.47039794921875, - "learning_rate": 3.9112652125559845e-05, - "loss": 28.5604, - "step": 94540 - }, - { - "epoch": 0.3819939640509541, - "grad_norm": 1304.2777099609375, - "learning_rate": 3.910977069756998e-05, - "loss": 54.2573, - "step": 94550 - }, - { - "epoch": 0.38203436531632173, - "grad_norm": 1068.87939453125, - "learning_rate": 3.9106888994502864e-05, - "loss": 34.1891, - "step": 94560 - }, - { - "epoch": 0.3820747665816893, - "grad_norm": 4277.7587890625, - "learning_rate": 3.9104007016414695e-05, - "loss": 87.1632, - "step": 94570 - }, - { - "epoch": 0.38211516784705696, - "grad_norm": 96.375244140625, - "learning_rate": 3.910112476336164e-05, - "loss": 76.1936, - "step": 94580 - }, - { - "epoch": 0.3821555691124246, - "grad_norm": 775.3291015625, - "learning_rate": 3.90982422353999e-05, - "loss": 74.7171, - "step": 94590 - }, - { - "epoch": 0.38219597037779224, - "grad_norm": 696.175537109375, - "learning_rate": 3.909535943258567e-05, - "loss": 54.9566, - "step": 94600 - }, - { - "epoch": 0.3822363716431599, - "grad_norm": 724.5578002929688, - "learning_rate": 3.909247635497516e-05, - "loss": 77.6065, - "step": 94610 - }, - { - "epoch": 0.3822767729085275, - "grad_norm": 393.4891662597656, - "learning_rate": 3.9089593002624555e-05, - "loss": 65.3597, - "step": 94620 - }, - { - "epoch": 0.3823171741738951, - "grad_norm": 1047.53662109375, - "learning_rate": 3.908670937559008e-05, - "loss": 63.4875, - "step": 94630 - }, - { - "epoch": 0.38235757543926274, - "grad_norm": 1051.636962890625, - "learning_rate": 3.908382547392796e-05, - "loss": 39.8985, - "step": 94640 - }, - { - "epoch": 0.3823979767046304, - "grad_norm": 705.0889892578125, - "learning_rate": 3.908094129769442e-05, - "loss": 52.4874, - "step": 94650 - }, - { - "epoch": 0.382438377969998, - "grad_norm": 1593.9310302734375, - "learning_rate": 3.907805684694566e-05, - "loss": 82.5799, - "step": 94660 - }, - { - "epoch": 0.38247877923536566, - "grad_norm": 462.7720947265625, - "learning_rate": 3.9075172121737945e-05, - "loss": 48.7856, - "step": 94670 - }, - { - "epoch": 0.3825191805007333, - "grad_norm": 1333.7330322265625, - "learning_rate": 3.907228712212751e-05, - "loss": 88.1809, - "step": 94680 - }, - { - "epoch": 0.3825595817661009, - "grad_norm": 606.8469848632812, - "learning_rate": 3.906940184817057e-05, - "loss": 63.5642, - "step": 94690 - }, - { - "epoch": 0.3825999830314685, - "grad_norm": 280.5736389160156, - "learning_rate": 3.906651629992342e-05, - "loss": 54.9645, - "step": 94700 - }, - { - "epoch": 0.38264038429683617, - "grad_norm": 1871.5345458984375, - "learning_rate": 3.906363047744229e-05, - "loss": 162.7685, - "step": 94710 - }, - { - "epoch": 0.3826807855622038, - "grad_norm": 605.5426025390625, - "learning_rate": 3.9060744380783435e-05, - "loss": 43.6138, - "step": 94720 - }, - { - "epoch": 0.38272118682757145, - "grad_norm": 2624.923583984375, - "learning_rate": 3.9057858010003137e-05, - "loss": 108.0628, - "step": 94730 - }, - { - "epoch": 0.3827615880929391, - "grad_norm": 978.3139038085938, - "learning_rate": 3.905497136515766e-05, - "loss": 51.3885, - "step": 94740 - }, - { - "epoch": 0.3828019893583067, - "grad_norm": 2022.71435546875, - "learning_rate": 3.905208444630327e-05, - "loss": 58.5064, - "step": 94750 - }, - { - "epoch": 0.3828423906236743, - "grad_norm": 395.371826171875, - "learning_rate": 3.9049197253496264e-05, - "loss": 60.7483, - "step": 94760 - }, - { - "epoch": 0.38288279188904195, - "grad_norm": 848.9832153320312, - "learning_rate": 3.904630978679292e-05, - "loss": 57.199, - "step": 94770 - }, - { - "epoch": 0.3829231931544096, - "grad_norm": 1061.2220458984375, - "learning_rate": 3.9043422046249544e-05, - "loss": 64.7823, - "step": 94780 - }, - { - "epoch": 0.38296359441977723, - "grad_norm": 824.2564086914062, - "learning_rate": 3.904053403192242e-05, - "loss": 58.263, - "step": 94790 - }, - { - "epoch": 0.38300399568514487, - "grad_norm": 1173.652099609375, - "learning_rate": 3.903764574386786e-05, - "loss": 80.1155, - "step": 94800 - }, - { - "epoch": 0.3830443969505125, - "grad_norm": 596.0165405273438, - "learning_rate": 3.903475718214217e-05, - "loss": 64.6889, - "step": 94810 - }, - { - "epoch": 0.3830847982158801, - "grad_norm": 544.8338012695312, - "learning_rate": 3.9031868346801656e-05, - "loss": 90.0342, - "step": 94820 - }, - { - "epoch": 0.38312519948124774, - "grad_norm": 582.4503173828125, - "learning_rate": 3.902897923790265e-05, - "loss": 67.4698, - "step": 94830 - }, - { - "epoch": 0.3831656007466154, - "grad_norm": 1065.5283203125, - "learning_rate": 3.902608985550147e-05, - "loss": 64.2469, - "step": 94840 - }, - { - "epoch": 0.383206002011983, - "grad_norm": 718.9613037109375, - "learning_rate": 3.902320019965445e-05, - "loss": 50.7413, - "step": 94850 - }, - { - "epoch": 0.38324640327735066, - "grad_norm": 497.2271423339844, - "learning_rate": 3.902031027041793e-05, - "loss": 46.6418, - "step": 94860 - }, - { - "epoch": 0.3832868045427183, - "grad_norm": 767.0538330078125, - "learning_rate": 3.901742006784822e-05, - "loss": 86.6283, - "step": 94870 - }, - { - "epoch": 0.38332720580808594, - "grad_norm": 101.88063049316406, - "learning_rate": 3.9014529592001705e-05, - "loss": 80.5774, - "step": 94880 - }, - { - "epoch": 0.3833676070734535, - "grad_norm": 479.095947265625, - "learning_rate": 3.901163884293472e-05, - "loss": 44.5601, - "step": 94890 - }, - { - "epoch": 0.38340800833882116, - "grad_norm": 560.9264526367188, - "learning_rate": 3.900874782070362e-05, - "loss": 68.0652, - "step": 94900 - }, - { - "epoch": 0.3834484096041888, - "grad_norm": 530.4874877929688, - "learning_rate": 3.900585652536477e-05, - "loss": 87.2366, - "step": 94910 - }, - { - "epoch": 0.38348881086955644, - "grad_norm": 779.4183959960938, - "learning_rate": 3.900296495697453e-05, - "loss": 70.1493, - "step": 94920 - }, - { - "epoch": 0.3835292121349241, - "grad_norm": 0.0, - "learning_rate": 3.9000073115589286e-05, - "loss": 59.7547, - "step": 94930 - }, - { - "epoch": 0.3835696134002917, - "grad_norm": 516.5095825195312, - "learning_rate": 3.899718100126541e-05, - "loss": 50.7623, - "step": 94940 - }, - { - "epoch": 0.3836100146656593, - "grad_norm": 569.387939453125, - "learning_rate": 3.899428861405928e-05, - "loss": 83.759, - "step": 94950 - }, - { - "epoch": 0.38365041593102694, - "grad_norm": 586.040771484375, - "learning_rate": 3.899139595402729e-05, - "loss": 42.7253, - "step": 94960 - }, - { - "epoch": 0.3836908171963946, - "grad_norm": 2201.12646484375, - "learning_rate": 3.898850302122583e-05, - "loss": 56.9618, - "step": 94970 - }, - { - "epoch": 0.3837312184617622, - "grad_norm": 511.3911437988281, - "learning_rate": 3.898560981571131e-05, - "loss": 55.5062, - "step": 94980 - }, - { - "epoch": 0.38377161972712986, - "grad_norm": 383.2709045410156, - "learning_rate": 3.8982716337540115e-05, - "loss": 60.895, - "step": 94990 - }, - { - "epoch": 0.3838120209924975, - "grad_norm": 768.9598388671875, - "learning_rate": 3.897982258676867e-05, - "loss": 59.8323, - "step": 95000 - }, - { - "epoch": 0.3838524222578651, - "grad_norm": 1006.3447875976562, - "learning_rate": 3.897692856345339e-05, - "loss": 43.6415, - "step": 95010 - }, - { - "epoch": 0.38389282352323273, - "grad_norm": 774.2186889648438, - "learning_rate": 3.897403426765069e-05, - "loss": 101.709, - "step": 95020 - }, - { - "epoch": 0.38393322478860037, - "grad_norm": 1448.4361572265625, - "learning_rate": 3.8971139699417e-05, - "loss": 91.2065, - "step": 95030 - }, - { - "epoch": 0.383973626053968, - "grad_norm": 771.1356811523438, - "learning_rate": 3.896824485880874e-05, - "loss": 60.503, - "step": 95040 - }, - { - "epoch": 0.38401402731933565, - "grad_norm": 726.0272827148438, - "learning_rate": 3.8965349745882365e-05, - "loss": 70.719, - "step": 95050 - }, - { - "epoch": 0.3840544285847033, - "grad_norm": 791.23193359375, - "learning_rate": 3.896245436069431e-05, - "loss": 83.7272, - "step": 95060 - }, - { - "epoch": 0.38409482985007093, - "grad_norm": 805.988037109375, - "learning_rate": 3.8959558703301015e-05, - "loss": 62.7216, - "step": 95070 - }, - { - "epoch": 0.3841352311154385, - "grad_norm": 318.8636169433594, - "learning_rate": 3.895666277375892e-05, - "loss": 59.8062, - "step": 95080 - }, - { - "epoch": 0.38417563238080615, - "grad_norm": 0.0, - "learning_rate": 3.8953766572124515e-05, - "loss": 46.7181, - "step": 95090 - }, - { - "epoch": 0.3842160336461738, - "grad_norm": 616.468017578125, - "learning_rate": 3.895087009845425e-05, - "loss": 55.656, - "step": 95100 - }, - { - "epoch": 0.38425643491154143, - "grad_norm": 872.3570556640625, - "learning_rate": 3.8947973352804584e-05, - "loss": 78.4981, - "step": 95110 - }, - { - "epoch": 0.3842968361769091, - "grad_norm": 765.6026000976562, - "learning_rate": 3.894507633523199e-05, - "loss": 54.1042, - "step": 95120 - }, - { - "epoch": 0.3843372374422767, - "grad_norm": 903.8804931640625, - "learning_rate": 3.894217904579296e-05, - "loss": 79.6192, - "step": 95130 - }, - { - "epoch": 0.3843776387076443, - "grad_norm": 725.60498046875, - "learning_rate": 3.8939281484543974e-05, - "loss": 84.4251, - "step": 95140 - }, - { - "epoch": 0.38441803997301194, - "grad_norm": 1229.2586669921875, - "learning_rate": 3.893638365154152e-05, - "loss": 125.7261, - "step": 95150 - }, - { - "epoch": 0.3844584412383796, - "grad_norm": 727.9944458007812, - "learning_rate": 3.8933485546842094e-05, - "loss": 55.8314, - "step": 95160 - }, - { - "epoch": 0.3844988425037472, - "grad_norm": 732.46337890625, - "learning_rate": 3.893058717050218e-05, - "loss": 43.0125, - "step": 95170 - }, - { - "epoch": 0.38453924376911486, - "grad_norm": 948.4493408203125, - "learning_rate": 3.892768852257831e-05, - "loss": 48.8631, - "step": 95180 - }, - { - "epoch": 0.3845796450344825, - "grad_norm": 395.610595703125, - "learning_rate": 3.892478960312698e-05, - "loss": 97.7245, - "step": 95190 - }, - { - "epoch": 0.38462004629985014, - "grad_norm": 480.57672119140625, - "learning_rate": 3.8921890412204705e-05, - "loss": 67.3274, - "step": 95200 - }, - { - "epoch": 0.3846604475652177, - "grad_norm": 601.1005859375, - "learning_rate": 3.891899094986801e-05, - "loss": 76.2722, - "step": 95210 - }, - { - "epoch": 0.38470084883058536, - "grad_norm": 1527.212646484375, - "learning_rate": 3.891609121617342e-05, - "loss": 30.4016, - "step": 95220 - }, - { - "epoch": 0.384741250095953, - "grad_norm": 598.6000366210938, - "learning_rate": 3.8913191211177464e-05, - "loss": 38.4764, - "step": 95230 - }, - { - "epoch": 0.38478165136132064, - "grad_norm": 1061.5889892578125, - "learning_rate": 3.891029093493669e-05, - "loss": 57.333, - "step": 95240 - }, - { - "epoch": 0.3848220526266883, - "grad_norm": 719.52392578125, - "learning_rate": 3.8907390387507625e-05, - "loss": 66.065, - "step": 95250 - }, - { - "epoch": 0.3848624538920559, - "grad_norm": 723.9736328125, - "learning_rate": 3.890448956894682e-05, - "loss": 41.1686, - "step": 95260 - }, - { - "epoch": 0.3849028551574235, - "grad_norm": 789.1625366210938, - "learning_rate": 3.8901588479310846e-05, - "loss": 112.5225, - "step": 95270 - }, - { - "epoch": 0.38494325642279115, - "grad_norm": 321.8541259765625, - "learning_rate": 3.889868711865624e-05, - "loss": 43.6828, - "step": 95280 - }, - { - "epoch": 0.3849836576881588, - "grad_norm": 449.05401611328125, - "learning_rate": 3.8895785487039574e-05, - "loss": 82.2265, - "step": 95290 - }, - { - "epoch": 0.3850240589535264, - "grad_norm": 677.4368286132812, - "learning_rate": 3.8892883584517415e-05, - "loss": 52.7545, - "step": 95300 - }, - { - "epoch": 0.38506446021889407, - "grad_norm": 960.1495361328125, - "learning_rate": 3.888998141114634e-05, - "loss": 51.1408, - "step": 95310 - }, - { - "epoch": 0.3851048614842617, - "grad_norm": 3223.043701171875, - "learning_rate": 3.8887078966982925e-05, - "loss": 75.4026, - "step": 95320 - }, - { - "epoch": 0.3851452627496293, - "grad_norm": 1701.433837890625, - "learning_rate": 3.888417625208376e-05, - "loss": 65.9606, - "step": 95330 - }, - { - "epoch": 0.38518566401499693, - "grad_norm": 1047.423583984375, - "learning_rate": 3.888127326650542e-05, - "loss": 53.6032, - "step": 95340 - }, - { - "epoch": 0.38522606528036457, - "grad_norm": 591.1990356445312, - "learning_rate": 3.887837001030452e-05, - "loss": 67.6553, - "step": 95350 - }, - { - "epoch": 0.3852664665457322, - "grad_norm": 1093.99365234375, - "learning_rate": 3.887546648353765e-05, - "loss": 44.322, - "step": 95360 - }, - { - "epoch": 0.38530686781109985, - "grad_norm": 545.5051879882812, - "learning_rate": 3.887256268626142e-05, - "loss": 76.8624, - "step": 95370 - }, - { - "epoch": 0.3853472690764675, - "grad_norm": 1276.1842041015625, - "learning_rate": 3.886965861853244e-05, - "loss": 48.7556, - "step": 95380 - }, - { - "epoch": 0.38538767034183513, - "grad_norm": 739.0816650390625, - "learning_rate": 3.886675428040732e-05, - "loss": 62.3025, - "step": 95390 - }, - { - "epoch": 0.3854280716072027, - "grad_norm": 655.6229858398438, - "learning_rate": 3.8863849671942685e-05, - "loss": 51.2894, - "step": 95400 - }, - { - "epoch": 0.38546847287257036, - "grad_norm": 835.5565795898438, - "learning_rate": 3.886094479319517e-05, - "loss": 63.1423, - "step": 95410 - }, - { - "epoch": 0.385508874137938, - "grad_norm": 983.1041259765625, - "learning_rate": 3.885803964422139e-05, - "loss": 65.168, - "step": 95420 - }, - { - "epoch": 0.38554927540330564, - "grad_norm": 928.8274536132812, - "learning_rate": 3.885513422507799e-05, - "loss": 54.2019, - "step": 95430 - }, - { - "epoch": 0.3855896766686733, - "grad_norm": 716.9531860351562, - "learning_rate": 3.885222853582163e-05, - "loss": 69.1313, - "step": 95440 - }, - { - "epoch": 0.3856300779340409, - "grad_norm": 1002.2490234375, - "learning_rate": 3.8849322576508934e-05, - "loss": 57.0536, - "step": 95450 - }, - { - "epoch": 0.3856704791994085, - "grad_norm": 721.2386474609375, - "learning_rate": 3.884641634719657e-05, - "loss": 75.6969, - "step": 95460 - }, - { - "epoch": 0.38571088046477614, - "grad_norm": 673.9929809570312, - "learning_rate": 3.884350984794118e-05, - "loss": 68.2813, - "step": 95470 - }, - { - "epoch": 0.3857512817301438, - "grad_norm": 728.1707153320312, - "learning_rate": 3.8840603078799445e-05, - "loss": 53.9483, - "step": 95480 - }, - { - "epoch": 0.3857916829955114, - "grad_norm": 823.33740234375, - "learning_rate": 3.883769603982803e-05, - "loss": 51.6572, - "step": 95490 - }, - { - "epoch": 0.38583208426087906, - "grad_norm": 1103.5313720703125, - "learning_rate": 3.883478873108361e-05, - "loss": 49.7478, - "step": 95500 - }, - { - "epoch": 0.3858724855262467, - "grad_norm": 1030.14404296875, - "learning_rate": 3.883188115262285e-05, - "loss": 52.5702, - "step": 95510 - }, - { - "epoch": 0.38591288679161434, - "grad_norm": 937.8312377929688, - "learning_rate": 3.8828973304502446e-05, - "loss": 66.6772, - "step": 95520 - }, - { - "epoch": 0.3859532880569819, - "grad_norm": 1410.5428466796875, - "learning_rate": 3.88260651867791e-05, - "loss": 55.7531, - "step": 95530 - }, - { - "epoch": 0.38599368932234956, - "grad_norm": 480.3839111328125, - "learning_rate": 3.8823156799509484e-05, - "loss": 61.0244, - "step": 95540 - }, - { - "epoch": 0.3860340905877172, - "grad_norm": 471.7522888183594, - "learning_rate": 3.8820248142750316e-05, - "loss": 86.2028, - "step": 95550 - }, - { - "epoch": 0.38607449185308484, - "grad_norm": 531.3958129882812, - "learning_rate": 3.881733921655829e-05, - "loss": 88.4188, - "step": 95560 - }, - { - "epoch": 0.3861148931184525, - "grad_norm": 878.5484008789062, - "learning_rate": 3.881443002099012e-05, - "loss": 64.7605, - "step": 95570 - }, - { - "epoch": 0.3861552943838201, - "grad_norm": 371.7911682128906, - "learning_rate": 3.8811520556102535e-05, - "loss": 51.486, - "step": 95580 - }, - { - "epoch": 0.3861956956491877, - "grad_norm": 529.8397216796875, - "learning_rate": 3.880861082195224e-05, - "loss": 72.443, - "step": 95590 - }, - { - "epoch": 0.38623609691455535, - "grad_norm": 988.7250366210938, - "learning_rate": 3.880570081859597e-05, - "loss": 83.2871, - "step": 95600 - }, - { - "epoch": 0.386276498179923, - "grad_norm": 433.3272705078125, - "learning_rate": 3.880279054609045e-05, - "loss": 45.9184, - "step": 95610 - }, - { - "epoch": 0.38631689944529063, - "grad_norm": 1444.31787109375, - "learning_rate": 3.8799880004492425e-05, - "loss": 67.2129, - "step": 95620 - }, - { - "epoch": 0.38635730071065827, - "grad_norm": 507.1636047363281, - "learning_rate": 3.879696919385864e-05, - "loss": 44.7271, - "step": 95630 - }, - { - "epoch": 0.3863977019760259, - "grad_norm": 1155.373046875, - "learning_rate": 3.879405811424583e-05, - "loss": 66.5983, - "step": 95640 - }, - { - "epoch": 0.3864381032413935, - "grad_norm": 490.9678955078125, - "learning_rate": 3.879114676571076e-05, - "loss": 60.9269, - "step": 95650 - }, - { - "epoch": 0.38647850450676113, - "grad_norm": 0.0, - "learning_rate": 3.878823514831018e-05, - "loss": 56.5793, - "step": 95660 - }, - { - "epoch": 0.3865189057721288, - "grad_norm": 1740.7720947265625, - "learning_rate": 3.878532326210086e-05, - "loss": 74.9253, - "step": 95670 - }, - { - "epoch": 0.3865593070374964, - "grad_norm": 437.5474853515625, - "learning_rate": 3.8782411107139564e-05, - "loss": 280.9561, - "step": 95680 - }, - { - "epoch": 0.38659970830286405, - "grad_norm": 1296.4512939453125, - "learning_rate": 3.877949868348307e-05, - "loss": 73.3886, - "step": 95690 - }, - { - "epoch": 0.3866401095682317, - "grad_norm": 372.0828552246094, - "learning_rate": 3.877658599118815e-05, - "loss": 70.0043, - "step": 95700 - }, - { - "epoch": 0.38668051083359933, - "grad_norm": 462.0464782714844, - "learning_rate": 3.87736730303116e-05, - "loss": 69.3731, - "step": 95710 - }, - { - "epoch": 0.3867209120989669, - "grad_norm": 674.0639038085938, - "learning_rate": 3.87707598009102e-05, - "loss": 82.2932, - "step": 95720 - }, - { - "epoch": 0.38676131336433456, - "grad_norm": 281.2317810058594, - "learning_rate": 3.8767846303040746e-05, - "loss": 66.2907, - "step": 95730 - }, - { - "epoch": 0.3868017146297022, - "grad_norm": 531.0217895507812, - "learning_rate": 3.876493253676004e-05, - "loss": 68.4629, - "step": 95740 - }, - { - "epoch": 0.38684211589506984, - "grad_norm": 871.7710571289062, - "learning_rate": 3.8762018502124894e-05, - "loss": 61.8916, - "step": 95750 - }, - { - "epoch": 0.3868825171604375, - "grad_norm": 600.0103759765625, - "learning_rate": 3.875910419919211e-05, - "loss": 86.7091, - "step": 95760 - }, - { - "epoch": 0.3869229184258051, - "grad_norm": 631.7959594726562, - "learning_rate": 3.87561896280185e-05, - "loss": 57.5459, - "step": 95770 - }, - { - "epoch": 0.3869633196911727, - "grad_norm": 459.5212097167969, - "learning_rate": 3.8753274788660894e-05, - "loss": 38.7924, - "step": 95780 - }, - { - "epoch": 0.38700372095654034, - "grad_norm": 722.750244140625, - "learning_rate": 3.875035968117612e-05, - "loss": 61.4237, - "step": 95790 - }, - { - "epoch": 0.387044122221908, - "grad_norm": 1434.1351318359375, - "learning_rate": 3.8747444305621e-05, - "loss": 45.202, - "step": 95800 - }, - { - "epoch": 0.3870845234872756, - "grad_norm": 1343.0447998046875, - "learning_rate": 3.874452866205237e-05, - "loss": 74.4229, - "step": 95810 - }, - { - "epoch": 0.38712492475264326, - "grad_norm": 851.3370971679688, - "learning_rate": 3.874161275052709e-05, - "loss": 57.2729, - "step": 95820 - }, - { - "epoch": 0.3871653260180109, - "grad_norm": 519.22216796875, - "learning_rate": 3.873869657110198e-05, - "loss": 52.2334, - "step": 95830 - }, - { - "epoch": 0.38720572728337854, - "grad_norm": 1355.063720703125, - "learning_rate": 3.873578012383393e-05, - "loss": 80.8646, - "step": 95840 - }, - { - "epoch": 0.3872461285487461, - "grad_norm": 615.1976318359375, - "learning_rate": 3.873286340877975e-05, - "loss": 58.7473, - "step": 95850 - }, - { - "epoch": 0.38728652981411377, - "grad_norm": 780.4457397460938, - "learning_rate": 3.8729946425996345e-05, - "loss": 72.4027, - "step": 95860 - }, - { - "epoch": 0.3873269310794814, - "grad_norm": 345.5774841308594, - "learning_rate": 3.8727029175540554e-05, - "loss": 61.2218, - "step": 95870 - }, - { - "epoch": 0.38736733234484905, - "grad_norm": 546.84228515625, - "learning_rate": 3.872411165746927e-05, - "loss": 47.3672, - "step": 95880 - }, - { - "epoch": 0.3874077336102167, - "grad_norm": 1022.2073364257812, - "learning_rate": 3.872119387183936e-05, - "loss": 74.9494, - "step": 95890 - }, - { - "epoch": 0.3874481348755843, - "grad_norm": 789.552978515625, - "learning_rate": 3.8718275818707715e-05, - "loss": 64.7529, - "step": 95900 - }, - { - "epoch": 0.3874885361409519, - "grad_norm": 446.9588623046875, - "learning_rate": 3.8715357498131214e-05, - "loss": 66.2357, - "step": 95910 - }, - { - "epoch": 0.38752893740631955, - "grad_norm": 432.23651123046875, - "learning_rate": 3.871243891016676e-05, - "loss": 65.1847, - "step": 95920 - }, - { - "epoch": 0.3875693386716872, - "grad_norm": 382.0699462890625, - "learning_rate": 3.870952005487125e-05, - "loss": 46.396, - "step": 95930 - }, - { - "epoch": 0.38760973993705483, - "grad_norm": 553.678955078125, - "learning_rate": 3.870660093230159e-05, - "loss": 59.2287, - "step": 95940 - }, - { - "epoch": 0.38765014120242247, - "grad_norm": 397.8702392578125, - "learning_rate": 3.870368154251469e-05, - "loss": 57.8399, - "step": 95950 - }, - { - "epoch": 0.3876905424677901, - "grad_norm": 776.6229248046875, - "learning_rate": 3.870076188556746e-05, - "loss": 49.7638, - "step": 95960 - }, - { - "epoch": 0.3877309437331577, - "grad_norm": 940.7283325195312, - "learning_rate": 3.869784196151682e-05, - "loss": 69.5812, - "step": 95970 - }, - { - "epoch": 0.38777134499852534, - "grad_norm": 1250.880859375, - "learning_rate": 3.869492177041971e-05, - "loss": 86.8493, - "step": 95980 - }, - { - "epoch": 0.387811746263893, - "grad_norm": 718.7807006835938, - "learning_rate": 3.8692001312333036e-05, - "loss": 64.5188, - "step": 95990 - }, - { - "epoch": 0.3878521475292606, - "grad_norm": 1685.44921875, - "learning_rate": 3.868908058731376e-05, - "loss": 81.74, - "step": 96000 - }, - { - "epoch": 0.38789254879462826, - "grad_norm": 1482.5496826171875, - "learning_rate": 3.8686159595418805e-05, - "loss": 85.64, - "step": 96010 - }, - { - "epoch": 0.3879329500599959, - "grad_norm": 494.4794006347656, - "learning_rate": 3.868323833670512e-05, - "loss": 75.5731, - "step": 96020 - }, - { - "epoch": 0.38797335132536354, - "grad_norm": 1048.5169677734375, - "learning_rate": 3.868031681122966e-05, - "loss": 100.7712, - "step": 96030 - }, - { - "epoch": 0.3880137525907311, - "grad_norm": 431.1014099121094, - "learning_rate": 3.867739501904938e-05, - "loss": 41.6055, - "step": 96040 - }, - { - "epoch": 0.38805415385609876, - "grad_norm": 670.1493530273438, - "learning_rate": 3.867447296022124e-05, - "loss": 55.9565, - "step": 96050 - }, - { - "epoch": 0.3880945551214664, - "grad_norm": 826.1647338867188, - "learning_rate": 3.8671550634802216e-05, - "loss": 64.8045, - "step": 96060 - }, - { - "epoch": 0.38813495638683404, - "grad_norm": 408.30645751953125, - "learning_rate": 3.866862804284928e-05, - "loss": 48.8189, - "step": 96070 - }, - { - "epoch": 0.3881753576522017, - "grad_norm": 523.0665283203125, - "learning_rate": 3.8665705184419386e-05, - "loss": 39.3677, - "step": 96080 - }, - { - "epoch": 0.3882157589175693, - "grad_norm": 1171.9281005859375, - "learning_rate": 3.8662782059569546e-05, - "loss": 76.4366, - "step": 96090 - }, - { - "epoch": 0.3882561601829369, - "grad_norm": 688.5985717773438, - "learning_rate": 3.865985866835673e-05, - "loss": 54.2586, - "step": 96100 - }, - { - "epoch": 0.38829656144830454, - "grad_norm": 703.115234375, - "learning_rate": 3.865693501083794e-05, - "loss": 41.2508, - "step": 96110 - }, - { - "epoch": 0.3883369627136722, - "grad_norm": 764.5994262695312, - "learning_rate": 3.865401108707017e-05, - "loss": 70.6168, - "step": 96120 - }, - { - "epoch": 0.3883773639790398, - "grad_norm": 653.46240234375, - "learning_rate": 3.8651086897110424e-05, - "loss": 95.4823, - "step": 96130 - }, - { - "epoch": 0.38841776524440746, - "grad_norm": 1796.30029296875, - "learning_rate": 3.864816244101571e-05, - "loss": 83.386, - "step": 96140 - }, - { - "epoch": 0.3884581665097751, - "grad_norm": 552.94677734375, - "learning_rate": 3.8645237718843044e-05, - "loss": 56.03, - "step": 96150 - }, - { - "epoch": 0.38849856777514274, - "grad_norm": 776.851806640625, - "learning_rate": 3.864231273064944e-05, - "loss": 82.2406, - "step": 96160 - }, - { - "epoch": 0.38853896904051033, - "grad_norm": 465.9934997558594, - "learning_rate": 3.8639387476491926e-05, - "loss": 68.4747, - "step": 96170 - }, - { - "epoch": 0.38857937030587797, - "grad_norm": 2382.02685546875, - "learning_rate": 3.863646195642754e-05, - "loss": 96.2178, - "step": 96180 - }, - { - "epoch": 0.3886197715712456, - "grad_norm": 781.694580078125, - "learning_rate": 3.8633536170513296e-05, - "loss": 69.0198, - "step": 96190 - }, - { - "epoch": 0.38866017283661325, - "grad_norm": 822.1322631835938, - "learning_rate": 3.8630610118806254e-05, - "loss": 56.8671, - "step": 96200 - }, - { - "epoch": 0.3887005741019809, - "grad_norm": 518.6736450195312, - "learning_rate": 3.862768380136345e-05, - "loss": 43.1214, - "step": 96210 - }, - { - "epoch": 0.38874097536734853, - "grad_norm": 1137.239013671875, - "learning_rate": 3.862475721824193e-05, - "loss": 77.5319, - "step": 96220 - }, - { - "epoch": 0.3887813766327161, - "grad_norm": 113.21827697753906, - "learning_rate": 3.862183036949875e-05, - "loss": 64.3933, - "step": 96230 - }, - { - "epoch": 0.38882177789808375, - "grad_norm": 1088.9544677734375, - "learning_rate": 3.861890325519098e-05, - "loss": 69.0374, - "step": 96240 - }, - { - "epoch": 0.3888621791634514, - "grad_norm": 1370.9381103515625, - "learning_rate": 3.861597587537568e-05, - "loss": 59.8645, - "step": 96250 - }, - { - "epoch": 0.38890258042881903, - "grad_norm": 523.1607055664062, - "learning_rate": 3.861304823010991e-05, - "loss": 71.4097, - "step": 96260 - }, - { - "epoch": 0.3889429816941867, - "grad_norm": 959.0990600585938, - "learning_rate": 3.861012031945077e-05, - "loss": 42.5465, - "step": 96270 - }, - { - "epoch": 0.3889833829595543, - "grad_norm": 660.4682006835938, - "learning_rate": 3.8607192143455326e-05, - "loss": 55.936, - "step": 96280 - }, - { - "epoch": 0.3890237842249219, - "grad_norm": 748.14990234375, - "learning_rate": 3.860426370218067e-05, - "loss": 43.6322, - "step": 96290 - }, - { - "epoch": 0.38906418549028954, - "grad_norm": 584.9511108398438, - "learning_rate": 3.860133499568387e-05, - "loss": 70.2999, - "step": 96300 - }, - { - "epoch": 0.3891045867556572, - "grad_norm": 1015.5634765625, - "learning_rate": 3.859840602402206e-05, - "loss": 109.6341, - "step": 96310 - }, - { - "epoch": 0.3891449880210248, - "grad_norm": 1177.64697265625, - "learning_rate": 3.859547678725231e-05, - "loss": 63.7178, - "step": 96320 - }, - { - "epoch": 0.38918538928639246, - "grad_norm": 480.65618896484375, - "learning_rate": 3.859254728543175e-05, - "loss": 62.31, - "step": 96330 - }, - { - "epoch": 0.3892257905517601, - "grad_norm": 1283.1507568359375, - "learning_rate": 3.8589617518617485e-05, - "loss": 61.4974, - "step": 96340 - }, - { - "epoch": 0.38926619181712774, - "grad_norm": 1167.147216796875, - "learning_rate": 3.858668748686662e-05, - "loss": 48.8035, - "step": 96350 - }, - { - "epoch": 0.3893065930824953, - "grad_norm": 793.9154052734375, - "learning_rate": 3.858375719023629e-05, - "loss": 68.4192, - "step": 96360 - }, - { - "epoch": 0.38934699434786296, - "grad_norm": 838.4378051757812, - "learning_rate": 3.8580826628783625e-05, - "loss": 86.5221, - "step": 96370 - }, - { - "epoch": 0.3893873956132306, - "grad_norm": 797.8261108398438, - "learning_rate": 3.857789580256575e-05, - "loss": 63.2042, - "step": 96380 - }, - { - "epoch": 0.38942779687859824, - "grad_norm": 601.6024169921875, - "learning_rate": 3.857496471163981e-05, - "loss": 52.494, - "step": 96390 - }, - { - "epoch": 0.3894681981439659, - "grad_norm": 693.2849731445312, - "learning_rate": 3.8572033356062943e-05, - "loss": 88.3605, - "step": 96400 - }, - { - "epoch": 0.3895085994093335, - "grad_norm": 1151.2178955078125, - "learning_rate": 3.8569101735892296e-05, - "loss": 47.6969, - "step": 96410 - }, - { - "epoch": 0.3895490006747011, - "grad_norm": 534.7440185546875, - "learning_rate": 3.856616985118502e-05, - "loss": 56.3229, - "step": 96420 - }, - { - "epoch": 0.38958940194006875, - "grad_norm": 883.9851684570312, - "learning_rate": 3.8563237701998286e-05, - "loss": 44.7732, - "step": 96430 - }, - { - "epoch": 0.3896298032054364, - "grad_norm": 903.25341796875, - "learning_rate": 3.856030528838925e-05, - "loss": 49.8167, - "step": 96440 - }, - { - "epoch": 0.389670204470804, - "grad_norm": 1717.411376953125, - "learning_rate": 3.8557372610415074e-05, - "loss": 56.202, - "step": 96450 - }, - { - "epoch": 0.38971060573617167, - "grad_norm": 1218.4908447265625, - "learning_rate": 3.8554439668132946e-05, - "loss": 48.3608, - "step": 96460 - }, - { - "epoch": 0.3897510070015393, - "grad_norm": 884.7052001953125, - "learning_rate": 3.855150646160003e-05, - "loss": 46.3305, - "step": 96470 - }, - { - "epoch": 0.38979140826690695, - "grad_norm": 360.12017822265625, - "learning_rate": 3.854857299087353e-05, - "loss": 37.3111, - "step": 96480 - }, - { - "epoch": 0.38983180953227453, - "grad_norm": 1448.187744140625, - "learning_rate": 3.8545639256010625e-05, - "loss": 71.6807, - "step": 96490 - }, - { - "epoch": 0.38987221079764217, - "grad_norm": 935.8035278320312, - "learning_rate": 3.85427052570685e-05, - "loss": 60.7646, - "step": 96500 - }, - { - "epoch": 0.3899126120630098, - "grad_norm": 805.13037109375, - "learning_rate": 3.853977099410436e-05, - "loss": 46.3225, - "step": 96510 - }, - { - "epoch": 0.38995301332837745, - "grad_norm": 562.0592041015625, - "learning_rate": 3.853683646717543e-05, - "loss": 62.2224, - "step": 96520 - }, - { - "epoch": 0.3899934145937451, - "grad_norm": 1327.85498046875, - "learning_rate": 3.853390167633889e-05, - "loss": 66.2915, - "step": 96530 - }, - { - "epoch": 0.39003381585911273, - "grad_norm": 1803.8189697265625, - "learning_rate": 3.8530966621651976e-05, - "loss": 89.2004, - "step": 96540 - }, - { - "epoch": 0.3900742171244803, - "grad_norm": 363.34783935546875, - "learning_rate": 3.8528031303171895e-05, - "loss": 38.5132, - "step": 96550 - }, - { - "epoch": 0.39011461838984796, - "grad_norm": 1082.673095703125, - "learning_rate": 3.852509572095588e-05, - "loss": 58.43, - "step": 96560 - }, - { - "epoch": 0.3901550196552156, - "grad_norm": 1270.4439697265625, - "learning_rate": 3.852215987506117e-05, - "loss": 55.3348, - "step": 96570 - }, - { - "epoch": 0.39019542092058324, - "grad_norm": 478.66815185546875, - "learning_rate": 3.851922376554499e-05, - "loss": 59.5862, - "step": 96580 - }, - { - "epoch": 0.3902358221859509, - "grad_norm": 772.7987670898438, - "learning_rate": 3.851628739246457e-05, - "loss": 53.7853, - "step": 96590 - }, - { - "epoch": 0.3902762234513185, - "grad_norm": 353.50360107421875, - "learning_rate": 3.851335075587718e-05, - "loss": 68.1417, - "step": 96600 - }, - { - "epoch": 0.3903166247166861, - "grad_norm": 3968.27587890625, - "learning_rate": 3.8510413855840056e-05, - "loss": 92.6137, - "step": 96610 - }, - { - "epoch": 0.39035702598205374, - "grad_norm": 624.1312255859375, - "learning_rate": 3.850747669241046e-05, - "loss": 74.6902, - "step": 96620 - }, - { - "epoch": 0.3903974272474214, - "grad_norm": 1058.5302734375, - "learning_rate": 3.850453926564565e-05, - "loss": 67.8454, - "step": 96630 - }, - { - "epoch": 0.390437828512789, - "grad_norm": 1056.72021484375, - "learning_rate": 3.85016015756029e-05, - "loss": 88.1807, - "step": 96640 - }, - { - "epoch": 0.39047822977815666, - "grad_norm": 1824.6705322265625, - "learning_rate": 3.849866362233947e-05, - "loss": 71.9311, - "step": 96650 - }, - { - "epoch": 0.3905186310435243, - "grad_norm": 608.2755126953125, - "learning_rate": 3.849572540591264e-05, - "loss": 45.0875, - "step": 96660 - }, - { - "epoch": 0.39055903230889194, - "grad_norm": 665.3267822265625, - "learning_rate": 3.84927869263797e-05, - "loss": 67.2472, - "step": 96670 - }, - { - "epoch": 0.3905994335742595, - "grad_norm": 752.25927734375, - "learning_rate": 3.848984818379793e-05, - "loss": 108.7115, - "step": 96680 - }, - { - "epoch": 0.39063983483962716, - "grad_norm": 393.086181640625, - "learning_rate": 3.848690917822463e-05, - "loss": 61.483, - "step": 96690 - }, - { - "epoch": 0.3906802361049948, - "grad_norm": 1095.4296875, - "learning_rate": 3.8483969909717087e-05, - "loss": 57.5574, - "step": 96700 - }, - { - "epoch": 0.39072063737036244, - "grad_norm": 841.97705078125, - "learning_rate": 3.8481030378332614e-05, - "loss": 68.264, - "step": 96710 - }, - { - "epoch": 0.3907610386357301, - "grad_norm": 410.4032897949219, - "learning_rate": 3.84780905841285e-05, - "loss": 69.9679, - "step": 96720 - }, - { - "epoch": 0.3908014399010977, - "grad_norm": 513.5309448242188, - "learning_rate": 3.8475150527162085e-05, - "loss": 80.5341, - "step": 96730 - }, - { - "epoch": 0.3908418411664653, - "grad_norm": 451.75445556640625, - "learning_rate": 3.847221020749067e-05, - "loss": 45.8152, - "step": 96740 - }, - { - "epoch": 0.39088224243183295, - "grad_norm": 1032.3883056640625, - "learning_rate": 3.8469269625171576e-05, - "loss": 84.8716, - "step": 96750 - }, - { - "epoch": 0.3909226436972006, - "grad_norm": 472.958251953125, - "learning_rate": 3.846632878026214e-05, - "loss": 49.6663, - "step": 96760 - }, - { - "epoch": 0.39096304496256823, - "grad_norm": 969.470703125, - "learning_rate": 3.8463387672819696e-05, - "loss": 63.3029, - "step": 96770 - }, - { - "epoch": 0.39100344622793587, - "grad_norm": 1277.8431396484375, - "learning_rate": 3.846044630290158e-05, - "loss": 56.5187, - "step": 96780 - }, - { - "epoch": 0.3910438474933035, - "grad_norm": 364.2802734375, - "learning_rate": 3.845750467056511e-05, - "loss": 59.6623, - "step": 96790 - }, - { - "epoch": 0.39108424875867115, - "grad_norm": 982.3886108398438, - "learning_rate": 3.8454562775867684e-05, - "loss": 85.9636, - "step": 96800 - }, - { - "epoch": 0.39112465002403873, - "grad_norm": 416.16619873046875, - "learning_rate": 3.8451620618866616e-05, - "loss": 84.5701, - "step": 96810 - }, - { - "epoch": 0.3911650512894064, - "grad_norm": 627.9135131835938, - "learning_rate": 3.844867819961928e-05, - "loss": 44.0341, - "step": 96820 - }, - { - "epoch": 0.391205452554774, - "grad_norm": 1361.487060546875, - "learning_rate": 3.8445735518183043e-05, - "loss": 48.6817, - "step": 96830 - }, - { - "epoch": 0.39124585382014165, - "grad_norm": 1403.5098876953125, - "learning_rate": 3.8442792574615275e-05, - "loss": 49.1378, - "step": 96840 - }, - { - "epoch": 0.3912862550855093, - "grad_norm": 1468.5533447265625, - "learning_rate": 3.843984936897334e-05, - "loss": 71.1556, - "step": 96850 - }, - { - "epoch": 0.39132665635087693, - "grad_norm": 2194.382080078125, - "learning_rate": 3.843690590131462e-05, - "loss": 89.6682, - "step": 96860 - }, - { - "epoch": 0.3913670576162445, - "grad_norm": 899.1420288085938, - "learning_rate": 3.84339621716965e-05, - "loss": 59.679, - "step": 96870 - }, - { - "epoch": 0.39140745888161216, - "grad_norm": 726.7706298828125, - "learning_rate": 3.843101818017637e-05, - "loss": 81.8773, - "step": 96880 - }, - { - "epoch": 0.3914478601469798, - "grad_norm": 441.8895568847656, - "learning_rate": 3.8428073926811625e-05, - "loss": 87.1567, - "step": 96890 - }, - { - "epoch": 0.39148826141234744, - "grad_norm": 788.19921875, - "learning_rate": 3.842512941165968e-05, - "loss": 81.7942, - "step": 96900 - }, - { - "epoch": 0.3915286626777151, - "grad_norm": 0.0, - "learning_rate": 3.842218463477791e-05, - "loss": 45.6026, - "step": 96910 - }, - { - "epoch": 0.3915690639430827, - "grad_norm": 864.9554443359375, - "learning_rate": 3.841923959622375e-05, - "loss": 49.6541, - "step": 96920 - }, - { - "epoch": 0.3916094652084503, - "grad_norm": 903.1978149414062, - "learning_rate": 3.84162942960546e-05, - "loss": 76.4913, - "step": 96930 - }, - { - "epoch": 0.39164986647381794, - "grad_norm": 848.7103271484375, - "learning_rate": 3.841334873432789e-05, - "loss": 67.0842, - "step": 96940 - }, - { - "epoch": 0.3916902677391856, - "grad_norm": 898.5715942382812, - "learning_rate": 3.841040291110103e-05, - "loss": 40.5335, - "step": 96950 - }, - { - "epoch": 0.3917306690045532, - "grad_norm": 422.8970031738281, - "learning_rate": 3.840745682643147e-05, - "loss": 62.1978, - "step": 96960 - }, - { - "epoch": 0.39177107026992086, - "grad_norm": 404.5113220214844, - "learning_rate": 3.840451048037663e-05, - "loss": 56.2681, - "step": 96970 - }, - { - "epoch": 0.3918114715352885, - "grad_norm": 1311.5926513671875, - "learning_rate": 3.8401563872993966e-05, - "loss": 63.1407, - "step": 96980 - }, - { - "epoch": 0.39185187280065614, - "grad_norm": 477.2160339355469, - "learning_rate": 3.839861700434091e-05, - "loss": 51.8024, - "step": 96990 - }, - { - "epoch": 0.3918922740660237, - "grad_norm": 835.2833862304688, - "learning_rate": 3.8395669874474915e-05, - "loss": 48.2391, - "step": 97000 - }, - { - "epoch": 0.39193267533139137, - "grad_norm": 906.5626831054688, - "learning_rate": 3.839272248345344e-05, - "loss": 61.7122, - "step": 97010 - }, - { - "epoch": 0.391973076596759, - "grad_norm": 321.434326171875, - "learning_rate": 3.838977483133395e-05, - "loss": 66.5921, - "step": 97020 - }, - { - "epoch": 0.39201347786212665, - "grad_norm": 408.02197265625, - "learning_rate": 3.838682691817391e-05, - "loss": 47.1528, - "step": 97030 - }, - { - "epoch": 0.3920538791274943, - "grad_norm": 953.2274780273438, - "learning_rate": 3.8383878744030776e-05, - "loss": 47.6821, - "step": 97040 - }, - { - "epoch": 0.3920942803928619, - "grad_norm": 541.6514282226562, - "learning_rate": 3.8380930308962036e-05, - "loss": 60.5129, - "step": 97050 - }, - { - "epoch": 0.3921346816582295, - "grad_norm": 443.59527587890625, - "learning_rate": 3.837798161302518e-05, - "loss": 67.278, - "step": 97060 - }, - { - "epoch": 0.39217508292359715, - "grad_norm": 1681.9339599609375, - "learning_rate": 3.8375032656277684e-05, - "loss": 59.9378, - "step": 97070 - }, - { - "epoch": 0.3922154841889648, - "grad_norm": 696.7039184570312, - "learning_rate": 3.837208343877703e-05, - "loss": 49.9206, - "step": 97080 - }, - { - "epoch": 0.39225588545433243, - "grad_norm": 885.4489135742188, - "learning_rate": 3.8369133960580724e-05, - "loss": 69.6553, - "step": 97090 - }, - { - "epoch": 0.39229628671970007, - "grad_norm": 688.5819702148438, - "learning_rate": 3.836618422174628e-05, - "loss": 80.361, - "step": 97100 - }, - { - "epoch": 0.3923366879850677, - "grad_norm": 757.7618408203125, - "learning_rate": 3.83632342223312e-05, - "loss": 49.0418, - "step": 97110 - }, - { - "epoch": 0.39237708925043535, - "grad_norm": 435.7521057128906, - "learning_rate": 3.836028396239297e-05, - "loss": 52.3195, - "step": 97120 - }, - { - "epoch": 0.39241749051580294, - "grad_norm": 622.2557983398438, - "learning_rate": 3.8357333441989134e-05, - "loss": 69.8973, - "step": 97130 - }, - { - "epoch": 0.3924578917811706, - "grad_norm": 2009.5267333984375, - "learning_rate": 3.835438266117721e-05, - "loss": 63.6782, - "step": 97140 - }, - { - "epoch": 0.3924982930465382, - "grad_norm": 851.5496826171875, - "learning_rate": 3.835143162001472e-05, - "loss": 79.4912, - "step": 97150 - }, - { - "epoch": 0.39253869431190586, - "grad_norm": 496.26544189453125, - "learning_rate": 3.834848031855919e-05, - "loss": 53.252, - "step": 97160 - }, - { - "epoch": 0.3925790955772735, - "grad_norm": 664.895263671875, - "learning_rate": 3.8345528756868164e-05, - "loss": 51.9402, - "step": 97170 - }, - { - "epoch": 0.39261949684264114, - "grad_norm": 963.7388305664062, - "learning_rate": 3.8342576934999184e-05, - "loss": 57.3356, - "step": 97180 - }, - { - "epoch": 0.3926598981080087, - "grad_norm": 706.6784057617188, - "learning_rate": 3.83396248530098e-05, - "loss": 74.5728, - "step": 97190 - }, - { - "epoch": 0.39270029937337636, - "grad_norm": 486.8099365234375, - "learning_rate": 3.8336672510957574e-05, - "loss": 64.1127, - "step": 97200 - }, - { - "epoch": 0.392740700638744, - "grad_norm": 563.300048828125, - "learning_rate": 3.833371990890003e-05, - "loss": 53.4073, - "step": 97210 - }, - { - "epoch": 0.39278110190411164, - "grad_norm": 382.8632507324219, - "learning_rate": 3.8330767046894765e-05, - "loss": 44.1544, - "step": 97220 - }, - { - "epoch": 0.3928215031694793, - "grad_norm": 944.963134765625, - "learning_rate": 3.8327813924999326e-05, - "loss": 64.7648, - "step": 97230 - }, - { - "epoch": 0.3928619044348469, - "grad_norm": 428.9286193847656, - "learning_rate": 3.83248605432713e-05, - "loss": 57.617, - "step": 97240 - }, - { - "epoch": 0.3929023057002145, - "grad_norm": 742.8692016601562, - "learning_rate": 3.832190690176825e-05, - "loss": 44.4798, - "step": 97250 - }, - { - "epoch": 0.39294270696558214, - "grad_norm": 139.9200439453125, - "learning_rate": 3.831895300054777e-05, - "loss": 73.2557, - "step": 97260 - }, - { - "epoch": 0.3929831082309498, - "grad_norm": 2734.72802734375, - "learning_rate": 3.8315998839667445e-05, - "loss": 73.5291, - "step": 97270 - }, - { - "epoch": 0.3930235094963174, - "grad_norm": 1699.1915283203125, - "learning_rate": 3.8313044419184873e-05, - "loss": 92.5831, - "step": 97280 - }, - { - "epoch": 0.39306391076168506, - "grad_norm": 1669.2913818359375, - "learning_rate": 3.831008973915764e-05, - "loss": 54.4022, - "step": 97290 - }, - { - "epoch": 0.3931043120270527, - "grad_norm": 1141.6053466796875, - "learning_rate": 3.830713479964335e-05, - "loss": 85.7646, - "step": 97300 - }, - { - "epoch": 0.39314471329242034, - "grad_norm": 1941.885986328125, - "learning_rate": 3.8304179600699626e-05, - "loss": 74.6156, - "step": 97310 - }, - { - "epoch": 0.39318511455778793, - "grad_norm": 877.8158569335938, - "learning_rate": 3.830122414238406e-05, - "loss": 56.3947, - "step": 97320 - }, - { - "epoch": 0.39322551582315557, - "grad_norm": 600.1071166992188, - "learning_rate": 3.829826842475429e-05, - "loss": 56.1246, - "step": 97330 - }, - { - "epoch": 0.3932659170885232, - "grad_norm": 1540.666259765625, - "learning_rate": 3.8295312447867924e-05, - "loss": 58.5337, - "step": 97340 - }, - { - "epoch": 0.39330631835389085, - "grad_norm": 620.038330078125, - "learning_rate": 3.82923562117826e-05, - "loss": 40.1906, - "step": 97350 - }, - { - "epoch": 0.3933467196192585, - "grad_norm": 649.1076049804688, - "learning_rate": 3.828939971655595e-05, - "loss": 86.4342, - "step": 97360 - }, - { - "epoch": 0.39338712088462613, - "grad_norm": 1530.02001953125, - "learning_rate": 3.828644296224562e-05, - "loss": 61.6796, - "step": 97370 - }, - { - "epoch": 0.3934275221499937, - "grad_norm": 1663.7215576171875, - "learning_rate": 3.8283485948909224e-05, - "loss": 59.6131, - "step": 97380 - }, - { - "epoch": 0.39346792341536135, - "grad_norm": 509.4885559082031, - "learning_rate": 3.828052867660445e-05, - "loss": 60.7145, - "step": 97390 - }, - { - "epoch": 0.393508324680729, - "grad_norm": 586.28955078125, - "learning_rate": 3.827757114538892e-05, - "loss": 95.243, - "step": 97400 - }, - { - "epoch": 0.39354872594609663, - "grad_norm": 808.3604736328125, - "learning_rate": 3.82746133553203e-05, - "loss": 66.3657, - "step": 97410 - }, - { - "epoch": 0.3935891272114643, - "grad_norm": 307.7069396972656, - "learning_rate": 3.827165530645627e-05, - "loss": 69.5492, - "step": 97420 - }, - { - "epoch": 0.3936295284768319, - "grad_norm": 1287.857666015625, - "learning_rate": 3.8268696998854486e-05, - "loss": 74.0201, - "step": 97430 - }, - { - "epoch": 0.3936699297421995, - "grad_norm": 1741.658935546875, - "learning_rate": 3.826573843257262e-05, - "loss": 95.2432, - "step": 97440 - }, - { - "epoch": 0.39371033100756714, - "grad_norm": 530.0614624023438, - "learning_rate": 3.826277960766835e-05, - "loss": 76.4804, - "step": 97450 - }, - { - "epoch": 0.3937507322729348, - "grad_norm": 449.9317932128906, - "learning_rate": 3.8259820524199374e-05, - "loss": 58.2512, - "step": 97460 - }, - { - "epoch": 0.3937911335383024, - "grad_norm": 273.4659729003906, - "learning_rate": 3.8256861182223366e-05, - "loss": 50.8024, - "step": 97470 - }, - { - "epoch": 0.39383153480367006, - "grad_norm": 1517.554931640625, - "learning_rate": 3.8253901581798016e-05, - "loss": 65.5294, - "step": 97480 - }, - { - "epoch": 0.3938719360690377, - "grad_norm": 286.39971923828125, - "learning_rate": 3.825094172298104e-05, - "loss": 41.5933, - "step": 97490 - }, - { - "epoch": 0.39391233733440534, - "grad_norm": 1205.6583251953125, - "learning_rate": 3.824798160583012e-05, - "loss": 59.1793, - "step": 97500 - }, - { - "epoch": 0.3939527385997729, - "grad_norm": 1654.855224609375, - "learning_rate": 3.824502123040299e-05, - "loss": 71.1765, - "step": 97510 - }, - { - "epoch": 0.39399313986514056, - "grad_norm": 636.5582275390625, - "learning_rate": 3.824206059675736e-05, - "loss": 76.6066, - "step": 97520 - }, - { - "epoch": 0.3940335411305082, - "grad_norm": 878.0634765625, - "learning_rate": 3.823909970495092e-05, - "loss": 66.1705, - "step": 97530 - }, - { - "epoch": 0.39407394239587584, - "grad_norm": 733.1356811523438, - "learning_rate": 3.8236138555041434e-05, - "loss": 86.5831, - "step": 97540 - }, - { - "epoch": 0.3941143436612435, - "grad_norm": 372.1419982910156, - "learning_rate": 3.823317714708661e-05, - "loss": 52.7755, - "step": 97550 - }, - { - "epoch": 0.3941547449266111, - "grad_norm": 978.9861450195312, - "learning_rate": 3.823021548114417e-05, - "loss": 55.6883, - "step": 97560 - }, - { - "epoch": 0.3941951461919787, - "grad_norm": 339.8622741699219, - "learning_rate": 3.822725355727188e-05, - "loss": 51.7447, - "step": 97570 - }, - { - "epoch": 0.39423554745734635, - "grad_norm": 610.7191162109375, - "learning_rate": 3.8224291375527464e-05, - "loss": 49.7207, - "step": 97580 - }, - { - "epoch": 0.394275948722714, - "grad_norm": 1472.842041015625, - "learning_rate": 3.822132893596869e-05, - "loss": 55.3662, - "step": 97590 - }, - { - "epoch": 0.3943163499880816, - "grad_norm": 558.3397827148438, - "learning_rate": 3.821836623865329e-05, - "loss": 91.5254, - "step": 97600 - }, - { - "epoch": 0.39435675125344927, - "grad_norm": 770.9660034179688, - "learning_rate": 3.821540328363905e-05, - "loss": 58.9525, - "step": 97610 - }, - { - "epoch": 0.3943971525188169, - "grad_norm": 900.81982421875, - "learning_rate": 3.821244007098371e-05, - "loss": 49.8541, - "step": 97620 - }, - { - "epoch": 0.39443755378418455, - "grad_norm": 472.5091247558594, - "learning_rate": 3.820947660074504e-05, - "loss": 70.776, - "step": 97630 - }, - { - "epoch": 0.39447795504955213, - "grad_norm": 1419.308349609375, - "learning_rate": 3.820651287298084e-05, - "loss": 62.8083, - "step": 97640 - }, - { - "epoch": 0.39451835631491977, - "grad_norm": 1251.2393798828125, - "learning_rate": 3.8203548887748865e-05, - "loss": 98.8956, - "step": 97650 - }, - { - "epoch": 0.3945587575802874, - "grad_norm": 568.4578857421875, - "learning_rate": 3.8200584645106904e-05, - "loss": 52.2117, - "step": 97660 - }, - { - "epoch": 0.39459915884565505, - "grad_norm": 1025.6453857421875, - "learning_rate": 3.819762014511275e-05, - "loss": 77.8451, - "step": 97670 - }, - { - "epoch": 0.3946395601110227, - "grad_norm": 1376.6019287109375, - "learning_rate": 3.81946553878242e-05, - "loss": 63.783, - "step": 97680 - }, - { - "epoch": 0.39467996137639033, - "grad_norm": 1392.58544921875, - "learning_rate": 3.819169037329905e-05, - "loss": 51.8016, - "step": 97690 - }, - { - "epoch": 0.3947203626417579, - "grad_norm": 737.3711547851562, - "learning_rate": 3.8188725101595094e-05, - "loss": 44.1351, - "step": 97700 - }, - { - "epoch": 0.39476076390712556, - "grad_norm": 675.7130737304688, - "learning_rate": 3.818575957277016e-05, - "loss": 70.7781, - "step": 97710 - }, - { - "epoch": 0.3948011651724932, - "grad_norm": 522.35302734375, - "learning_rate": 3.8182793786882065e-05, - "loss": 54.7861, - "step": 97720 - }, - { - "epoch": 0.39484156643786084, - "grad_norm": 1283.5611572265625, - "learning_rate": 3.817982774398861e-05, - "loss": 86.0404, - "step": 97730 - }, - { - "epoch": 0.3948819677032285, - "grad_norm": 837.8106079101562, - "learning_rate": 3.817686144414762e-05, - "loss": 72.8473, - "step": 97740 - }, - { - "epoch": 0.3949223689685961, - "grad_norm": 2973.33740234375, - "learning_rate": 3.8173894887416945e-05, - "loss": 72.2817, - "step": 97750 - }, - { - "epoch": 0.3949627702339637, - "grad_norm": 433.107421875, - "learning_rate": 3.8170928073854396e-05, - "loss": 71.6905, - "step": 97760 - }, - { - "epoch": 0.39500317149933134, - "grad_norm": 559.8467407226562, - "learning_rate": 3.816796100351783e-05, - "loss": 59.3247, - "step": 97770 - }, - { - "epoch": 0.395043572764699, - "grad_norm": 676.9853515625, - "learning_rate": 3.8164993676465074e-05, - "loss": 59.816, - "step": 97780 - }, - { - "epoch": 0.3950839740300666, - "grad_norm": 1209.71923828125, - "learning_rate": 3.816202609275401e-05, - "loss": 78.3244, - "step": 97790 - }, - { - "epoch": 0.39512437529543426, - "grad_norm": 407.7916259765625, - "learning_rate": 3.8159058252442446e-05, - "loss": 40.6203, - "step": 97800 - }, - { - "epoch": 0.3951647765608019, - "grad_norm": 0.0, - "learning_rate": 3.815609015558829e-05, - "loss": 68.3833, - "step": 97810 - }, - { - "epoch": 0.39520517782616954, - "grad_norm": 235.8328094482422, - "learning_rate": 3.815312180224937e-05, - "loss": 47.2443, - "step": 97820 - }, - { - "epoch": 0.3952455790915371, - "grad_norm": 620.359619140625, - "learning_rate": 3.8150153192483566e-05, - "loss": 53.1402, - "step": 97830 - }, - { - "epoch": 0.39528598035690476, - "grad_norm": 1514.925537109375, - "learning_rate": 3.814718432634876e-05, - "loss": 83.4849, - "step": 97840 - }, - { - "epoch": 0.3953263816222724, - "grad_norm": 195.32774353027344, - "learning_rate": 3.8144215203902834e-05, - "loss": 58.4113, - "step": 97850 - }, - { - "epoch": 0.39536678288764004, - "grad_norm": 610.07568359375, - "learning_rate": 3.814124582520365e-05, - "loss": 47.7628, - "step": 97860 - }, - { - "epoch": 0.3954071841530077, - "grad_norm": 1019.016845703125, - "learning_rate": 3.813827619030913e-05, - "loss": 66.7314, - "step": 97870 - }, - { - "epoch": 0.3954475854183753, - "grad_norm": 340.5806884765625, - "learning_rate": 3.813530629927714e-05, - "loss": 37.2535, - "step": 97880 - }, - { - "epoch": 0.3954879866837429, - "grad_norm": 705.3890991210938, - "learning_rate": 3.81323361521656e-05, - "loss": 63.5229, - "step": 97890 - }, - { - "epoch": 0.39552838794911055, - "grad_norm": 911.9359741210938, - "learning_rate": 3.81293657490324e-05, - "loss": 107.5619, - "step": 97900 - }, - { - "epoch": 0.3955687892144782, - "grad_norm": 448.5742492675781, - "learning_rate": 3.812639508993545e-05, - "loss": 67.1163, - "step": 97910 - }, - { - "epoch": 0.39560919047984583, - "grad_norm": 292.3797607421875, - "learning_rate": 3.8123424174932674e-05, - "loss": 46.8763, - "step": 97920 - }, - { - "epoch": 0.39564959174521347, - "grad_norm": 516.19287109375, - "learning_rate": 3.812045300408199e-05, - "loss": 60.135, - "step": 97930 - }, - { - "epoch": 0.3956899930105811, - "grad_norm": 1726.6441650390625, - "learning_rate": 3.811748157744132e-05, - "loss": 111.1981, - "step": 97940 - }, - { - "epoch": 0.39573039427594875, - "grad_norm": 617.226806640625, - "learning_rate": 3.8114509895068586e-05, - "loss": 40.3687, - "step": 97950 - }, - { - "epoch": 0.39577079554131633, - "grad_norm": 1564.2078857421875, - "learning_rate": 3.811153795702174e-05, - "loss": 57.8685, - "step": 97960 - }, - { - "epoch": 0.395811196806684, - "grad_norm": 543.6961669921875, - "learning_rate": 3.81085657633587e-05, - "loss": 63.6772, - "step": 97970 - }, - { - "epoch": 0.3958515980720516, - "grad_norm": 619.5729370117188, - "learning_rate": 3.810559331413743e-05, - "loss": 53.4549, - "step": 97980 - }, - { - "epoch": 0.39589199933741925, - "grad_norm": 1818.6915283203125, - "learning_rate": 3.810262060941587e-05, - "loss": 46.7142, - "step": 97990 - }, - { - "epoch": 0.3959324006027869, - "grad_norm": 423.151611328125, - "learning_rate": 3.8099647649251986e-05, - "loss": 68.5805, - "step": 98000 - }, - { - "epoch": 0.39597280186815453, - "grad_norm": 797.4796752929688, - "learning_rate": 3.809667443370372e-05, - "loss": 43.375, - "step": 98010 - }, - { - "epoch": 0.3960132031335221, - "grad_norm": 478.58349609375, - "learning_rate": 3.809370096282902e-05, - "loss": 63.259, - "step": 98020 - }, - { - "epoch": 0.39605360439888976, - "grad_norm": 477.88470458984375, - "learning_rate": 3.8090727236685906e-05, - "loss": 85.8146, - "step": 98030 - }, - { - "epoch": 0.3960940056642574, - "grad_norm": 358.82342529296875, - "learning_rate": 3.808775325533232e-05, - "loss": 46.4715, - "step": 98040 - }, - { - "epoch": 0.39613440692962504, - "grad_norm": 1070.005615234375, - "learning_rate": 3.808477901882624e-05, - "loss": 50.1758, - "step": 98050 - }, - { - "epoch": 0.3961748081949927, - "grad_norm": 568.8511352539062, - "learning_rate": 3.808180452722566e-05, - "loss": 53.4005, - "step": 98060 - }, - { - "epoch": 0.3962152094603603, - "grad_norm": 523.6063232421875, - "learning_rate": 3.8078829780588564e-05, - "loss": 65.6613, - "step": 98070 - }, - { - "epoch": 0.3962556107257279, - "grad_norm": 1423.616943359375, - "learning_rate": 3.8075854778972955e-05, - "loss": 88.5826, - "step": 98080 - }, - { - "epoch": 0.39629601199109554, - "grad_norm": 821.159912109375, - "learning_rate": 3.807287952243682e-05, - "loss": 52.51, - "step": 98090 - }, - { - "epoch": 0.3963364132564632, - "grad_norm": 1087.4752197265625, - "learning_rate": 3.8069904011038165e-05, - "loss": 62.1011, - "step": 98100 - }, - { - "epoch": 0.3963768145218308, - "grad_norm": 981.5172119140625, - "learning_rate": 3.806692824483501e-05, - "loss": 43.4204, - "step": 98110 - }, - { - "epoch": 0.39641721578719846, - "grad_norm": 1140.0291748046875, - "learning_rate": 3.806395222388536e-05, - "loss": 67.1618, - "step": 98120 - }, - { - "epoch": 0.3964576170525661, - "grad_norm": 863.1648559570312, - "learning_rate": 3.8060975948247223e-05, - "loss": 71.7122, - "step": 98130 - }, - { - "epoch": 0.39649801831793374, - "grad_norm": 631.3798217773438, - "learning_rate": 3.805799941797865e-05, - "loss": 42.8692, - "step": 98140 - }, - { - "epoch": 0.3965384195833013, - "grad_norm": 1376.2213134765625, - "learning_rate": 3.805502263313765e-05, - "loss": 55.8479, - "step": 98150 - }, - { - "epoch": 0.39657882084866897, - "grad_norm": 476.51446533203125, - "learning_rate": 3.805204559378227e-05, - "loss": 57.2946, - "step": 98160 - }, - { - "epoch": 0.3966192221140366, - "grad_norm": 941.9556884765625, - "learning_rate": 3.804906829997053e-05, - "loss": 71.0839, - "step": 98170 - }, - { - "epoch": 0.39665962337940425, - "grad_norm": 764.8499755859375, - "learning_rate": 3.804609075176049e-05, - "loss": 61.4329, - "step": 98180 - }, - { - "epoch": 0.3967000246447719, - "grad_norm": 804.7611694335938, - "learning_rate": 3.8043112949210194e-05, - "loss": 50.3516, - "step": 98190 - }, - { - "epoch": 0.3967404259101395, - "grad_norm": 757.18798828125, - "learning_rate": 3.80401348923777e-05, - "loss": 65.6766, - "step": 98200 - }, - { - "epoch": 0.3967808271755071, - "grad_norm": 802.1757202148438, - "learning_rate": 3.803715658132105e-05, - "loss": 51.4916, - "step": 98210 - }, - { - "epoch": 0.39682122844087475, - "grad_norm": 677.951904296875, - "learning_rate": 3.803417801609833e-05, - "loss": 40.4412, - "step": 98220 - }, - { - "epoch": 0.3968616297062424, - "grad_norm": 960.4522094726562, - "learning_rate": 3.803119919676761e-05, - "loss": 53.576, - "step": 98230 - }, - { - "epoch": 0.39690203097161003, - "grad_norm": 1985.3116455078125, - "learning_rate": 3.802822012338694e-05, - "loss": 62.2902, - "step": 98240 - }, - { - "epoch": 0.39694243223697767, - "grad_norm": 1060.7294921875, - "learning_rate": 3.802524079601442e-05, - "loss": 59.7074, - "step": 98250 - }, - { - "epoch": 0.3969828335023453, - "grad_norm": 926.2281494140625, - "learning_rate": 3.802226121470811e-05, - "loss": 67.3111, - "step": 98260 - }, - { - "epoch": 0.39702323476771295, - "grad_norm": 558.5460205078125, - "learning_rate": 3.8019281379526114e-05, - "loss": 52.2698, - "step": 98270 - }, - { - "epoch": 0.39706363603308054, - "grad_norm": 1760.0323486328125, - "learning_rate": 3.8016301290526534e-05, - "loss": 63.7224, - "step": 98280 - }, - { - "epoch": 0.3971040372984482, - "grad_norm": 717.9991455078125, - "learning_rate": 3.8013320947767464e-05, - "loss": 61.1942, - "step": 98290 - }, - { - "epoch": 0.3971444385638158, - "grad_norm": 302.7607421875, - "learning_rate": 3.8010340351306997e-05, - "loss": 52.8783, - "step": 98300 - }, - { - "epoch": 0.39718483982918346, - "grad_norm": 603.1386108398438, - "learning_rate": 3.800735950120324e-05, - "loss": 61.837, - "step": 98310 - }, - { - "epoch": 0.3972252410945511, - "grad_norm": 714.36474609375, - "learning_rate": 3.8004378397514315e-05, - "loss": 57.4805, - "step": 98320 - }, - { - "epoch": 0.39726564235991874, - "grad_norm": 578.4430541992188, - "learning_rate": 3.800139704029835e-05, - "loss": 44.0688, - "step": 98330 - }, - { - "epoch": 0.3973060436252863, - "grad_norm": 556.3411254882812, - "learning_rate": 3.7998415429613444e-05, - "loss": 58.5663, - "step": 98340 - }, - { - "epoch": 0.39734644489065396, - "grad_norm": 402.5372314453125, - "learning_rate": 3.7995433565517735e-05, - "loss": 47.3463, - "step": 98350 - }, - { - "epoch": 0.3973868461560216, - "grad_norm": 652.9655151367188, - "learning_rate": 3.799245144806937e-05, - "loss": 77.4669, - "step": 98360 - }, - { - "epoch": 0.39742724742138924, - "grad_norm": 351.4555358886719, - "learning_rate": 3.7989469077326466e-05, - "loss": 55.3536, - "step": 98370 - }, - { - "epoch": 0.3974676486867569, - "grad_norm": 530.9819946289062, - "learning_rate": 3.798648645334718e-05, - "loss": 48.7565, - "step": 98380 - }, - { - "epoch": 0.3975080499521245, - "grad_norm": 997.8034057617188, - "learning_rate": 3.798350357618965e-05, - "loss": 69.4074, - "step": 98390 - }, - { - "epoch": 0.3975484512174921, - "grad_norm": 1051.143310546875, - "learning_rate": 3.798052044591204e-05, - "loss": 68.8657, - "step": 98400 - }, - { - "epoch": 0.39758885248285974, - "grad_norm": 763.1336059570312, - "learning_rate": 3.79775370625725e-05, - "loss": 65.6765, - "step": 98410 - }, - { - "epoch": 0.3976292537482274, - "grad_norm": 727.5631713867188, - "learning_rate": 3.797455342622919e-05, - "loss": 55.0416, - "step": 98420 - }, - { - "epoch": 0.397669655013595, - "grad_norm": 747.4799194335938, - "learning_rate": 3.797156953694028e-05, - "loss": 62.9093, - "step": 98430 - }, - { - "epoch": 0.39771005627896266, - "grad_norm": 568.0397338867188, - "learning_rate": 3.796858539476394e-05, - "loss": 57.0158, - "step": 98440 - }, - { - "epoch": 0.3977504575443303, - "grad_norm": 1187.3994140625, - "learning_rate": 3.7965600999758356e-05, - "loss": 75.5707, - "step": 98450 - }, - { - "epoch": 0.39779085880969794, - "grad_norm": 1377.0982666015625, - "learning_rate": 3.796261635198171e-05, - "loss": 110.1248, - "step": 98460 - }, - { - "epoch": 0.39783126007506553, - "grad_norm": 541.7080078125, - "learning_rate": 3.7959631451492176e-05, - "loss": 113.9989, - "step": 98470 - }, - { - "epoch": 0.39787166134043317, - "grad_norm": 1125.3931884765625, - "learning_rate": 3.7956646298347956e-05, - "loss": 53.4357, - "step": 98480 - }, - { - "epoch": 0.3979120626058008, - "grad_norm": 1457.5439453125, - "learning_rate": 3.795366089260725e-05, - "loss": 47.1348, - "step": 98490 - }, - { - "epoch": 0.39795246387116845, - "grad_norm": 639.0778198242188, - "learning_rate": 3.795067523432826e-05, - "loss": 53.8182, - "step": 98500 - }, - { - "epoch": 0.3979928651365361, - "grad_norm": 473.65032958984375, - "learning_rate": 3.794768932356918e-05, - "loss": 63.5976, - "step": 98510 - }, - { - "epoch": 0.39803326640190373, - "grad_norm": 1148.6119384765625, - "learning_rate": 3.7944703160388234e-05, - "loss": 43.6071, - "step": 98520 - }, - { - "epoch": 0.3980736676672713, - "grad_norm": 2093.45849609375, - "learning_rate": 3.794171674484363e-05, - "loss": 60.3479, - "step": 98530 - }, - { - "epoch": 0.39811406893263895, - "grad_norm": 1044.666748046875, - "learning_rate": 3.793873007699361e-05, - "loss": 88.364, - "step": 98540 - }, - { - "epoch": 0.3981544701980066, - "grad_norm": 597.9827270507812, - "learning_rate": 3.7935743156896375e-05, - "loss": 54.5251, - "step": 98550 - }, - { - "epoch": 0.39819487146337423, - "grad_norm": 353.5838928222656, - "learning_rate": 3.793275598461017e-05, - "loss": 66.4365, - "step": 98560 - }, - { - "epoch": 0.3982352727287419, - "grad_norm": 399.634521484375, - "learning_rate": 3.792976856019323e-05, - "loss": 31.9752, - "step": 98570 - }, - { - "epoch": 0.3982756739941095, - "grad_norm": 915.1165771484375, - "learning_rate": 3.792678088370379e-05, - "loss": 70.9123, - "step": 98580 - }, - { - "epoch": 0.39831607525947715, - "grad_norm": 863.623046875, - "learning_rate": 3.792379295520011e-05, - "loss": 69.3178, - "step": 98590 - }, - { - "epoch": 0.39835647652484474, - "grad_norm": 773.9990844726562, - "learning_rate": 3.792080477474043e-05, - "loss": 75.029, - "step": 98600 - }, - { - "epoch": 0.3983968777902124, - "grad_norm": 889.9473876953125, - "learning_rate": 3.7917816342383005e-05, - "loss": 80.714, - "step": 98610 - }, - { - "epoch": 0.39843727905558, - "grad_norm": 551.6591186523438, - "learning_rate": 3.7914827658186103e-05, - "loss": 59.7947, - "step": 98620 - }, - { - "epoch": 0.39847768032094766, - "grad_norm": 457.0266418457031, - "learning_rate": 3.791183872220798e-05, - "loss": 64.1721, - "step": 98630 - }, - { - "epoch": 0.3985180815863153, - "grad_norm": 778.4760131835938, - "learning_rate": 3.790884953450692e-05, - "loss": 53.5698, - "step": 98640 - }, - { - "epoch": 0.39855848285168294, - "grad_norm": 628.935791015625, - "learning_rate": 3.790586009514119e-05, - "loss": 45.5346, - "step": 98650 - }, - { - "epoch": 0.3985988841170505, - "grad_norm": 390.69134521484375, - "learning_rate": 3.790287040416908e-05, - "loss": 80.0498, - "step": 98660 - }, - { - "epoch": 0.39863928538241816, - "grad_norm": 585.2010498046875, - "learning_rate": 3.7899880461648865e-05, - "loss": 45.9213, - "step": 98670 - }, - { - "epoch": 0.3986796866477858, - "grad_norm": 537.3295288085938, - "learning_rate": 3.789689026763883e-05, - "loss": 52.588, - "step": 98680 - }, - { - "epoch": 0.39872008791315344, - "grad_norm": 1386.0994873046875, - "learning_rate": 3.789389982219729e-05, - "loss": 69.4656, - "step": 98690 - }, - { - "epoch": 0.3987604891785211, - "grad_norm": 471.1151123046875, - "learning_rate": 3.789090912538253e-05, - "loss": 52.704, - "step": 98700 - }, - { - "epoch": 0.3988008904438887, - "grad_norm": 407.30584716796875, - "learning_rate": 3.7887918177252855e-05, - "loss": 86.2156, - "step": 98710 - }, - { - "epoch": 0.3988412917092563, - "grad_norm": 563.2343139648438, - "learning_rate": 3.788492697786658e-05, - "loss": 79.5212, - "step": 98720 - }, - { - "epoch": 0.39888169297462395, - "grad_norm": 3062.3173828125, - "learning_rate": 3.788193552728204e-05, - "loss": 67.4379, - "step": 98730 - }, - { - "epoch": 0.3989220942399916, - "grad_norm": 581.1060791015625, - "learning_rate": 3.7878943825557516e-05, - "loss": 53.4574, - "step": 98740 - }, - { - "epoch": 0.3989624955053592, - "grad_norm": 495.8964538574219, - "learning_rate": 3.787595187275136e-05, - "loss": 58.9977, - "step": 98750 - }, - { - "epoch": 0.39900289677072687, - "grad_norm": 576.1397705078125, - "learning_rate": 3.7872959668921884e-05, - "loss": 87.9499, - "step": 98760 - }, - { - "epoch": 0.3990432980360945, - "grad_norm": 429.25439453125, - "learning_rate": 3.786996721412745e-05, - "loss": 42.8542, - "step": 98770 - }, - { - "epoch": 0.39908369930146215, - "grad_norm": 403.6635437011719, - "learning_rate": 3.7866974508426354e-05, - "loss": 43.5901, - "step": 98780 - }, - { - "epoch": 0.39912410056682973, - "grad_norm": 498.6553039550781, - "learning_rate": 3.786398155187698e-05, - "loss": 81.6215, - "step": 98790 - }, - { - "epoch": 0.39916450183219737, - "grad_norm": 779.7023315429688, - "learning_rate": 3.786098834453766e-05, - "loss": 47.5752, - "step": 98800 - }, - { - "epoch": 0.399204903097565, - "grad_norm": 615.2477416992188, - "learning_rate": 3.7857994886466755e-05, - "loss": 53.3112, - "step": 98810 - }, - { - "epoch": 0.39924530436293265, - "grad_norm": 2094.180908203125, - "learning_rate": 3.7855001177722615e-05, - "loss": 62.0935, - "step": 98820 - }, - { - "epoch": 0.3992857056283003, - "grad_norm": 477.36724853515625, - "learning_rate": 3.785200721836361e-05, - "loss": 75.4263, - "step": 98830 - }, - { - "epoch": 0.39932610689366793, - "grad_norm": 2158.408447265625, - "learning_rate": 3.7849013008448115e-05, - "loss": 99.4542, - "step": 98840 - }, - { - "epoch": 0.3993665081590355, - "grad_norm": 1072.518310546875, - "learning_rate": 3.784601854803449e-05, - "loss": 77.213, - "step": 98850 - }, - { - "epoch": 0.39940690942440316, - "grad_norm": 796.7136840820312, - "learning_rate": 3.784302383718113e-05, - "loss": 62.9405, - "step": 98860 - }, - { - "epoch": 0.3994473106897708, - "grad_norm": 716.7407836914062, - "learning_rate": 3.784002887594639e-05, - "loss": 66.7786, - "step": 98870 - }, - { - "epoch": 0.39948771195513844, - "grad_norm": 526.4265747070312, - "learning_rate": 3.783703366438868e-05, - "loss": 47.3223, - "step": 98880 - }, - { - "epoch": 0.3995281132205061, - "grad_norm": 420.97637939453125, - "learning_rate": 3.783403820256639e-05, - "loss": 43.0085, - "step": 98890 - }, - { - "epoch": 0.3995685144858737, - "grad_norm": 931.0668334960938, - "learning_rate": 3.783104249053793e-05, - "loss": 84.5414, - "step": 98900 - }, - { - "epoch": 0.39960891575124136, - "grad_norm": 729.2132568359375, - "learning_rate": 3.782804652836168e-05, - "loss": 59.9172, - "step": 98910 - }, - { - "epoch": 0.39964931701660894, - "grad_norm": 518.9829711914062, - "learning_rate": 3.782505031609607e-05, - "loss": 64.4889, - "step": 98920 - }, - { - "epoch": 0.3996897182819766, - "grad_norm": 756.1053466796875, - "learning_rate": 3.782205385379948e-05, - "loss": 73.3509, - "step": 98930 - }, - { - "epoch": 0.3997301195473442, - "grad_norm": 1349.8555908203125, - "learning_rate": 3.781905714153037e-05, - "loss": 70.6158, - "step": 98940 - }, - { - "epoch": 0.39977052081271186, - "grad_norm": 573.3903198242188, - "learning_rate": 3.781606017934713e-05, - "loss": 48.6632, - "step": 98950 - }, - { - "epoch": 0.3998109220780795, - "grad_norm": 510.5514221191406, - "learning_rate": 3.78130629673082e-05, - "loss": 65.3422, - "step": 98960 - }, - { - "epoch": 0.39985132334344714, - "grad_norm": 676.6386108398438, - "learning_rate": 3.781006550547202e-05, - "loss": 49.1923, - "step": 98970 - }, - { - "epoch": 0.3998917246088147, - "grad_norm": 1945.87353515625, - "learning_rate": 3.780706779389701e-05, - "loss": 109.869, - "step": 98980 - }, - { - "epoch": 0.39993212587418236, - "grad_norm": 607.2948608398438, - "learning_rate": 3.7804069832641615e-05, - "loss": 59.3958, - "step": 98990 - }, - { - "epoch": 0.39997252713955, - "grad_norm": 3488.2783203125, - "learning_rate": 3.780107162176429e-05, - "loss": 71.8975, - "step": 99000 - }, - { - "epoch": 0.40001292840491764, - "grad_norm": 869.2059326171875, - "learning_rate": 3.779807316132349e-05, - "loss": 71.5209, - "step": 99010 - }, - { - "epoch": 0.4000533296702853, - "grad_norm": 1111.3924560546875, - "learning_rate": 3.779507445137766e-05, - "loss": 72.5968, - "step": 99020 - }, - { - "epoch": 0.4000937309356529, - "grad_norm": 590.8203125, - "learning_rate": 3.779207549198527e-05, - "loss": 68.4518, - "step": 99030 - }, - { - "epoch": 0.4001341322010205, - "grad_norm": 1010.8024291992188, - "learning_rate": 3.778907628320477e-05, - "loss": 55.1932, - "step": 99040 - }, - { - "epoch": 0.40017453346638815, - "grad_norm": 1730.03271484375, - "learning_rate": 3.778607682509465e-05, - "loss": 72.5133, - "step": 99050 - }, - { - "epoch": 0.4002149347317558, - "grad_norm": 706.1375122070312, - "learning_rate": 3.7783077117713386e-05, - "loss": 51.3278, - "step": 99060 - }, - { - "epoch": 0.40025533599712343, - "grad_norm": 491.18145751953125, - "learning_rate": 3.778007716111945e-05, - "loss": 57.2659, - "step": 99070 - }, - { - "epoch": 0.40029573726249107, - "grad_norm": 967.4299926757812, - "learning_rate": 3.777707695537133e-05, - "loss": 54.0169, - "step": 99080 - }, - { - "epoch": 0.4003361385278587, - "grad_norm": 586.3837280273438, - "learning_rate": 3.777407650052751e-05, - "loss": 72.3556, - "step": 99090 - }, - { - "epoch": 0.40037653979322635, - "grad_norm": 618.3201904296875, - "learning_rate": 3.77710757966465e-05, - "loss": 57.029, - "step": 99100 - }, - { - "epoch": 0.40041694105859393, - "grad_norm": 753.272705078125, - "learning_rate": 3.7768074843786796e-05, - "loss": 61.8438, - "step": 99110 - }, - { - "epoch": 0.4004573423239616, - "grad_norm": 249.07408142089844, - "learning_rate": 3.776507364200689e-05, - "loss": 66.9332, - "step": 99120 - }, - { - "epoch": 0.4004977435893292, - "grad_norm": 371.7924499511719, - "learning_rate": 3.77620721913653e-05, - "loss": 57.5363, - "step": 99130 - }, - { - "epoch": 0.40053814485469685, - "grad_norm": 1456.956787109375, - "learning_rate": 3.7759070491920544e-05, - "loss": 66.1014, - "step": 99140 - }, - { - "epoch": 0.4005785461200645, - "grad_norm": 700.562744140625, - "learning_rate": 3.775606854373115e-05, - "loss": 60.8896, - "step": 99150 - }, - { - "epoch": 0.40061894738543213, - "grad_norm": 712.740966796875, - "learning_rate": 3.775306634685562e-05, - "loss": 57.9598, - "step": 99160 - }, - { - "epoch": 0.4006593486507997, - "grad_norm": 2491.177490234375, - "learning_rate": 3.7750063901352494e-05, - "loss": 66.2804, - "step": 99170 - }, - { - "epoch": 0.40069974991616736, - "grad_norm": 930.8803100585938, - "learning_rate": 3.774706120728032e-05, - "loss": 61.9584, - "step": 99180 - }, - { - "epoch": 0.400740151181535, - "grad_norm": 488.9775085449219, - "learning_rate": 3.774405826469762e-05, - "loss": 51.8706, - "step": 99190 - }, - { - "epoch": 0.40078055244690264, - "grad_norm": 1185.0855712890625, - "learning_rate": 3.7741055073662946e-05, - "loss": 59.6434, - "step": 99200 - }, - { - "epoch": 0.4008209537122703, - "grad_norm": 1065.01025390625, - "learning_rate": 3.773805163423484e-05, - "loss": 90.2759, - "step": 99210 - }, - { - "epoch": 0.4008613549776379, - "grad_norm": 1346.4959716796875, - "learning_rate": 3.773504794647187e-05, - "loss": 47.388, - "step": 99220 - }, - { - "epoch": 0.40090175624300556, - "grad_norm": 672.511962890625, - "learning_rate": 3.7732044010432564e-05, - "loss": 43.0481, - "step": 99230 - }, - { - "epoch": 0.40094215750837314, - "grad_norm": 655.6102294921875, - "learning_rate": 3.772903982617552e-05, - "loss": 57.7771, - "step": 99240 - }, - { - "epoch": 0.4009825587737408, - "grad_norm": 1324.095703125, - "learning_rate": 3.7726035393759285e-05, - "loss": 57.1472, - "step": 99250 - }, - { - "epoch": 0.4010229600391084, - "grad_norm": 1157.129150390625, - "learning_rate": 3.772303071324244e-05, - "loss": 60.1481, - "step": 99260 - }, - { - "epoch": 0.40106336130447606, - "grad_norm": 784.1769409179688, - "learning_rate": 3.772002578468356e-05, - "loss": 101.2776, - "step": 99270 - }, - { - "epoch": 0.4011037625698437, - "grad_norm": 682.7789306640625, - "learning_rate": 3.771702060814123e-05, - "loss": 86.0453, - "step": 99280 - }, - { - "epoch": 0.40114416383521134, - "grad_norm": 3423.19921875, - "learning_rate": 3.771401518367403e-05, - "loss": 74.2687, - "step": 99290 - }, - { - "epoch": 0.4011845651005789, - "grad_norm": 672.9019775390625, - "learning_rate": 3.771100951134057e-05, - "loss": 55.2397, - "step": 99300 - }, - { - "epoch": 0.40122496636594657, - "grad_norm": 642.078857421875, - "learning_rate": 3.770800359119943e-05, - "loss": 34.1067, - "step": 99310 - }, - { - "epoch": 0.4012653676313142, - "grad_norm": 986.0284423828125, - "learning_rate": 3.770499742330922e-05, - "loss": 64.7369, - "step": 99320 - }, - { - "epoch": 0.40130576889668185, - "grad_norm": 416.7942199707031, - "learning_rate": 3.770199100772853e-05, - "loss": 63.9787, - "step": 99330 - }, - { - "epoch": 0.4013461701620495, - "grad_norm": 1163.7266845703125, - "learning_rate": 3.7698984344515997e-05, - "loss": 55.3137, - "step": 99340 - }, - { - "epoch": 0.4013865714274171, - "grad_norm": 591.773681640625, - "learning_rate": 3.769597743373023e-05, - "loss": 54.0791, - "step": 99350 - }, - { - "epoch": 0.4014269726927847, - "grad_norm": 1694.82373046875, - "learning_rate": 3.769297027542985e-05, - "loss": 46.7332, - "step": 99360 - }, - { - "epoch": 0.40146737395815235, - "grad_norm": 717.4148559570312, - "learning_rate": 3.768996286967347e-05, - "loss": 55.8938, - "step": 99370 - }, - { - "epoch": 0.40150777522352, - "grad_norm": 591.4462890625, - "learning_rate": 3.768695521651973e-05, - "loss": 46.4734, - "step": 99380 - }, - { - "epoch": 0.40154817648888763, - "grad_norm": 1100.54150390625, - "learning_rate": 3.7683947316027276e-05, - "loss": 63.5928, - "step": 99390 - }, - { - "epoch": 0.40158857775425527, - "grad_norm": 421.6627197265625, - "learning_rate": 3.7680939168254733e-05, - "loss": 59.3417, - "step": 99400 - }, - { - "epoch": 0.4016289790196229, - "grad_norm": 307.5633544921875, - "learning_rate": 3.767793077326075e-05, - "loss": 94.9799, - "step": 99410 - }, - { - "epoch": 0.40166938028499055, - "grad_norm": 743.0738525390625, - "learning_rate": 3.767492213110397e-05, - "loss": 58.2898, - "step": 99420 - }, - { - "epoch": 0.40170978155035814, - "grad_norm": 724.935791015625, - "learning_rate": 3.767191324184308e-05, - "loss": 54.7535, - "step": 99430 - }, - { - "epoch": 0.4017501828157258, - "grad_norm": 1314.9210205078125, - "learning_rate": 3.7668904105536706e-05, - "loss": 76.4102, - "step": 99440 - }, - { - "epoch": 0.4017905840810934, - "grad_norm": 920.00537109375, - "learning_rate": 3.7665894722243525e-05, - "loss": 58.301, - "step": 99450 - }, - { - "epoch": 0.40183098534646106, - "grad_norm": 1673.596435546875, - "learning_rate": 3.76628850920222e-05, - "loss": 57.4322, - "step": 99460 - }, - { - "epoch": 0.4018713866118287, - "grad_norm": 826.5586547851562, - "learning_rate": 3.7659875214931426e-05, - "loss": 69.376, - "step": 99470 - }, - { - "epoch": 0.40191178787719634, - "grad_norm": 581.40283203125, - "learning_rate": 3.765686509102985e-05, - "loss": 36.2128, - "step": 99480 - }, - { - "epoch": 0.4019521891425639, - "grad_norm": 326.0263977050781, - "learning_rate": 3.765385472037618e-05, - "loss": 64.3921, - "step": 99490 - }, - { - "epoch": 0.40199259040793156, - "grad_norm": 729.6510009765625, - "learning_rate": 3.765084410302909e-05, - "loss": 63.8964, - "step": 99500 - }, - { - "epoch": 0.4020329916732992, - "grad_norm": 920.1556396484375, - "learning_rate": 3.76478332390473e-05, - "loss": 41.994, - "step": 99510 - }, - { - "epoch": 0.40207339293866684, - "grad_norm": 888.444091796875, - "learning_rate": 3.764482212848948e-05, - "loss": 70.7583, - "step": 99520 - }, - { - "epoch": 0.4021137942040345, - "grad_norm": 909.1885375976562, - "learning_rate": 3.7641810771414335e-05, - "loss": 67.3313, - "step": 99530 - }, - { - "epoch": 0.4021541954694021, - "grad_norm": 1571.4930419921875, - "learning_rate": 3.763879916788059e-05, - "loss": 66.0156, - "step": 99540 - }, - { - "epoch": 0.40219459673476976, - "grad_norm": 724.2689208984375, - "learning_rate": 3.763578731794695e-05, - "loss": 66.495, - "step": 99550 - }, - { - "epoch": 0.40223499800013734, - "grad_norm": 734.593505859375, - "learning_rate": 3.7632775221672115e-05, - "loss": 41.2429, - "step": 99560 - }, - { - "epoch": 0.402275399265505, - "grad_norm": 1102.271728515625, - "learning_rate": 3.7629762879114835e-05, - "loss": 71.2054, - "step": 99570 - }, - { - "epoch": 0.4023158005308726, - "grad_norm": 1053.3270263671875, - "learning_rate": 3.7626750290333824e-05, - "loss": 49.4548, - "step": 99580 - }, - { - "epoch": 0.40235620179624026, - "grad_norm": 774.8560180664062, - "learning_rate": 3.7623737455387814e-05, - "loss": 53.1317, - "step": 99590 - }, - { - "epoch": 0.4023966030616079, - "grad_norm": 792.8966064453125, - "learning_rate": 3.762072437433555e-05, - "loss": 63.3754, - "step": 99600 - }, - { - "epoch": 0.40243700432697554, - "grad_norm": 648.0517578125, - "learning_rate": 3.761771104723576e-05, - "loss": 41.7011, - "step": 99610 - }, - { - "epoch": 0.40247740559234313, - "grad_norm": 816.7391967773438, - "learning_rate": 3.76146974741472e-05, - "loss": 55.6581, - "step": 99620 - }, - { - "epoch": 0.40251780685771077, - "grad_norm": 2065.635498046875, - "learning_rate": 3.761168365512862e-05, - "loss": 76.0005, - "step": 99630 - }, - { - "epoch": 0.4025582081230784, - "grad_norm": 1137.087890625, - "learning_rate": 3.760866959023877e-05, - "loss": 58.8005, - "step": 99640 - }, - { - "epoch": 0.40259860938844605, - "grad_norm": 1612.660400390625, - "learning_rate": 3.760565527953641e-05, - "loss": 52.3195, - "step": 99650 - }, - { - "epoch": 0.4026390106538137, - "grad_norm": 1130.4898681640625, - "learning_rate": 3.7602640723080315e-05, - "loss": 58.5023, - "step": 99660 - }, - { - "epoch": 0.40267941191918133, - "grad_norm": 1282.1605224609375, - "learning_rate": 3.7599625920929254e-05, - "loss": 60.2157, - "step": 99670 - }, - { - "epoch": 0.4027198131845489, - "grad_norm": 626.7401733398438, - "learning_rate": 3.759661087314199e-05, - "loss": 61.224, - "step": 99680 - }, - { - "epoch": 0.40276021444991655, - "grad_norm": 403.7524108886719, - "learning_rate": 3.759359557977732e-05, - "loss": 43.2286, - "step": 99690 - }, - { - "epoch": 0.4028006157152842, - "grad_norm": 459.4110107421875, - "learning_rate": 3.759058004089402e-05, - "loss": 63.8654, - "step": 99700 - }, - { - "epoch": 0.40284101698065183, - "grad_norm": 689.9656982421875, - "learning_rate": 3.758756425655089e-05, - "loss": 55.9281, - "step": 99710 - }, - { - "epoch": 0.4028814182460195, - "grad_norm": 493.5693664550781, - "learning_rate": 3.7584548226806696e-05, - "loss": 46.4668, - "step": 99720 - }, - { - "epoch": 0.4029218195113871, - "grad_norm": 568.452880859375, - "learning_rate": 3.758153195172026e-05, - "loss": 85.0147, - "step": 99730 - }, - { - "epoch": 0.40296222077675475, - "grad_norm": 775.391845703125, - "learning_rate": 3.7578515431350384e-05, - "loss": 62.4054, - "step": 99740 - }, - { - "epoch": 0.40300262204212234, - "grad_norm": 1037.2498779296875, - "learning_rate": 3.757549866575588e-05, - "loss": 58.9337, - "step": 99750 - }, - { - "epoch": 0.40304302330749, - "grad_norm": 487.4396667480469, - "learning_rate": 3.757248165499555e-05, - "loss": 78.7348, - "step": 99760 - }, - { - "epoch": 0.4030834245728576, - "grad_norm": 1064.0037841796875, - "learning_rate": 3.7569464399128215e-05, - "loss": 43.4251, - "step": 99770 - }, - { - "epoch": 0.40312382583822526, - "grad_norm": 417.7305603027344, - "learning_rate": 3.75664468982127e-05, - "loss": 43.3443, - "step": 99780 - }, - { - "epoch": 0.4031642271035929, - "grad_norm": 1124.982177734375, - "learning_rate": 3.756342915230784e-05, - "loss": 72.6339, - "step": 99790 - }, - { - "epoch": 0.40320462836896054, - "grad_norm": 1895.08935546875, - "learning_rate": 3.7560411161472456e-05, - "loss": 87.87, - "step": 99800 - }, - { - "epoch": 0.4032450296343281, - "grad_norm": 885.612060546875, - "learning_rate": 3.755739292576539e-05, - "loss": 68.6611, - "step": 99810 - }, - { - "epoch": 0.40328543089969576, - "grad_norm": 1059.715576171875, - "learning_rate": 3.7554374445245474e-05, - "loss": 50.6557, - "step": 99820 - }, - { - "epoch": 0.4033258321650634, - "grad_norm": 1289.69677734375, - "learning_rate": 3.755135571997158e-05, - "loss": 55.3921, - "step": 99830 - }, - { - "epoch": 0.40336623343043104, - "grad_norm": 948.4808959960938, - "learning_rate": 3.7548336750002544e-05, - "loss": 52.4533, - "step": 99840 - }, - { - "epoch": 0.4034066346957987, - "grad_norm": 768.9962768554688, - "learning_rate": 3.7545317535397214e-05, - "loss": 52.6331, - "step": 99850 - }, - { - "epoch": 0.4034470359611663, - "grad_norm": 806.0327758789062, - "learning_rate": 3.754229807621446e-05, - "loss": 58.2938, - "step": 99860 - }, - { - "epoch": 0.40348743722653396, - "grad_norm": 631.0634155273438, - "learning_rate": 3.753927837251315e-05, - "loss": 68.7876, - "step": 99870 - }, - { - "epoch": 0.40352783849190155, - "grad_norm": 574.1565551757812, - "learning_rate": 3.753625842435216e-05, - "loss": 45.7764, - "step": 99880 - }, - { - "epoch": 0.4035682397572692, - "grad_norm": 1697.2900390625, - "learning_rate": 3.753323823179035e-05, - "loss": 65.6436, - "step": 99890 - }, - { - "epoch": 0.4036086410226368, - "grad_norm": 260.11083984375, - "learning_rate": 3.7530217794886606e-05, - "loss": 71.5105, - "step": 99900 - }, - { - "epoch": 0.40364904228800447, - "grad_norm": 932.4937133789062, - "learning_rate": 3.752719711369982e-05, - "loss": 61.679, - "step": 99910 - }, - { - "epoch": 0.4036894435533721, - "grad_norm": 676.4361572265625, - "learning_rate": 3.752417618828888e-05, - "loss": 90.9519, - "step": 99920 - }, - { - "epoch": 0.40372984481873975, - "grad_norm": 6873.72802734375, - "learning_rate": 3.752115501871267e-05, - "loss": 99.3489, - "step": 99930 - }, - { - "epoch": 0.40377024608410733, - "grad_norm": 0.0, - "learning_rate": 3.75181336050301e-05, - "loss": 58.3508, - "step": 99940 - }, - { - "epoch": 0.40381064734947497, - "grad_norm": 619.0031127929688, - "learning_rate": 3.751511194730007e-05, - "loss": 58.6171, - "step": 99950 - }, - { - "epoch": 0.4038510486148426, - "grad_norm": 963.3147583007812, - "learning_rate": 3.751209004558149e-05, - "loss": 60.5256, - "step": 99960 - }, - { - "epoch": 0.40389144988021025, - "grad_norm": 720.6400756835938, - "learning_rate": 3.750906789993327e-05, - "loss": 59.5601, - "step": 99970 - }, - { - "epoch": 0.4039318511455779, - "grad_norm": 1478.5609130859375, - "learning_rate": 3.7506045510414335e-05, - "loss": 46.86, - "step": 99980 - }, - { - "epoch": 0.40397225241094553, - "grad_norm": 427.1527099609375, - "learning_rate": 3.7503022877083606e-05, - "loss": 57.4738, - "step": 99990 - }, - { - "epoch": 0.4040126536763131, - "grad_norm": 967.2838134765625, - "learning_rate": 3.7500000000000003e-05, - "loss": 77.0952, - "step": 100000 - }, - { - "epoch": 0.40405305494168076, - "grad_norm": 896.5092163085938, - "learning_rate": 3.749697687922247e-05, - "loss": 49.8017, - "step": 100010 - }, - { - "epoch": 0.4040934562070484, - "grad_norm": 663.4006958007812, - "learning_rate": 3.749395351480993e-05, - "loss": 45.9336, - "step": 100020 - }, - { - "epoch": 0.40413385747241604, - "grad_norm": 471.2986145019531, - "learning_rate": 3.749092990682134e-05, - "loss": 62.4995, - "step": 100030 - }, - { - "epoch": 0.4041742587377837, - "grad_norm": 956.7597045898438, - "learning_rate": 3.748790605531565e-05, - "loss": 62.3297, - "step": 100040 - }, - { - "epoch": 0.4042146600031513, - "grad_norm": 2078.440185546875, - "learning_rate": 3.748488196035179e-05, - "loss": 129.3817, - "step": 100050 - }, - { - "epoch": 0.40425506126851896, - "grad_norm": 4852.7265625, - "learning_rate": 3.748185762198873e-05, - "loss": 86.8261, - "step": 100060 - }, - { - "epoch": 0.40429546253388654, - "grad_norm": 1042.48828125, - "learning_rate": 3.747883304028543e-05, - "loss": 51.4865, - "step": 100070 - }, - { - "epoch": 0.4043358637992542, - "grad_norm": 758.50048828125, - "learning_rate": 3.7475808215300854e-05, - "loss": 47.7026, - "step": 100080 - }, - { - "epoch": 0.4043762650646218, - "grad_norm": 769.8150024414062, - "learning_rate": 3.7472783147093985e-05, - "loss": 85.2325, - "step": 100090 - }, - { - "epoch": 0.40441666632998946, - "grad_norm": 777.2942504882812, - "learning_rate": 3.746975783572377e-05, - "loss": 67.7482, - "step": 100100 - }, - { - "epoch": 0.4044570675953571, - "grad_norm": 605.9794921875, - "learning_rate": 3.746673228124922e-05, - "loss": 53.6268, - "step": 100110 - }, - { - "epoch": 0.40449746886072474, - "grad_norm": 1080.2708740234375, - "learning_rate": 3.7463706483729296e-05, - "loss": 69.3192, - "step": 100120 - }, - { - "epoch": 0.4045378701260923, - "grad_norm": 725.3931884765625, - "learning_rate": 3.7460680443223004e-05, - "loss": 42.6117, - "step": 100130 - }, - { - "epoch": 0.40457827139145996, - "grad_norm": 629.57470703125, - "learning_rate": 3.745765415978933e-05, - "loss": 69.1875, - "step": 100140 - }, - { - "epoch": 0.4046186726568276, - "grad_norm": 788.4440307617188, - "learning_rate": 3.7454627633487274e-05, - "loss": 86.3007, - "step": 100150 - }, - { - "epoch": 0.40465907392219524, - "grad_norm": 879.8585815429688, - "learning_rate": 3.7451600864375844e-05, - "loss": 81.9851, - "step": 100160 - }, - { - "epoch": 0.4046994751875629, - "grad_norm": 578.5433349609375, - "learning_rate": 3.7448573852514035e-05, - "loss": 56.7397, - "step": 100170 - }, - { - "epoch": 0.4047398764529305, - "grad_norm": 731.2473754882812, - "learning_rate": 3.744554659796088e-05, - "loss": 54.396, - "step": 100180 - }, - { - "epoch": 0.40478027771829816, - "grad_norm": 718.974609375, - "learning_rate": 3.744251910077538e-05, - "loss": 47.8276, - "step": 100190 - }, - { - "epoch": 0.40482067898366575, - "grad_norm": 714.3526000976562, - "learning_rate": 3.7439491361016564e-05, - "loss": 68.6014, - "step": 100200 - }, - { - "epoch": 0.4048610802490334, - "grad_norm": 681.6476440429688, - "learning_rate": 3.743646337874346e-05, - "loss": 68.5919, - "step": 100210 - }, - { - "epoch": 0.40490148151440103, - "grad_norm": 709.8939819335938, - "learning_rate": 3.743343515401511e-05, - "loss": 60.53, - "step": 100220 - }, - { - "epoch": 0.40494188277976867, - "grad_norm": 730.64892578125, - "learning_rate": 3.743040668689053e-05, - "loss": 91.2893, - "step": 100230 - }, - { - "epoch": 0.4049822840451363, - "grad_norm": 1242.37451171875, - "learning_rate": 3.742737797742878e-05, - "loss": 50.5919, - "step": 100240 - }, - { - "epoch": 0.40502268531050395, - "grad_norm": 1279.793701171875, - "learning_rate": 3.742434902568889e-05, - "loss": 47.2868, - "step": 100250 - }, - { - "epoch": 0.40506308657587153, - "grad_norm": 446.11175537109375, - "learning_rate": 3.742131983172992e-05, - "loss": 43.8764, - "step": 100260 - }, - { - "epoch": 0.4051034878412392, - "grad_norm": 478.0585021972656, - "learning_rate": 3.741829039561092e-05, - "loss": 67.3198, - "step": 100270 - }, - { - "epoch": 0.4051438891066068, - "grad_norm": 1781.1514892578125, - "learning_rate": 3.741526071739097e-05, - "loss": 55.5198, - "step": 100280 - }, - { - "epoch": 0.40518429037197445, - "grad_norm": 601.347412109375, - "learning_rate": 3.741223079712911e-05, - "loss": 50.489, - "step": 100290 - }, - { - "epoch": 0.4052246916373421, - "grad_norm": 631.7415161132812, - "learning_rate": 3.7409200634884426e-05, - "loss": 52.5421, - "step": 100300 - }, - { - "epoch": 0.40526509290270973, - "grad_norm": 431.8924255371094, - "learning_rate": 3.740617023071598e-05, - "loss": 59.2278, - "step": 100310 - }, - { - "epoch": 0.4053054941680773, - "grad_norm": 1287.239501953125, - "learning_rate": 3.740313958468287e-05, - "loss": 63.1438, - "step": 100320 - }, - { - "epoch": 0.40534589543344496, - "grad_norm": 769.814208984375, - "learning_rate": 3.7400108696844156e-05, - "loss": 56.3332, - "step": 100330 - }, - { - "epoch": 0.4053862966988126, - "grad_norm": 372.637451171875, - "learning_rate": 3.739707756725894e-05, - "loss": 61.8536, - "step": 100340 - }, - { - "epoch": 0.40542669796418024, - "grad_norm": 537.6292114257812, - "learning_rate": 3.739404619598632e-05, - "loss": 75.01, - "step": 100350 - }, - { - "epoch": 0.4054670992295479, - "grad_norm": 1282.1539306640625, - "learning_rate": 3.7391014583085385e-05, - "loss": 42.7495, - "step": 100360 - }, - { - "epoch": 0.4055075004949155, - "grad_norm": 1255.515869140625, - "learning_rate": 3.738798272861525e-05, - "loss": 79.5692, - "step": 100370 - }, - { - "epoch": 0.40554790176028316, - "grad_norm": 1217.02294921875, - "learning_rate": 3.7384950632634995e-05, - "loss": 55.6681, - "step": 100380 - }, - { - "epoch": 0.40558830302565074, - "grad_norm": 480.9776306152344, - "learning_rate": 3.7381918295203774e-05, - "loss": 43.6625, - "step": 100390 - }, - { - "epoch": 0.4056287042910184, - "grad_norm": 861.3986206054688, - "learning_rate": 3.7378885716380664e-05, - "loss": 48.5241, - "step": 100400 - }, - { - "epoch": 0.405669105556386, - "grad_norm": 490.213134765625, - "learning_rate": 3.737585289622482e-05, - "loss": 60.6238, - "step": 100410 - }, - { - "epoch": 0.40570950682175366, - "grad_norm": 0.0, - "learning_rate": 3.7372819834795335e-05, - "loss": 82.8279, - "step": 100420 - }, - { - "epoch": 0.4057499080871213, - "grad_norm": 3635.81689453125, - "learning_rate": 3.736978653215136e-05, - "loss": 79.4986, - "step": 100430 - }, - { - "epoch": 0.40579030935248894, - "grad_norm": 803.3690795898438, - "learning_rate": 3.736675298835203e-05, - "loss": 84.2047, - "step": 100440 - }, - { - "epoch": 0.4058307106178565, - "grad_norm": 1111.7283935546875, - "learning_rate": 3.7363719203456495e-05, - "loss": 51.9508, - "step": 100450 - }, - { - "epoch": 0.40587111188322417, - "grad_norm": 472.92242431640625, - "learning_rate": 3.736068517752388e-05, - "loss": 47.1429, - "step": 100460 - }, - { - "epoch": 0.4059115131485918, - "grad_norm": 638.3177490234375, - "learning_rate": 3.735765091061334e-05, - "loss": 74.3898, - "step": 100470 - }, - { - "epoch": 0.40595191441395945, - "grad_norm": 877.7601318359375, - "learning_rate": 3.7354616402784035e-05, - "loss": 63.0587, - "step": 100480 - }, - { - "epoch": 0.4059923156793271, - "grad_norm": 625.0929565429688, - "learning_rate": 3.735158165409514e-05, - "loss": 38.2794, - "step": 100490 - }, - { - "epoch": 0.4060327169446947, - "grad_norm": 756.3575439453125, - "learning_rate": 3.7348546664605777e-05, - "loss": 64.2025, - "step": 100500 - }, - { - "epoch": 0.4060731182100623, - "grad_norm": 1217.9346923828125, - "learning_rate": 3.7345511434375145e-05, - "loss": 68.1389, - "step": 100510 - }, - { - "epoch": 0.40611351947542995, - "grad_norm": 873.300048828125, - "learning_rate": 3.734247596346242e-05, - "loss": 66.9642, - "step": 100520 - }, - { - "epoch": 0.4061539207407976, - "grad_norm": 959.7666015625, - "learning_rate": 3.733944025192677e-05, - "loss": 66.9598, - "step": 100530 - }, - { - "epoch": 0.40619432200616523, - "grad_norm": 789.1475830078125, - "learning_rate": 3.733640429982738e-05, - "loss": 58.7012, - "step": 100540 - }, - { - "epoch": 0.40623472327153287, - "grad_norm": 480.3835754394531, - "learning_rate": 3.7333368107223424e-05, - "loss": 43.2679, - "step": 100550 - }, - { - "epoch": 0.4062751245369005, - "grad_norm": 759.8263549804688, - "learning_rate": 3.7330331674174125e-05, - "loss": 66.052, - "step": 100560 - }, - { - "epoch": 0.40631552580226815, - "grad_norm": 859.6227416992188, - "learning_rate": 3.732729500073866e-05, - "loss": 56.3151, - "step": 100570 - }, - { - "epoch": 0.40635592706763574, - "grad_norm": 405.04998779296875, - "learning_rate": 3.732425808697622e-05, - "loss": 54.2167, - "step": 100580 - }, - { - "epoch": 0.4063963283330034, - "grad_norm": 821.4298706054688, - "learning_rate": 3.732122093294603e-05, - "loss": 67.599, - "step": 100590 - }, - { - "epoch": 0.406436729598371, - "grad_norm": 777.3472290039062, - "learning_rate": 3.731818353870729e-05, - "loss": 58.1797, - "step": 100600 - }, - { - "epoch": 0.40647713086373866, - "grad_norm": 1343.93359375, - "learning_rate": 3.731514590431922e-05, - "loss": 65.9451, - "step": 100610 - }, - { - "epoch": 0.4065175321291063, - "grad_norm": 800.9368896484375, - "learning_rate": 3.731210802984105e-05, - "loss": 67.0606, - "step": 100620 - }, - { - "epoch": 0.40655793339447394, - "grad_norm": 1185.2408447265625, - "learning_rate": 3.730906991533199e-05, - "loss": 76.7648, - "step": 100630 - }, - { - "epoch": 0.4065983346598415, - "grad_norm": 545.9066772460938, - "learning_rate": 3.7306031560851275e-05, - "loss": 42.591, - "step": 100640 - }, - { - "epoch": 0.40663873592520916, - "grad_norm": 551.5760498046875, - "learning_rate": 3.730299296645814e-05, - "loss": 81.1927, - "step": 100650 - }, - { - "epoch": 0.4066791371905768, - "grad_norm": 3141.09326171875, - "learning_rate": 3.729995413221183e-05, - "loss": 85.903, - "step": 100660 - }, - { - "epoch": 0.40671953845594444, - "grad_norm": 1117.46728515625, - "learning_rate": 3.7296915058171566e-05, - "loss": 59.2733, - "step": 100670 - }, - { - "epoch": 0.4067599397213121, - "grad_norm": 2471.260986328125, - "learning_rate": 3.729387574439662e-05, - "loss": 68.098, - "step": 100680 - }, - { - "epoch": 0.4068003409866797, - "grad_norm": 768.341552734375, - "learning_rate": 3.729083619094624e-05, - "loss": 73.1918, - "step": 100690 - }, - { - "epoch": 0.40684074225204736, - "grad_norm": 0.0, - "learning_rate": 3.7287796397879674e-05, - "loss": 28.0081, - "step": 100700 - }, - { - "epoch": 0.40688114351741494, - "grad_norm": 1264.1614990234375, - "learning_rate": 3.72847563652562e-05, - "loss": 45.6677, - "step": 100710 - }, - { - "epoch": 0.4069215447827826, - "grad_norm": 287.6547546386719, - "learning_rate": 3.7281716093135063e-05, - "loss": 60.4336, - "step": 100720 - }, - { - "epoch": 0.4069619460481502, - "grad_norm": 565.496826171875, - "learning_rate": 3.7278675581575564e-05, - "loss": 49.9453, - "step": 100730 - }, - { - "epoch": 0.40700234731351786, - "grad_norm": 513.9716796875, - "learning_rate": 3.7275634830636957e-05, - "loss": 48.138, - "step": 100740 - }, - { - "epoch": 0.4070427485788855, - "grad_norm": 798.354248046875, - "learning_rate": 3.727259384037852e-05, - "loss": 49.801, - "step": 100750 - }, - { - "epoch": 0.40708314984425314, - "grad_norm": 901.558837890625, - "learning_rate": 3.726955261085956e-05, - "loss": 38.1762, - "step": 100760 - }, - { - "epoch": 0.40712355110962073, - "grad_norm": 808.7131958007812, - "learning_rate": 3.726651114213935e-05, - "loss": 63.8376, - "step": 100770 - }, - { - "epoch": 0.40716395237498837, - "grad_norm": 726.5311279296875, - "learning_rate": 3.726346943427719e-05, - "loss": 51.7929, - "step": 100780 - }, - { - "epoch": 0.407204353640356, - "grad_norm": 367.88018798828125, - "learning_rate": 3.726042748733238e-05, - "loss": 85.8663, - "step": 100790 - }, - { - "epoch": 0.40724475490572365, - "grad_norm": 368.78338623046875, - "learning_rate": 3.725738530136422e-05, - "loss": 33.2297, - "step": 100800 - }, - { - "epoch": 0.4072851561710913, - "grad_norm": 942.588623046875, - "learning_rate": 3.7254342876432026e-05, - "loss": 74.4766, - "step": 100810 - }, - { - "epoch": 0.40732555743645893, - "grad_norm": 704.6798095703125, - "learning_rate": 3.7251300212595106e-05, - "loss": 84.4977, - "step": 100820 - }, - { - "epoch": 0.4073659587018265, - "grad_norm": 411.1459045410156, - "learning_rate": 3.724825730991279e-05, - "loss": 67.2837, - "step": 100830 - }, - { - "epoch": 0.40740635996719415, - "grad_norm": 969.4332885742188, - "learning_rate": 3.7245214168444386e-05, - "loss": 73.3558, - "step": 100840 - }, - { - "epoch": 0.4074467612325618, - "grad_norm": 857.5194091796875, - "learning_rate": 3.724217078824923e-05, - "loss": 87.0812, - "step": 100850 - }, - { - "epoch": 0.40748716249792943, - "grad_norm": 789.8101196289062, - "learning_rate": 3.723912716938665e-05, - "loss": 62.384, - "step": 100860 - }, - { - "epoch": 0.4075275637632971, - "grad_norm": 963.7822265625, - "learning_rate": 3.723608331191598e-05, - "loss": 70.5818, - "step": 100870 - }, - { - "epoch": 0.4075679650286647, - "grad_norm": 1622.1092529296875, - "learning_rate": 3.723303921589657e-05, - "loss": 73.844, - "step": 100880 - }, - { - "epoch": 0.40760836629403235, - "grad_norm": 4206.56787109375, - "learning_rate": 3.722999488138776e-05, - "loss": 86.4621, - "step": 100890 - }, - { - "epoch": 0.40764876755939994, - "grad_norm": 780.2285766601562, - "learning_rate": 3.722695030844891e-05, - "loss": 42.5627, - "step": 100900 - }, - { - "epoch": 0.4076891688247676, - "grad_norm": 1180.5223388671875, - "learning_rate": 3.7223905497139366e-05, - "loss": 59.9004, - "step": 100910 - }, - { - "epoch": 0.4077295700901352, - "grad_norm": 609.8385009765625, - "learning_rate": 3.722086044751849e-05, - "loss": 39.532, - "step": 100920 - }, - { - "epoch": 0.40776997135550286, - "grad_norm": 0.0, - "learning_rate": 3.721781515964565e-05, - "loss": 33.3144, - "step": 100930 - }, - { - "epoch": 0.4078103726208705, - "grad_norm": 1108.013671875, - "learning_rate": 3.721476963358021e-05, - "loss": 67.8884, - "step": 100940 - }, - { - "epoch": 0.40785077388623814, - "grad_norm": 503.3689270019531, - "learning_rate": 3.721172386938155e-05, - "loss": 63.0006, - "step": 100950 - }, - { - "epoch": 0.4078911751516057, - "grad_norm": 2834.478515625, - "learning_rate": 3.720867786710904e-05, - "loss": 74.9134, - "step": 100960 - }, - { - "epoch": 0.40793157641697336, - "grad_norm": 1194.1221923828125, - "learning_rate": 3.7205631626822074e-05, - "loss": 47.9969, - "step": 100970 - }, - { - "epoch": 0.407971977682341, - "grad_norm": 798.5789794921875, - "learning_rate": 3.7202585148580036e-05, - "loss": 86.6126, - "step": 100980 - }, - { - "epoch": 0.40801237894770864, - "grad_norm": 505.0960998535156, - "learning_rate": 3.7199538432442316e-05, - "loss": 59.1577, - "step": 100990 - }, - { - "epoch": 0.4080527802130763, - "grad_norm": 767.0346069335938, - "learning_rate": 3.719649147846832e-05, - "loss": 64.2209, - "step": 101000 - }, - { - "epoch": 0.4080931814784439, - "grad_norm": 577.536865234375, - "learning_rate": 3.7193444286717436e-05, - "loss": 64.3384, - "step": 101010 - }, - { - "epoch": 0.40813358274381156, - "grad_norm": 668.321533203125, - "learning_rate": 3.719039685724909e-05, - "loss": 39.2026, - "step": 101020 - }, - { - "epoch": 0.40817398400917915, - "grad_norm": 211.69288635253906, - "learning_rate": 3.718734919012267e-05, - "loss": 51.6079, - "step": 101030 - }, - { - "epoch": 0.4082143852745468, - "grad_norm": 391.42852783203125, - "learning_rate": 3.71843012853976e-05, - "loss": 42.6838, - "step": 101040 - }, - { - "epoch": 0.4082547865399144, - "grad_norm": 441.98419189453125, - "learning_rate": 3.718125314313331e-05, - "loss": 40.6746, - "step": 101050 - }, - { - "epoch": 0.40829518780528207, - "grad_norm": 772.721923828125, - "learning_rate": 3.7178204763389216e-05, - "loss": 68.107, - "step": 101060 - }, - { - "epoch": 0.4083355890706497, - "grad_norm": 286.0000305175781, - "learning_rate": 3.717515614622476e-05, - "loss": 79.75, - "step": 101070 - }, - { - "epoch": 0.40837599033601735, - "grad_norm": 966.0552368164062, - "learning_rate": 3.717210729169935e-05, - "loss": 72.2583, - "step": 101080 - }, - { - "epoch": 0.40841639160138493, - "grad_norm": 756.3565673828125, - "learning_rate": 3.7169058199872455e-05, - "loss": 69.5634, - "step": 101090 - }, - { - "epoch": 0.40845679286675257, - "grad_norm": 754.9552001953125, - "learning_rate": 3.71660088708035e-05, - "loss": 58.8931, - "step": 101100 - }, - { - "epoch": 0.4084971941321202, - "grad_norm": 1555.589599609375, - "learning_rate": 3.716295930455194e-05, - "loss": 79.8807, - "step": 101110 - }, - { - "epoch": 0.40853759539748785, - "grad_norm": 1325.0550537109375, - "learning_rate": 3.7159909501177226e-05, - "loss": 68.7145, - "step": 101120 - }, - { - "epoch": 0.4085779966628555, - "grad_norm": 620.8428955078125, - "learning_rate": 3.715685946073881e-05, - "loss": 55.6151, - "step": 101130 - }, - { - "epoch": 0.40861839792822313, - "grad_norm": 955.8616943359375, - "learning_rate": 3.7153809183296176e-05, - "loss": 66.1763, - "step": 101140 - }, - { - "epoch": 0.4086587991935907, - "grad_norm": 599.90234375, - "learning_rate": 3.715075866890876e-05, - "loss": 65.0991, - "step": 101150 - }, - { - "epoch": 0.40869920045895836, - "grad_norm": 3034.8486328125, - "learning_rate": 3.7147707917636046e-05, - "loss": 69.5837, - "step": 101160 - }, - { - "epoch": 0.408739601724326, - "grad_norm": 0.0, - "learning_rate": 3.7144656929537524e-05, - "loss": 45.1975, - "step": 101170 - }, - { - "epoch": 0.40878000298969364, - "grad_norm": 502.8624572753906, - "learning_rate": 3.714160570467266e-05, - "loss": 73.8093, - "step": 101180 - }, - { - "epoch": 0.4088204042550613, - "grad_norm": 0.0, - "learning_rate": 3.7138554243100934e-05, - "loss": 48.2301, - "step": 101190 - }, - { - "epoch": 0.4088608055204289, - "grad_norm": 648.0908203125, - "learning_rate": 3.713550254488185e-05, - "loss": 103.6744, - "step": 101200 - }, - { - "epoch": 0.40890120678579656, - "grad_norm": 1352.32421875, - "learning_rate": 3.71324506100749e-05, - "loss": 82.1861, - "step": 101210 - }, - { - "epoch": 0.40894160805116414, - "grad_norm": 1041.673583984375, - "learning_rate": 3.712939843873957e-05, - "loss": 40.6213, - "step": 101220 - }, - { - "epoch": 0.4089820093165318, - "grad_norm": 763.6616821289062, - "learning_rate": 3.7126346030935374e-05, - "loss": 55.786, - "step": 101230 - }, - { - "epoch": 0.4090224105818994, - "grad_norm": 892.0834350585938, - "learning_rate": 3.712329338672182e-05, - "loss": 60.7086, - "step": 101240 - }, - { - "epoch": 0.40906281184726706, - "grad_norm": 788.8603515625, - "learning_rate": 3.712024050615843e-05, - "loss": 53.5069, - "step": 101250 - }, - { - "epoch": 0.4091032131126347, - "grad_norm": 1101.8909912109375, - "learning_rate": 3.71171873893047e-05, - "loss": 47.3406, - "step": 101260 - }, - { - "epoch": 0.40914361437800234, - "grad_norm": 296.9659729003906, - "learning_rate": 3.711413403622017e-05, - "loss": 55.5417, - "step": 101270 - }, - { - "epoch": 0.4091840156433699, - "grad_norm": 653.2747802734375, - "learning_rate": 3.711108044696436e-05, - "loss": 71.033, - "step": 101280 - }, - { - "epoch": 0.40922441690873756, - "grad_norm": 671.96533203125, - "learning_rate": 3.710802662159679e-05, - "loss": 58.3862, - "step": 101290 - }, - { - "epoch": 0.4092648181741052, - "grad_norm": 519.48095703125, - "learning_rate": 3.710497256017702e-05, - "loss": 90.0393, - "step": 101300 - }, - { - "epoch": 0.40930521943947284, - "grad_norm": 837.6402587890625, - "learning_rate": 3.7101918262764576e-05, - "loss": 60.9863, - "step": 101310 - }, - { - "epoch": 0.4093456207048405, - "grad_norm": 907.7230224609375, - "learning_rate": 3.7098863729419e-05, - "loss": 80.4925, - "step": 101320 - }, - { - "epoch": 0.4093860219702081, - "grad_norm": 2056.220947265625, - "learning_rate": 3.709580896019985e-05, - "loss": 49.3788, - "step": 101330 - }, - { - "epoch": 0.40942642323557576, - "grad_norm": 531.6597900390625, - "learning_rate": 3.7092753955166674e-05, - "loss": 81.7104, - "step": 101340 - }, - { - "epoch": 0.40946682450094335, - "grad_norm": 714.7631225585938, - "learning_rate": 3.708969871437904e-05, - "loss": 63.8878, - "step": 101350 - }, - { - "epoch": 0.409507225766311, - "grad_norm": 1157.5352783203125, - "learning_rate": 3.7086643237896504e-05, - "loss": 41.3693, - "step": 101360 - }, - { - "epoch": 0.40954762703167863, - "grad_norm": 531.5262451171875, - "learning_rate": 3.708358752577863e-05, - "loss": 71.7268, - "step": 101370 - }, - { - "epoch": 0.40958802829704627, - "grad_norm": 447.32330322265625, - "learning_rate": 3.7080531578085e-05, - "loss": 66.6252, - "step": 101380 - }, - { - "epoch": 0.4096284295624139, - "grad_norm": 983.9207153320312, - "learning_rate": 3.707747539487519e-05, - "loss": 62.158, - "step": 101390 - }, - { - "epoch": 0.40966883082778155, - "grad_norm": 294.036376953125, - "learning_rate": 3.7074418976208766e-05, - "loss": 54.1539, - "step": 101400 - }, - { - "epoch": 0.40970923209314913, - "grad_norm": 2036.715087890625, - "learning_rate": 3.707136232214534e-05, - "loss": 53.9095, - "step": 101410 - }, - { - "epoch": 0.4097496333585168, - "grad_norm": 1761.55908203125, - "learning_rate": 3.706830543274449e-05, - "loss": 57.585, - "step": 101420 - }, - { - "epoch": 0.4097900346238844, - "grad_norm": 1144.66064453125, - "learning_rate": 3.706524830806581e-05, - "loss": 80.4368, - "step": 101430 - }, - { - "epoch": 0.40983043588925205, - "grad_norm": 457.09368896484375, - "learning_rate": 3.706219094816891e-05, - "loss": 80.4445, - "step": 101440 - }, - { - "epoch": 0.4098708371546197, - "grad_norm": 451.6939697265625, - "learning_rate": 3.705913335311338e-05, - "loss": 46.0204, - "step": 101450 - }, - { - "epoch": 0.40991123841998733, - "grad_norm": 675.3792724609375, - "learning_rate": 3.705607552295883e-05, - "loss": 29.4593, - "step": 101460 - }, - { - "epoch": 0.4099516396853549, - "grad_norm": 473.2438659667969, - "learning_rate": 3.7053017457764895e-05, - "loss": 69.6346, - "step": 101470 - }, - { - "epoch": 0.40999204095072256, - "grad_norm": 1086.820556640625, - "learning_rate": 3.704995915759117e-05, - "loss": 52.2497, - "step": 101480 - }, - { - "epoch": 0.4100324422160902, - "grad_norm": 1838.8997802734375, - "learning_rate": 3.704690062249729e-05, - "loss": 67.0797, - "step": 101490 - }, - { - "epoch": 0.41007284348145784, - "grad_norm": 1140.804931640625, - "learning_rate": 3.704384185254288e-05, - "loss": 59.0365, - "step": 101500 - }, - { - "epoch": 0.4101132447468255, - "grad_norm": 967.5361938476562, - "learning_rate": 3.7040782847787576e-05, - "loss": 62.3368, - "step": 101510 - }, - { - "epoch": 0.4101536460121931, - "grad_norm": 862.10107421875, - "learning_rate": 3.7037723608291015e-05, - "loss": 66.9464, - "step": 101520 - }, - { - "epoch": 0.41019404727756076, - "grad_norm": 333.78411865234375, - "learning_rate": 3.703466413411282e-05, - "loss": 57.9339, - "step": 101530 - }, - { - "epoch": 0.41023444854292834, - "grad_norm": 1024.932861328125, - "learning_rate": 3.703160442531266e-05, - "loss": 50.8443, - "step": 101540 - }, - { - "epoch": 0.410274849808296, - "grad_norm": 1006.3261108398438, - "learning_rate": 3.702854448195019e-05, - "loss": 66.2163, - "step": 101550 - }, - { - "epoch": 0.4103152510736636, - "grad_norm": 720.7424926757812, - "learning_rate": 3.7025484304085034e-05, - "loss": 61.6989, - "step": 101560 - }, - { - "epoch": 0.41035565233903126, - "grad_norm": 972.9140625, - "learning_rate": 3.702242389177687e-05, - "loss": 58.963, - "step": 101570 - }, - { - "epoch": 0.4103960536043989, - "grad_norm": 819.4244384765625, - "learning_rate": 3.701936324508537e-05, - "loss": 58.8996, - "step": 101580 - }, - { - "epoch": 0.41043645486976654, - "grad_norm": 1408.4876708984375, - "learning_rate": 3.7016302364070196e-05, - "loss": 104.3692, - "step": 101590 - }, - { - "epoch": 0.4104768561351341, - "grad_norm": 1001.0791625976562, - "learning_rate": 3.701324124879102e-05, - "loss": 76.1169, - "step": 101600 - }, - { - "epoch": 0.41051725740050177, - "grad_norm": 1362.98193359375, - "learning_rate": 3.701017989930752e-05, - "loss": 79.0587, - "step": 101610 - }, - { - "epoch": 0.4105576586658694, - "grad_norm": 494.7317199707031, - "learning_rate": 3.7007118315679384e-05, - "loss": 41.3256, - "step": 101620 - }, - { - "epoch": 0.41059805993123705, - "grad_norm": 418.2009582519531, - "learning_rate": 3.700405649796629e-05, - "loss": 77.2711, - "step": 101630 - }, - { - "epoch": 0.4106384611966047, - "grad_norm": 934.8997192382812, - "learning_rate": 3.700099444622794e-05, - "loss": 64.7147, - "step": 101640 - }, - { - "epoch": 0.4106788624619723, - "grad_norm": 829.1802978515625, - "learning_rate": 3.699793216052402e-05, - "loss": 57.4641, - "step": 101650 - }, - { - "epoch": 0.41071926372733997, - "grad_norm": 536.0474853515625, - "learning_rate": 3.699486964091423e-05, - "loss": 79.0541, - "step": 101660 - }, - { - "epoch": 0.41075966499270755, - "grad_norm": 808.2246704101562, - "learning_rate": 3.699180688745829e-05, - "loss": 50.0221, - "step": 101670 - }, - { - "epoch": 0.4108000662580752, - "grad_norm": 558.8209838867188, - "learning_rate": 3.6988743900215894e-05, - "loss": 52.1435, - "step": 101680 - }, - { - "epoch": 0.41084046752344283, - "grad_norm": 1482.892578125, - "learning_rate": 3.698568067924677e-05, - "loss": 49.8897, - "step": 101690 - }, - { - "epoch": 0.41088086878881047, - "grad_norm": 494.3254699707031, - "learning_rate": 3.698261722461063e-05, - "loss": 64.2092, - "step": 101700 - }, - { - "epoch": 0.4109212700541781, - "grad_norm": 455.20489501953125, - "learning_rate": 3.6979553536367194e-05, - "loss": 38.1696, - "step": 101710 - }, - { - "epoch": 0.41096167131954575, - "grad_norm": 528.5197143554688, - "learning_rate": 3.69764896145762e-05, - "loss": 42.2116, - "step": 101720 - }, - { - "epoch": 0.41100207258491334, - "grad_norm": 935.036865234375, - "learning_rate": 3.697342545929737e-05, - "loss": 76.0732, - "step": 101730 - }, - { - "epoch": 0.411042473850281, - "grad_norm": 1118.97216796875, - "learning_rate": 3.697036107059044e-05, - "loss": 37.8624, - "step": 101740 - }, - { - "epoch": 0.4110828751156486, - "grad_norm": 1046.9444580078125, - "learning_rate": 3.696729644851518e-05, - "loss": 34.6092, - "step": 101750 - }, - { - "epoch": 0.41112327638101626, - "grad_norm": 427.7893981933594, - "learning_rate": 3.696423159313129e-05, - "loss": 67.689, - "step": 101760 - }, - { - "epoch": 0.4111636776463839, - "grad_norm": 655.9987182617188, - "learning_rate": 3.696116650449856e-05, - "loss": 42.4555, - "step": 101770 - }, - { - "epoch": 0.41120407891175154, - "grad_norm": 911.2579956054688, - "learning_rate": 3.6958101182676726e-05, - "loss": 61.5867, - "step": 101780 - }, - { - "epoch": 0.4112444801771191, - "grad_norm": 2679.955078125, - "learning_rate": 3.6955035627725557e-05, - "loss": 62.913, - "step": 101790 - }, - { - "epoch": 0.41128488144248676, - "grad_norm": 736.22802734375, - "learning_rate": 3.695196983970481e-05, - "loss": 54.0211, - "step": 101800 - }, - { - "epoch": 0.4113252827078544, - "grad_norm": 1243.8758544921875, - "learning_rate": 3.694890381867425e-05, - "loss": 58.7315, - "step": 101810 - }, - { - "epoch": 0.41136568397322204, - "grad_norm": 828.197265625, - "learning_rate": 3.6945837564693666e-05, - "loss": 65.7947, - "step": 101820 - }, - { - "epoch": 0.4114060852385897, - "grad_norm": 682.7947387695312, - "learning_rate": 3.6942771077822835e-05, - "loss": 57.709, - "step": 101830 - }, - { - "epoch": 0.4114464865039573, - "grad_norm": 1457.8909912109375, - "learning_rate": 3.693970435812153e-05, - "loss": 53.162, - "step": 101840 - }, - { - "epoch": 0.41148688776932496, - "grad_norm": 1344.3275146484375, - "learning_rate": 3.693663740564953e-05, - "loss": 54.1553, - "step": 101850 - }, - { - "epoch": 0.41152728903469254, - "grad_norm": 0.0, - "learning_rate": 3.693357022046665e-05, - "loss": 47.7167, - "step": 101860 - }, - { - "epoch": 0.4115676903000602, - "grad_norm": 927.4996337890625, - "learning_rate": 3.693050280263268e-05, - "loss": 74.7277, - "step": 101870 - }, - { - "epoch": 0.4116080915654278, - "grad_norm": 401.41693115234375, - "learning_rate": 3.6927435152207406e-05, - "loss": 51.1262, - "step": 101880 - }, - { - "epoch": 0.41164849283079546, - "grad_norm": 756.7000122070312, - "learning_rate": 3.6924367269250644e-05, - "loss": 43.8885, - "step": 101890 - }, - { - "epoch": 0.4116888940961631, - "grad_norm": 1407.22412109375, - "learning_rate": 3.69212991538222e-05, - "loss": 51.1716, - "step": 101900 - }, - { - "epoch": 0.41172929536153074, - "grad_norm": 1306.109619140625, - "learning_rate": 3.691823080598189e-05, - "loss": 56.4997, - "step": 101910 - }, - { - "epoch": 0.41176969662689833, - "grad_norm": 890.89013671875, - "learning_rate": 3.6915162225789546e-05, - "loss": 88.45, - "step": 101920 - }, - { - "epoch": 0.41181009789226597, - "grad_norm": 195.86825561523438, - "learning_rate": 3.691209341330497e-05, - "loss": 70.7165, - "step": 101930 - }, - { - "epoch": 0.4118504991576336, - "grad_norm": 1073.44677734375, - "learning_rate": 3.690902436858801e-05, - "loss": 88.2749, - "step": 101940 - }, - { - "epoch": 0.41189090042300125, - "grad_norm": 310.966552734375, - "learning_rate": 3.690595509169848e-05, - "loss": 101.5226, - "step": 101950 - }, - { - "epoch": 0.4119313016883689, - "grad_norm": 515.8529663085938, - "learning_rate": 3.690288558269623e-05, - "loss": 55.0783, - "step": 101960 - }, - { - "epoch": 0.41197170295373653, - "grad_norm": 548.2571411132812, - "learning_rate": 3.68998158416411e-05, - "loss": 51.7741, - "step": 101970 - }, - { - "epoch": 0.41201210421910417, - "grad_norm": 712.4876708984375, - "learning_rate": 3.689674586859292e-05, - "loss": 55.8182, - "step": 101980 - }, - { - "epoch": 0.41205250548447175, - "grad_norm": 1289.3074951171875, - "learning_rate": 3.689367566361157e-05, - "loss": 83.5634, - "step": 101990 - }, - { - "epoch": 0.4120929067498394, - "grad_norm": 378.75164794921875, - "learning_rate": 3.689060522675689e-05, - "loss": 62.7789, - "step": 102000 - }, - { - "epoch": 0.41213330801520703, - "grad_norm": 788.7225341796875, - "learning_rate": 3.6887534558088727e-05, - "loss": 51.0795, - "step": 102010 - }, - { - "epoch": 0.4121737092805747, - "grad_norm": 419.6149597167969, - "learning_rate": 3.688446365766696e-05, - "loss": 72.9321, - "step": 102020 - }, - { - "epoch": 0.4122141105459423, - "grad_norm": 424.8109436035156, - "learning_rate": 3.688139252555146e-05, - "loss": 51.3233, - "step": 102030 - }, - { - "epoch": 0.41225451181130995, - "grad_norm": 340.1546630859375, - "learning_rate": 3.6878321161802104e-05, - "loss": 99.2203, - "step": 102040 - }, - { - "epoch": 0.41229491307667754, - "grad_norm": 572.083984375, - "learning_rate": 3.6875249566478745e-05, - "loss": 62.0742, - "step": 102050 - }, - { - "epoch": 0.4123353143420452, - "grad_norm": 639.38818359375, - "learning_rate": 3.687217773964129e-05, - "loss": 72.4136, - "step": 102060 - }, - { - "epoch": 0.4123757156074128, - "grad_norm": 405.4486389160156, - "learning_rate": 3.686910568134962e-05, - "loss": 57.4386, - "step": 102070 - }, - { - "epoch": 0.41241611687278046, - "grad_norm": 938.0454711914062, - "learning_rate": 3.686603339166362e-05, - "loss": 64.274, - "step": 102080 - }, - { - "epoch": 0.4124565181381481, - "grad_norm": 1430.1524658203125, - "learning_rate": 3.686296087064319e-05, - "loss": 62.3886, - "step": 102090 - }, - { - "epoch": 0.41249691940351574, - "grad_norm": 796.8814086914062, - "learning_rate": 3.685988811834823e-05, - "loss": 85.7702, - "step": 102100 - }, - { - "epoch": 0.4125373206688833, - "grad_norm": 1625.54833984375, - "learning_rate": 3.685681513483865e-05, - "loss": 86.8409, - "step": 102110 - }, - { - "epoch": 0.41257772193425096, - "grad_norm": 754.6946411132812, - "learning_rate": 3.685374192017436e-05, - "loss": 42.4442, - "step": 102120 - }, - { - "epoch": 0.4126181231996186, - "grad_norm": 599.8799438476562, - "learning_rate": 3.6850668474415255e-05, - "loss": 42.2114, - "step": 102130 - }, - { - "epoch": 0.41265852446498624, - "grad_norm": 919.1255493164062, - "learning_rate": 3.684759479762127e-05, - "loss": 66.864, - "step": 102140 - }, - { - "epoch": 0.4126989257303539, - "grad_norm": 352.7879943847656, - "learning_rate": 3.684452088985233e-05, - "loss": 46.0815, - "step": 102150 - }, - { - "epoch": 0.4127393269957215, - "grad_norm": 1374.6343994140625, - "learning_rate": 3.6841446751168355e-05, - "loss": 69.1762, - "step": 102160 - }, - { - "epoch": 0.41277972826108916, - "grad_norm": 597.5450439453125, - "learning_rate": 3.683837238162928e-05, - "loss": 85.0557, - "step": 102170 - }, - { - "epoch": 0.41282012952645675, - "grad_norm": 674.8424072265625, - "learning_rate": 3.683529778129503e-05, - "loss": 79.6359, - "step": 102180 - }, - { - "epoch": 0.4128605307918244, - "grad_norm": 624.8746337890625, - "learning_rate": 3.683222295022557e-05, - "loss": 39.7842, - "step": 102190 - }, - { - "epoch": 0.412900932057192, - "grad_norm": 1277.397705078125, - "learning_rate": 3.682914788848083e-05, - "loss": 58.8937, - "step": 102200 - }, - { - "epoch": 0.41294133332255967, - "grad_norm": 722.8589477539062, - "learning_rate": 3.682607259612076e-05, - "loss": 80.0508, - "step": 102210 - }, - { - "epoch": 0.4129817345879273, - "grad_norm": 824.6448364257812, - "learning_rate": 3.682299707320532e-05, - "loss": 53.1664, - "step": 102220 - }, - { - "epoch": 0.41302213585329495, - "grad_norm": 234.46766662597656, - "learning_rate": 3.681992131979446e-05, - "loss": 66.3412, - "step": 102230 - }, - { - "epoch": 0.41306253711866253, - "grad_norm": 1732.6822509765625, - "learning_rate": 3.681684533594815e-05, - "loss": 118.9763, - "step": 102240 - }, - { - "epoch": 0.41310293838403017, - "grad_norm": 985.6689453125, - "learning_rate": 3.681376912172636e-05, - "loss": 48.3016, - "step": 102250 - }, - { - "epoch": 0.4131433396493978, - "grad_norm": 534.2235107421875, - "learning_rate": 3.6810692677189046e-05, - "loss": 41.1004, - "step": 102260 - }, - { - "epoch": 0.41318374091476545, - "grad_norm": 0.0, - "learning_rate": 3.680761600239621e-05, - "loss": 47.6259, - "step": 102270 - }, - { - "epoch": 0.4132241421801331, - "grad_norm": 2476.089111328125, - "learning_rate": 3.680453909740782e-05, - "loss": 60.4867, - "step": 102280 - }, - { - "epoch": 0.41326454344550073, - "grad_norm": 733.741455078125, - "learning_rate": 3.680146196228386e-05, - "loss": 58.1537, - "step": 102290 - }, - { - "epoch": 0.41330494471086837, - "grad_norm": 543.4186401367188, - "learning_rate": 3.6798384597084325e-05, - "loss": 42.9503, - "step": 102300 - }, - { - "epoch": 0.41334534597623596, - "grad_norm": 535.3422241210938, - "learning_rate": 3.67953070018692e-05, - "loss": 76.8054, - "step": 102310 - }, - { - "epoch": 0.4133857472416036, - "grad_norm": 851.113037109375, - "learning_rate": 3.679222917669851e-05, - "loss": 53.6934, - "step": 102320 - }, - { - "epoch": 0.41342614850697124, - "grad_norm": 3898.60693359375, - "learning_rate": 3.6789151121632226e-05, - "loss": 58.586, - "step": 102330 - }, - { - "epoch": 0.4134665497723389, - "grad_norm": 425.6492919921875, - "learning_rate": 3.678607283673037e-05, - "loss": 61.4479, - "step": 102340 - }, - { - "epoch": 0.4135069510377065, - "grad_norm": 877.3761596679688, - "learning_rate": 3.678299432205296e-05, - "loss": 73.7707, - "step": 102350 - }, - { - "epoch": 0.41354735230307416, - "grad_norm": 763.9284057617188, - "learning_rate": 3.6779915577660015e-05, - "loss": 56.7363, - "step": 102360 - }, - { - "epoch": 0.41358775356844174, - "grad_norm": 1059.4683837890625, - "learning_rate": 3.677683660361155e-05, - "loss": 52.8749, - "step": 102370 - }, - { - "epoch": 0.4136281548338094, - "grad_norm": 408.92071533203125, - "learning_rate": 3.677375739996759e-05, - "loss": 53.0879, - "step": 102380 - }, - { - "epoch": 0.413668556099177, - "grad_norm": 529.644775390625, - "learning_rate": 3.677067796678817e-05, - "loss": 53.6878, - "step": 102390 - }, - { - "epoch": 0.41370895736454466, - "grad_norm": 888.7438354492188, - "learning_rate": 3.6767598304133324e-05, - "loss": 44.5858, - "step": 102400 - }, - { - "epoch": 0.4137493586299123, - "grad_norm": 1067.380615234375, - "learning_rate": 3.676451841206308e-05, - "loss": 42.4786, - "step": 102410 - }, - { - "epoch": 0.41378975989527994, - "grad_norm": 2619.344482421875, - "learning_rate": 3.67614382906375e-05, - "loss": 59.8869, - "step": 102420 - }, - { - "epoch": 0.4138301611606475, - "grad_norm": 730.7283325195312, - "learning_rate": 3.675835793991662e-05, - "loss": 53.2614, - "step": 102430 - }, - { - "epoch": 0.41387056242601516, - "grad_norm": 540.2338256835938, - "learning_rate": 3.67552773599605e-05, - "loss": 53.5669, - "step": 102440 - }, - { - "epoch": 0.4139109636913828, - "grad_norm": 424.4781799316406, - "learning_rate": 3.675219655082921e-05, - "loss": 40.3122, - "step": 102450 - }, - { - "epoch": 0.41395136495675044, - "grad_norm": 1885.5537109375, - "learning_rate": 3.6749115512582786e-05, - "loss": 59.4535, - "step": 102460 - }, - { - "epoch": 0.4139917662221181, - "grad_norm": 790.8214111328125, - "learning_rate": 3.674603424528131e-05, - "loss": 68.5578, - "step": 102470 - }, - { - "epoch": 0.4140321674874857, - "grad_norm": 788.549560546875, - "learning_rate": 3.674295274898485e-05, - "loss": 115.0106, - "step": 102480 - }, - { - "epoch": 0.41407256875285336, - "grad_norm": 3169.2861328125, - "learning_rate": 3.673987102375348e-05, - "loss": 78.3599, - "step": 102490 - }, - { - "epoch": 0.41411297001822095, - "grad_norm": 1180.6651611328125, - "learning_rate": 3.673678906964727e-05, - "loss": 69.2081, - "step": 102500 - }, - { - "epoch": 0.4141533712835886, - "grad_norm": 870.7091674804688, - "learning_rate": 3.673370688672632e-05, - "loss": 76.4967, - "step": 102510 - }, - { - "epoch": 0.41419377254895623, - "grad_norm": 535.1434326171875, - "learning_rate": 3.673062447505072e-05, - "loss": 37.986, - "step": 102520 - }, - { - "epoch": 0.41423417381432387, - "grad_norm": 574.4082641601562, - "learning_rate": 3.672754183468055e-05, - "loss": 43.2866, - "step": 102530 - }, - { - "epoch": 0.4142745750796915, - "grad_norm": 1545.2213134765625, - "learning_rate": 3.672445896567592e-05, - "loss": 84.1171, - "step": 102540 - }, - { - "epoch": 0.41431497634505915, - "grad_norm": 625.9170532226562, - "learning_rate": 3.6721375868096924e-05, - "loss": 44.562, - "step": 102550 - }, - { - "epoch": 0.41435537761042673, - "grad_norm": 948.0220336914062, - "learning_rate": 3.6718292542003666e-05, - "loss": 68.8121, - "step": 102560 - }, - { - "epoch": 0.4143957788757944, - "grad_norm": 400.49859619140625, - "learning_rate": 3.671520898745627e-05, - "loss": 59.8667, - "step": 102570 - }, - { - "epoch": 0.414436180141162, - "grad_norm": 329.8058776855469, - "learning_rate": 3.671212520451484e-05, - "loss": 76.8248, - "step": 102580 - }, - { - "epoch": 0.41447658140652965, - "grad_norm": 916.3284301757812, - "learning_rate": 3.670904119323949e-05, - "loss": 74.5609, - "step": 102590 - }, - { - "epoch": 0.4145169826718973, - "grad_norm": 561.3569946289062, - "learning_rate": 3.6705956953690364e-05, - "loss": 43.266, - "step": 102600 - }, - { - "epoch": 0.41455738393726493, - "grad_norm": 627.5663452148438, - "learning_rate": 3.670287248592758e-05, - "loss": 49.7998, - "step": 102610 - }, - { - "epoch": 0.4145977852026326, - "grad_norm": 919.4302368164062, - "learning_rate": 3.669978779001127e-05, - "loss": 72.0918, - "step": 102620 - }, - { - "epoch": 0.41463818646800016, - "grad_norm": 536.8106079101562, - "learning_rate": 3.669670286600157e-05, - "loss": 84.2724, - "step": 102630 - }, - { - "epoch": 0.4146785877333678, - "grad_norm": 926.4111328125, - "learning_rate": 3.6693617713958634e-05, - "loss": 49.7705, - "step": 102640 - }, - { - "epoch": 0.41471898899873544, - "grad_norm": 511.818359375, - "learning_rate": 3.66905323339426e-05, - "loss": 46.5629, - "step": 102650 - }, - { - "epoch": 0.4147593902641031, - "grad_norm": 1487.813720703125, - "learning_rate": 3.668744672601361e-05, - "loss": 55.226, - "step": 102660 - }, - { - "epoch": 0.4147997915294707, - "grad_norm": 1999.3150634765625, - "learning_rate": 3.668436089023184e-05, - "loss": 84.6968, - "step": 102670 - }, - { - "epoch": 0.41484019279483836, - "grad_norm": 443.5814208984375, - "learning_rate": 3.668127482665743e-05, - "loss": 59.7227, - "step": 102680 - }, - { - "epoch": 0.41488059406020594, - "grad_norm": 798.040283203125, - "learning_rate": 3.667818853535056e-05, - "loss": 88.5318, - "step": 102690 - }, - { - "epoch": 0.4149209953255736, - "grad_norm": 1065.25927734375, - "learning_rate": 3.667510201637139e-05, - "loss": 46.2704, - "step": 102700 - }, - { - "epoch": 0.4149613965909412, - "grad_norm": 213.02117919921875, - "learning_rate": 3.667201526978009e-05, - "loss": 72.9519, - "step": 102710 - }, - { - "epoch": 0.41500179785630886, - "grad_norm": 582.4226684570312, - "learning_rate": 3.6668928295636854e-05, - "loss": 43.031, - "step": 102720 - }, - { - "epoch": 0.4150421991216765, - "grad_norm": 550.1882934570312, - "learning_rate": 3.666584109400185e-05, - "loss": 56.0368, - "step": 102730 - }, - { - "epoch": 0.41508260038704414, - "grad_norm": 563.0902709960938, - "learning_rate": 3.666275366493526e-05, - "loss": 58.4682, - "step": 102740 - }, - { - "epoch": 0.4151230016524117, - "grad_norm": 1416.572998046875, - "learning_rate": 3.665966600849728e-05, - "loss": 48.4765, - "step": 102750 - }, - { - "epoch": 0.41516340291777937, - "grad_norm": 977.8565063476562, - "learning_rate": 3.665657812474812e-05, - "loss": 99.4884, - "step": 102760 - }, - { - "epoch": 0.415203804183147, - "grad_norm": 605.6978149414062, - "learning_rate": 3.6653490013747965e-05, - "loss": 62.5299, - "step": 102770 - }, - { - "epoch": 0.41524420544851465, - "grad_norm": 1297.1224365234375, - "learning_rate": 3.665040167555702e-05, - "loss": 53.5037, - "step": 102780 - }, - { - "epoch": 0.4152846067138823, - "grad_norm": 823.6423950195312, - "learning_rate": 3.664731311023549e-05, - "loss": 50.0008, - "step": 102790 - }, - { - "epoch": 0.4153250079792499, - "grad_norm": 611.2938842773438, - "learning_rate": 3.664422431784361e-05, - "loss": 50.3032, - "step": 102800 - }, - { - "epoch": 0.41536540924461757, - "grad_norm": 713.4339599609375, - "learning_rate": 3.6641135298441576e-05, - "loss": 65.9606, - "step": 102810 - }, - { - "epoch": 0.41540581050998515, - "grad_norm": 908.730712890625, - "learning_rate": 3.6638046052089616e-05, - "loss": 72.0269, - "step": 102820 - }, - { - "epoch": 0.4154462117753528, - "grad_norm": 714.7061767578125, - "learning_rate": 3.6634956578847954e-05, - "loss": 85.8837, - "step": 102830 - }, - { - "epoch": 0.41548661304072043, - "grad_norm": 490.82269287109375, - "learning_rate": 3.663186687877682e-05, - "loss": 57.8224, - "step": 102840 - }, - { - "epoch": 0.41552701430608807, - "grad_norm": 954.6878662109375, - "learning_rate": 3.662877695193646e-05, - "loss": 58.0162, - "step": 102850 - }, - { - "epoch": 0.4155674155714557, - "grad_norm": 220.2834930419922, - "learning_rate": 3.6625686798387106e-05, - "loss": 67.6011, - "step": 102860 - }, - { - "epoch": 0.41560781683682335, - "grad_norm": 1146.68994140625, - "learning_rate": 3.6622596418188995e-05, - "loss": 86.136, - "step": 102870 - }, - { - "epoch": 0.41564821810219094, - "grad_norm": 983.2677001953125, - "learning_rate": 3.661950581140239e-05, - "loss": 32.4779, - "step": 102880 - }, - { - "epoch": 0.4156886193675586, - "grad_norm": 694.6705322265625, - "learning_rate": 3.661641497808754e-05, - "loss": 52.4579, - "step": 102890 - }, - { - "epoch": 0.4157290206329262, - "grad_norm": 500.0966491699219, - "learning_rate": 3.66133239183047e-05, - "loss": 54.7675, - "step": 102900 - }, - { - "epoch": 0.41576942189829386, - "grad_norm": 847.8213500976562, - "learning_rate": 3.6610232632114124e-05, - "loss": 55.5015, - "step": 102910 - }, - { - "epoch": 0.4158098231636615, - "grad_norm": 1668.6177978515625, - "learning_rate": 3.6607141119576084e-05, - "loss": 98.7418, - "step": 102920 - }, - { - "epoch": 0.41585022442902914, - "grad_norm": 397.88885498046875, - "learning_rate": 3.6604049380750855e-05, - "loss": 49.9987, - "step": 102930 - }, - { - "epoch": 0.4158906256943968, - "grad_norm": 378.2391662597656, - "learning_rate": 3.660095741569871e-05, - "loss": 62.2791, - "step": 102940 - }, - { - "epoch": 0.41593102695976436, - "grad_norm": 539.7728271484375, - "learning_rate": 3.659786522447993e-05, - "loss": 57.2088, - "step": 102950 - }, - { - "epoch": 0.415971428225132, - "grad_norm": 925.6470336914062, - "learning_rate": 3.659477280715479e-05, - "loss": 54.7426, - "step": 102960 - }, - { - "epoch": 0.41601182949049964, - "grad_norm": 0.0, - "learning_rate": 3.659168016378359e-05, - "loss": 54.7942, - "step": 102970 - }, - { - "epoch": 0.4160522307558673, - "grad_norm": 1118.713623046875, - "learning_rate": 3.658858729442662e-05, - "loss": 45.2592, - "step": 102980 - }, - { - "epoch": 0.4160926320212349, - "grad_norm": 478.5465087890625, - "learning_rate": 3.658549419914417e-05, - "loss": 78.469, - "step": 102990 - }, - { - "epoch": 0.41613303328660256, - "grad_norm": 556.0774536132812, - "learning_rate": 3.6582400877996546e-05, - "loss": 56.5781, - "step": 103000 - }, - { - "epoch": 0.41617343455197015, - "grad_norm": 360.80340576171875, - "learning_rate": 3.6579307331044054e-05, - "loss": 52.6853, - "step": 103010 - }, - { - "epoch": 0.4162138358173378, - "grad_norm": 601.7576293945312, - "learning_rate": 3.657621355834701e-05, - "loss": 61.4476, - "step": 103020 - }, - { - "epoch": 0.4162542370827054, - "grad_norm": 592.1106567382812, - "learning_rate": 3.657311955996571e-05, - "loss": 51.4591, - "step": 103030 - }, - { - "epoch": 0.41629463834807306, - "grad_norm": 637.8342895507812, - "learning_rate": 3.657002533596049e-05, - "loss": 61.3845, - "step": 103040 - }, - { - "epoch": 0.4163350396134407, - "grad_norm": 371.7774963378906, - "learning_rate": 3.6566930886391674e-05, - "loss": 74.0396, - "step": 103050 - }, - { - "epoch": 0.41637544087880834, - "grad_norm": 375.3711242675781, - "learning_rate": 3.656383621131959e-05, - "loss": 56.7201, - "step": 103060 - }, - { - "epoch": 0.41641584214417593, - "grad_norm": 538.1626586914062, - "learning_rate": 3.656074131080457e-05, - "loss": 51.9634, - "step": 103070 - }, - { - "epoch": 0.41645624340954357, - "grad_norm": 1070.9267578125, - "learning_rate": 3.655764618490692e-05, - "loss": 92.4124, - "step": 103080 - }, - { - "epoch": 0.4164966446749112, - "grad_norm": 656.9863891601562, - "learning_rate": 3.655455083368703e-05, - "loss": 65.6765, - "step": 103090 - }, - { - "epoch": 0.41653704594027885, - "grad_norm": 841.900634765625, - "learning_rate": 3.655145525720522e-05, - "loss": 68.1387, - "step": 103100 - }, - { - "epoch": 0.4165774472056465, - "grad_norm": 409.5385437011719, - "learning_rate": 3.6548359455521836e-05, - "loss": 41.3072, - "step": 103110 - }, - { - "epoch": 0.41661784847101413, - "grad_norm": 598.8934936523438, - "learning_rate": 3.654526342869724e-05, - "loss": 64.1787, - "step": 103120 - }, - { - "epoch": 0.41665824973638177, - "grad_norm": 938.2333374023438, - "learning_rate": 3.654216717679179e-05, - "loss": 63.3869, - "step": 103130 - }, - { - "epoch": 0.41669865100174935, - "grad_norm": 0.0, - "learning_rate": 3.6539070699865853e-05, - "loss": 45.5103, - "step": 103140 - }, - { - "epoch": 0.416739052267117, - "grad_norm": 555.0664672851562, - "learning_rate": 3.653597399797979e-05, - "loss": 78.243, - "step": 103150 - }, - { - "epoch": 0.41677945353248463, - "grad_norm": 200.4738006591797, - "learning_rate": 3.6532877071193974e-05, - "loss": 45.6943, - "step": 103160 - }, - { - "epoch": 0.4168198547978523, - "grad_norm": 773.7369384765625, - "learning_rate": 3.652977991956878e-05, - "loss": 76.8474, - "step": 103170 - }, - { - "epoch": 0.4168602560632199, - "grad_norm": 1079.94775390625, - "learning_rate": 3.6526682543164595e-05, - "loss": 42.3459, - "step": 103180 - }, - { - "epoch": 0.41690065732858755, - "grad_norm": 919.0646362304688, - "learning_rate": 3.6523584942041794e-05, - "loss": 57.7047, - "step": 103190 - }, - { - "epoch": 0.41694105859395514, - "grad_norm": 891.546630859375, - "learning_rate": 3.6520487116260776e-05, - "loss": 73.0311, - "step": 103200 - }, - { - "epoch": 0.4169814598593228, - "grad_norm": 673.0984497070312, - "learning_rate": 3.6517389065881925e-05, - "loss": 52.7989, - "step": 103210 - }, - { - "epoch": 0.4170218611246904, - "grad_norm": 622.553955078125, - "learning_rate": 3.651429079096566e-05, - "loss": 61.8157, - "step": 103220 - }, - { - "epoch": 0.41706226239005806, - "grad_norm": 590.778564453125, - "learning_rate": 3.651119229157235e-05, - "loss": 42.1001, - "step": 103230 - }, - { - "epoch": 0.4171026636554257, - "grad_norm": 783.4470825195312, - "learning_rate": 3.650809356776242e-05, - "loss": 57.0175, - "step": 103240 - }, - { - "epoch": 0.41714306492079334, - "grad_norm": 1166.0804443359375, - "learning_rate": 3.6504994619596294e-05, - "loss": 72.4459, - "step": 103250 - }, - { - "epoch": 0.4171834661861609, - "grad_norm": 981.3162231445312, - "learning_rate": 3.650189544713437e-05, - "loss": 64.13, - "step": 103260 - }, - { - "epoch": 0.41722386745152856, - "grad_norm": 556.0223388671875, - "learning_rate": 3.649879605043707e-05, - "loss": 61.0497, - "step": 103270 - }, - { - "epoch": 0.4172642687168962, - "grad_norm": 1262.341064453125, - "learning_rate": 3.6495696429564823e-05, - "loss": 66.8657, - "step": 103280 - }, - { - "epoch": 0.41730466998226384, - "grad_norm": 322.91082763671875, - "learning_rate": 3.649259658457805e-05, - "loss": 62.4766, - "step": 103290 - }, - { - "epoch": 0.4173450712476315, - "grad_norm": 603.0479736328125, - "learning_rate": 3.6489496515537204e-05, - "loss": 65.0343, - "step": 103300 - }, - { - "epoch": 0.4173854725129991, - "grad_norm": 724.8381958007812, - "learning_rate": 3.648639622250269e-05, - "loss": 71.2941, - "step": 103310 - }, - { - "epoch": 0.41742587377836676, - "grad_norm": 709.285888671875, - "learning_rate": 3.648329570553498e-05, - "loss": 69.7722, - "step": 103320 - }, - { - "epoch": 0.41746627504373435, - "grad_norm": 1008.2940673828125, - "learning_rate": 3.648019496469451e-05, - "loss": 61.9354, - "step": 103330 - }, - { - "epoch": 0.417506676309102, - "grad_norm": 716.356689453125, - "learning_rate": 3.647709400004172e-05, - "loss": 102.8124, - "step": 103340 - }, - { - "epoch": 0.4175470775744696, - "grad_norm": 661.27880859375, - "learning_rate": 3.647399281163708e-05, - "loss": 54.8302, - "step": 103350 - }, - { - "epoch": 0.41758747883983727, - "grad_norm": 1043.45263671875, - "learning_rate": 3.647089139954104e-05, - "loss": 60.8454, - "step": 103360 - }, - { - "epoch": 0.4176278801052049, - "grad_norm": 862.45263671875, - "learning_rate": 3.646778976381407e-05, - "loss": 50.1279, - "step": 103370 - }, - { - "epoch": 0.41766828137057255, - "grad_norm": 924.0723266601562, - "learning_rate": 3.646468790451663e-05, - "loss": 41.2529, - "step": 103380 - }, - { - "epoch": 0.41770868263594013, - "grad_norm": 597.7651977539062, - "learning_rate": 3.64615858217092e-05, - "loss": 57.1287, - "step": 103390 - }, - { - "epoch": 0.41774908390130777, - "grad_norm": 379.25152587890625, - "learning_rate": 3.645848351545225e-05, - "loss": 62.9141, - "step": 103400 - }, - { - "epoch": 0.4177894851666754, - "grad_norm": 953.6447143554688, - "learning_rate": 3.645538098580627e-05, - "loss": 53.8604, - "step": 103410 - }, - { - "epoch": 0.41782988643204305, - "grad_norm": 575.6273193359375, - "learning_rate": 3.6452278232831735e-05, - "loss": 76.9551, - "step": 103420 - }, - { - "epoch": 0.4178702876974107, - "grad_norm": 1073.8568115234375, - "learning_rate": 3.644917525658914e-05, - "loss": 35.6009, - "step": 103430 - }, - { - "epoch": 0.41791068896277833, - "grad_norm": 1554.2855224609375, - "learning_rate": 3.644607205713898e-05, - "loss": 59.1008, - "step": 103440 - }, - { - "epoch": 0.41795109022814597, - "grad_norm": 619.3630981445312, - "learning_rate": 3.644296863454175e-05, - "loss": 48.0955, - "step": 103450 - }, - { - "epoch": 0.41799149149351356, - "grad_norm": 810.5908813476562, - "learning_rate": 3.643986498885796e-05, - "loss": 66.1217, - "step": 103460 - }, - { - "epoch": 0.4180318927588812, - "grad_norm": 2242.524658203125, - "learning_rate": 3.643676112014811e-05, - "loss": 79.2221, - "step": 103470 - }, - { - "epoch": 0.41807229402424884, - "grad_norm": 303.63385009765625, - "learning_rate": 3.643365702847272e-05, - "loss": 45.5608, - "step": 103480 - }, - { - "epoch": 0.4181126952896165, - "grad_norm": 1422.8160400390625, - "learning_rate": 3.643055271389229e-05, - "loss": 66.0965, - "step": 103490 - }, - { - "epoch": 0.4181530965549841, - "grad_norm": 741.1747436523438, - "learning_rate": 3.642744817646736e-05, - "loss": 45.8928, - "step": 103500 - }, - { - "epoch": 0.41819349782035176, - "grad_norm": 506.9162902832031, - "learning_rate": 3.642434341625844e-05, - "loss": 67.4611, - "step": 103510 - }, - { - "epoch": 0.41823389908571934, - "grad_norm": 592.9998168945312, - "learning_rate": 3.642123843332606e-05, - "loss": 63.5184, - "step": 103520 - }, - { - "epoch": 0.418274300351087, - "grad_norm": 303.4530334472656, - "learning_rate": 3.641813322773076e-05, - "loss": 50.6571, - "step": 103530 - }, - { - "epoch": 0.4183147016164546, - "grad_norm": 1942.93115234375, - "learning_rate": 3.641502779953307e-05, - "loss": 46.2943, - "step": 103540 - }, - { - "epoch": 0.41835510288182226, - "grad_norm": 748.298095703125, - "learning_rate": 3.6411922148793544e-05, - "loss": 54.8428, - "step": 103550 - }, - { - "epoch": 0.4183955041471899, - "grad_norm": 902.7544555664062, - "learning_rate": 3.640881627557271e-05, - "loss": 54.3055, - "step": 103560 - }, - { - "epoch": 0.41843590541255754, - "grad_norm": 1064.920654296875, - "learning_rate": 3.640571017993113e-05, - "loss": 58.3005, - "step": 103570 - }, - { - "epoch": 0.4184763066779251, - "grad_norm": 1428.6287841796875, - "learning_rate": 3.6402603861929374e-05, - "loss": 64.2658, - "step": 103580 - }, - { - "epoch": 0.41851670794329277, - "grad_norm": 503.11871337890625, - "learning_rate": 3.639949732162797e-05, - "loss": 61.6714, - "step": 103590 - }, - { - "epoch": 0.4185571092086604, - "grad_norm": 1219.691162109375, - "learning_rate": 3.639639055908751e-05, - "loss": 66.9562, - "step": 103600 - }, - { - "epoch": 0.41859751047402805, - "grad_norm": 223.49790954589844, - "learning_rate": 3.639328357436853e-05, - "loss": 55.7522, - "step": 103610 - }, - { - "epoch": 0.4186379117393957, - "grad_norm": 514.73681640625, - "learning_rate": 3.639017636753163e-05, - "loss": 71.0878, - "step": 103620 - }, - { - "epoch": 0.4186783130047633, - "grad_norm": 383.3266906738281, - "learning_rate": 3.638706893863739e-05, - "loss": 71.9139, - "step": 103630 - }, - { - "epoch": 0.41871871427013097, - "grad_norm": 539.1884765625, - "learning_rate": 3.638396128774636e-05, - "loss": 76.8819, - "step": 103640 - }, - { - "epoch": 0.41875911553549855, - "grad_norm": 1771.3558349609375, - "learning_rate": 3.6380853414919144e-05, - "loss": 65.5004, - "step": 103650 - }, - { - "epoch": 0.4187995168008662, - "grad_norm": 257.1325988769531, - "learning_rate": 3.6377745320216346e-05, - "loss": 84.9693, - "step": 103660 - }, - { - "epoch": 0.41883991806623383, - "grad_norm": 1277.81787109375, - "learning_rate": 3.6374637003698536e-05, - "loss": 59.5876, - "step": 103670 - }, - { - "epoch": 0.41888031933160147, - "grad_norm": 985.5901489257812, - "learning_rate": 3.637152846542633e-05, - "loss": 47.3202, - "step": 103680 - }, - { - "epoch": 0.4189207205969691, - "grad_norm": 943.6722412109375, - "learning_rate": 3.636841970546031e-05, - "loss": 52.7835, - "step": 103690 - }, - { - "epoch": 0.41896112186233675, - "grad_norm": 512.9838256835938, - "learning_rate": 3.63653107238611e-05, - "loss": 59.9564, - "step": 103700 - }, - { - "epoch": 0.41900152312770433, - "grad_norm": 886.2669067382812, - "learning_rate": 3.636220152068931e-05, - "loss": 69.5346, - "step": 103710 - }, - { - "epoch": 0.419041924393072, - "grad_norm": 866.3492431640625, - "learning_rate": 3.635909209600555e-05, - "loss": 68.2752, - "step": 103720 - }, - { - "epoch": 0.4190823256584396, - "grad_norm": 536.70947265625, - "learning_rate": 3.635598244987043e-05, - "loss": 56.4688, - "step": 103730 - }, - { - "epoch": 0.41912272692380725, - "grad_norm": 421.8968200683594, - "learning_rate": 3.6352872582344596e-05, - "loss": 63.9303, - "step": 103740 - }, - { - "epoch": 0.4191631281891749, - "grad_norm": 437.35546875, - "learning_rate": 3.634976249348867e-05, - "loss": 29.7742, - "step": 103750 - }, - { - "epoch": 0.41920352945454253, - "grad_norm": 1521.4986572265625, - "learning_rate": 3.634665218336328e-05, - "loss": 52.6372, - "step": 103760 - }, - { - "epoch": 0.4192439307199102, - "grad_norm": 814.3853759765625, - "learning_rate": 3.6343541652029064e-05, - "loss": 68.0177, - "step": 103770 - }, - { - "epoch": 0.41928433198527776, - "grad_norm": 495.0726318359375, - "learning_rate": 3.6340430899546656e-05, - "loss": 49.5452, - "step": 103780 - }, - { - "epoch": 0.4193247332506454, - "grad_norm": 1200.4290771484375, - "learning_rate": 3.633731992597672e-05, - "loss": 81.554, - "step": 103790 - }, - { - "epoch": 0.41936513451601304, - "grad_norm": 1105.92822265625, - "learning_rate": 3.633420873137988e-05, - "loss": 59.0107, - "step": 103800 - }, - { - "epoch": 0.4194055357813807, - "grad_norm": 965.9473266601562, - "learning_rate": 3.633109731581682e-05, - "loss": 52.3541, - "step": 103810 - }, - { - "epoch": 0.4194459370467483, - "grad_norm": 784.390625, - "learning_rate": 3.632798567934817e-05, - "loss": 42.5555, - "step": 103820 - }, - { - "epoch": 0.41948633831211596, - "grad_norm": 1471.4720458984375, - "learning_rate": 3.632487382203462e-05, - "loss": 61.9829, - "step": 103830 - }, - { - "epoch": 0.41952673957748354, - "grad_norm": 826.1594848632812, - "learning_rate": 3.632176174393682e-05, - "loss": 53.5257, - "step": 103840 - }, - { - "epoch": 0.4195671408428512, - "grad_norm": 526.9407958984375, - "learning_rate": 3.631864944511545e-05, - "loss": 58.7132, - "step": 103850 - }, - { - "epoch": 0.4196075421082188, - "grad_norm": 845.7454833984375, - "learning_rate": 3.6315536925631174e-05, - "loss": 44.2861, - "step": 103860 - }, - { - "epoch": 0.41964794337358646, - "grad_norm": 869.853515625, - "learning_rate": 3.631242418554469e-05, - "loss": 78.6071, - "step": 103870 - }, - { - "epoch": 0.4196883446389541, - "grad_norm": 858.0875244140625, - "learning_rate": 3.630931122491666e-05, - "loss": 53.3545, - "step": 103880 - }, - { - "epoch": 0.41972874590432174, - "grad_norm": 472.39337158203125, - "learning_rate": 3.6306198043807795e-05, - "loss": 44.7906, - "step": 103890 - }, - { - "epoch": 0.4197691471696893, - "grad_norm": 1292.262939453125, - "learning_rate": 3.630308464227877e-05, - "loss": 46.8033, - "step": 103900 - }, - { - "epoch": 0.41980954843505697, - "grad_norm": 667.8062133789062, - "learning_rate": 3.62999710203903e-05, - "loss": 75.6769, - "step": 103910 - }, - { - "epoch": 0.4198499497004246, - "grad_norm": 862.8394775390625, - "learning_rate": 3.629685717820307e-05, - "loss": 56.4103, - "step": 103920 - }, - { - "epoch": 0.41989035096579225, - "grad_norm": 348.3255920410156, - "learning_rate": 3.629374311577779e-05, - "loss": 44.0832, - "step": 103930 - }, - { - "epoch": 0.4199307522311599, - "grad_norm": 1450.09375, - "learning_rate": 3.629062883317519e-05, - "loss": 68.2507, - "step": 103940 - }, - { - "epoch": 0.4199711534965275, - "grad_norm": 537.00537109375, - "learning_rate": 3.628751433045596e-05, - "loss": 69.9783, - "step": 103950 - }, - { - "epoch": 0.42001155476189517, - "grad_norm": 581.5643310546875, - "learning_rate": 3.628439960768082e-05, - "loss": 63.0526, - "step": 103960 - }, - { - "epoch": 0.42005195602726275, - "grad_norm": 664.759521484375, - "learning_rate": 3.62812846649105e-05, - "loss": 56.3519, - "step": 103970 - }, - { - "epoch": 0.4200923572926304, - "grad_norm": 635.3744506835938, - "learning_rate": 3.6278169502205736e-05, - "loss": 61.1876, - "step": 103980 - }, - { - "epoch": 0.42013275855799803, - "grad_norm": 530.5980224609375, - "learning_rate": 3.627505411962724e-05, - "loss": 66.6494, - "step": 103990 - }, - { - "epoch": 0.42017315982336567, - "grad_norm": 758.6454467773438, - "learning_rate": 3.627193851723577e-05, - "loss": 58.0647, - "step": 104000 - }, - { - "epoch": 0.4202135610887333, - "grad_norm": 2163.636474609375, - "learning_rate": 3.6268822695092056e-05, - "loss": 46.5954, - "step": 104010 - }, - { - "epoch": 0.42025396235410095, - "grad_norm": 1194.97119140625, - "learning_rate": 3.626570665325684e-05, - "loss": 61.3154, - "step": 104020 - }, - { - "epoch": 0.42029436361946854, - "grad_norm": 1891.653076171875, - "learning_rate": 3.626259039179086e-05, - "loss": 59.1667, - "step": 104030 - }, - { - "epoch": 0.4203347648848362, - "grad_norm": 609.3426513671875, - "learning_rate": 3.6259473910754904e-05, - "loss": 41.8143, - "step": 104040 - }, - { - "epoch": 0.4203751661502038, - "grad_norm": 1182.2841796875, - "learning_rate": 3.625635721020969e-05, - "loss": 64.0979, - "step": 104050 - }, - { - "epoch": 0.42041556741557146, - "grad_norm": 960.9483642578125, - "learning_rate": 3.6253240290216e-05, - "loss": 49.4696, - "step": 104060 - }, - { - "epoch": 0.4204559686809391, - "grad_norm": 292.2507629394531, - "learning_rate": 3.62501231508346e-05, - "loss": 71.0735, - "step": 104070 - }, - { - "epoch": 0.42049636994630674, - "grad_norm": 782.7526245117188, - "learning_rate": 3.624700579212626e-05, - "loss": 55.9743, - "step": 104080 - }, - { - "epoch": 0.4205367712116744, - "grad_norm": 1657.7484130859375, - "learning_rate": 3.624388821415175e-05, - "loss": 35.3792, - "step": 104090 - }, - { - "epoch": 0.42057717247704196, - "grad_norm": 462.6631774902344, - "learning_rate": 3.624077041697185e-05, - "loss": 53.0793, - "step": 104100 - }, - { - "epoch": 0.4206175737424096, - "grad_norm": 606.4874267578125, - "learning_rate": 3.6237652400647345e-05, - "loss": 45.0725, - "step": 104110 - }, - { - "epoch": 0.42065797500777724, - "grad_norm": 842.7166137695312, - "learning_rate": 3.623453416523902e-05, - "loss": 67.8834, - "step": 104120 - }, - { - "epoch": 0.4206983762731449, - "grad_norm": 249.5082550048828, - "learning_rate": 3.623141571080766e-05, - "loss": 37.8796, - "step": 104130 - }, - { - "epoch": 0.4207387775385125, - "grad_norm": 343.2146911621094, - "learning_rate": 3.6228297037414074e-05, - "loss": 57.456, - "step": 104140 - }, - { - "epoch": 0.42077917880388016, - "grad_norm": 3797.084228515625, - "learning_rate": 3.622517814511906e-05, - "loss": 60.7318, - "step": 104150 - }, - { - "epoch": 0.42081958006924775, - "grad_norm": 617.2786254882812, - "learning_rate": 3.622205903398342e-05, - "loss": 59.6277, - "step": 104160 - }, - { - "epoch": 0.4208599813346154, - "grad_norm": 836.6347045898438, - "learning_rate": 3.6218939704067955e-05, - "loss": 69.2782, - "step": 104170 - }, - { - "epoch": 0.420900382599983, - "grad_norm": 988.6712646484375, - "learning_rate": 3.621582015543348e-05, - "loss": 71.7698, - "step": 104180 - }, - { - "epoch": 0.42094078386535067, - "grad_norm": 377.2164001464844, - "learning_rate": 3.621270038814083e-05, - "loss": 58.6837, - "step": 104190 - }, - { - "epoch": 0.4209811851307183, - "grad_norm": 498.7535095214844, - "learning_rate": 3.6209580402250815e-05, - "loss": 38.0926, - "step": 104200 - }, - { - "epoch": 0.42102158639608595, - "grad_norm": 403.9892883300781, - "learning_rate": 3.620646019782425e-05, - "loss": 51.1487, - "step": 104210 - }, - { - "epoch": 0.42106198766145353, - "grad_norm": 1094.6707763671875, - "learning_rate": 3.6203339774921976e-05, - "loss": 49.0772, - "step": 104220 - }, - { - "epoch": 0.42110238892682117, - "grad_norm": 1082.783935546875, - "learning_rate": 3.6200219133604816e-05, - "loss": 71.491, - "step": 104230 - }, - { - "epoch": 0.4211427901921888, - "grad_norm": 443.6391296386719, - "learning_rate": 3.6197098273933634e-05, - "loss": 57.1322, - "step": 104240 - }, - { - "epoch": 0.42118319145755645, - "grad_norm": 431.85430908203125, - "learning_rate": 3.619397719596924e-05, - "loss": 51.8768, - "step": 104250 - }, - { - "epoch": 0.4212235927229241, - "grad_norm": 2122.891845703125, - "learning_rate": 3.619085589977251e-05, - "loss": 62.3512, - "step": 104260 - }, - { - "epoch": 0.42126399398829173, - "grad_norm": 1347.9971923828125, - "learning_rate": 3.618773438540428e-05, - "loss": 68.3303, - "step": 104270 - }, - { - "epoch": 0.42130439525365937, - "grad_norm": 583.928955078125, - "learning_rate": 3.618461265292541e-05, - "loss": 52.5091, - "step": 104280 - }, - { - "epoch": 0.42134479651902695, - "grad_norm": 523.10546875, - "learning_rate": 3.618149070239676e-05, - "loss": 55.4588, - "step": 104290 - }, - { - "epoch": 0.4213851977843946, - "grad_norm": 891.4425659179688, - "learning_rate": 3.617836853387918e-05, - "loss": 56.3773, - "step": 104300 - }, - { - "epoch": 0.42142559904976223, - "grad_norm": 534.82470703125, - "learning_rate": 3.6175246147433563e-05, - "loss": 64.5376, - "step": 104310 - }, - { - "epoch": 0.4214660003151299, - "grad_norm": 529.0205688476562, - "learning_rate": 3.617212354312076e-05, - "loss": 50.4187, - "step": 104320 - }, - { - "epoch": 0.4215064015804975, - "grad_norm": 303.3257141113281, - "learning_rate": 3.616900072100166e-05, - "loss": 42.9221, - "step": 104330 - }, - { - "epoch": 0.42154680284586515, - "grad_norm": 1264.615478515625, - "learning_rate": 3.6165877681137136e-05, - "loss": 72.0312, - "step": 104340 - }, - { - "epoch": 0.42158720411123274, - "grad_norm": 1191.180908203125, - "learning_rate": 3.6162754423588085e-05, - "loss": 57.8124, - "step": 104350 - }, - { - "epoch": 0.4216276053766004, - "grad_norm": 366.5780944824219, - "learning_rate": 3.61596309484154e-05, - "loss": 56.5291, - "step": 104360 - }, - { - "epoch": 0.421668006641968, - "grad_norm": 512.8042602539062, - "learning_rate": 3.615650725567995e-05, - "loss": 57.7641, - "step": 104370 - }, - { - "epoch": 0.42170840790733566, - "grad_norm": 782.4150390625, - "learning_rate": 3.615338334544265e-05, - "loss": 41.4072, - "step": 104380 - }, - { - "epoch": 0.4217488091727033, - "grad_norm": 1751.27587890625, - "learning_rate": 3.615025921776439e-05, - "loss": 49.6701, - "step": 104390 - }, - { - "epoch": 0.42178921043807094, - "grad_norm": 1079.94189453125, - "learning_rate": 3.614713487270611e-05, - "loss": 46.7276, - "step": 104400 - }, - { - "epoch": 0.4218296117034386, - "grad_norm": 1060.4190673828125, - "learning_rate": 3.614401031032867e-05, - "loss": 106.2965, - "step": 104410 - }, - { - "epoch": 0.42187001296880616, - "grad_norm": 749.7593994140625, - "learning_rate": 3.614088553069303e-05, - "loss": 44.3926, - "step": 104420 - }, - { - "epoch": 0.4219104142341738, - "grad_norm": 628.2740478515625, - "learning_rate": 3.6137760533860074e-05, - "loss": 88.6748, - "step": 104430 - }, - { - "epoch": 0.42195081549954144, - "grad_norm": 461.367431640625, - "learning_rate": 3.613463531989076e-05, - "loss": 49.6176, - "step": 104440 - }, - { - "epoch": 0.4219912167649091, - "grad_norm": 873.3343505859375, - "learning_rate": 3.613150988884599e-05, - "loss": 61.126, - "step": 104450 - }, - { - "epoch": 0.4220316180302767, - "grad_norm": 393.72869873046875, - "learning_rate": 3.612838424078671e-05, - "loss": 38.0402, - "step": 104460 - }, - { - "epoch": 0.42207201929564436, - "grad_norm": 850.967041015625, - "learning_rate": 3.612525837577384e-05, - "loss": 66.1431, - "step": 104470 - }, - { - "epoch": 0.42211242056101195, - "grad_norm": 1090.09814453125, - "learning_rate": 3.6122132293868335e-05, - "loss": 82.8322, - "step": 104480 - }, - { - "epoch": 0.4221528218263796, - "grad_norm": 956.2141723632812, - "learning_rate": 3.611900599513114e-05, - "loss": 57.5093, - "step": 104490 - }, - { - "epoch": 0.4221932230917472, - "grad_norm": 407.1945495605469, - "learning_rate": 3.611587947962319e-05, - "loss": 50.0537, - "step": 104500 - }, - { - "epoch": 0.42223362435711487, - "grad_norm": 486.4323425292969, - "learning_rate": 3.6112752747405447e-05, - "loss": 52.2107, - "step": 104510 - }, - { - "epoch": 0.4222740256224825, - "grad_norm": 864.6810913085938, - "learning_rate": 3.6109625798538873e-05, - "loss": 50.7976, - "step": 104520 - }, - { - "epoch": 0.42231442688785015, - "grad_norm": 534.1397705078125, - "learning_rate": 3.6106498633084424e-05, - "loss": 48.8516, - "step": 104530 - }, - { - "epoch": 0.42235482815321773, - "grad_norm": 636.9712524414062, - "learning_rate": 3.610337125110307e-05, - "loss": 52.8733, - "step": 104540 - }, - { - "epoch": 0.42239522941858537, - "grad_norm": 1049.5821533203125, - "learning_rate": 3.610024365265577e-05, - "loss": 56.6238, - "step": 104550 - }, - { - "epoch": 0.422435630683953, - "grad_norm": 374.7794189453125, - "learning_rate": 3.6097115837803505e-05, - "loss": 73.0223, - "step": 104560 - }, - { - "epoch": 0.42247603194932065, - "grad_norm": 321.7105712890625, - "learning_rate": 3.609398780660726e-05, - "loss": 51.7502, - "step": 104570 - }, - { - "epoch": 0.4225164332146883, - "grad_norm": 833.6190185546875, - "learning_rate": 3.6090859559128e-05, - "loss": 44.2709, - "step": 104580 - }, - { - "epoch": 0.42255683448005593, - "grad_norm": 465.5598449707031, - "learning_rate": 3.6087731095426733e-05, - "loss": 54.5164, - "step": 104590 - }, - { - "epoch": 0.42259723574542357, - "grad_norm": 460.9774169921875, - "learning_rate": 3.608460241556443e-05, - "loss": 59.3148, - "step": 104600 - }, - { - "epoch": 0.42263763701079116, - "grad_norm": 390.5638427734375, - "learning_rate": 3.6081473519602105e-05, - "loss": 70.8456, - "step": 104610 - }, - { - "epoch": 0.4226780382761588, - "grad_norm": 1563.58642578125, - "learning_rate": 3.607834440760074e-05, - "loss": 57.9671, - "step": 104620 - }, - { - "epoch": 0.42271843954152644, - "grad_norm": 1309.08935546875, - "learning_rate": 3.607521507962136e-05, - "loss": 63.8607, - "step": 104630 - }, - { - "epoch": 0.4227588408068941, - "grad_norm": 2113.58447265625, - "learning_rate": 3.6072085535724956e-05, - "loss": 57.3463, - "step": 104640 - }, - { - "epoch": 0.4227992420722617, - "grad_norm": 1004.9712524414062, - "learning_rate": 3.606895577597255e-05, - "loss": 67.326, - "step": 104650 - }, - { - "epoch": 0.42283964333762936, - "grad_norm": 0.0, - "learning_rate": 3.606582580042513e-05, - "loss": 33.3827, - "step": 104660 - }, - { - "epoch": 0.42288004460299694, - "grad_norm": 594.0809936523438, - "learning_rate": 3.606269560914376e-05, - "loss": 48.728, - "step": 104670 - }, - { - "epoch": 0.4229204458683646, - "grad_norm": 891.5621948242188, - "learning_rate": 3.6059565202189435e-05, - "loss": 63.9774, - "step": 104680 - }, - { - "epoch": 0.4229608471337322, - "grad_norm": 622.961669921875, - "learning_rate": 3.605643457962319e-05, - "loss": 53.7502, - "step": 104690 - }, - { - "epoch": 0.42300124839909986, - "grad_norm": 899.34716796875, - "learning_rate": 3.605330374150607e-05, - "loss": 62.4448, - "step": 104700 - }, - { - "epoch": 0.4230416496644675, - "grad_norm": 698.8828735351562, - "learning_rate": 3.60501726878991e-05, - "loss": 72.48, - "step": 104710 - }, - { - "epoch": 0.42308205092983514, - "grad_norm": 591.2288208007812, - "learning_rate": 3.604704141886332e-05, - "loss": 50.3654, - "step": 104720 - }, - { - "epoch": 0.4231224521952028, - "grad_norm": 1411.56640625, - "learning_rate": 3.6043909934459785e-05, - "loss": 68.7279, - "step": 104730 - }, - { - "epoch": 0.42316285346057037, - "grad_norm": 3371.13134765625, - "learning_rate": 3.604077823474954e-05, - "loss": 72.1287, - "step": 104740 - }, - { - "epoch": 0.423203254725938, - "grad_norm": 0.0, - "learning_rate": 3.603764631979363e-05, - "loss": 61.998, - "step": 104750 - }, - { - "epoch": 0.42324365599130565, - "grad_norm": 704.408935546875, - "learning_rate": 3.603451418965313e-05, - "loss": 27.3794, - "step": 104760 - }, - { - "epoch": 0.4232840572566733, - "grad_norm": 973.9273681640625, - "learning_rate": 3.60313818443891e-05, - "loss": 87.7188, - "step": 104770 - }, - { - "epoch": 0.4233244585220409, - "grad_norm": 908.014404296875, - "learning_rate": 3.602824928406259e-05, - "loss": 51.2298, - "step": 104780 - }, - { - "epoch": 0.42336485978740857, - "grad_norm": 693.9036254882812, - "learning_rate": 3.602511650873469e-05, - "loss": 97.5675, - "step": 104790 - }, - { - "epoch": 0.42340526105277615, - "grad_norm": 1070.6943359375, - "learning_rate": 3.602198351846647e-05, - "loss": 70.0561, - "step": 104800 - }, - { - "epoch": 0.4234456623181438, - "grad_norm": 970.8385620117188, - "learning_rate": 3.6018850313319e-05, - "loss": 71.5289, - "step": 104810 - }, - { - "epoch": 0.42348606358351143, - "grad_norm": 1016.22021484375, - "learning_rate": 3.6015716893353376e-05, - "loss": 53.878, - "step": 104820 - }, - { - "epoch": 0.42352646484887907, - "grad_norm": 532.7132568359375, - "learning_rate": 3.601258325863067e-05, - "loss": 62.0343, - "step": 104830 - }, - { - "epoch": 0.4235668661142467, - "grad_norm": 94.07963562011719, - "learning_rate": 3.600944940921199e-05, - "loss": 67.1461, - "step": 104840 - }, - { - "epoch": 0.42360726737961435, - "grad_norm": 921.68017578125, - "learning_rate": 3.6006315345158434e-05, - "loss": 65.9748, - "step": 104850 - }, - { - "epoch": 0.42364766864498193, - "grad_norm": 1337.1324462890625, - "learning_rate": 3.600318106653108e-05, - "loss": 60.1088, - "step": 104860 - }, - { - "epoch": 0.4236880699103496, - "grad_norm": 522.1008911132812, - "learning_rate": 3.600004657339105e-05, - "loss": 55.0299, - "step": 104870 - }, - { - "epoch": 0.4237284711757172, - "grad_norm": 452.1679992675781, - "learning_rate": 3.5996911865799454e-05, - "loss": 42.4841, - "step": 104880 - }, - { - "epoch": 0.42376887244108485, - "grad_norm": 712.0603637695312, - "learning_rate": 3.59937769438174e-05, - "loss": 57.7705, - "step": 104890 - }, - { - "epoch": 0.4238092737064525, - "grad_norm": 486.09613037109375, - "learning_rate": 3.5990641807506e-05, - "loss": 39.141, - "step": 104900 - }, - { - "epoch": 0.42384967497182013, - "grad_norm": 1278.7674560546875, - "learning_rate": 3.598750645692638e-05, - "loss": 59.3348, - "step": 104910 - }, - { - "epoch": 0.4238900762371878, - "grad_norm": 1631.982666015625, - "learning_rate": 3.5984370892139666e-05, - "loss": 68.6455, - "step": 104920 - }, - { - "epoch": 0.42393047750255536, - "grad_norm": 466.52874755859375, - "learning_rate": 3.598123511320699e-05, - "loss": 37.1398, - "step": 104930 - }, - { - "epoch": 0.423970878767923, - "grad_norm": 1008.3381958007812, - "learning_rate": 3.597809912018947e-05, - "loss": 65.8413, - "step": 104940 - }, - { - "epoch": 0.42401128003329064, - "grad_norm": 738.3510131835938, - "learning_rate": 3.597496291314827e-05, - "loss": 67.1094, - "step": 104950 - }, - { - "epoch": 0.4240516812986583, - "grad_norm": 337.72796630859375, - "learning_rate": 3.5971826492144504e-05, - "loss": 63.2226, - "step": 104960 - }, - { - "epoch": 0.4240920825640259, - "grad_norm": 295.6142272949219, - "learning_rate": 3.5968689857239345e-05, - "loss": 65.1638, - "step": 104970 - }, - { - "epoch": 0.42413248382939356, - "grad_norm": 797.707275390625, - "learning_rate": 3.596555300849392e-05, - "loss": 77.6298, - "step": 104980 - }, - { - "epoch": 0.42417288509476114, - "grad_norm": 476.190185546875, - "learning_rate": 3.5962415945969405e-05, - "loss": 69.4243, - "step": 104990 - }, - { - "epoch": 0.4242132863601288, - "grad_norm": 412.16644287109375, - "learning_rate": 3.5959278669726935e-05, - "loss": 40.4091, - "step": 105000 - }, - { - "epoch": 0.4242536876254964, - "grad_norm": 565.7656860351562, - "learning_rate": 3.595614117982769e-05, - "loss": 68.7301, - "step": 105010 - }, - { - "epoch": 0.42429408889086406, - "grad_norm": 733.7787475585938, - "learning_rate": 3.5953003476332835e-05, - "loss": 54.2594, - "step": 105020 - }, - { - "epoch": 0.4243344901562317, - "grad_norm": 842.4686279296875, - "learning_rate": 3.5949865559303536e-05, - "loss": 65.4532, - "step": 105030 - }, - { - "epoch": 0.42437489142159934, - "grad_norm": 1525.9725341796875, - "learning_rate": 3.594672742880097e-05, - "loss": 51.1515, - "step": 105040 - }, - { - "epoch": 0.424415292686967, - "grad_norm": 1515.8177490234375, - "learning_rate": 3.594358908488632e-05, - "loss": 42.8702, - "step": 105050 - }, - { - "epoch": 0.42445569395233457, - "grad_norm": 922.44091796875, - "learning_rate": 3.594045052762076e-05, - "loss": 57.4078, - "step": 105060 - }, - { - "epoch": 0.4244960952177022, - "grad_norm": 1049.3338623046875, - "learning_rate": 3.5937311757065494e-05, - "loss": 55.3803, - "step": 105070 - }, - { - "epoch": 0.42453649648306985, - "grad_norm": 563.2704467773438, - "learning_rate": 3.5934172773281696e-05, - "loss": 28.5142, - "step": 105080 - }, - { - "epoch": 0.4245768977484375, - "grad_norm": 655.0813598632812, - "learning_rate": 3.593103357633058e-05, - "loss": 41.6843, - "step": 105090 - }, - { - "epoch": 0.4246172990138051, - "grad_norm": 684.439453125, - "learning_rate": 3.592789416627332e-05, - "loss": 50.5486, - "step": 105100 - }, - { - "epoch": 0.42465770027917277, - "grad_norm": 765.9920654296875, - "learning_rate": 3.592475454317115e-05, - "loss": 68.0954, - "step": 105110 - }, - { - "epoch": 0.42469810154454035, - "grad_norm": 1379.385009765625, - "learning_rate": 3.592161470708526e-05, - "loss": 65.1052, - "step": 105120 - }, - { - "epoch": 0.424738502809908, - "grad_norm": 296.92437744140625, - "learning_rate": 3.591847465807687e-05, - "loss": 46.8323, - "step": 105130 - }, - { - "epoch": 0.42477890407527563, - "grad_norm": 554.3091430664062, - "learning_rate": 3.59153343962072e-05, - "loss": 54.8523, - "step": 105140 - }, - { - "epoch": 0.42481930534064327, - "grad_norm": 529.84521484375, - "learning_rate": 3.5912193921537476e-05, - "loss": 44.9619, - "step": 105150 - }, - { - "epoch": 0.4248597066060109, - "grad_norm": 571.56591796875, - "learning_rate": 3.5909053234128895e-05, - "loss": 44.5782, - "step": 105160 - }, - { - "epoch": 0.42490010787137855, - "grad_norm": 598.4688110351562, - "learning_rate": 3.590591233404271e-05, - "loss": 75.1092, - "step": 105170 - }, - { - "epoch": 0.42494050913674614, - "grad_norm": 1925.43798828125, - "learning_rate": 3.590277122134015e-05, - "loss": 52.7716, - "step": 105180 - }, - { - "epoch": 0.4249809104021138, - "grad_norm": 435.9007263183594, - "learning_rate": 3.5899629896082454e-05, - "loss": 35.1308, - "step": 105190 - }, - { - "epoch": 0.4250213116674814, - "grad_norm": 492.9730224609375, - "learning_rate": 3.5896488358330856e-05, - "loss": 47.9135, - "step": 105200 - }, - { - "epoch": 0.42506171293284906, - "grad_norm": 1603.56591796875, - "learning_rate": 3.5893346608146607e-05, - "loss": 62.7425, - "step": 105210 - }, - { - "epoch": 0.4251021141982167, - "grad_norm": 715.2278442382812, - "learning_rate": 3.5890204645590964e-05, - "loss": 127.7718, - "step": 105220 - }, - { - "epoch": 0.42514251546358434, - "grad_norm": 572.968017578125, - "learning_rate": 3.588706247072518e-05, - "loss": 65.1566, - "step": 105230 - }, - { - "epoch": 0.425182916728952, - "grad_norm": 859.6871948242188, - "learning_rate": 3.588392008361049e-05, - "loss": 49.4484, - "step": 105240 - }, - { - "epoch": 0.42522331799431956, - "grad_norm": 1073.6341552734375, - "learning_rate": 3.588077748430819e-05, - "loss": 65.2147, - "step": 105250 - }, - { - "epoch": 0.4252637192596872, - "grad_norm": 802.9885864257812, - "learning_rate": 3.587763467287953e-05, - "loss": 44.3761, - "step": 105260 - }, - { - "epoch": 0.42530412052505484, - "grad_norm": 1480.44921875, - "learning_rate": 3.587449164938578e-05, - "loss": 53.1393, - "step": 105270 - }, - { - "epoch": 0.4253445217904225, - "grad_norm": 544.9711303710938, - "learning_rate": 3.5871348413888204e-05, - "loss": 64.2403, - "step": 105280 - }, - { - "epoch": 0.4253849230557901, - "grad_norm": 1001.657958984375, - "learning_rate": 3.586820496644811e-05, - "loss": 61.8277, - "step": 105290 - }, - { - "epoch": 0.42542532432115776, - "grad_norm": 1063.820068359375, - "learning_rate": 3.586506130712676e-05, - "loss": 47.9627, - "step": 105300 - }, - { - "epoch": 0.42546572558652535, - "grad_norm": 1031.9720458984375, - "learning_rate": 3.5861917435985445e-05, - "loss": 58.5947, - "step": 105310 - }, - { - "epoch": 0.425506126851893, - "grad_norm": 376.46630859375, - "learning_rate": 3.585877335308546e-05, - "loss": 62.4376, - "step": 105320 - }, - { - "epoch": 0.4255465281172606, - "grad_norm": 1131.775390625, - "learning_rate": 3.5855629058488095e-05, - "loss": 71.483, - "step": 105330 - }, - { - "epoch": 0.42558692938262827, - "grad_norm": 687.2857666015625, - "learning_rate": 3.585248455225466e-05, - "loss": 69.5148, - "step": 105340 - }, - { - "epoch": 0.4256273306479959, - "grad_norm": 473.1686706542969, - "learning_rate": 3.584933983444644e-05, - "loss": 89.6963, - "step": 105350 - }, - { - "epoch": 0.42566773191336355, - "grad_norm": 742.19873046875, - "learning_rate": 3.5846194905124757e-05, - "loss": 67.2487, - "step": 105360 - }, - { - "epoch": 0.4257081331787312, - "grad_norm": 369.0582275390625, - "learning_rate": 3.584304976435092e-05, - "loss": 84.1498, - "step": 105370 - }, - { - "epoch": 0.42574853444409877, - "grad_norm": 409.1021728515625, - "learning_rate": 3.5839904412186256e-05, - "loss": 63.912, - "step": 105380 - }, - { - "epoch": 0.4257889357094664, - "grad_norm": 777.0023193359375, - "learning_rate": 3.583675884869206e-05, - "loss": 52.3509, - "step": 105390 - }, - { - "epoch": 0.42582933697483405, - "grad_norm": 345.2575988769531, - "learning_rate": 3.5833613073929684e-05, - "loss": 37.5077, - "step": 105400 - }, - { - "epoch": 0.4258697382402017, - "grad_norm": 1329.019287109375, - "learning_rate": 3.583046708796043e-05, - "loss": 46.7334, - "step": 105410 - }, - { - "epoch": 0.42591013950556933, - "grad_norm": 574.7626953125, - "learning_rate": 3.582732089084566e-05, - "loss": 49.3083, - "step": 105420 - }, - { - "epoch": 0.42595054077093697, - "grad_norm": 0.0, - "learning_rate": 3.582417448264669e-05, - "loss": 53.3416, - "step": 105430 - }, - { - "epoch": 0.42599094203630455, - "grad_norm": 1714.5047607421875, - "learning_rate": 3.582102786342485e-05, - "loss": 67.8292, - "step": 105440 - }, - { - "epoch": 0.4260313433016722, - "grad_norm": 1370.195068359375, - "learning_rate": 3.581788103324152e-05, - "loss": 58.7996, - "step": 105450 - }, - { - "epoch": 0.42607174456703983, - "grad_norm": 1272.4110107421875, - "learning_rate": 3.581473399215802e-05, - "loss": 70.6126, - "step": 105460 - }, - { - "epoch": 0.4261121458324075, - "grad_norm": 662.2067260742188, - "learning_rate": 3.581158674023572e-05, - "loss": 51.143, - "step": 105470 - }, - { - "epoch": 0.4261525470977751, - "grad_norm": 803.9454956054688, - "learning_rate": 3.5808439277535964e-05, - "loss": 38.5995, - "step": 105480 - }, - { - "epoch": 0.42619294836314275, - "grad_norm": 507.4480285644531, - "learning_rate": 3.580529160412013e-05, - "loss": 56.6335, - "step": 105490 - }, - { - "epoch": 0.42623334962851034, - "grad_norm": 673.6394653320312, - "learning_rate": 3.580214372004956e-05, - "loss": 71.4622, - "step": 105500 - }, - { - "epoch": 0.426273750893878, - "grad_norm": 506.5180969238281, - "learning_rate": 3.579899562538564e-05, - "loss": 34.8514, - "step": 105510 - }, - { - "epoch": 0.4263141521592456, - "grad_norm": 912.122802734375, - "learning_rate": 3.5795847320189746e-05, - "loss": 57.6717, - "step": 105520 - }, - { - "epoch": 0.42635455342461326, - "grad_norm": 888.0779418945312, - "learning_rate": 3.5792698804523245e-05, - "loss": 55.5145, - "step": 105530 - }, - { - "epoch": 0.4263949546899809, - "grad_norm": 670.391845703125, - "learning_rate": 3.5789550078447526e-05, - "loss": 78.2021, - "step": 105540 - }, - { - "epoch": 0.42643535595534854, - "grad_norm": 418.4927673339844, - "learning_rate": 3.5786401142023975e-05, - "loss": 73.2309, - "step": 105550 - }, - { - "epoch": 0.4264757572207162, - "grad_norm": 579.839599609375, - "learning_rate": 3.5783251995313985e-05, - "loss": 49.0248, - "step": 105560 - }, - { - "epoch": 0.42651615848608376, - "grad_norm": 1221.287841796875, - "learning_rate": 3.5780102638378936e-05, - "loss": 48.0422, - "step": 105570 - }, - { - "epoch": 0.4265565597514514, - "grad_norm": 576.9136962890625, - "learning_rate": 3.577695307128024e-05, - "loss": 48.1065, - "step": 105580 - }, - { - "epoch": 0.42659696101681904, - "grad_norm": 682.1599731445312, - "learning_rate": 3.57738032940793e-05, - "loss": 80.5689, - "step": 105590 - }, - { - "epoch": 0.4266373622821867, - "grad_norm": 952.7507934570312, - "learning_rate": 3.577065330683751e-05, - "loss": 59.2839, - "step": 105600 - }, - { - "epoch": 0.4266777635475543, - "grad_norm": 755.3192749023438, - "learning_rate": 3.5767503109616296e-05, - "loss": 63.4179, - "step": 105610 - }, - { - "epoch": 0.42671816481292196, - "grad_norm": 528.3074340820312, - "learning_rate": 3.576435270247706e-05, - "loss": 49.0063, - "step": 105620 - }, - { - "epoch": 0.42675856607828955, - "grad_norm": 1181.072509765625, - "learning_rate": 3.5761202085481235e-05, - "loss": 53.1849, - "step": 105630 - }, - { - "epoch": 0.4267989673436572, - "grad_norm": 825.5591430664062, - "learning_rate": 3.575805125869022e-05, - "loss": 54.1307, - "step": 105640 - }, - { - "epoch": 0.4268393686090248, - "grad_norm": 1657.726806640625, - "learning_rate": 3.5754900222165465e-05, - "loss": 84.8316, - "step": 105650 - }, - { - "epoch": 0.42687976987439247, - "grad_norm": 707.2351684570312, - "learning_rate": 3.5751748975968394e-05, - "loss": 71.4824, - "step": 105660 - }, - { - "epoch": 0.4269201711397601, - "grad_norm": 514.521484375, - "learning_rate": 3.574859752016045e-05, - "loss": 60.5219, - "step": 105670 - }, - { - "epoch": 0.42696057240512775, - "grad_norm": 661.9301147460938, - "learning_rate": 3.574544585480305e-05, - "loss": 45.8945, - "step": 105680 - }, - { - "epoch": 0.4270009736704954, - "grad_norm": 416.9234924316406, - "learning_rate": 3.574229397995765e-05, - "loss": 63.908, - "step": 105690 - }, - { - "epoch": 0.42704137493586297, - "grad_norm": 835.8658447265625, - "learning_rate": 3.573914189568571e-05, - "loss": 77.679, - "step": 105700 - }, - { - "epoch": 0.4270817762012306, - "grad_norm": 1938.5263671875, - "learning_rate": 3.5735989602048665e-05, - "loss": 62.1259, - "step": 105710 - }, - { - "epoch": 0.42712217746659825, - "grad_norm": 825.2261962890625, - "learning_rate": 3.573283709910798e-05, - "loss": 50.9862, - "step": 105720 - }, - { - "epoch": 0.4271625787319659, - "grad_norm": 1108.54345703125, - "learning_rate": 3.572968438692509e-05, - "loss": 75.7806, - "step": 105730 - }, - { - "epoch": 0.42720297999733353, - "grad_norm": 338.0368347167969, - "learning_rate": 3.5726531465561504e-05, - "loss": 40.806, - "step": 105740 - }, - { - "epoch": 0.42724338126270117, - "grad_norm": 876.4752197265625, - "learning_rate": 3.572337833507865e-05, - "loss": 63.9452, - "step": 105750 - }, - { - "epoch": 0.42728378252806876, - "grad_norm": 429.4320983886719, - "learning_rate": 3.572022499553802e-05, - "loss": 79.8431, - "step": 105760 - }, - { - "epoch": 0.4273241837934364, - "grad_norm": 749.8831787109375, - "learning_rate": 3.5717071447001083e-05, - "loss": 54.6913, - "step": 105770 - }, - { - "epoch": 0.42736458505880404, - "grad_norm": 902.8178100585938, - "learning_rate": 3.571391768952932e-05, - "loss": 45.6217, - "step": 105780 - }, - { - "epoch": 0.4274049863241717, - "grad_norm": 1094.285400390625, - "learning_rate": 3.571076372318422e-05, - "loss": 76.6032, - "step": 105790 - }, - { - "epoch": 0.4274453875895393, - "grad_norm": 1502.871337890625, - "learning_rate": 3.570760954802726e-05, - "loss": 80.5316, - "step": 105800 - }, - { - "epoch": 0.42748578885490696, - "grad_norm": 572.2935180664062, - "learning_rate": 3.5704455164119945e-05, - "loss": 64.48, - "step": 105810 - }, - { - "epoch": 0.42752619012027454, - "grad_norm": 742.1455688476562, - "learning_rate": 3.5701300571523755e-05, - "loss": 82.0077, - "step": 105820 - }, - { - "epoch": 0.4275665913856422, - "grad_norm": 520.0400390625, - "learning_rate": 3.569814577030022e-05, - "loss": 74.5518, - "step": 105830 - }, - { - "epoch": 0.4276069926510098, - "grad_norm": 967.4630126953125, - "learning_rate": 3.569499076051081e-05, - "loss": 51.6548, - "step": 105840 - }, - { - "epoch": 0.42764739391637746, - "grad_norm": 1097.524658203125, - "learning_rate": 3.5691835542217054e-05, - "loss": 66.9995, - "step": 105850 - }, - { - "epoch": 0.4276877951817451, - "grad_norm": 1097.96533203125, - "learning_rate": 3.5688680115480455e-05, - "loss": 55.4627, - "step": 105860 - }, - { - "epoch": 0.42772819644711274, - "grad_norm": 936.7929077148438, - "learning_rate": 3.5685524480362543e-05, - "loss": 79.1348, - "step": 105870 - }, - { - "epoch": 0.4277685977124804, - "grad_norm": 427.792724609375, - "learning_rate": 3.568236863692482e-05, - "loss": 57.1244, - "step": 105880 - }, - { - "epoch": 0.42780899897784797, - "grad_norm": 430.7917785644531, - "learning_rate": 3.567921258522883e-05, - "loss": 45.7751, - "step": 105890 - }, - { - "epoch": 0.4278494002432156, - "grad_norm": 1268.4412841796875, - "learning_rate": 3.567605632533608e-05, - "loss": 51.2699, - "step": 105900 - }, - { - "epoch": 0.42788980150858325, - "grad_norm": 156.56045532226562, - "learning_rate": 3.5672899857308134e-05, - "loss": 58.0844, - "step": 105910 - }, - { - "epoch": 0.4279302027739509, - "grad_norm": 829.1176147460938, - "learning_rate": 3.56697431812065e-05, - "loss": 55.2921, - "step": 105920 - }, - { - "epoch": 0.4279706040393185, - "grad_norm": 610.2461547851562, - "learning_rate": 3.566658629709273e-05, - "loss": 50.3534, - "step": 105930 - }, - { - "epoch": 0.42801100530468617, - "grad_norm": 1424.289306640625, - "learning_rate": 3.566342920502837e-05, - "loss": 63.0704, - "step": 105940 - }, - { - "epoch": 0.42805140657005375, - "grad_norm": 3496.262939453125, - "learning_rate": 3.5660271905074974e-05, - "loss": 59.1756, - "step": 105950 - }, - { - "epoch": 0.4280918078354214, - "grad_norm": 413.6312561035156, - "learning_rate": 3.565711439729408e-05, - "loss": 55.2588, - "step": 105960 - }, - { - "epoch": 0.42813220910078903, - "grad_norm": 632.3339233398438, - "learning_rate": 3.565395668174725e-05, - "loss": 69.6535, - "step": 105970 - }, - { - "epoch": 0.42817261036615667, - "grad_norm": 1112.0599365234375, - "learning_rate": 3.565079875849605e-05, - "loss": 51.4274, - "step": 105980 - }, - { - "epoch": 0.4282130116315243, - "grad_norm": 127.4210433959961, - "learning_rate": 3.564764062760205e-05, - "loss": 64.7097, - "step": 105990 - }, - { - "epoch": 0.42825341289689195, - "grad_norm": 1343.2515869140625, - "learning_rate": 3.564448228912682e-05, - "loss": 50.9704, - "step": 106000 - }, - { - "epoch": 0.4282938141622596, - "grad_norm": 794.98779296875, - "learning_rate": 3.564132374313192e-05, - "loss": 38.4331, - "step": 106010 - }, - { - "epoch": 0.4283342154276272, - "grad_norm": 195.28814697265625, - "learning_rate": 3.5638164989678935e-05, - "loss": 61.8604, - "step": 106020 - }, - { - "epoch": 0.4283746166929948, - "grad_norm": 1086.4222412109375, - "learning_rate": 3.563500602882945e-05, - "loss": 65.9661, - "step": 106030 - }, - { - "epoch": 0.42841501795836245, - "grad_norm": 737.5653076171875, - "learning_rate": 3.5631846860645044e-05, - "loss": 55.4186, - "step": 106040 - }, - { - "epoch": 0.4284554192237301, - "grad_norm": 864.1533203125, - "learning_rate": 3.562868748518732e-05, - "loss": 82.7763, - "step": 106050 - }, - { - "epoch": 0.42849582048909773, - "grad_norm": 1645.748046875, - "learning_rate": 3.562552790251785e-05, - "loss": 70.2854, - "step": 106060 - }, - { - "epoch": 0.4285362217544654, - "grad_norm": 754.142578125, - "learning_rate": 3.562236811269824e-05, - "loss": 55.0108, - "step": 106070 - }, - { - "epoch": 0.42857662301983296, - "grad_norm": 408.4161376953125, - "learning_rate": 3.56192081157901e-05, - "loss": 69.7249, - "step": 106080 - }, - { - "epoch": 0.4286170242852006, - "grad_norm": 308.8442077636719, - "learning_rate": 3.561604791185503e-05, - "loss": 54.3556, - "step": 106090 - }, - { - "epoch": 0.42865742555056824, - "grad_norm": 1148.19677734375, - "learning_rate": 3.561288750095465e-05, - "loss": 57.7566, - "step": 106100 - }, - { - "epoch": 0.4286978268159359, - "grad_norm": 589.8562622070312, - "learning_rate": 3.560972688315055e-05, - "loss": 65.197, - "step": 106110 - }, - { - "epoch": 0.4287382280813035, - "grad_norm": 1979.934326171875, - "learning_rate": 3.5606566058504375e-05, - "loss": 67.4663, - "step": 106120 - }, - { - "epoch": 0.42877862934667116, - "grad_norm": 638.1552124023438, - "learning_rate": 3.560340502707773e-05, - "loss": 40.0643, - "step": 106130 - }, - { - "epoch": 0.42881903061203874, - "grad_norm": 1344.94580078125, - "learning_rate": 3.560024378893224e-05, - "loss": 71.7906, - "step": 106140 - }, - { - "epoch": 0.4288594318774064, - "grad_norm": 541.87353515625, - "learning_rate": 3.559708234412954e-05, - "loss": 89.126, - "step": 106150 - }, - { - "epoch": 0.428899833142774, - "grad_norm": 1129.01708984375, - "learning_rate": 3.559392069273127e-05, - "loss": 74.1695, - "step": 106160 - }, - { - "epoch": 0.42894023440814166, - "grad_norm": 741.240234375, - "learning_rate": 3.559075883479906e-05, - "loss": 77.3889, - "step": 106170 - }, - { - "epoch": 0.4289806356735093, - "grad_norm": 373.0411682128906, - "learning_rate": 3.558759677039455e-05, - "loss": 85.0204, - "step": 106180 - }, - { - "epoch": 0.42902103693887694, - "grad_norm": 705.1157836914062, - "learning_rate": 3.558443449957939e-05, - "loss": 77.1509, - "step": 106190 - }, - { - "epoch": 0.4290614382042446, - "grad_norm": 565.6890258789062, - "learning_rate": 3.5581272022415244e-05, - "loss": 62.9252, - "step": 106200 - }, - { - "epoch": 0.42910183946961217, - "grad_norm": 2649.929443359375, - "learning_rate": 3.5578109338963736e-05, - "loss": 79.2853, - "step": 106210 - }, - { - "epoch": 0.4291422407349798, - "grad_norm": 645.3712768554688, - "learning_rate": 3.557494644928654e-05, - "loss": 52.6527, - "step": 106220 - }, - { - "epoch": 0.42918264200034745, - "grad_norm": 446.864013671875, - "learning_rate": 3.5571783353445325e-05, - "loss": 54.5988, - "step": 106230 - }, - { - "epoch": 0.4292230432657151, - "grad_norm": 1004.2807006835938, - "learning_rate": 3.5568620051501756e-05, - "loss": 67.0789, - "step": 106240 - }, - { - "epoch": 0.4292634445310827, - "grad_norm": 611.4927368164062, - "learning_rate": 3.556545654351749e-05, - "loss": 38.8004, - "step": 106250 - }, - { - "epoch": 0.42930384579645037, - "grad_norm": 868.6104125976562, - "learning_rate": 3.556229282955421e-05, - "loss": 44.7674, - "step": 106260 - }, - { - "epoch": 0.42934424706181795, - "grad_norm": 1725.8865966796875, - "learning_rate": 3.5559128909673595e-05, - "loss": 71.4952, - "step": 106270 - }, - { - "epoch": 0.4293846483271856, - "grad_norm": 654.6998291015625, - "learning_rate": 3.555596478393733e-05, - "loss": 48.7473, - "step": 106280 - }, - { - "epoch": 0.42942504959255323, - "grad_norm": 1723.461669921875, - "learning_rate": 3.555280045240709e-05, - "loss": 62.5021, - "step": 106290 - }, - { - "epoch": 0.42946545085792087, - "grad_norm": 989.3924560546875, - "learning_rate": 3.554963591514457e-05, - "loss": 64.8296, - "step": 106300 - }, - { - "epoch": 0.4295058521232885, - "grad_norm": 767.1257934570312, - "learning_rate": 3.554647117221147e-05, - "loss": 58.58, - "step": 106310 - }, - { - "epoch": 0.42954625338865615, - "grad_norm": 958.0810546875, - "learning_rate": 3.554330622366949e-05, - "loss": 82.886, - "step": 106320 - }, - { - "epoch": 0.42958665465402374, - "grad_norm": 1372.3026123046875, - "learning_rate": 3.554014106958032e-05, - "loss": 91.1392, - "step": 106330 - }, - { - "epoch": 0.4296270559193914, - "grad_norm": 765.9713745117188, - "learning_rate": 3.5536975710005677e-05, - "loss": 43.6938, - "step": 106340 - }, - { - "epoch": 0.429667457184759, - "grad_norm": 2553.733642578125, - "learning_rate": 3.553381014500727e-05, - "loss": 54.1921, - "step": 106350 - }, - { - "epoch": 0.42970785845012666, - "grad_norm": 1080.475341796875, - "learning_rate": 3.5530644374646815e-05, - "loss": 62.0937, - "step": 106360 - }, - { - "epoch": 0.4297482597154943, - "grad_norm": 2564.04638671875, - "learning_rate": 3.5527478398986015e-05, - "loss": 63.5857, - "step": 106370 - }, - { - "epoch": 0.42978866098086194, - "grad_norm": 299.32989501953125, - "learning_rate": 3.552431221808661e-05, - "loss": 44.5124, - "step": 106380 - }, - { - "epoch": 0.4298290622462296, - "grad_norm": 631.6810302734375, - "learning_rate": 3.5521145832010314e-05, - "loss": 49.5127, - "step": 106390 - }, - { - "epoch": 0.42986946351159716, - "grad_norm": 1121.8922119140625, - "learning_rate": 3.551797924081887e-05, - "loss": 58.7079, - "step": 106400 - }, - { - "epoch": 0.4299098647769648, - "grad_norm": 657.3575439453125, - "learning_rate": 3.5514812444574004e-05, - "loss": 50.7894, - "step": 106410 - }, - { - "epoch": 0.42995026604233244, - "grad_norm": 662.6183471679688, - "learning_rate": 3.551164544333745e-05, - "loss": 79.3807, - "step": 106420 - }, - { - "epoch": 0.4299906673077001, - "grad_norm": 593.0071411132812, - "learning_rate": 3.550847823717096e-05, - "loss": 65.9797, - "step": 106430 - }, - { - "epoch": 0.4300310685730677, - "grad_norm": 755.3797607421875, - "learning_rate": 3.5505310826136286e-05, - "loss": 51.0179, - "step": 106440 - }, - { - "epoch": 0.43007146983843536, - "grad_norm": 1839.5137939453125, - "learning_rate": 3.5502143210295165e-05, - "loss": 62.1264, - "step": 106450 - }, - { - "epoch": 0.43011187110380295, - "grad_norm": 722.4067993164062, - "learning_rate": 3.549897538970934e-05, - "loss": 63.9707, - "step": 106460 - }, - { - "epoch": 0.4301522723691706, - "grad_norm": 465.209716796875, - "learning_rate": 3.54958073644406e-05, - "loss": 51.9835, - "step": 106470 - }, - { - "epoch": 0.4301926736345382, - "grad_norm": 348.4991760253906, - "learning_rate": 3.5492639134550695e-05, - "loss": 84.2273, - "step": 106480 - }, - { - "epoch": 0.43023307489990587, - "grad_norm": 1531.664794921875, - "learning_rate": 3.548947070010138e-05, - "loss": 81.6072, - "step": 106490 - }, - { - "epoch": 0.4302734761652735, - "grad_norm": 797.4585571289062, - "learning_rate": 3.548630206115443e-05, - "loss": 56.6595, - "step": 106500 - }, - { - "epoch": 0.43031387743064115, - "grad_norm": 412.25360107421875, - "learning_rate": 3.5483133217771625e-05, - "loss": 57.9216, - "step": 106510 - }, - { - "epoch": 0.4303542786960088, - "grad_norm": 647.94921875, - "learning_rate": 3.5479964170014746e-05, - "loss": 99.7013, - "step": 106520 - }, - { - "epoch": 0.43039467996137637, - "grad_norm": 649.0404052734375, - "learning_rate": 3.547679491794557e-05, - "loss": 46.1641, - "step": 106530 - }, - { - "epoch": 0.430435081226744, - "grad_norm": 434.8841552734375, - "learning_rate": 3.547362546162588e-05, - "loss": 61.1318, - "step": 106540 - }, - { - "epoch": 0.43047548249211165, - "grad_norm": 733.9017333984375, - "learning_rate": 3.547045580111746e-05, - "loss": 49.06, - "step": 106550 - }, - { - "epoch": 0.4305158837574793, - "grad_norm": 721.9252319335938, - "learning_rate": 3.546728593648213e-05, - "loss": 61.6053, - "step": 106560 - }, - { - "epoch": 0.43055628502284693, - "grad_norm": 553.4278564453125, - "learning_rate": 3.546411586778167e-05, - "loss": 59.98, - "step": 106570 - }, - { - "epoch": 0.43059668628821457, - "grad_norm": 410.0889892578125, - "learning_rate": 3.546094559507787e-05, - "loss": 44.391, - "step": 106580 - }, - { - "epoch": 0.43063708755358215, - "grad_norm": 429.70361328125, - "learning_rate": 3.5457775118432556e-05, - "loss": 90.1694, - "step": 106590 - }, - { - "epoch": 0.4306774888189498, - "grad_norm": 899.5925903320312, - "learning_rate": 3.545460443790753e-05, - "loss": 79.3271, - "step": 106600 - }, - { - "epoch": 0.43071789008431743, - "grad_norm": 1410.063232421875, - "learning_rate": 3.545143355356462e-05, - "loss": 75.9909, - "step": 106610 - }, - { - "epoch": 0.4307582913496851, - "grad_norm": 1006.2098999023438, - "learning_rate": 3.544826246546563e-05, - "loss": 61.4801, - "step": 106620 - }, - { - "epoch": 0.4307986926150527, - "grad_norm": 618.147216796875, - "learning_rate": 3.544509117367238e-05, - "loss": 56.4724, - "step": 106630 - }, - { - "epoch": 0.43083909388042035, - "grad_norm": 866.3955078125, - "learning_rate": 3.544191967824669e-05, - "loss": 47.1831, - "step": 106640 - }, - { - "epoch": 0.43087949514578794, - "grad_norm": 802.1903686523438, - "learning_rate": 3.543874797925042e-05, - "loss": 62.7335, - "step": 106650 - }, - { - "epoch": 0.4309198964111556, - "grad_norm": 722.4039306640625, - "learning_rate": 3.543557607674537e-05, - "loss": 57.5358, - "step": 106660 - }, - { - "epoch": 0.4309602976765232, - "grad_norm": 467.41937255859375, - "learning_rate": 3.543240397079339e-05, - "loss": 42.7253, - "step": 106670 - }, - { - "epoch": 0.43100069894189086, - "grad_norm": 421.191162109375, - "learning_rate": 3.542923166145633e-05, - "loss": 60.0862, - "step": 106680 - }, - { - "epoch": 0.4310411002072585, - "grad_norm": 797.8417358398438, - "learning_rate": 3.542605914879603e-05, - "loss": 69.1785, - "step": 106690 - }, - { - "epoch": 0.43108150147262614, - "grad_norm": 1393.1776123046875, - "learning_rate": 3.542288643287434e-05, - "loss": 42.2223, - "step": 106700 - }, - { - "epoch": 0.4311219027379938, - "grad_norm": 699.2664184570312, - "learning_rate": 3.5419713513753114e-05, - "loss": 71.7247, - "step": 106710 - }, - { - "epoch": 0.43116230400336136, - "grad_norm": 1001.225830078125, - "learning_rate": 3.54165403914942e-05, - "loss": 54.1351, - "step": 106720 - }, - { - "epoch": 0.431202705268729, - "grad_norm": 587.420166015625, - "learning_rate": 3.541336706615947e-05, - "loss": 62.8683, - "step": 106730 - }, - { - "epoch": 0.43124310653409664, - "grad_norm": 481.1246643066406, - "learning_rate": 3.541019353781079e-05, - "loss": 48.2253, - "step": 106740 - }, - { - "epoch": 0.4312835077994643, - "grad_norm": 694.5806274414062, - "learning_rate": 3.540701980651003e-05, - "loss": 50.7718, - "step": 106750 - }, - { - "epoch": 0.4313239090648319, - "grad_norm": 1048.5947265625, - "learning_rate": 3.540384587231906e-05, - "loss": 78.4464, - "step": 106760 - }, - { - "epoch": 0.43136431033019956, - "grad_norm": 372.85406494140625, - "learning_rate": 3.540067173529976e-05, - "loss": 48.4107, - "step": 106770 - }, - { - "epoch": 0.43140471159556715, - "grad_norm": 2308.911865234375, - "learning_rate": 3.5397497395514004e-05, - "loss": 73.4827, - "step": 106780 - }, - { - "epoch": 0.4314451128609348, - "grad_norm": 183.2915802001953, - "learning_rate": 3.5394322853023694e-05, - "loss": 35.4445, - "step": 106790 - }, - { - "epoch": 0.4314855141263024, - "grad_norm": 4619.44140625, - "learning_rate": 3.53911481078907e-05, - "loss": 50.2226, - "step": 106800 - }, - { - "epoch": 0.43152591539167007, - "grad_norm": 873.1148071289062, - "learning_rate": 3.5387973160176926e-05, - "loss": 51.0404, - "step": 106810 - }, - { - "epoch": 0.4315663166570377, - "grad_norm": 480.3205871582031, - "learning_rate": 3.538479800994426e-05, - "loss": 41.806, - "step": 106820 - }, - { - "epoch": 0.43160671792240535, - "grad_norm": 560.2127685546875, - "learning_rate": 3.538162265725462e-05, - "loss": 34.7096, - "step": 106830 - }, - { - "epoch": 0.431647119187773, - "grad_norm": 800.5224609375, - "learning_rate": 3.5378447102169895e-05, - "loss": 83.1514, - "step": 106840 - }, - { - "epoch": 0.43168752045314057, - "grad_norm": 403.2615661621094, - "learning_rate": 3.537527134475201e-05, - "loss": 81.7792, - "step": 106850 - }, - { - "epoch": 0.4317279217185082, - "grad_norm": 613.8062744140625, - "learning_rate": 3.537209538506286e-05, - "loss": 51.7972, - "step": 106860 - }, - { - "epoch": 0.43176832298387585, - "grad_norm": 794.0146484375, - "learning_rate": 3.5368919223164374e-05, - "loss": 78.0091, - "step": 106870 - }, - { - "epoch": 0.4318087242492435, - "grad_norm": 877.7191162109375, - "learning_rate": 3.536574285911847e-05, - "loss": 47.9676, - "step": 106880 - }, - { - "epoch": 0.43184912551461113, - "grad_norm": 1015.1839599609375, - "learning_rate": 3.5362566292987076e-05, - "loss": 56.4783, - "step": 106890 - }, - { - "epoch": 0.43188952677997877, - "grad_norm": 519.8533935546875, - "learning_rate": 3.535938952483211e-05, - "loss": 53.7836, - "step": 106900 - }, - { - "epoch": 0.43192992804534636, - "grad_norm": 356.93109130859375, - "learning_rate": 3.5356212554715506e-05, - "loss": 63.2428, - "step": 106910 - }, - { - "epoch": 0.431970329310714, - "grad_norm": 694.5162963867188, - "learning_rate": 3.535303538269922e-05, - "loss": 33.9129, - "step": 106920 - }, - { - "epoch": 0.43201073057608164, - "grad_norm": 796.4173583984375, - "learning_rate": 3.534985800884517e-05, - "loss": 58.3042, - "step": 106930 - }, - { - "epoch": 0.4320511318414493, - "grad_norm": 0.0, - "learning_rate": 3.5346680433215316e-05, - "loss": 38.0315, - "step": 106940 - }, - { - "epoch": 0.4320915331068169, - "grad_norm": 570.5447387695312, - "learning_rate": 3.5343502655871594e-05, - "loss": 58.1119, - "step": 106950 - }, - { - "epoch": 0.43213193437218456, - "grad_norm": 2234.41162109375, - "learning_rate": 3.534032467687597e-05, - "loss": 50.3924, - "step": 106960 - }, - { - "epoch": 0.43217233563755214, - "grad_norm": 470.1455993652344, - "learning_rate": 3.533714649629039e-05, - "loss": 51.558, - "step": 106970 - }, - { - "epoch": 0.4322127369029198, - "grad_norm": 641.6006469726562, - "learning_rate": 3.533396811417682e-05, - "loss": 46.6653, - "step": 106980 - }, - { - "epoch": 0.4322531381682874, - "grad_norm": 1771.727294921875, - "learning_rate": 3.533078953059721e-05, - "loss": 42.6348, - "step": 106990 - }, - { - "epoch": 0.43229353943365506, - "grad_norm": 885.59765625, - "learning_rate": 3.532761074561355e-05, - "loss": 58.1762, - "step": 107000 - }, - { - "epoch": 0.4323339406990227, - "grad_norm": 618.5126953125, - "learning_rate": 3.5324431759287796e-05, - "loss": 43.6578, - "step": 107010 - }, - { - "epoch": 0.43237434196439034, - "grad_norm": 740.6704711914062, - "learning_rate": 3.532125257168193e-05, - "loss": 49.2011, - "step": 107020 - }, - { - "epoch": 0.432414743229758, - "grad_norm": 713.0264892578125, - "learning_rate": 3.531807318285793e-05, - "loss": 67.6269, - "step": 107030 - }, - { - "epoch": 0.43245514449512557, - "grad_norm": 793.6551513671875, - "learning_rate": 3.531489359287779e-05, - "loss": 78.4188, - "step": 107040 - }, - { - "epoch": 0.4324955457604932, - "grad_norm": 1205.0350341796875, - "learning_rate": 3.531171380180348e-05, - "loss": 77.0156, - "step": 107050 - }, - { - "epoch": 0.43253594702586085, - "grad_norm": 1664.7984619140625, - "learning_rate": 3.530853380969701e-05, - "loss": 51.596, - "step": 107060 - }, - { - "epoch": 0.4325763482912285, - "grad_norm": 353.55523681640625, - "learning_rate": 3.5305353616620355e-05, - "loss": 36.4841, - "step": 107070 - }, - { - "epoch": 0.4326167495565961, - "grad_norm": 607.7645263671875, - "learning_rate": 3.5302173222635524e-05, - "loss": 56.0603, - "step": 107080 - }, - { - "epoch": 0.43265715082196377, - "grad_norm": 688.8688354492188, - "learning_rate": 3.529899262780453e-05, - "loss": 60.969, - "step": 107090 - }, - { - "epoch": 0.43269755208733135, - "grad_norm": 431.1716003417969, - "learning_rate": 3.529581183218937e-05, - "loss": 43.1526, - "step": 107100 - }, - { - "epoch": 0.432737953352699, - "grad_norm": 1323.6634521484375, - "learning_rate": 3.529263083585206e-05, - "loss": 64.5691, - "step": 107110 - }, - { - "epoch": 0.43277835461806663, - "grad_norm": 650.7156372070312, - "learning_rate": 3.528944963885461e-05, - "loss": 24.5781, - "step": 107120 - }, - { - "epoch": 0.43281875588343427, - "grad_norm": 1326.6907958984375, - "learning_rate": 3.528626824125905e-05, - "loss": 55.9819, - "step": 107130 - }, - { - "epoch": 0.4328591571488019, - "grad_norm": 727.043701171875, - "learning_rate": 3.528308664312739e-05, - "loss": 44.3257, - "step": 107140 - }, - { - "epoch": 0.43289955841416955, - "grad_norm": 719.6072998046875, - "learning_rate": 3.527990484452166e-05, - "loss": 46.9241, - "step": 107150 - }, - { - "epoch": 0.4329399596795372, - "grad_norm": 1634.4859619140625, - "learning_rate": 3.527672284550389e-05, - "loss": 87.1335, - "step": 107160 - }, - { - "epoch": 0.4329803609449048, - "grad_norm": 281.7586975097656, - "learning_rate": 3.527354064613612e-05, - "loss": 70.6858, - "step": 107170 - }, - { - "epoch": 0.4330207622102724, - "grad_norm": 1065.198974609375, - "learning_rate": 3.5270358246480386e-05, - "loss": 79.6831, - "step": 107180 - }, - { - "epoch": 0.43306116347564005, - "grad_norm": 263.0101623535156, - "learning_rate": 3.526717564659873e-05, - "loss": 61.8488, - "step": 107190 - }, - { - "epoch": 0.4331015647410077, - "grad_norm": 918.0792236328125, - "learning_rate": 3.52639928465532e-05, - "loss": 56.7985, - "step": 107200 - }, - { - "epoch": 0.43314196600637533, - "grad_norm": 454.3590393066406, - "learning_rate": 3.526080984640585e-05, - "loss": 59.4921, - "step": 107210 - }, - { - "epoch": 0.433182367271743, - "grad_norm": 332.873291015625, - "learning_rate": 3.525762664621872e-05, - "loss": 34.6081, - "step": 107220 - }, - { - "epoch": 0.43322276853711056, - "grad_norm": 1424.8919677734375, - "learning_rate": 3.5254443246053886e-05, - "loss": 54.6382, - "step": 107230 - }, - { - "epoch": 0.4332631698024782, - "grad_norm": 1493.1998291015625, - "learning_rate": 3.5251259645973394e-05, - "loss": 77.3529, - "step": 107240 - }, - { - "epoch": 0.43330357106784584, - "grad_norm": 310.91943359375, - "learning_rate": 3.524807584603932e-05, - "loss": 58.498, - "step": 107250 - }, - { - "epoch": 0.4333439723332135, - "grad_norm": 713.1577758789062, - "learning_rate": 3.5244891846313736e-05, - "loss": 48.7548, - "step": 107260 - }, - { - "epoch": 0.4333843735985811, - "grad_norm": 746.4721069335938, - "learning_rate": 3.5241707646858703e-05, - "loss": 67.363, - "step": 107270 - }, - { - "epoch": 0.43342477486394876, - "grad_norm": 1737.3177490234375, - "learning_rate": 3.523852324773631e-05, - "loss": 58.3859, - "step": 107280 - }, - { - "epoch": 0.43346517612931634, - "grad_norm": 240.0654754638672, - "learning_rate": 3.523533864900863e-05, - "loss": 89.2563, - "step": 107290 - }, - { - "epoch": 0.433505577394684, - "grad_norm": 697.5162353515625, - "learning_rate": 3.523215385073777e-05, - "loss": 66.2658, - "step": 107300 - }, - { - "epoch": 0.4335459786600516, - "grad_norm": 527.6311645507812, - "learning_rate": 3.52289688529858e-05, - "loss": 39.8144, - "step": 107310 - }, - { - "epoch": 0.43358637992541926, - "grad_norm": 1482.4761962890625, - "learning_rate": 3.5225783655814796e-05, - "loss": 56.7785, - "step": 107320 - }, - { - "epoch": 0.4336267811907869, - "grad_norm": 1850.587646484375, - "learning_rate": 3.522259825928689e-05, - "loss": 69.2113, - "step": 107330 - }, - { - "epoch": 0.43366718245615454, - "grad_norm": 821.7341918945312, - "learning_rate": 3.5219412663464167e-05, - "loss": 85.9736, - "step": 107340 - }, - { - "epoch": 0.4337075837215222, - "grad_norm": 1092.7030029296875, - "learning_rate": 3.521622686840873e-05, - "loss": 74.6831, - "step": 107350 - }, - { - "epoch": 0.43374798498688977, - "grad_norm": 633.818115234375, - "learning_rate": 3.521304087418269e-05, - "loss": 64.548, - "step": 107360 - }, - { - "epoch": 0.4337883862522574, - "grad_norm": 994.1685791015625, - "learning_rate": 3.520985468084816e-05, - "loss": 31.1137, - "step": 107370 - }, - { - "epoch": 0.43382878751762505, - "grad_norm": 887.774169921875, - "learning_rate": 3.520666828846726e-05, - "loss": 52.128, - "step": 107380 - }, - { - "epoch": 0.4338691887829927, - "grad_norm": 478.0501708984375, - "learning_rate": 3.52034816971021e-05, - "loss": 60.6667, - "step": 107390 - }, - { - "epoch": 0.4339095900483603, - "grad_norm": 1626.81884765625, - "learning_rate": 3.5200294906814824e-05, - "loss": 108.8241, - "step": 107400 - }, - { - "epoch": 0.43394999131372797, - "grad_norm": 482.78741455078125, - "learning_rate": 3.519710791766754e-05, - "loss": 57.1852, - "step": 107410 - }, - { - "epoch": 0.43399039257909555, - "grad_norm": 536.291259765625, - "learning_rate": 3.5193920729722384e-05, - "loss": 55.354, - "step": 107420 - }, - { - "epoch": 0.4340307938444632, - "grad_norm": 395.05181884765625, - "learning_rate": 3.51907333430415e-05, - "loss": 51.6174, - "step": 107430 - }, - { - "epoch": 0.43407119510983083, - "grad_norm": 504.0698547363281, - "learning_rate": 3.5187545757687015e-05, - "loss": 48.5033, - "step": 107440 - }, - { - "epoch": 0.43411159637519847, - "grad_norm": 938.1822509765625, - "learning_rate": 3.518435797372109e-05, - "loss": 77.9516, - "step": 107450 - }, - { - "epoch": 0.4341519976405661, - "grad_norm": 542.053955078125, - "learning_rate": 3.5181169991205866e-05, - "loss": 50.1242, - "step": 107460 - }, - { - "epoch": 0.43419239890593375, - "grad_norm": 945.6251831054688, - "learning_rate": 3.517798181020348e-05, - "loss": 82.4342, - "step": 107470 - }, - { - "epoch": 0.4342328001713014, - "grad_norm": 841.2509765625, - "learning_rate": 3.517479343077611e-05, - "loss": 48.6936, - "step": 107480 - }, - { - "epoch": 0.434273201436669, - "grad_norm": 1135.741943359375, - "learning_rate": 3.517160485298589e-05, - "loss": 57.5404, - "step": 107490 - }, - { - "epoch": 0.4343136027020366, - "grad_norm": 634.0276489257812, - "learning_rate": 3.516841607689501e-05, - "loss": 57.2011, - "step": 107500 - }, - { - "epoch": 0.43435400396740426, - "grad_norm": 684.9427490234375, - "learning_rate": 3.516522710256562e-05, - "loss": 63.644, - "step": 107510 - }, - { - "epoch": 0.4343944052327719, - "grad_norm": 707.3285522460938, - "learning_rate": 3.516203793005989e-05, - "loss": 44.6688, - "step": 107520 - }, - { - "epoch": 0.43443480649813954, - "grad_norm": 765.8327026367188, - "learning_rate": 3.515884855944e-05, - "loss": 46.0717, - "step": 107530 - }, - { - "epoch": 0.4344752077635072, - "grad_norm": 708.8120727539062, - "learning_rate": 3.515565899076813e-05, - "loss": 47.5356, - "step": 107540 - }, - { - "epoch": 0.43451560902887476, - "grad_norm": 779.5784912109375, - "learning_rate": 3.5152469224106454e-05, - "loss": 50.02, - "step": 107550 - }, - { - "epoch": 0.4345560102942424, - "grad_norm": 638.3084106445312, - "learning_rate": 3.514927925951717e-05, - "loss": 69.4422, - "step": 107560 - }, - { - "epoch": 0.43459641155961004, - "grad_norm": 0.0, - "learning_rate": 3.5146089097062456e-05, - "loss": 48.1131, - "step": 107570 - }, - { - "epoch": 0.4346368128249777, - "grad_norm": 613.4315185546875, - "learning_rate": 3.514289873680451e-05, - "loss": 39.7531, - "step": 107580 - }, - { - "epoch": 0.4346772140903453, - "grad_norm": 596.2418212890625, - "learning_rate": 3.513970817880554e-05, - "loss": 46.1025, - "step": 107590 - }, - { - "epoch": 0.43471761535571296, - "grad_norm": 742.696044921875, - "learning_rate": 3.513651742312774e-05, - "loss": 50.7197, - "step": 107600 - }, - { - "epoch": 0.43475801662108055, - "grad_norm": 517.7498168945312, - "learning_rate": 3.51333264698333e-05, - "loss": 72.2126, - "step": 107610 - }, - { - "epoch": 0.4347984178864482, - "grad_norm": 336.8522644042969, - "learning_rate": 3.5130135318984456e-05, - "loss": 97.5587, - "step": 107620 - }, - { - "epoch": 0.4348388191518158, - "grad_norm": 1882.3516845703125, - "learning_rate": 3.512694397064341e-05, - "loss": 46.2149, - "step": 107630 - }, - { - "epoch": 0.43487922041718347, - "grad_norm": 852.6275024414062, - "learning_rate": 3.512375242487236e-05, - "loss": 59.0864, - "step": 107640 - }, - { - "epoch": 0.4349196216825511, - "grad_norm": 1158.6456298828125, - "learning_rate": 3.512056068173356e-05, - "loss": 86.1055, - "step": 107650 - }, - { - "epoch": 0.43496002294791875, - "grad_norm": 1050.2406005859375, - "learning_rate": 3.511736874128922e-05, - "loss": 47.8806, - "step": 107660 - }, - { - "epoch": 0.4350004242132864, - "grad_norm": 576.0054931640625, - "learning_rate": 3.5114176603601564e-05, - "loss": 44.2818, - "step": 107670 - }, - { - "epoch": 0.43504082547865397, - "grad_norm": 3012.95849609375, - "learning_rate": 3.511098426873283e-05, - "loss": 75.0666, - "step": 107680 - }, - { - "epoch": 0.4350812267440216, - "grad_norm": 1107.3419189453125, - "learning_rate": 3.5107791736745244e-05, - "loss": 47.5885, - "step": 107690 - }, - { - "epoch": 0.43512162800938925, - "grad_norm": 486.3786926269531, - "learning_rate": 3.5104599007701054e-05, - "loss": 52.94, - "step": 107700 - }, - { - "epoch": 0.4351620292747569, - "grad_norm": 1440.493896484375, - "learning_rate": 3.510140608166251e-05, - "loss": 44.7074, - "step": 107710 - }, - { - "epoch": 0.43520243054012453, - "grad_norm": 665.9118041992188, - "learning_rate": 3.5098212958691854e-05, - "loss": 84.7358, - "step": 107720 - }, - { - "epoch": 0.43524283180549217, - "grad_norm": 670.6646118164062, - "learning_rate": 3.509501963885134e-05, - "loss": 47.459, - "step": 107730 - }, - { - "epoch": 0.43528323307085975, - "grad_norm": 1127.4244384765625, - "learning_rate": 3.509182612220322e-05, - "loss": 66.771, - "step": 107740 - }, - { - "epoch": 0.4353236343362274, - "grad_norm": 1750.5572509765625, - "learning_rate": 3.5088632408809755e-05, - "loss": 100.8368, - "step": 107750 - }, - { - "epoch": 0.43536403560159503, - "grad_norm": 694.012451171875, - "learning_rate": 3.50854384987332e-05, - "loss": 67.874, - "step": 107760 - }, - { - "epoch": 0.4354044368669627, - "grad_norm": 543.4619750976562, - "learning_rate": 3.508224439203583e-05, - "loss": 48.8384, - "step": 107770 - }, - { - "epoch": 0.4354448381323303, - "grad_norm": 1029.557373046875, - "learning_rate": 3.5079050088779926e-05, - "loss": 41.3493, - "step": 107780 - }, - { - "epoch": 0.43548523939769795, - "grad_norm": 404.4317321777344, - "learning_rate": 3.5075855589027746e-05, - "loss": 59.9128, - "step": 107790 - }, - { - "epoch": 0.4355256406630656, - "grad_norm": 459.72357177734375, - "learning_rate": 3.507266089284157e-05, - "loss": 49.0815, - "step": 107800 - }, - { - "epoch": 0.4355660419284332, - "grad_norm": 1144.3963623046875, - "learning_rate": 3.506946600028368e-05, - "loss": 93.688, - "step": 107810 - }, - { - "epoch": 0.4356064431938008, - "grad_norm": 1337.915771484375, - "learning_rate": 3.5066270911416373e-05, - "loss": 41.8152, - "step": 107820 - }, - { - "epoch": 0.43564684445916846, - "grad_norm": 537.2900390625, - "learning_rate": 3.506307562630194e-05, - "loss": 33.384, - "step": 107830 - }, - { - "epoch": 0.4356872457245361, - "grad_norm": 676.332275390625, - "learning_rate": 3.5059880145002654e-05, - "loss": 72.9059, - "step": 107840 - }, - { - "epoch": 0.43572764698990374, - "grad_norm": 1162.9178466796875, - "learning_rate": 3.505668446758083e-05, - "loss": 46.9766, - "step": 107850 - }, - { - "epoch": 0.4357680482552714, - "grad_norm": 1080.3890380859375, - "learning_rate": 3.505348859409876e-05, - "loss": 53.5876, - "step": 107860 - }, - { - "epoch": 0.43580844952063896, - "grad_norm": 1019.6876220703125, - "learning_rate": 3.5050292524618764e-05, - "loss": 39.1764, - "step": 107870 - }, - { - "epoch": 0.4358488507860066, - "grad_norm": 521.6009521484375, - "learning_rate": 3.5047096259203135e-05, - "loss": 69.4503, - "step": 107880 - }, - { - "epoch": 0.43588925205137424, - "grad_norm": 1152.7193603515625, - "learning_rate": 3.5043899797914187e-05, - "loss": 74.8687, - "step": 107890 - }, - { - "epoch": 0.4359296533167419, - "grad_norm": 993.6572265625, - "learning_rate": 3.504070314081425e-05, - "loss": 64.1491, - "step": 107900 - }, - { - "epoch": 0.4359700545821095, - "grad_norm": 3389.17578125, - "learning_rate": 3.503750628796563e-05, - "loss": 62.572, - "step": 107910 - }, - { - "epoch": 0.43601045584747716, - "grad_norm": 1042.504150390625, - "learning_rate": 3.503430923943066e-05, - "loss": 51.6757, - "step": 107920 - }, - { - "epoch": 0.43605085711284475, - "grad_norm": 1820.093505859375, - "learning_rate": 3.503111199527167e-05, - "loss": 46.9761, - "step": 107930 - }, - { - "epoch": 0.4360912583782124, - "grad_norm": 530.4893798828125, - "learning_rate": 3.5027914555550976e-05, - "loss": 80.2376, - "step": 107940 - }, - { - "epoch": 0.43613165964358, - "grad_norm": 673.11767578125, - "learning_rate": 3.502471692033094e-05, - "loss": 36.759, - "step": 107950 - }, - { - "epoch": 0.43617206090894767, - "grad_norm": 979.2339477539062, - "learning_rate": 3.5021519089673876e-05, - "loss": 53.1278, - "step": 107960 - }, - { - "epoch": 0.4362124621743153, - "grad_norm": 533.0077514648438, - "learning_rate": 3.501832106364213e-05, - "loss": 44.0872, - "step": 107970 - }, - { - "epoch": 0.43625286343968295, - "grad_norm": 2183.581298828125, - "learning_rate": 3.501512284229807e-05, - "loss": 74.3517, - "step": 107980 - }, - { - "epoch": 0.4362932647050506, - "grad_norm": 1532.984375, - "learning_rate": 3.5011924425704036e-05, - "loss": 55.7305, - "step": 107990 - }, - { - "epoch": 0.43633366597041817, - "grad_norm": 794.1177978515625, - "learning_rate": 3.5008725813922386e-05, - "loss": 39.9372, - "step": 108000 - }, - { - "epoch": 0.4363740672357858, - "grad_norm": 489.5924072265625, - "learning_rate": 3.5005527007015455e-05, - "loss": 51.468, - "step": 108010 - }, - { - "epoch": 0.43641446850115345, - "grad_norm": 1078.9996337890625, - "learning_rate": 3.500232800504563e-05, - "loss": 69.4806, - "step": 108020 - }, - { - "epoch": 0.4364548697665211, - "grad_norm": 880.2926025390625, - "learning_rate": 3.499912880807528e-05, - "loss": 53.5359, - "step": 108030 - }, - { - "epoch": 0.43649527103188873, - "grad_norm": 827.0285034179688, - "learning_rate": 3.4995929416166756e-05, - "loss": 50.6137, - "step": 108040 - }, - { - "epoch": 0.43653567229725637, - "grad_norm": 720.4219970703125, - "learning_rate": 3.499272982938244e-05, - "loss": 35.6677, - "step": 108050 - }, - { - "epoch": 0.43657607356262396, - "grad_norm": 739.00927734375, - "learning_rate": 3.4989530047784716e-05, - "loss": 70.8884, - "step": 108060 - }, - { - "epoch": 0.4366164748279916, - "grad_norm": 387.0387878417969, - "learning_rate": 3.498633007143596e-05, - "loss": 47.103, - "step": 108070 - }, - { - "epoch": 0.43665687609335924, - "grad_norm": 912.795654296875, - "learning_rate": 3.498312990039856e-05, - "loss": 56.658, - "step": 108080 - }, - { - "epoch": 0.4366972773587269, - "grad_norm": 1193.864990234375, - "learning_rate": 3.497992953473491e-05, - "loss": 77.9862, - "step": 108090 - }, - { - "epoch": 0.4367376786240945, - "grad_norm": 711.3486938476562, - "learning_rate": 3.4976728974507384e-05, - "loss": 31.2605, - "step": 108100 - }, - { - "epoch": 0.43677807988946216, - "grad_norm": 1228.09033203125, - "learning_rate": 3.497352821977839e-05, - "loss": 42.339, - "step": 108110 - }, - { - "epoch": 0.4368184811548298, - "grad_norm": 524.43798828125, - "learning_rate": 3.497032727061034e-05, - "loss": 59.911, - "step": 108120 - }, - { - "epoch": 0.4368588824201974, - "grad_norm": 255.4713134765625, - "learning_rate": 3.496712612706561e-05, - "loss": 63.1904, - "step": 108130 - }, - { - "epoch": 0.436899283685565, - "grad_norm": 261.4523620605469, - "learning_rate": 3.4963924789206636e-05, - "loss": 88.5668, - "step": 108140 - }, - { - "epoch": 0.43693968495093266, - "grad_norm": 680.0599365234375, - "learning_rate": 3.496072325709582e-05, - "loss": 45.9959, - "step": 108150 - }, - { - "epoch": 0.4369800862163003, - "grad_norm": 402.27239990234375, - "learning_rate": 3.495752153079557e-05, - "loss": 50.0955, - "step": 108160 - }, - { - "epoch": 0.43702048748166794, - "grad_norm": 0.0, - "learning_rate": 3.495431961036832e-05, - "loss": 44.4594, - "step": 108170 - }, - { - "epoch": 0.4370608887470356, - "grad_norm": 455.03326416015625, - "learning_rate": 3.495111749587647e-05, - "loss": 51.1414, - "step": 108180 - }, - { - "epoch": 0.43710129001240317, - "grad_norm": 438.03314208984375, - "learning_rate": 3.494791518738247e-05, - "loss": 112.6708, - "step": 108190 - }, - { - "epoch": 0.4371416912777708, - "grad_norm": 547.6517333984375, - "learning_rate": 3.494471268494875e-05, - "loss": 31.8104, - "step": 108200 - }, - { - "epoch": 0.43718209254313845, - "grad_norm": 541.253173828125, - "learning_rate": 3.494150998863772e-05, - "loss": 63.8281, - "step": 108210 - }, - { - "epoch": 0.4372224938085061, - "grad_norm": 710.3536376953125, - "learning_rate": 3.4938307098511846e-05, - "loss": 43.2936, - "step": 108220 - }, - { - "epoch": 0.4372628950738737, - "grad_norm": 795.8914184570312, - "learning_rate": 3.493510401463355e-05, - "loss": 48.5519, - "step": 108230 - }, - { - "epoch": 0.43730329633924137, - "grad_norm": 1529.8331298828125, - "learning_rate": 3.493190073706529e-05, - "loss": 42.2938, - "step": 108240 - }, - { - "epoch": 0.43734369760460895, - "grad_norm": 1139.19189453125, - "learning_rate": 3.4928697265869515e-05, - "loss": 43.6591, - "step": 108250 - }, - { - "epoch": 0.4373840988699766, - "grad_norm": 427.7257080078125, - "learning_rate": 3.492549360110868e-05, - "loss": 126.2329, - "step": 108260 - }, - { - "epoch": 0.43742450013534423, - "grad_norm": 489.84521484375, - "learning_rate": 3.4922289742845224e-05, - "loss": 51.3288, - "step": 108270 - }, - { - "epoch": 0.43746490140071187, - "grad_norm": 0.0, - "learning_rate": 3.491908569114164e-05, - "loss": 57.9718, - "step": 108280 - }, - { - "epoch": 0.4375053026660795, - "grad_norm": 901.902099609375, - "learning_rate": 3.491588144606035e-05, - "loss": 58.3395, - "step": 108290 - }, - { - "epoch": 0.43754570393144715, - "grad_norm": 549.3409423828125, - "learning_rate": 3.491267700766386e-05, - "loss": 82.0675, - "step": 108300 - }, - { - "epoch": 0.4375861051968148, - "grad_norm": 490.4176025390625, - "learning_rate": 3.490947237601462e-05, - "loss": 52.1806, - "step": 108310 - }, - { - "epoch": 0.4376265064621824, - "grad_norm": 650.8810424804688, - "learning_rate": 3.4906267551175124e-05, - "loss": 76.2948, - "step": 108320 - }, - { - "epoch": 0.43766690772755, - "grad_norm": 959.3955688476562, - "learning_rate": 3.4903062533207834e-05, - "loss": 76.9859, - "step": 108330 - }, - { - "epoch": 0.43770730899291765, - "grad_norm": 609.113037109375, - "learning_rate": 3.489985732217525e-05, - "loss": 34.4562, - "step": 108340 - }, - { - "epoch": 0.4377477102582853, - "grad_norm": 2289.5068359375, - "learning_rate": 3.4896651918139845e-05, - "loss": 71.8435, - "step": 108350 - }, - { - "epoch": 0.43778811152365293, - "grad_norm": 0.0, - "learning_rate": 3.489344632116412e-05, - "loss": 54.8811, - "step": 108360 - }, - { - "epoch": 0.4378285127890206, - "grad_norm": 416.5489196777344, - "learning_rate": 3.489024053131056e-05, - "loss": 54.7843, - "step": 108370 - }, - { - "epoch": 0.43786891405438816, - "grad_norm": 850.0728149414062, - "learning_rate": 3.488703454864167e-05, - "loss": 83.0363, - "step": 108380 - }, - { - "epoch": 0.4379093153197558, - "grad_norm": 580.010498046875, - "learning_rate": 3.488382837321995e-05, - "loss": 45.165, - "step": 108390 - }, - { - "epoch": 0.43794971658512344, - "grad_norm": 684.89111328125, - "learning_rate": 3.488062200510791e-05, - "loss": 78.0684, - "step": 108400 - }, - { - "epoch": 0.4379901178504911, - "grad_norm": 1013.9144287109375, - "learning_rate": 3.487741544436806e-05, - "loss": 63.4015, - "step": 108410 - }, - { - "epoch": 0.4380305191158587, - "grad_norm": 758.853759765625, - "learning_rate": 3.48742086910629e-05, - "loss": 71.6265, - "step": 108420 - }, - { - "epoch": 0.43807092038122636, - "grad_norm": 398.92095947265625, - "learning_rate": 3.487100174525498e-05, - "loss": 43.4276, - "step": 108430 - }, - { - "epoch": 0.438111321646594, - "grad_norm": 313.7893371582031, - "learning_rate": 3.4867794607006784e-05, - "loss": 48.0121, - "step": 108440 - }, - { - "epoch": 0.4381517229119616, - "grad_norm": 1349.3988037109375, - "learning_rate": 3.486458727638085e-05, - "loss": 70.9725, - "step": 108450 - }, - { - "epoch": 0.4381921241773292, - "grad_norm": 433.0930480957031, - "learning_rate": 3.486137975343971e-05, - "loss": 61.4865, - "step": 108460 - }, - { - "epoch": 0.43823252544269686, - "grad_norm": 1083.5059814453125, - "learning_rate": 3.48581720382459e-05, - "loss": 54.0927, - "step": 108470 - }, - { - "epoch": 0.4382729267080645, - "grad_norm": 2715.4462890625, - "learning_rate": 3.485496413086195e-05, - "loss": 87.5101, - "step": 108480 - }, - { - "epoch": 0.43831332797343214, - "grad_norm": 566.393798828125, - "learning_rate": 3.4851756031350394e-05, - "loss": 56.3198, - "step": 108490 - }, - { - "epoch": 0.4383537292387998, - "grad_norm": 815.7106323242188, - "learning_rate": 3.484854773977378e-05, - "loss": 61.3737, - "step": 108500 - }, - { - "epoch": 0.43839413050416737, - "grad_norm": 807.414306640625, - "learning_rate": 3.4845339256194666e-05, - "loss": 44.4692, - "step": 108510 - }, - { - "epoch": 0.438434531769535, - "grad_norm": 984.805419921875, - "learning_rate": 3.484213058067559e-05, - "loss": 75.2083, - "step": 108520 - }, - { - "epoch": 0.43847493303490265, - "grad_norm": 1864.611083984375, - "learning_rate": 3.483892171327911e-05, - "loss": 86.4765, - "step": 108530 - }, - { - "epoch": 0.4385153343002703, - "grad_norm": 1487.40869140625, - "learning_rate": 3.4835712654067785e-05, - "loss": 76.399, - "step": 108540 - }, - { - "epoch": 0.4385557355656379, - "grad_norm": 735.3146362304688, - "learning_rate": 3.483250340310418e-05, - "loss": 70.6352, - "step": 108550 - }, - { - "epoch": 0.43859613683100557, - "grad_norm": 807.2081298828125, - "learning_rate": 3.482929396045087e-05, - "loss": 48.8148, - "step": 108560 - }, - { - "epoch": 0.43863653809637315, - "grad_norm": 632.7271118164062, - "learning_rate": 3.48260843261704e-05, - "loss": 77.7649, - "step": 108570 - }, - { - "epoch": 0.4386769393617408, - "grad_norm": 897.5272216796875, - "learning_rate": 3.482287450032536e-05, - "loss": 51.7576, - "step": 108580 - }, - { - "epoch": 0.43871734062710843, - "grad_norm": 753.651611328125, - "learning_rate": 3.4819664482978325e-05, - "loss": 71.9576, - "step": 108590 - }, - { - "epoch": 0.43875774189247607, - "grad_norm": 524.542724609375, - "learning_rate": 3.481645427419188e-05, - "loss": 51.1556, - "step": 108600 - }, - { - "epoch": 0.4387981431578437, - "grad_norm": 578.78173828125, - "learning_rate": 3.48132438740286e-05, - "loss": 42.8052, - "step": 108610 - }, - { - "epoch": 0.43883854442321135, - "grad_norm": 634.4146118164062, - "learning_rate": 3.481003328255108e-05, - "loss": 56.4694, - "step": 108620 - }, - { - "epoch": 0.438878945688579, - "grad_norm": 823.702392578125, - "learning_rate": 3.480682249982191e-05, - "loss": 51.0967, - "step": 108630 - }, - { - "epoch": 0.4389193469539466, - "grad_norm": 625.2899169921875, - "learning_rate": 3.4803611525903685e-05, - "loss": 55.0041, - "step": 108640 - }, - { - "epoch": 0.4389597482193142, - "grad_norm": 251.00901794433594, - "learning_rate": 3.480040036085901e-05, - "loss": 52.5765, - "step": 108650 - }, - { - "epoch": 0.43900014948468186, - "grad_norm": 1115.88134765625, - "learning_rate": 3.479718900475049e-05, - "loss": 83.0873, - "step": 108660 - }, - { - "epoch": 0.4390405507500495, - "grad_norm": 9209.01953125, - "learning_rate": 3.479397745764071e-05, - "loss": 142.5869, - "step": 108670 - }, - { - "epoch": 0.43908095201541714, - "grad_norm": 704.7980346679688, - "learning_rate": 3.479076571959231e-05, - "loss": 47.6946, - "step": 108680 - }, - { - "epoch": 0.4391213532807848, - "grad_norm": 1096.587890625, - "learning_rate": 3.4787553790667896e-05, - "loss": 57.3606, - "step": 108690 - }, - { - "epoch": 0.43916175454615236, - "grad_norm": 685.4246215820312, - "learning_rate": 3.4784341670930065e-05, - "loss": 52.4217, - "step": 108700 - }, - { - "epoch": 0.43920215581152, - "grad_norm": 385.501220703125, - "learning_rate": 3.478112936044146e-05, - "loss": 43.921, - "step": 108710 - }, - { - "epoch": 0.43924255707688764, - "grad_norm": 648.174560546875, - "learning_rate": 3.477791685926471e-05, - "loss": 48.4695, - "step": 108720 - }, - { - "epoch": 0.4392829583422553, - "grad_norm": 201.7768096923828, - "learning_rate": 3.4774704167462434e-05, - "loss": 63.3514, - "step": 108730 - }, - { - "epoch": 0.4393233596076229, - "grad_norm": 579.0665283203125, - "learning_rate": 3.477149128509727e-05, - "loss": 59.8261, - "step": 108740 - }, - { - "epoch": 0.43936376087299056, - "grad_norm": 727.160888671875, - "learning_rate": 3.476827821223184e-05, - "loss": 48.0612, - "step": 108750 - }, - { - "epoch": 0.4394041621383582, - "grad_norm": 842.6546020507812, - "learning_rate": 3.4765064948928814e-05, - "loss": 52.5195, - "step": 108760 - }, - { - "epoch": 0.4394445634037258, - "grad_norm": 680.0587158203125, - "learning_rate": 3.4761851495250816e-05, - "loss": 83.2483, - "step": 108770 - }, - { - "epoch": 0.4394849646690934, - "grad_norm": 946.002197265625, - "learning_rate": 3.475863785126049e-05, - "loss": 61.0262, - "step": 108780 - }, - { - "epoch": 0.43952536593446107, - "grad_norm": 1292.424072265625, - "learning_rate": 3.47554240170205e-05, - "loss": 117.6964, - "step": 108790 - }, - { - "epoch": 0.4395657671998287, - "grad_norm": 593.932861328125, - "learning_rate": 3.475220999259349e-05, - "loss": 60.331, - "step": 108800 - }, - { - "epoch": 0.43960616846519635, - "grad_norm": 740.8704833984375, - "learning_rate": 3.4748995778042136e-05, - "loss": 88.3325, - "step": 108810 - }, - { - "epoch": 0.439646569730564, - "grad_norm": 495.8048400878906, - "learning_rate": 3.474578137342909e-05, - "loss": 46.6653, - "step": 108820 - }, - { - "epoch": 0.43968697099593157, - "grad_norm": 907.8876342773438, - "learning_rate": 3.474256677881701e-05, - "loss": 65.5185, - "step": 108830 - }, - { - "epoch": 0.4397273722612992, - "grad_norm": 1308.182861328125, - "learning_rate": 3.473935199426858e-05, - "loss": 109.5993, - "step": 108840 - }, - { - "epoch": 0.43976777352666685, - "grad_norm": 538.4534912109375, - "learning_rate": 3.4736137019846465e-05, - "loss": 47.8979, - "step": 108850 - }, - { - "epoch": 0.4398081747920345, - "grad_norm": 302.4723815917969, - "learning_rate": 3.4732921855613355e-05, - "loss": 60.7093, - "step": 108860 - }, - { - "epoch": 0.43984857605740213, - "grad_norm": 1104.8883056640625, - "learning_rate": 3.472970650163191e-05, - "loss": 61.7136, - "step": 108870 - }, - { - "epoch": 0.43988897732276977, - "grad_norm": 1319.9013671875, - "learning_rate": 3.4726490957964834e-05, - "loss": 71.9735, - "step": 108880 - }, - { - "epoch": 0.43992937858813735, - "grad_norm": 3414.708251953125, - "learning_rate": 3.472327522467481e-05, - "loss": 74.4029, - "step": 108890 - }, - { - "epoch": 0.439969779853505, - "grad_norm": 442.96405029296875, - "learning_rate": 3.4720059301824525e-05, - "loss": 74.8008, - "step": 108900 - }, - { - "epoch": 0.44001018111887263, - "grad_norm": 849.4617919921875, - "learning_rate": 3.4716843189476687e-05, - "loss": 67.8839, - "step": 108910 - }, - { - "epoch": 0.4400505823842403, - "grad_norm": 464.7969970703125, - "learning_rate": 3.471362688769398e-05, - "loss": 55.6521, - "step": 108920 - }, - { - "epoch": 0.4400909836496079, - "grad_norm": 426.9241943359375, - "learning_rate": 3.471041039653913e-05, - "loss": 53.6479, - "step": 108930 - }, - { - "epoch": 0.44013138491497555, - "grad_norm": 926.24853515625, - "learning_rate": 3.4707193716074816e-05, - "loss": 58.2524, - "step": 108940 - }, - { - "epoch": 0.4401717861803432, - "grad_norm": 1493.707763671875, - "learning_rate": 3.470397684636377e-05, - "loss": 60.5845, - "step": 108950 - }, - { - "epoch": 0.4402121874457108, - "grad_norm": 715.8504638671875, - "learning_rate": 3.4700759787468695e-05, - "loss": 61.8466, - "step": 108960 - }, - { - "epoch": 0.4402525887110784, - "grad_norm": 539.120849609375, - "learning_rate": 3.469754253945232e-05, - "loss": 50.327, - "step": 108970 - }, - { - "epoch": 0.44029298997644606, - "grad_norm": 644.7427978515625, - "learning_rate": 3.4694325102377355e-05, - "loss": 74.642, - "step": 108980 - }, - { - "epoch": 0.4403333912418137, - "grad_norm": 702.2979736328125, - "learning_rate": 3.469110747630653e-05, - "loss": 56.1246, - "step": 108990 - }, - { - "epoch": 0.44037379250718134, - "grad_norm": 1032.9031982421875, - "learning_rate": 3.4687889661302576e-05, - "loss": 56.0517, - "step": 109000 - }, - { - "epoch": 0.440414193772549, - "grad_norm": 972.0687866210938, - "learning_rate": 3.468467165742823e-05, - "loss": 56.0222, - "step": 109010 - }, - { - "epoch": 0.44045459503791656, - "grad_norm": 598.9658203125, - "learning_rate": 3.468145346474622e-05, - "loss": 57.363, - "step": 109020 - }, - { - "epoch": 0.4404949963032842, - "grad_norm": 616.7764282226562, - "learning_rate": 3.4678235083319296e-05, - "loss": 82.0413, - "step": 109030 - }, - { - "epoch": 0.44053539756865184, - "grad_norm": 318.1781005859375, - "learning_rate": 3.467501651321019e-05, - "loss": 62.9576, - "step": 109040 - }, - { - "epoch": 0.4405757988340195, - "grad_norm": 718.73681640625, - "learning_rate": 3.467179775448166e-05, - "loss": 66.6093, - "step": 109050 - }, - { - "epoch": 0.4406162000993871, - "grad_norm": 973.098388671875, - "learning_rate": 3.466857880719645e-05, - "loss": 65.0112, - "step": 109060 - }, - { - "epoch": 0.44065660136475476, - "grad_norm": 1736.9281005859375, - "learning_rate": 3.466535967141732e-05, - "loss": 67.5672, - "step": 109070 - }, - { - "epoch": 0.4406970026301224, - "grad_norm": 1171.598388671875, - "learning_rate": 3.466214034720702e-05, - "loss": 62.2778, - "step": 109080 - }, - { - "epoch": 0.44073740389549, - "grad_norm": 981.1380004882812, - "learning_rate": 3.4658920834628335e-05, - "loss": 72.6023, - "step": 109090 - }, - { - "epoch": 0.4407778051608576, - "grad_norm": 858.4905395507812, - "learning_rate": 3.4655701133744e-05, - "loss": 43.7386, - "step": 109100 - }, - { - "epoch": 0.44081820642622527, - "grad_norm": 734.6194458007812, - "learning_rate": 3.465248124461681e-05, - "loss": 41.7183, - "step": 109110 - }, - { - "epoch": 0.4408586076915929, - "grad_norm": 2607.31787109375, - "learning_rate": 3.4649261167309526e-05, - "loss": 105.1528, - "step": 109120 - }, - { - "epoch": 0.44089900895696055, - "grad_norm": 675.8436279296875, - "learning_rate": 3.464604090188493e-05, - "loss": 75.2605, - "step": 109130 - }, - { - "epoch": 0.4409394102223282, - "grad_norm": 472.8738098144531, - "learning_rate": 3.46428204484058e-05, - "loss": 50.3132, - "step": 109140 - }, - { - "epoch": 0.44097981148769577, - "grad_norm": 629.1958618164062, - "learning_rate": 3.463959980693492e-05, - "loss": 47.585, - "step": 109150 - }, - { - "epoch": 0.4410202127530634, - "grad_norm": 714.365478515625, - "learning_rate": 3.4636378977535075e-05, - "loss": 62.9666, - "step": 109160 - }, - { - "epoch": 0.44106061401843105, - "grad_norm": 1306.74169921875, - "learning_rate": 3.4633157960269056e-05, - "loss": 67.3224, - "step": 109170 - }, - { - "epoch": 0.4411010152837987, - "grad_norm": 810.0322265625, - "learning_rate": 3.462993675519968e-05, - "loss": 45.5099, - "step": 109180 - }, - { - "epoch": 0.44114141654916633, - "grad_norm": 822.7820434570312, - "learning_rate": 3.462671536238972e-05, - "loss": 47.5919, - "step": 109190 - }, - { - "epoch": 0.44118181781453397, - "grad_norm": 719.748046875, - "learning_rate": 3.462349378190199e-05, - "loss": 58.4106, - "step": 109200 - }, - { - "epoch": 0.44122221907990156, - "grad_norm": 1387.738525390625, - "learning_rate": 3.4620272013799286e-05, - "loss": 55.8598, - "step": 109210 - }, - { - "epoch": 0.4412626203452692, - "grad_norm": 291.135009765625, - "learning_rate": 3.461705005814444e-05, - "loss": 56.4821, - "step": 109220 - }, - { - "epoch": 0.44130302161063684, - "grad_norm": 2201.89453125, - "learning_rate": 3.4613827915000244e-05, - "loss": 112.0971, - "step": 109230 - }, - { - "epoch": 0.4413434228760045, - "grad_norm": 828.1795043945312, - "learning_rate": 3.461060558442952e-05, - "loss": 52.2993, - "step": 109240 - }, - { - "epoch": 0.4413838241413721, - "grad_norm": 493.1712341308594, - "learning_rate": 3.460738306649509e-05, - "loss": 38.8712, - "step": 109250 - }, - { - "epoch": 0.44142422540673976, - "grad_norm": 310.04742431640625, - "learning_rate": 3.4604160361259796e-05, - "loss": 54.1063, - "step": 109260 - }, - { - "epoch": 0.4414646266721074, - "grad_norm": 1105.275634765625, - "learning_rate": 3.460093746878644e-05, - "loss": 57.73, - "step": 109270 - }, - { - "epoch": 0.441505027937475, - "grad_norm": 422.4311828613281, - "learning_rate": 3.459771438913787e-05, - "loss": 76.8583, - "step": 109280 - }, - { - "epoch": 0.4415454292028426, - "grad_norm": 533.1647338867188, - "learning_rate": 3.459449112237691e-05, - "loss": 70.399, - "step": 109290 - }, - { - "epoch": 0.44158583046821026, - "grad_norm": 453.3349304199219, - "learning_rate": 3.459126766856641e-05, - "loss": 47.0437, - "step": 109300 - }, - { - "epoch": 0.4416262317335779, - "grad_norm": 868.0079345703125, - "learning_rate": 3.458804402776921e-05, - "loss": 49.0908, - "step": 109310 - }, - { - "epoch": 0.44166663299894554, - "grad_norm": 986.170166015625, - "learning_rate": 3.458482020004815e-05, - "loss": 54.6284, - "step": 109320 - }, - { - "epoch": 0.4417070342643132, - "grad_norm": 447.4167785644531, - "learning_rate": 3.4581596185466094e-05, - "loss": 59.609, - "step": 109330 - }, - { - "epoch": 0.44174743552968077, - "grad_norm": 641.2802124023438, - "learning_rate": 3.457837198408588e-05, - "loss": 85.7572, - "step": 109340 - }, - { - "epoch": 0.4417878367950484, - "grad_norm": 808.4306640625, - "learning_rate": 3.457514759597038e-05, - "loss": 64.826, - "step": 109350 - }, - { - "epoch": 0.44182823806041605, - "grad_norm": 202.7559051513672, - "learning_rate": 3.457192302118244e-05, - "loss": 52.6859, - "step": 109360 - }, - { - "epoch": 0.4418686393257837, - "grad_norm": 1533.5028076171875, - "learning_rate": 3.4568698259784945e-05, - "loss": 71.5681, - "step": 109370 - }, - { - "epoch": 0.4419090405911513, - "grad_norm": 788.40771484375, - "learning_rate": 3.4565473311840735e-05, - "loss": 56.7714, - "step": 109380 - }, - { - "epoch": 0.44194944185651897, - "grad_norm": 659.1009521484375, - "learning_rate": 3.4562248177412715e-05, - "loss": 44.7925, - "step": 109390 - }, - { - "epoch": 0.44198984312188655, - "grad_norm": 695.70068359375, - "learning_rate": 3.455902285656373e-05, - "loss": 95.6126, - "step": 109400 - }, - { - "epoch": 0.4420302443872542, - "grad_norm": 536.9435424804688, - "learning_rate": 3.4555797349356676e-05, - "loss": 52.6949, - "step": 109410 - }, - { - "epoch": 0.44207064565262183, - "grad_norm": 928.5408935546875, - "learning_rate": 3.455257165585444e-05, - "loss": 65.1302, - "step": 109420 - }, - { - "epoch": 0.44211104691798947, - "grad_norm": 635.9898071289062, - "learning_rate": 3.454934577611989e-05, - "loss": 88.1777, - "step": 109430 - }, - { - "epoch": 0.4421514481833571, - "grad_norm": 1163.3731689453125, - "learning_rate": 3.454611971021593e-05, - "loss": 75.0465, - "step": 109440 - }, - { - "epoch": 0.44219184944872475, - "grad_norm": 1179.76708984375, - "learning_rate": 3.454289345820546e-05, - "loss": 72.196, - "step": 109450 - }, - { - "epoch": 0.4422322507140924, - "grad_norm": 407.4288024902344, - "learning_rate": 3.453966702015137e-05, - "loss": 37.4781, - "step": 109460 - }, - { - "epoch": 0.44227265197946, - "grad_norm": 1263.00732421875, - "learning_rate": 3.453644039611656e-05, - "loss": 55.1117, - "step": 109470 - }, - { - "epoch": 0.4423130532448276, - "grad_norm": 441.8350830078125, - "learning_rate": 3.453321358616393e-05, - "loss": 34.3389, - "step": 109480 - }, - { - "epoch": 0.44235345451019525, - "grad_norm": 813.268798828125, - "learning_rate": 3.452998659035639e-05, - "loss": 61.2214, - "step": 109490 - }, - { - "epoch": 0.4423938557755629, - "grad_norm": 1138.2327880859375, - "learning_rate": 3.452675940875686e-05, - "loss": 69.0981, - "step": 109500 - }, - { - "epoch": 0.44243425704093053, - "grad_norm": 1215.769287109375, - "learning_rate": 3.452353204142824e-05, - "loss": 52.1015, - "step": 109510 - }, - { - "epoch": 0.4424746583062982, - "grad_norm": 169.15260314941406, - "learning_rate": 3.452030448843347e-05, - "loss": 49.8006, - "step": 109520 - }, - { - "epoch": 0.44251505957166576, - "grad_norm": 372.5907287597656, - "learning_rate": 3.451707674983546e-05, - "loss": 67.884, - "step": 109530 - }, - { - "epoch": 0.4425554608370334, - "grad_norm": 1219.9879150390625, - "learning_rate": 3.451384882569714e-05, - "loss": 48.5552, - "step": 109540 - }, - { - "epoch": 0.44259586210240104, - "grad_norm": 1142.11865234375, - "learning_rate": 3.4510620716081446e-05, - "loss": 50.2349, - "step": 109550 - }, - { - "epoch": 0.4426362633677687, - "grad_norm": 1006.7579956054688, - "learning_rate": 3.45073924210513e-05, - "loss": 100.4007, - "step": 109560 - }, - { - "epoch": 0.4426766646331363, - "grad_norm": 714.3269653320312, - "learning_rate": 3.4504163940669634e-05, - "loss": 65.3141, - "step": 109570 - }, - { - "epoch": 0.44271706589850396, - "grad_norm": 1006.07177734375, - "learning_rate": 3.4500935274999413e-05, - "loss": 98.7038, - "step": 109580 - }, - { - "epoch": 0.4427574671638716, - "grad_norm": 1100.7425537109375, - "learning_rate": 3.449770642410356e-05, - "loss": 58.6847, - "step": 109590 - }, - { - "epoch": 0.4427978684292392, - "grad_norm": 1185.401123046875, - "learning_rate": 3.4494477388045035e-05, - "loss": 56.3779, - "step": 109600 - }, - { - "epoch": 0.4428382696946068, - "grad_norm": 1830.9031982421875, - "learning_rate": 3.449124816688677e-05, - "loss": 71.803, - "step": 109610 - }, - { - "epoch": 0.44287867095997446, - "grad_norm": 639.5352172851562, - "learning_rate": 3.448801876069176e-05, - "loss": 59.7502, - "step": 109620 - }, - { - "epoch": 0.4429190722253421, - "grad_norm": 818.8923950195312, - "learning_rate": 3.4484789169522927e-05, - "loss": 65.0993, - "step": 109630 - }, - { - "epoch": 0.44295947349070974, - "grad_norm": 520.5197143554688, - "learning_rate": 3.448155939344324e-05, - "loss": 58.1046, - "step": 109640 - }, - { - "epoch": 0.4429998747560774, - "grad_norm": 2226.27978515625, - "learning_rate": 3.4478329432515674e-05, - "loss": 49.6613, - "step": 109650 - }, - { - "epoch": 0.44304027602144497, - "grad_norm": 3536.548095703125, - "learning_rate": 3.44750992868032e-05, - "loss": 60.4391, - "step": 109660 - }, - { - "epoch": 0.4430806772868126, - "grad_norm": 922.1171875, - "learning_rate": 3.447186895636879e-05, - "loss": 49.5081, - "step": 109670 - }, - { - "epoch": 0.44312107855218025, - "grad_norm": 485.9327697753906, - "learning_rate": 3.4468638441275415e-05, - "loss": 52.8482, - "step": 109680 - }, - { - "epoch": 0.4431614798175479, - "grad_norm": 1202.439208984375, - "learning_rate": 3.4465407741586056e-05, - "loss": 51.3917, - "step": 109690 - }, - { - "epoch": 0.4432018810829155, - "grad_norm": 3666.6962890625, - "learning_rate": 3.4462176857363704e-05, - "loss": 50.0755, - "step": 109700 - }, - { - "epoch": 0.44324228234828317, - "grad_norm": 566.9150390625, - "learning_rate": 3.445894578867134e-05, - "loss": 77.9221, - "step": 109710 - }, - { - "epoch": 0.44328268361365075, - "grad_norm": 695.4548950195312, - "learning_rate": 3.445571453557196e-05, - "loss": 92.9399, - "step": 109720 - }, - { - "epoch": 0.4433230848790184, - "grad_norm": 1101.68212890625, - "learning_rate": 3.445248309812856e-05, - "loss": 67.3483, - "step": 109730 - }, - { - "epoch": 0.44336348614438603, - "grad_norm": 782.0620727539062, - "learning_rate": 3.4449251476404135e-05, - "loss": 47.7574, - "step": 109740 - }, - { - "epoch": 0.44340388740975367, - "grad_norm": 404.2615661621094, - "learning_rate": 3.444601967046168e-05, - "loss": 68.9839, - "step": 109750 - }, - { - "epoch": 0.4434442886751213, - "grad_norm": 1344.5721435546875, - "learning_rate": 3.444278768036421e-05, - "loss": 51.2115, - "step": 109760 - }, - { - "epoch": 0.44348468994048895, - "grad_norm": 453.09136962890625, - "learning_rate": 3.443955550617474e-05, - "loss": 62.8084, - "step": 109770 - }, - { - "epoch": 0.4435250912058566, - "grad_norm": 690.1050415039062, - "learning_rate": 3.443632314795627e-05, - "loss": 37.6234, - "step": 109780 - }, - { - "epoch": 0.4435654924712242, - "grad_norm": 942.2594604492188, - "learning_rate": 3.443309060577182e-05, - "loss": 57.9371, - "step": 109790 - }, - { - "epoch": 0.4436058937365918, - "grad_norm": 2099.522216796875, - "learning_rate": 3.442985787968442e-05, - "loss": 96.0077, - "step": 109800 - }, - { - "epoch": 0.44364629500195946, - "grad_norm": 894.1741333007812, - "learning_rate": 3.4426624969757083e-05, - "loss": 43.2961, - "step": 109810 - }, - { - "epoch": 0.4436866962673271, - "grad_norm": 515.2462158203125, - "learning_rate": 3.442339187605283e-05, - "loss": 44.3043, - "step": 109820 - }, - { - "epoch": 0.44372709753269474, - "grad_norm": 1050.0601806640625, - "learning_rate": 3.442015859863472e-05, - "loss": 80.5312, - "step": 109830 - }, - { - "epoch": 0.4437674987980624, - "grad_norm": 2025.021484375, - "learning_rate": 3.4416925137565754e-05, - "loss": 60.7582, - "step": 109840 - }, - { - "epoch": 0.44380790006342996, - "grad_norm": 1199.0517578125, - "learning_rate": 3.4413691492908985e-05, - "loss": 62.2759, - "step": 109850 - }, - { - "epoch": 0.4438483013287976, - "grad_norm": 781.955078125, - "learning_rate": 3.441045766472745e-05, - "loss": 55.6269, - "step": 109860 - }, - { - "epoch": 0.44388870259416524, - "grad_norm": 1648.44140625, - "learning_rate": 3.440722365308421e-05, - "loss": 57.5258, - "step": 109870 - }, - { - "epoch": 0.4439291038595329, - "grad_norm": 1811.8358154296875, - "learning_rate": 3.440398945804229e-05, - "loss": 78.8444, - "step": 109880 - }, - { - "epoch": 0.4439695051249005, - "grad_norm": 678.7551879882812, - "learning_rate": 3.440075507966476e-05, - "loss": 58.5628, - "step": 109890 - }, - { - "epoch": 0.44400990639026816, - "grad_norm": 1020.2061157226562, - "learning_rate": 3.439752051801467e-05, - "loss": 41.0844, - "step": 109900 - }, - { - "epoch": 0.4440503076556358, - "grad_norm": 1242.7587890625, - "learning_rate": 3.439428577315508e-05, - "loss": 50.6694, - "step": 109910 - }, - { - "epoch": 0.4440907089210034, - "grad_norm": 1669.455810546875, - "learning_rate": 3.439105084514905e-05, - "loss": 61.132, - "step": 109920 - }, - { - "epoch": 0.444131110186371, - "grad_norm": 357.2478942871094, - "learning_rate": 3.4387815734059654e-05, - "loss": 64.2863, - "step": 109930 - }, - { - "epoch": 0.44417151145173867, - "grad_norm": 696.1216430664062, - "learning_rate": 3.438458043994995e-05, - "loss": 52.3681, - "step": 109940 - }, - { - "epoch": 0.4442119127171063, - "grad_norm": 1389.9837646484375, - "learning_rate": 3.438134496288302e-05, - "loss": 59.619, - "step": 109950 - }, - { - "epoch": 0.44425231398247395, - "grad_norm": 819.2481689453125, - "learning_rate": 3.437810930292195e-05, - "loss": 71.3323, - "step": 109960 - }, - { - "epoch": 0.4442927152478416, - "grad_norm": 541.750732421875, - "learning_rate": 3.43748734601298e-05, - "loss": 73.0371, - "step": 109970 - }, - { - "epoch": 0.44433311651320917, - "grad_norm": 923.3477172851562, - "learning_rate": 3.437163743456967e-05, - "loss": 50.583, - "step": 109980 - }, - { - "epoch": 0.4443735177785768, - "grad_norm": 1519.56005859375, - "learning_rate": 3.436840122630464e-05, - "loss": 63.0997, - "step": 109990 - }, - { - "epoch": 0.44441391904394445, - "grad_norm": 1023.0885620117188, - "learning_rate": 3.436516483539781e-05, - "loss": 35.0977, - "step": 110000 - }, - { - "epoch": 0.4444543203093121, - "grad_norm": 787.899658203125, - "learning_rate": 3.4361928261912254e-05, - "loss": 56.4767, - "step": 110010 - }, - { - "epoch": 0.44449472157467973, - "grad_norm": 0.0, - "learning_rate": 3.4358691505911104e-05, - "loss": 44.5502, - "step": 110020 - }, - { - "epoch": 0.44453512284004737, - "grad_norm": 385.0424499511719, - "learning_rate": 3.4355454567457445e-05, - "loss": 70.914, - "step": 110030 - }, - { - "epoch": 0.44457552410541495, - "grad_norm": 967.9429931640625, - "learning_rate": 3.435221744661438e-05, - "loss": 62.6088, - "step": 110040 - }, - { - "epoch": 0.4446159253707826, - "grad_norm": 389.7696533203125, - "learning_rate": 3.434898014344501e-05, - "loss": 48.413, - "step": 110050 - }, - { - "epoch": 0.44465632663615023, - "grad_norm": 1513.93017578125, - "learning_rate": 3.434574265801247e-05, - "loss": 50.9386, - "step": 110060 - }, - { - "epoch": 0.4446967279015179, - "grad_norm": 461.18096923828125, - "learning_rate": 3.4342504990379866e-05, - "loss": 66.7544, - "step": 110070 - }, - { - "epoch": 0.4447371291668855, - "grad_norm": 593.7630615234375, - "learning_rate": 3.433926714061032e-05, - "loss": 60.6121, - "step": 110080 - }, - { - "epoch": 0.44477753043225315, - "grad_norm": 1564.3096923828125, - "learning_rate": 3.433602910876694e-05, - "loss": 70.2792, - "step": 110090 - }, - { - "epoch": 0.4448179316976208, - "grad_norm": 1443.179931640625, - "learning_rate": 3.433279089491288e-05, - "loss": 36.5048, - "step": 110100 - }, - { - "epoch": 0.4448583329629884, - "grad_norm": 822.4376220703125, - "learning_rate": 3.432955249911125e-05, - "loss": 78.1726, - "step": 110110 - }, - { - "epoch": 0.444898734228356, - "grad_norm": 1508.205078125, - "learning_rate": 3.432631392142519e-05, - "loss": 66.1732, - "step": 110120 - }, - { - "epoch": 0.44493913549372366, - "grad_norm": 251.31320190429688, - "learning_rate": 3.432307516191783e-05, - "loss": 34.4932, - "step": 110130 - }, - { - "epoch": 0.4449795367590913, - "grad_norm": 1120.1636962890625, - "learning_rate": 3.4319836220652335e-05, - "loss": 38.0345, - "step": 110140 - }, - { - "epoch": 0.44501993802445894, - "grad_norm": 439.1837158203125, - "learning_rate": 3.431659709769183e-05, - "loss": 56.9619, - "step": 110150 - }, - { - "epoch": 0.4450603392898266, - "grad_norm": 969.8883666992188, - "learning_rate": 3.431335779309947e-05, - "loss": 37.3155, - "step": 110160 - }, - { - "epoch": 0.44510074055519416, - "grad_norm": 813.2791137695312, - "learning_rate": 3.43101183069384e-05, - "loss": 82.1769, - "step": 110170 - }, - { - "epoch": 0.4451411418205618, - "grad_norm": 712.302001953125, - "learning_rate": 3.430687863927178e-05, - "loss": 53.5758, - "step": 110180 - }, - { - "epoch": 0.44518154308592944, - "grad_norm": 730.143310546875, - "learning_rate": 3.4303638790162774e-05, - "loss": 61.2881, - "step": 110190 - }, - { - "epoch": 0.4452219443512971, - "grad_norm": 945.42041015625, - "learning_rate": 3.430039875967454e-05, - "loss": 62.6643, - "step": 110200 - }, - { - "epoch": 0.4452623456166647, - "grad_norm": 887.8884887695312, - "learning_rate": 3.429715854787024e-05, - "loss": 52.4225, - "step": 110210 - }, - { - "epoch": 0.44530274688203236, - "grad_norm": 778.4572143554688, - "learning_rate": 3.429391815481305e-05, - "loss": 80.6638, - "step": 110220 - }, - { - "epoch": 0.4453431481474, - "grad_norm": 701.6489868164062, - "learning_rate": 3.429067758056613e-05, - "loss": 70.6435, - "step": 110230 - }, - { - "epoch": 0.4453835494127676, - "grad_norm": 1139.0123291015625, - "learning_rate": 3.428743682519269e-05, - "loss": 78.8899, - "step": 110240 - }, - { - "epoch": 0.4454239506781352, - "grad_norm": 0.0, - "learning_rate": 3.428419588875588e-05, - "loss": 51.6693, - "step": 110250 - }, - { - "epoch": 0.44546435194350287, - "grad_norm": 919.1795654296875, - "learning_rate": 3.428095477131888e-05, - "loss": 56.4408, - "step": 110260 - }, - { - "epoch": 0.4455047532088705, - "grad_norm": 953.95263671875, - "learning_rate": 3.427771347294489e-05, - "loss": 72.902, - "step": 110270 - }, - { - "epoch": 0.44554515447423815, - "grad_norm": 1921.18359375, - "learning_rate": 3.427447199369711e-05, - "loss": 43.3139, - "step": 110280 - }, - { - "epoch": 0.4455855557396058, - "grad_norm": 790.10791015625, - "learning_rate": 3.4271230333638716e-05, - "loss": 71.2949, - "step": 110290 - }, - { - "epoch": 0.44562595700497337, - "grad_norm": 1008.7927856445312, - "learning_rate": 3.426798849283291e-05, - "loss": 60.3911, - "step": 110300 - }, - { - "epoch": 0.445666358270341, - "grad_norm": 749.1685791015625, - "learning_rate": 3.4264746471342905e-05, - "loss": 78.0302, - "step": 110310 - }, - { - "epoch": 0.44570675953570865, - "grad_norm": 967.6669921875, - "learning_rate": 3.4261504269231904e-05, - "loss": 49.5635, - "step": 110320 - }, - { - "epoch": 0.4457471608010763, - "grad_norm": 1514.6182861328125, - "learning_rate": 3.4258261886563104e-05, - "loss": 76.0732, - "step": 110330 - }, - { - "epoch": 0.44578756206644393, - "grad_norm": 1716.936279296875, - "learning_rate": 3.425501932339971e-05, - "loss": 133.2402, - "step": 110340 - }, - { - "epoch": 0.44582796333181157, - "grad_norm": 959.5086669921875, - "learning_rate": 3.425177657980496e-05, - "loss": 71.294, - "step": 110350 - }, - { - "epoch": 0.44586836459717916, - "grad_norm": 812.0950317382812, - "learning_rate": 3.4248533655842066e-05, - "loss": 57.06, - "step": 110360 - }, - { - "epoch": 0.4459087658625468, - "grad_norm": 267.0820617675781, - "learning_rate": 3.4245290551574237e-05, - "loss": 44.4798, - "step": 110370 - }, - { - "epoch": 0.44594916712791444, - "grad_norm": 1544.18310546875, - "learning_rate": 3.4242047267064715e-05, - "loss": 57.8316, - "step": 110380 - }, - { - "epoch": 0.4459895683932821, - "grad_norm": 695.9027709960938, - "learning_rate": 3.4238803802376716e-05, - "loss": 56.366, - "step": 110390 - }, - { - "epoch": 0.4460299696586497, - "grad_norm": 685.2529296875, - "learning_rate": 3.423556015757349e-05, - "loss": 57.191, - "step": 110400 - }, - { - "epoch": 0.44607037092401736, - "grad_norm": 653.3543090820312, - "learning_rate": 3.423231633271826e-05, - "loss": 61.4118, - "step": 110410 - }, - { - "epoch": 0.446110772189385, - "grad_norm": 1396.4661865234375, - "learning_rate": 3.4229072327874274e-05, - "loss": 53.9518, - "step": 110420 - }, - { - "epoch": 0.4461511734547526, - "grad_norm": 742.2490234375, - "learning_rate": 3.422582814310476e-05, - "loss": 64.2797, - "step": 110430 - }, - { - "epoch": 0.4461915747201202, - "grad_norm": 811.0308837890625, - "learning_rate": 3.4222583778472996e-05, - "loss": 34.3383, - "step": 110440 - }, - { - "epoch": 0.44623197598548786, - "grad_norm": 784.7843017578125, - "learning_rate": 3.421933923404219e-05, - "loss": 83.5336, - "step": 110450 - }, - { - "epoch": 0.4462723772508555, - "grad_norm": 368.3476257324219, - "learning_rate": 3.421609450987563e-05, - "loss": 43.9047, - "step": 110460 - }, - { - "epoch": 0.44631277851622314, - "grad_norm": 507.54205322265625, - "learning_rate": 3.421284960603657e-05, - "loss": 40.9963, - "step": 110470 - }, - { - "epoch": 0.4463531797815908, - "grad_norm": 1140.799560546875, - "learning_rate": 3.4209604522588255e-05, - "loss": 46.2735, - "step": 110480 - }, - { - "epoch": 0.44639358104695837, - "grad_norm": 1338.29541015625, - "learning_rate": 3.4206359259593954e-05, - "loss": 37.1206, - "step": 110490 - }, - { - "epoch": 0.446433982312326, - "grad_norm": 665.2498168945312, - "learning_rate": 3.4203113817116957e-05, - "loss": 57.0931, - "step": 110500 - }, - { - "epoch": 0.44647438357769365, - "grad_norm": 734.5198974609375, - "learning_rate": 3.4199868195220505e-05, - "loss": 71.1633, - "step": 110510 - }, - { - "epoch": 0.4465147848430613, - "grad_norm": 679.3670043945312, - "learning_rate": 3.419662239396789e-05, - "loss": 52.7156, - "step": 110520 - }, - { - "epoch": 0.4465551861084289, - "grad_norm": 538.1387939453125, - "learning_rate": 3.419337641342239e-05, - "loss": 87.353, - "step": 110530 - }, - { - "epoch": 0.44659558737379657, - "grad_norm": 625.4715576171875, - "learning_rate": 3.419013025364727e-05, - "loss": 52.2098, - "step": 110540 - }, - { - "epoch": 0.4466359886391642, - "grad_norm": 1380.083740234375, - "learning_rate": 3.4186883914705835e-05, - "loss": 110.8045, - "step": 110550 - }, - { - "epoch": 0.4466763899045318, - "grad_norm": 664.3757934570312, - "learning_rate": 3.418363739666137e-05, - "loss": 33.5992, - "step": 110560 - }, - { - "epoch": 0.44671679116989943, - "grad_norm": 352.8798522949219, - "learning_rate": 3.418039069957717e-05, - "loss": 54.5655, - "step": 110570 - }, - { - "epoch": 0.44675719243526707, - "grad_norm": 196.41831970214844, - "learning_rate": 3.417714382351652e-05, - "loss": 46.0049, - "step": 110580 - }, - { - "epoch": 0.4467975937006347, - "grad_norm": 1419.017822265625, - "learning_rate": 3.417389676854274e-05, - "loss": 87.4692, - "step": 110590 - }, - { - "epoch": 0.44683799496600235, - "grad_norm": 515.550537109375, - "learning_rate": 3.417064953471911e-05, - "loss": 46.8774, - "step": 110600 - }, - { - "epoch": 0.44687839623137, - "grad_norm": 729.7348022460938, - "learning_rate": 3.416740212210894e-05, - "loss": 47.5663, - "step": 110610 - }, - { - "epoch": 0.4469187974967376, - "grad_norm": 618.8557739257812, - "learning_rate": 3.416415453077555e-05, - "loss": 56.8461, - "step": 110620 - }, - { - "epoch": 0.4469591987621052, - "grad_norm": 1495.1834716796875, - "learning_rate": 3.416090676078225e-05, - "loss": 71.6594, - "step": 110630 - }, - { - "epoch": 0.44699960002747285, - "grad_norm": 537.9067993164062, - "learning_rate": 3.415765881219236e-05, - "loss": 75.8224, - "step": 110640 - }, - { - "epoch": 0.4470400012928405, - "grad_norm": 1021.9192504882812, - "learning_rate": 3.4154410685069196e-05, - "loss": 71.1588, - "step": 110650 - }, - { - "epoch": 0.44708040255820813, - "grad_norm": 1147.8583984375, - "learning_rate": 3.4151162379476075e-05, - "loss": 99.0359, - "step": 110660 - }, - { - "epoch": 0.4471208038235758, - "grad_norm": 951.2720947265625, - "learning_rate": 3.414791389547635e-05, - "loss": 62.2096, - "step": 110670 - }, - { - "epoch": 0.44716120508894336, - "grad_norm": 292.7242736816406, - "learning_rate": 3.414466523313332e-05, - "loss": 40.3493, - "step": 110680 - }, - { - "epoch": 0.447201606354311, - "grad_norm": 1284.24755859375, - "learning_rate": 3.414141639251033e-05, - "loss": 49.9673, - "step": 110690 - }, - { - "epoch": 0.44724200761967864, - "grad_norm": 1174.4100341796875, - "learning_rate": 3.413816737367073e-05, - "loss": 76.3328, - "step": 110700 - }, - { - "epoch": 0.4472824088850463, - "grad_norm": 1334.654052734375, - "learning_rate": 3.4134918176677846e-05, - "loss": 68.6847, - "step": 110710 - }, - { - "epoch": 0.4473228101504139, - "grad_norm": 1329.33203125, - "learning_rate": 3.4131668801595027e-05, - "loss": 60.1202, - "step": 110720 - }, - { - "epoch": 0.44736321141578156, - "grad_norm": 472.8536071777344, - "learning_rate": 3.4128419248485635e-05, - "loss": 55.691, - "step": 110730 - }, - { - "epoch": 0.4474036126811492, - "grad_norm": 1049.84619140625, - "learning_rate": 3.4125169517413e-05, - "loss": 70.7531, - "step": 110740 - }, - { - "epoch": 0.4474440139465168, - "grad_norm": 527.7225952148438, - "learning_rate": 3.412191960844049e-05, - "loss": 48.9243, - "step": 110750 - }, - { - "epoch": 0.4474844152118844, - "grad_norm": 415.2084045410156, - "learning_rate": 3.411866952163146e-05, - "loss": 52.9608, - "step": 110760 - }, - { - "epoch": 0.44752481647725206, - "grad_norm": 1201.3553466796875, - "learning_rate": 3.4115419257049286e-05, - "loss": 71.9567, - "step": 110770 - }, - { - "epoch": 0.4475652177426197, - "grad_norm": 245.35345458984375, - "learning_rate": 3.4112168814757307e-05, - "loss": 69.7229, - "step": 110780 - }, - { - "epoch": 0.44760561900798734, - "grad_norm": 1845.09033203125, - "learning_rate": 3.41089181948189e-05, - "loss": 68.3285, - "step": 110790 - }, - { - "epoch": 0.447646020273355, - "grad_norm": 874.4844360351562, - "learning_rate": 3.410566739729746e-05, - "loss": 42.1089, - "step": 110800 - }, - { - "epoch": 0.44768642153872257, - "grad_norm": 466.1338806152344, - "learning_rate": 3.410241642225633e-05, - "loss": 53.4753, - "step": 110810 - }, - { - "epoch": 0.4477268228040902, - "grad_norm": 391.43475341796875, - "learning_rate": 3.409916526975892e-05, - "loss": 56.1511, - "step": 110820 - }, - { - "epoch": 0.44776722406945785, - "grad_norm": 1110.3125, - "learning_rate": 3.409591393986859e-05, - "loss": 56.4363, - "step": 110830 - }, - { - "epoch": 0.4478076253348255, - "grad_norm": 441.3172302246094, - "learning_rate": 3.409266243264874e-05, - "loss": 54.6422, - "step": 110840 - }, - { - "epoch": 0.4478480266001931, - "grad_norm": 868.8545532226562, - "learning_rate": 3.408941074816275e-05, - "loss": 56.9401, - "step": 110850 - }, - { - "epoch": 0.44788842786556077, - "grad_norm": 988.2617797851562, - "learning_rate": 3.408615888647402e-05, - "loss": 58.8403, - "step": 110860 - }, - { - "epoch": 0.4479288291309284, - "grad_norm": 1164.9776611328125, - "learning_rate": 3.408290684764594e-05, - "loss": 63.0026, - "step": 110870 - }, - { - "epoch": 0.447969230396296, - "grad_norm": 592.5801391601562, - "learning_rate": 3.407965463174192e-05, - "loss": 52.5244, - "step": 110880 - }, - { - "epoch": 0.44800963166166363, - "grad_norm": 647.7515258789062, - "learning_rate": 3.407640223882536e-05, - "loss": 29.65, - "step": 110890 - }, - { - "epoch": 0.44805003292703127, - "grad_norm": 954.7613525390625, - "learning_rate": 3.407314966895966e-05, - "loss": 56.6861, - "step": 110900 - }, - { - "epoch": 0.4480904341923989, - "grad_norm": 997.4801025390625, - "learning_rate": 3.406989692220824e-05, - "loss": 72.3521, - "step": 110910 - }, - { - "epoch": 0.44813083545776655, - "grad_norm": 308.1066589355469, - "learning_rate": 3.4066643998634505e-05, - "loss": 79.9528, - "step": 110920 - }, - { - "epoch": 0.4481712367231342, - "grad_norm": 1413.3211669921875, - "learning_rate": 3.406339089830188e-05, - "loss": 90.6204, - "step": 110930 - }, - { - "epoch": 0.4482116379885018, - "grad_norm": 761.3001098632812, - "learning_rate": 3.406013762127379e-05, - "loss": 59.6376, - "step": 110940 - }, - { - "epoch": 0.4482520392538694, - "grad_norm": 550.8245239257812, - "learning_rate": 3.405688416761364e-05, - "loss": 61.1043, - "step": 110950 - }, - { - "epoch": 0.44829244051923706, - "grad_norm": 1125.8817138671875, - "learning_rate": 3.4053630537384885e-05, - "loss": 54.3687, - "step": 110960 - }, - { - "epoch": 0.4483328417846047, - "grad_norm": 1198.2652587890625, - "learning_rate": 3.4050376730650935e-05, - "loss": 52.4602, - "step": 110970 - }, - { - "epoch": 0.44837324304997234, - "grad_norm": 293.5941162109375, - "learning_rate": 3.4047122747475224e-05, - "loss": 48.189, - "step": 110980 - }, - { - "epoch": 0.44841364431534, - "grad_norm": 1793.2451171875, - "learning_rate": 3.40438685879212e-05, - "loss": 58.882, - "step": 110990 - }, - { - "epoch": 0.44845404558070756, - "grad_norm": 405.7001037597656, - "learning_rate": 3.4040614252052305e-05, - "loss": 87.0919, - "step": 111000 - }, - { - "epoch": 0.4484944468460752, - "grad_norm": 609.4334716796875, - "learning_rate": 3.403735973993198e-05, - "loss": 88.5648, - "step": 111010 - }, - { - "epoch": 0.44853484811144284, - "grad_norm": 476.433837890625, - "learning_rate": 3.403410505162369e-05, - "loss": 72.3557, - "step": 111020 - }, - { - "epoch": 0.4485752493768105, - "grad_norm": 547.7504272460938, - "learning_rate": 3.403085018719085e-05, - "loss": 71.4732, - "step": 111030 - }, - { - "epoch": 0.4486156506421781, - "grad_norm": 618.4702758789062, - "learning_rate": 3.402759514669694e-05, - "loss": 44.4472, - "step": 111040 - }, - { - "epoch": 0.44865605190754576, - "grad_norm": 731.4344482421875, - "learning_rate": 3.4024339930205415e-05, - "loss": 53.5866, - "step": 111050 - }, - { - "epoch": 0.4486964531729134, - "grad_norm": 457.8855895996094, - "learning_rate": 3.402108453777974e-05, - "loss": 58.6352, - "step": 111060 - }, - { - "epoch": 0.448736854438281, - "grad_norm": 612.9066772460938, - "learning_rate": 3.401782896948338e-05, - "loss": 80.7433, - "step": 111070 - }, - { - "epoch": 0.4487772557036486, - "grad_norm": 590.0531616210938, - "learning_rate": 3.401457322537979e-05, - "loss": 49.937, - "step": 111080 - }, - { - "epoch": 0.44881765696901627, - "grad_norm": 579.5253295898438, - "learning_rate": 3.401131730553247e-05, - "loss": 53.8527, - "step": 111090 - }, - { - "epoch": 0.4488580582343839, - "grad_norm": 761.0218505859375, - "learning_rate": 3.400806121000487e-05, - "loss": 57.2909, - "step": 111100 - }, - { - "epoch": 0.44889845949975155, - "grad_norm": 938.6339721679688, - "learning_rate": 3.400480493886048e-05, - "loss": 54.5442, - "step": 111110 - }, - { - "epoch": 0.4489388607651192, - "grad_norm": 800.48486328125, - "learning_rate": 3.400154849216278e-05, - "loss": 50.155, - "step": 111120 - }, - { - "epoch": 0.44897926203048677, - "grad_norm": 1541.491943359375, - "learning_rate": 3.3998291869975266e-05, - "loss": 47.9149, - "step": 111130 - }, - { - "epoch": 0.4490196632958544, - "grad_norm": 631.569580078125, - "learning_rate": 3.399503507236141e-05, - "loss": 60.8179, - "step": 111140 - }, - { - "epoch": 0.44906006456122205, - "grad_norm": 233.70372009277344, - "learning_rate": 3.399177809938472e-05, - "loss": 33.1188, - "step": 111150 - }, - { - "epoch": 0.4491004658265897, - "grad_norm": 492.7761535644531, - "learning_rate": 3.398852095110868e-05, - "loss": 62.9832, - "step": 111160 - }, - { - "epoch": 0.44914086709195733, - "grad_norm": 691.3985595703125, - "learning_rate": 3.398526362759681e-05, - "loss": 58.8685, - "step": 111170 - }, - { - "epoch": 0.44918126835732497, - "grad_norm": 763.817626953125, - "learning_rate": 3.3982006128912584e-05, - "loss": 34.1555, - "step": 111180 - }, - { - "epoch": 0.4492216696226926, - "grad_norm": 794.317626953125, - "learning_rate": 3.3978748455119536e-05, - "loss": 71.4623, - "step": 111190 - }, - { - "epoch": 0.4492620708880602, - "grad_norm": 423.894287109375, - "learning_rate": 3.397549060628116e-05, - "loss": 56.8908, - "step": 111200 - }, - { - "epoch": 0.44930247215342783, - "grad_norm": 2169.064697265625, - "learning_rate": 3.3972232582460974e-05, - "loss": 52.3652, - "step": 111210 - }, - { - "epoch": 0.4493428734187955, - "grad_norm": 363.28289794921875, - "learning_rate": 3.3968974383722495e-05, - "loss": 40.4274, - "step": 111220 - }, - { - "epoch": 0.4493832746841631, - "grad_norm": 973.9365234375, - "learning_rate": 3.3965716010129236e-05, - "loss": 106.0774, - "step": 111230 - }, - { - "epoch": 0.44942367594953075, - "grad_norm": 3008.888671875, - "learning_rate": 3.396245746174473e-05, - "loss": 44.22, - "step": 111240 - }, - { - "epoch": 0.4494640772148984, - "grad_norm": 926.1411743164062, - "learning_rate": 3.39591987386325e-05, - "loss": 46.0268, - "step": 111250 - }, - { - "epoch": 0.449504478480266, - "grad_norm": 177.50221252441406, - "learning_rate": 3.3955939840856096e-05, - "loss": 44.916, - "step": 111260 - }, - { - "epoch": 0.4495448797456336, - "grad_norm": 779.60888671875, - "learning_rate": 3.395268076847902e-05, - "loss": 56.0308, - "step": 111270 - }, - { - "epoch": 0.44958528101100126, - "grad_norm": 323.57818603515625, - "learning_rate": 3.394942152156482e-05, - "loss": 40.499, - "step": 111280 - }, - { - "epoch": 0.4496256822763689, - "grad_norm": 513.7103881835938, - "learning_rate": 3.394616210017705e-05, - "loss": 44.3521, - "step": 111290 - }, - { - "epoch": 0.44966608354173654, - "grad_norm": 657.2388305664062, - "learning_rate": 3.3942902504379235e-05, - "loss": 72.6372, - "step": 111300 - }, - { - "epoch": 0.4497064848071042, - "grad_norm": 513.4599609375, - "learning_rate": 3.3939642734234936e-05, - "loss": 118.4347, - "step": 111310 - }, - { - "epoch": 0.44974688607247176, - "grad_norm": 719.9033203125, - "learning_rate": 3.39363827898077e-05, - "loss": 85.1415, - "step": 111320 - }, - { - "epoch": 0.4497872873378394, - "grad_norm": 1146.169677734375, - "learning_rate": 3.393312267116107e-05, - "loss": 60.6316, - "step": 111330 - }, - { - "epoch": 0.44982768860320704, - "grad_norm": 633.3748168945312, - "learning_rate": 3.392986237835863e-05, - "loss": 60.0308, - "step": 111340 - }, - { - "epoch": 0.4498680898685747, - "grad_norm": 436.7900390625, - "learning_rate": 3.3926601911463915e-05, - "loss": 54.0903, - "step": 111350 - }, - { - "epoch": 0.4499084911339423, - "grad_norm": 425.1293029785156, - "learning_rate": 3.392334127054051e-05, - "loss": 59.7452, - "step": 111360 - }, - { - "epoch": 0.44994889239930996, - "grad_norm": 1201.4161376953125, - "learning_rate": 3.392008045565197e-05, - "loss": 56.7192, - "step": 111370 - }, - { - "epoch": 0.4499892936646776, - "grad_norm": 287.7637939453125, - "learning_rate": 3.391681946686186e-05, - "loss": 67.3147, - "step": 111380 - }, - { - "epoch": 0.4500296949300452, - "grad_norm": 818.6224975585938, - "learning_rate": 3.3913558304233776e-05, - "loss": 53.3463, - "step": 111390 - }, - { - "epoch": 0.4500700961954128, - "grad_norm": 385.80987548828125, - "learning_rate": 3.3910296967831266e-05, - "loss": 66.1127, - "step": 111400 - }, - { - "epoch": 0.45011049746078047, - "grad_norm": 1808.7755126953125, - "learning_rate": 3.3907035457717944e-05, - "loss": 67.1233, - "step": 111410 - }, - { - "epoch": 0.4501508987261481, - "grad_norm": 512.4915771484375, - "learning_rate": 3.390377377395738e-05, - "loss": 52.8904, - "step": 111420 - }, - { - "epoch": 0.45019129999151575, - "grad_norm": 1636.7056884765625, - "learning_rate": 3.3900511916613155e-05, - "loss": 101.467, - "step": 111430 - }, - { - "epoch": 0.4502317012568834, - "grad_norm": 682.1113891601562, - "learning_rate": 3.389724988574887e-05, - "loss": 47.2666, - "step": 111440 - }, - { - "epoch": 0.45027210252225097, - "grad_norm": 401.15771484375, - "learning_rate": 3.389398768142812e-05, - "loss": 40.7815, - "step": 111450 - }, - { - "epoch": 0.4503125037876186, - "grad_norm": 1251.0804443359375, - "learning_rate": 3.389072530371451e-05, - "loss": 49.2346, - "step": 111460 - }, - { - "epoch": 0.45035290505298625, - "grad_norm": 642.7127075195312, - "learning_rate": 3.388746275267162e-05, - "loss": 63.2051, - "step": 111470 - }, - { - "epoch": 0.4503933063183539, - "grad_norm": 605.9111938476562, - "learning_rate": 3.388420002836307e-05, - "loss": 52.5846, - "step": 111480 - }, - { - "epoch": 0.45043370758372153, - "grad_norm": 1575.560302734375, - "learning_rate": 3.3880937130852466e-05, - "loss": 35.7747, - "step": 111490 - }, - { - "epoch": 0.45047410884908917, - "grad_norm": 645.1834716796875, - "learning_rate": 3.387767406020343e-05, - "loss": 55.3005, - "step": 111500 - }, - { - "epoch": 0.4505145101144568, - "grad_norm": 666.5519409179688, - "learning_rate": 3.3874410816479564e-05, - "loss": 48.5718, - "step": 111510 - }, - { - "epoch": 0.4505549113798244, - "grad_norm": 654.6434326171875, - "learning_rate": 3.387114739974448e-05, - "loss": 81.415, - "step": 111520 - }, - { - "epoch": 0.45059531264519204, - "grad_norm": 2661.026123046875, - "learning_rate": 3.3867883810061824e-05, - "loss": 84.8381, - "step": 111530 - }, - { - "epoch": 0.4506357139105597, - "grad_norm": 2447.82470703125, - "learning_rate": 3.38646200474952e-05, - "loss": 48.1841, - "step": 111540 - }, - { - "epoch": 0.4506761151759273, - "grad_norm": 708.4982299804688, - "learning_rate": 3.3861356112108247e-05, - "loss": 56.9643, - "step": 111550 - }, - { - "epoch": 0.45071651644129496, - "grad_norm": 489.48846435546875, - "learning_rate": 3.3858092003964594e-05, - "loss": 57.1001, - "step": 111560 - }, - { - "epoch": 0.4507569177066626, - "grad_norm": 538.01806640625, - "learning_rate": 3.385482772312787e-05, - "loss": 43.918, - "step": 111570 - }, - { - "epoch": 0.4507973189720302, - "grad_norm": 624.0175170898438, - "learning_rate": 3.3851563269661726e-05, - "loss": 95.1447, - "step": 111580 - }, - { - "epoch": 0.4508377202373978, - "grad_norm": 565.3558959960938, - "learning_rate": 3.38482986436298e-05, - "loss": 44.5088, - "step": 111590 - }, - { - "epoch": 0.45087812150276546, - "grad_norm": 746.841796875, - "learning_rate": 3.384503384509574e-05, - "loss": 44.0926, - "step": 111600 - }, - { - "epoch": 0.4509185227681331, - "grad_norm": 969.9041748046875, - "learning_rate": 3.384176887412318e-05, - "loss": 38.6924, - "step": 111610 - }, - { - "epoch": 0.45095892403350074, - "grad_norm": 971.7702026367188, - "learning_rate": 3.38385037307758e-05, - "loss": 56.2464, - "step": 111620 - }, - { - "epoch": 0.4509993252988684, - "grad_norm": 1089.8248291015625, - "learning_rate": 3.383523841511723e-05, - "loss": 89.415, - "step": 111630 - }, - { - "epoch": 0.45103972656423597, - "grad_norm": 535.305419921875, - "learning_rate": 3.3831972927211135e-05, - "loss": 41.9581, - "step": 111640 - }, - { - "epoch": 0.4510801278296036, - "grad_norm": 1719.795166015625, - "learning_rate": 3.382870726712119e-05, - "loss": 74.1624, - "step": 111650 - }, - { - "epoch": 0.45112052909497125, - "grad_norm": 1085.8828125, - "learning_rate": 3.382544143491104e-05, - "loss": 81.0191, - "step": 111660 - }, - { - "epoch": 0.4511609303603389, - "grad_norm": 409.8812561035156, - "learning_rate": 3.382217543064438e-05, - "loss": 40.052, - "step": 111670 - }, - { - "epoch": 0.4512013316257065, - "grad_norm": 1443.2547607421875, - "learning_rate": 3.381890925438486e-05, - "loss": 37.1698, - "step": 111680 - }, - { - "epoch": 0.45124173289107417, - "grad_norm": 1432.521728515625, - "learning_rate": 3.3815642906196156e-05, - "loss": 75.6335, - "step": 111690 - }, - { - "epoch": 0.4512821341564418, - "grad_norm": 1234.38720703125, - "learning_rate": 3.381237638614196e-05, - "loss": 48.0366, - "step": 111700 - }, - { - "epoch": 0.4513225354218094, - "grad_norm": 840.4757690429688, - "learning_rate": 3.380910969428596e-05, - "loss": 58.3024, - "step": 111710 - }, - { - "epoch": 0.45136293668717703, - "grad_norm": 591.4329833984375, - "learning_rate": 3.380584283069183e-05, - "loss": 42.8441, - "step": 111720 - }, - { - "epoch": 0.45140333795254467, - "grad_norm": 579.3867797851562, - "learning_rate": 3.380257579542325e-05, - "loss": 100.9832, - "step": 111730 - }, - { - "epoch": 0.4514437392179123, - "grad_norm": 1018.6928100585938, - "learning_rate": 3.379930858854392e-05, - "loss": 40.5557, - "step": 111740 - }, - { - "epoch": 0.45148414048327995, - "grad_norm": 475.0141906738281, - "learning_rate": 3.3796041210117546e-05, - "loss": 47.8023, - "step": 111750 - }, - { - "epoch": 0.4515245417486476, - "grad_norm": 1127.599853515625, - "learning_rate": 3.379277366020782e-05, - "loss": 56.3643, - "step": 111760 - }, - { - "epoch": 0.4515649430140152, - "grad_norm": 632.768310546875, - "learning_rate": 3.3789505938878443e-05, - "loss": 42.618, - "step": 111770 - }, - { - "epoch": 0.4516053442793828, - "grad_norm": 558.4849853515625, - "learning_rate": 3.378623804619313e-05, - "loss": 38.2455, - "step": 111780 - }, - { - "epoch": 0.45164574554475045, - "grad_norm": 1455.6197509765625, - "learning_rate": 3.378296998221557e-05, - "loss": 54.4919, - "step": 111790 - }, - { - "epoch": 0.4516861468101181, - "grad_norm": 650.8120727539062, - "learning_rate": 3.3779701747009504e-05, - "loss": 68.0886, - "step": 111800 - }, - { - "epoch": 0.45172654807548573, - "grad_norm": 613.3826904296875, - "learning_rate": 3.377643334063862e-05, - "loss": 47.3737, - "step": 111810 - }, - { - "epoch": 0.4517669493408534, - "grad_norm": 814.562744140625, - "learning_rate": 3.3773164763166655e-05, - "loss": 70.561, - "step": 111820 - }, - { - "epoch": 0.451807350606221, - "grad_norm": 785.9385986328125, - "learning_rate": 3.376989601465733e-05, - "loss": 38.8111, - "step": 111830 - }, - { - "epoch": 0.4518477518715886, - "grad_norm": 640.478759765625, - "learning_rate": 3.376662709517435e-05, - "loss": 55.49, - "step": 111840 - }, - { - "epoch": 0.45188815313695624, - "grad_norm": 590.5025024414062, - "learning_rate": 3.3763358004781475e-05, - "loss": 59.1937, - "step": 111850 - }, - { - "epoch": 0.4519285544023239, - "grad_norm": 681.9274291992188, - "learning_rate": 3.3760088743542424e-05, - "loss": 121.1419, - "step": 111860 - }, - { - "epoch": 0.4519689556676915, - "grad_norm": 1086.4017333984375, - "learning_rate": 3.375681931152093e-05, - "loss": 57.0334, - "step": 111870 - }, - { - "epoch": 0.45200935693305916, - "grad_norm": 614.7588500976562, - "learning_rate": 3.375354970878073e-05, - "loss": 51.712, - "step": 111880 - }, - { - "epoch": 0.4520497581984268, - "grad_norm": 1066.3997802734375, - "learning_rate": 3.375027993538559e-05, - "loss": 62.087, - "step": 111890 - }, - { - "epoch": 0.4520901594637944, - "grad_norm": 644.5421142578125, - "learning_rate": 3.374700999139923e-05, - "loss": 58.5303, - "step": 111900 - }, - { - "epoch": 0.452130560729162, - "grad_norm": 840.0023193359375, - "learning_rate": 3.37437398768854e-05, - "loss": 71.8506, - "step": 111910 - }, - { - "epoch": 0.45217096199452966, - "grad_norm": 1441.012451171875, - "learning_rate": 3.374046959190786e-05, - "loss": 92.4958, - "step": 111920 - }, - { - "epoch": 0.4522113632598973, - "grad_norm": 1386.08349609375, - "learning_rate": 3.3737199136530364e-05, - "loss": 51.7875, - "step": 111930 - }, - { - "epoch": 0.45225176452526494, - "grad_norm": 1856.324951171875, - "learning_rate": 3.373392851081668e-05, - "loss": 74.6996, - "step": 111940 - }, - { - "epoch": 0.4522921657906326, - "grad_norm": 371.86566162109375, - "learning_rate": 3.373065771483056e-05, - "loss": 53.6041, - "step": 111950 - }, - { - "epoch": 0.45233256705600017, - "grad_norm": 862.8280639648438, - "learning_rate": 3.372738674863577e-05, - "loss": 51.361, - "step": 111960 - }, - { - "epoch": 0.4523729683213678, - "grad_norm": 923.8464965820312, - "learning_rate": 3.372411561229609e-05, - "loss": 43.7514, - "step": 111970 - }, - { - "epoch": 0.45241336958673545, - "grad_norm": 716.6856079101562, - "learning_rate": 3.372084430587528e-05, - "loss": 39.6764, - "step": 111980 - }, - { - "epoch": 0.4524537708521031, - "grad_norm": 593.4553833007812, - "learning_rate": 3.371757282943712e-05, - "loss": 39.9165, - "step": 111990 - }, - { - "epoch": 0.4524941721174707, - "grad_norm": 496.314208984375, - "learning_rate": 3.3714301183045385e-05, - "loss": 78.6274, - "step": 112000 - }, - { - "epoch": 0.45253457338283837, - "grad_norm": 811.6598510742188, - "learning_rate": 3.3711029366763866e-05, - "loss": 34.8752, - "step": 112010 - }, - { - "epoch": 0.452574974648206, - "grad_norm": 1419.599609375, - "learning_rate": 3.370775738065634e-05, - "loss": 81.8719, - "step": 112020 - }, - { - "epoch": 0.4526153759135736, - "grad_norm": 1418.275146484375, - "learning_rate": 3.370448522478661e-05, - "loss": 65.6506, - "step": 112030 - }, - { - "epoch": 0.45265577717894123, - "grad_norm": 8803.1748046875, - "learning_rate": 3.370121289921845e-05, - "loss": 112.1481, - "step": 112040 - }, - { - "epoch": 0.45269617844430887, - "grad_norm": 328.6112060546875, - "learning_rate": 3.369794040401567e-05, - "loss": 38.9246, - "step": 112050 - }, - { - "epoch": 0.4527365797096765, - "grad_norm": 576.2307739257812, - "learning_rate": 3.3694667739242066e-05, - "loss": 54.7131, - "step": 112060 - }, - { - "epoch": 0.45277698097504415, - "grad_norm": 1210.724365234375, - "learning_rate": 3.369139490496144e-05, - "loss": 60.4555, - "step": 112070 - }, - { - "epoch": 0.4528173822404118, - "grad_norm": 1678.520751953125, - "learning_rate": 3.368812190123759e-05, - "loss": 53.7549, - "step": 112080 - }, - { - "epoch": 0.4528577835057794, - "grad_norm": 754.4735717773438, - "learning_rate": 3.3684848728134334e-05, - "loss": 55.5762, - "step": 112090 - }, - { - "epoch": 0.452898184771147, - "grad_norm": 294.9254455566406, - "learning_rate": 3.368157538571548e-05, - "loss": 62.4506, - "step": 112100 - }, - { - "epoch": 0.45293858603651466, - "grad_norm": 1608.3568115234375, - "learning_rate": 3.367830187404484e-05, - "loss": 55.6337, - "step": 112110 - }, - { - "epoch": 0.4529789873018823, - "grad_norm": 680.6564331054688, - "learning_rate": 3.367502819318624e-05, - "loss": 49.9141, - "step": 112120 - }, - { - "epoch": 0.45301938856724994, - "grad_norm": 1690.9805908203125, - "learning_rate": 3.36717543432035e-05, - "loss": 58.1772, - "step": 112130 - }, - { - "epoch": 0.4530597898326176, - "grad_norm": 564.469482421875, - "learning_rate": 3.366848032416045e-05, - "loss": 59.8349, - "step": 112140 - }, - { - "epoch": 0.4531001910979852, - "grad_norm": 725.934326171875, - "learning_rate": 3.3665206136120906e-05, - "loss": 72.5532, - "step": 112150 - }, - { - "epoch": 0.4531405923633528, - "grad_norm": 949.9281616210938, - "learning_rate": 3.3661931779148707e-05, - "loss": 65.5223, - "step": 112160 - }, - { - "epoch": 0.45318099362872044, - "grad_norm": 1123.207763671875, - "learning_rate": 3.365865725330769e-05, - "loss": 52.4678, - "step": 112170 - }, - { - "epoch": 0.4532213948940881, - "grad_norm": 264.70001220703125, - "learning_rate": 3.3655382558661685e-05, - "loss": 64.133, - "step": 112180 - }, - { - "epoch": 0.4532617961594557, - "grad_norm": 1888.4464111328125, - "learning_rate": 3.3652107695274555e-05, - "loss": 45.2613, - "step": 112190 - }, - { - "epoch": 0.45330219742482336, - "grad_norm": 539.1635131835938, - "learning_rate": 3.3648832663210124e-05, - "loss": 95.8625, - "step": 112200 - }, - { - "epoch": 0.453342598690191, - "grad_norm": 510.55816650390625, - "learning_rate": 3.3645557462532245e-05, - "loss": 39.8649, - "step": 112210 - }, - { - "epoch": 0.4533829999555586, - "grad_norm": 1129.6290283203125, - "learning_rate": 3.364228209330477e-05, - "loss": 54.8464, - "step": 112220 - }, - { - "epoch": 0.4534234012209262, - "grad_norm": 718.903564453125, - "learning_rate": 3.363900655559157e-05, - "loss": 70.0421, - "step": 112230 - }, - { - "epoch": 0.45346380248629387, - "grad_norm": 765.8641967773438, - "learning_rate": 3.363573084945648e-05, - "loss": 43.2255, - "step": 112240 - }, - { - "epoch": 0.4535042037516615, - "grad_norm": 305.63763427734375, - "learning_rate": 3.363245497496337e-05, - "loss": 55.8746, - "step": 112250 - }, - { - "epoch": 0.45354460501702915, - "grad_norm": 359.51715087890625, - "learning_rate": 3.362917893217611e-05, - "loss": 73.9199, - "step": 112260 - }, - { - "epoch": 0.4535850062823968, - "grad_norm": 516.9877319335938, - "learning_rate": 3.362590272115855e-05, - "loss": 67.4331, - "step": 112270 - }, - { - "epoch": 0.45362540754776437, - "grad_norm": 862.2160034179688, - "learning_rate": 3.3622626341974594e-05, - "loss": 57.4161, - "step": 112280 - }, - { - "epoch": 0.453665808813132, - "grad_norm": 1069.54150390625, - "learning_rate": 3.361934979468809e-05, - "loss": 57.9138, - "step": 112290 - }, - { - "epoch": 0.45370621007849965, - "grad_norm": 375.27825927734375, - "learning_rate": 3.3616073079362926e-05, - "loss": 51.6367, - "step": 112300 - }, - { - "epoch": 0.4537466113438673, - "grad_norm": 450.5061950683594, - "learning_rate": 3.361279619606299e-05, - "loss": 61.73, - "step": 112310 - }, - { - "epoch": 0.45378701260923493, - "grad_norm": 1464.5079345703125, - "learning_rate": 3.360951914485215e-05, - "loss": 52.9956, - "step": 112320 - }, - { - "epoch": 0.45382741387460257, - "grad_norm": 324.18194580078125, - "learning_rate": 3.3606241925794295e-05, - "loss": 47.1093, - "step": 112330 - }, - { - "epoch": 0.4538678151399702, - "grad_norm": 836.4578857421875, - "learning_rate": 3.360296453895333e-05, - "loss": 56.2767, - "step": 112340 - }, - { - "epoch": 0.4539082164053378, - "grad_norm": 1074.7415771484375, - "learning_rate": 3.3599686984393134e-05, - "loss": 50.7534, - "step": 112350 - }, - { - "epoch": 0.45394861767070543, - "grad_norm": 225.61444091796875, - "learning_rate": 3.359640926217763e-05, - "loss": 40.0397, - "step": 112360 - }, - { - "epoch": 0.4539890189360731, - "grad_norm": 1213.6234130859375, - "learning_rate": 3.359313137237069e-05, - "loss": 47.421, - "step": 112370 - }, - { - "epoch": 0.4540294202014407, - "grad_norm": 640.165283203125, - "learning_rate": 3.3589853315036225e-05, - "loss": 60.4271, - "step": 112380 - }, - { - "epoch": 0.45406982146680835, - "grad_norm": 354.8425598144531, - "learning_rate": 3.358657509023815e-05, - "loss": 58.7236, - "step": 112390 - }, - { - "epoch": 0.454110222732176, - "grad_norm": 1010.51123046875, - "learning_rate": 3.3583296698040384e-05, - "loss": 67.7665, - "step": 112400 - }, - { - "epoch": 0.4541506239975436, - "grad_norm": 313.4957580566406, - "learning_rate": 3.3580018138506824e-05, - "loss": 73.5613, - "step": 112410 - }, - { - "epoch": 0.4541910252629112, - "grad_norm": 623.6607055664062, - "learning_rate": 3.3576739411701394e-05, - "loss": 58.9651, - "step": 112420 - }, - { - "epoch": 0.45423142652827886, - "grad_norm": 870.5923461914062, - "learning_rate": 3.357346051768801e-05, - "loss": 64.04, - "step": 112430 - }, - { - "epoch": 0.4542718277936465, - "grad_norm": 499.6788330078125, - "learning_rate": 3.35701814565306e-05, - "loss": 59.2575, - "step": 112440 - }, - { - "epoch": 0.45431222905901414, - "grad_norm": 902.0311889648438, - "learning_rate": 3.356690222829309e-05, - "loss": 37.0818, - "step": 112450 - }, - { - "epoch": 0.4543526303243818, - "grad_norm": 713.7149047851562, - "learning_rate": 3.356362283303941e-05, - "loss": 52.5913, - "step": 112460 - }, - { - "epoch": 0.45439303158974936, - "grad_norm": 671.7296142578125, - "learning_rate": 3.3560343270833495e-05, - "loss": 53.5628, - "step": 112470 - }, - { - "epoch": 0.454433432855117, - "grad_norm": 1461.3150634765625, - "learning_rate": 3.355706354173928e-05, - "loss": 78.7695, - "step": 112480 - }, - { - "epoch": 0.45447383412048464, - "grad_norm": 1066.4896240234375, - "learning_rate": 3.3553783645820715e-05, - "loss": 74.929, - "step": 112490 - }, - { - "epoch": 0.4545142353858523, - "grad_norm": 257.38531494140625, - "learning_rate": 3.355050358314172e-05, - "loss": 39.4283, - "step": 112500 - }, - { - "epoch": 0.4545546366512199, - "grad_norm": 685.919677734375, - "learning_rate": 3.354722335376626e-05, - "loss": 51.0791, - "step": 112510 - }, - { - "epoch": 0.45459503791658756, - "grad_norm": 1068.9114990234375, - "learning_rate": 3.354394295775829e-05, - "loss": 44.3589, - "step": 112520 - }, - { - "epoch": 0.4546354391819552, - "grad_norm": 604.07568359375, - "learning_rate": 3.354066239518174e-05, - "loss": 65.293, - "step": 112530 - }, - { - "epoch": 0.4546758404473228, - "grad_norm": 1334.7886962890625, - "learning_rate": 3.353738166610058e-05, - "loss": 52.1476, - "step": 112540 - }, - { - "epoch": 0.4547162417126904, - "grad_norm": 517.1060791015625, - "learning_rate": 3.353410077057877e-05, - "loss": 80.8657, - "step": 112550 - }, - { - "epoch": 0.45475664297805807, - "grad_norm": 479.4462890625, - "learning_rate": 3.3530819708680286e-05, - "loss": 54.3628, - "step": 112560 - }, - { - "epoch": 0.4547970442434257, - "grad_norm": 579.92041015625, - "learning_rate": 3.352753848046907e-05, - "loss": 41.2108, - "step": 112570 - }, - { - "epoch": 0.45483744550879335, - "grad_norm": 557.235107421875, - "learning_rate": 3.3524257086009104e-05, - "loss": 33.0313, - "step": 112580 - }, - { - "epoch": 0.454877846774161, - "grad_norm": 929.7172241210938, - "learning_rate": 3.352097552536435e-05, - "loss": 40.3618, - "step": 112590 - }, - { - "epoch": 0.45491824803952857, - "grad_norm": 1180.1580810546875, - "learning_rate": 3.35176937985988e-05, - "loss": 73.8001, - "step": 112600 - }, - { - "epoch": 0.4549586493048962, - "grad_norm": 931.85009765625, - "learning_rate": 3.351441190577642e-05, - "loss": 45.0227, - "step": 112610 - }, - { - "epoch": 0.45499905057026385, - "grad_norm": 911.3134155273438, - "learning_rate": 3.3511129846961184e-05, - "loss": 59.2229, - "step": 112620 - }, - { - "epoch": 0.4550394518356315, - "grad_norm": 675.170654296875, - "learning_rate": 3.35078476222171e-05, - "loss": 80.4119, - "step": 112630 - }, - { - "epoch": 0.45507985310099913, - "grad_norm": 1176.3988037109375, - "learning_rate": 3.350456523160815e-05, - "loss": 47.3479, - "step": 112640 - }, - { - "epoch": 0.45512025436636677, - "grad_norm": 969.1943359375, - "learning_rate": 3.350128267519832e-05, - "loss": 81.494, - "step": 112650 - }, - { - "epoch": 0.4551606556317344, - "grad_norm": 291.8066711425781, - "learning_rate": 3.349799995305162e-05, - "loss": 41.662, - "step": 112660 - }, - { - "epoch": 0.455201056897102, - "grad_norm": 658.59765625, - "learning_rate": 3.3494717065232016e-05, - "loss": 61.2986, - "step": 112670 - }, - { - "epoch": 0.45524145816246964, - "grad_norm": 917.7549438476562, - "learning_rate": 3.349143401180354e-05, - "loss": 63.2453, - "step": 112680 - }, - { - "epoch": 0.4552818594278373, - "grad_norm": 453.6599426269531, - "learning_rate": 3.348815079283018e-05, - "loss": 59.7746, - "step": 112690 - }, - { - "epoch": 0.4553222606932049, - "grad_norm": 850.1251220703125, - "learning_rate": 3.3484867408375954e-05, - "loss": 57.4509, - "step": 112700 - }, - { - "epoch": 0.45536266195857256, - "grad_norm": 701.5401000976562, - "learning_rate": 3.348158385850487e-05, - "loss": 41.1296, - "step": 112710 - }, - { - "epoch": 0.4554030632239402, - "grad_norm": 819.9852294921875, - "learning_rate": 3.347830014328094e-05, - "loss": 69.8444, - "step": 112720 - }, - { - "epoch": 0.4554434644893078, - "grad_norm": 720.2810668945312, - "learning_rate": 3.347501626276819e-05, - "loss": 75.9919, - "step": 112730 - }, - { - "epoch": 0.4554838657546754, - "grad_norm": 495.089599609375, - "learning_rate": 3.3471732217030625e-05, - "loss": 40.4414, - "step": 112740 - }, - { - "epoch": 0.45552426702004306, - "grad_norm": 573.2144775390625, - "learning_rate": 3.346844800613229e-05, - "loss": 43.111, - "step": 112750 - }, - { - "epoch": 0.4555646682854107, - "grad_norm": 1056.5557861328125, - "learning_rate": 3.346516363013719e-05, - "loss": 92.2572, - "step": 112760 - }, - { - "epoch": 0.45560506955077834, - "grad_norm": 487.2084655761719, - "learning_rate": 3.346187908910938e-05, - "loss": 36.6314, - "step": 112770 - }, - { - "epoch": 0.455645470816146, - "grad_norm": 501.5868225097656, - "learning_rate": 3.345859438311287e-05, - "loss": 56.4939, - "step": 112780 - }, - { - "epoch": 0.45568587208151357, - "grad_norm": 792.0189819335938, - "learning_rate": 3.345530951221171e-05, - "loss": 94.0912, - "step": 112790 - }, - { - "epoch": 0.4557262733468812, - "grad_norm": 778.9989624023438, - "learning_rate": 3.3452024476469934e-05, - "loss": 40.7863, - "step": 112800 - }, - { - "epoch": 0.45576667461224885, - "grad_norm": 1507.0234375, - "learning_rate": 3.3448739275951595e-05, - "loss": 61.6816, - "step": 112810 - }, - { - "epoch": 0.4558070758776165, - "grad_norm": 432.25244140625, - "learning_rate": 3.344545391072073e-05, - "loss": 44.9511, - "step": 112820 - }, - { - "epoch": 0.4558474771429841, - "grad_norm": 655.8712158203125, - "learning_rate": 3.34421683808414e-05, - "loss": 48.9905, - "step": 112830 - }, - { - "epoch": 0.45588787840835177, - "grad_norm": 940.5873413085938, - "learning_rate": 3.343888268637765e-05, - "loss": 59.8554, - "step": 112840 - }, - { - "epoch": 0.4559282796737194, - "grad_norm": 1201.72412109375, - "learning_rate": 3.343559682739353e-05, - "loss": 85.8798, - "step": 112850 - }, - { - "epoch": 0.455968680939087, - "grad_norm": 822.5703735351562, - "learning_rate": 3.343231080395312e-05, - "loss": 54.6579, - "step": 112860 - }, - { - "epoch": 0.45600908220445463, - "grad_norm": 476.291748046875, - "learning_rate": 3.342902461612045e-05, - "loss": 40.1478, - "step": 112870 - }, - { - "epoch": 0.45604948346982227, - "grad_norm": 1140.7523193359375, - "learning_rate": 3.3425738263959615e-05, - "loss": 36.0184, - "step": 112880 - }, - { - "epoch": 0.4560898847351899, - "grad_norm": 627.5087280273438, - "learning_rate": 3.3422451747534684e-05, - "loss": 67.8078, - "step": 112890 - }, - { - "epoch": 0.45613028600055755, - "grad_norm": 525.3790283203125, - "learning_rate": 3.3419165066909705e-05, - "loss": 47.1049, - "step": 112900 - }, - { - "epoch": 0.4561706872659252, - "grad_norm": 361.6174011230469, - "learning_rate": 3.3415878222148776e-05, - "loss": 66.291, - "step": 112910 - }, - { - "epoch": 0.4562110885312928, - "grad_norm": 1267.40966796875, - "learning_rate": 3.341259121331597e-05, - "loss": 39.7781, - "step": 112920 - }, - { - "epoch": 0.4562514897966604, - "grad_norm": 643.3392333984375, - "learning_rate": 3.340930404047537e-05, - "loss": 37.3639, - "step": 112930 - }, - { - "epoch": 0.45629189106202805, - "grad_norm": 481.6955871582031, - "learning_rate": 3.3406016703691055e-05, - "loss": 55.8988, - "step": 112940 - }, - { - "epoch": 0.4563322923273957, - "grad_norm": 650.4725952148438, - "learning_rate": 3.340272920302711e-05, - "loss": 91.6434, - "step": 112950 - }, - { - "epoch": 0.45637269359276333, - "grad_norm": 674.4364013671875, - "learning_rate": 3.339944153854764e-05, - "loss": 46.4506, - "step": 112960 - }, - { - "epoch": 0.456413094858131, - "grad_norm": 542.6135864257812, - "learning_rate": 3.3396153710316736e-05, - "loss": 54.1574, - "step": 112970 - }, - { - "epoch": 0.4564534961234986, - "grad_norm": 1714.6644287109375, - "learning_rate": 3.339286571839848e-05, - "loss": 96.3534, - "step": 112980 - }, - { - "epoch": 0.4564938973888662, - "grad_norm": 525.607177734375, - "learning_rate": 3.338957756285699e-05, - "loss": 67.7952, - "step": 112990 - }, - { - "epoch": 0.45653429865423384, - "grad_norm": 1665.643798828125, - "learning_rate": 3.338628924375638e-05, - "loss": 69.8561, - "step": 113000 - }, - { - "epoch": 0.4565746999196015, - "grad_norm": 1218.552490234375, - "learning_rate": 3.338300076116073e-05, - "loss": 91.2745, - "step": 113010 - }, - { - "epoch": 0.4566151011849691, - "grad_norm": 1521.2366943359375, - "learning_rate": 3.337971211513417e-05, - "loss": 59.6287, - "step": 113020 - }, - { - "epoch": 0.45665550245033676, - "grad_norm": 898.8818969726562, - "learning_rate": 3.337642330574081e-05, - "loss": 54.526, - "step": 113030 - }, - { - "epoch": 0.4566959037157044, - "grad_norm": 390.90435791015625, - "learning_rate": 3.3373134333044756e-05, - "loss": 35.1512, - "step": 113040 - }, - { - "epoch": 0.456736304981072, - "grad_norm": 788.0700073242188, - "learning_rate": 3.336984519711015e-05, - "loss": 46.5823, - "step": 113050 - }, - { - "epoch": 0.4567767062464396, - "grad_norm": 979.811767578125, - "learning_rate": 3.336655589800109e-05, - "loss": 58.1711, - "step": 113060 - }, - { - "epoch": 0.45681710751180726, - "grad_norm": 1041.4178466796875, - "learning_rate": 3.336326643578172e-05, - "loss": 55.3556, - "step": 113070 - }, - { - "epoch": 0.4568575087771749, - "grad_norm": 594.2887573242188, - "learning_rate": 3.3359976810516164e-05, - "loss": 52.3926, - "step": 113080 - }, - { - "epoch": 0.45689791004254254, - "grad_norm": 731.5540771484375, - "learning_rate": 3.335668702226856e-05, - "loss": 107.6374, - "step": 113090 - }, - { - "epoch": 0.4569383113079102, - "grad_norm": 608.2393798828125, - "learning_rate": 3.3353397071103046e-05, - "loss": 57.6193, - "step": 113100 - }, - { - "epoch": 0.45697871257327777, - "grad_norm": 617.5512084960938, - "learning_rate": 3.3350106957083744e-05, - "loss": 72.52, - "step": 113110 - }, - { - "epoch": 0.4570191138386454, - "grad_norm": 658.0664672851562, - "learning_rate": 3.334681668027481e-05, - "loss": 94.5809, - "step": 113120 - }, - { - "epoch": 0.45705951510401305, - "grad_norm": 543.9719848632812, - "learning_rate": 3.334352624074039e-05, - "loss": 41.8438, - "step": 113130 - }, - { - "epoch": 0.4570999163693807, - "grad_norm": 696.774658203125, - "learning_rate": 3.334023563854463e-05, - "loss": 76.3532, - "step": 113140 - }, - { - "epoch": 0.4571403176347483, - "grad_norm": 2398.177734375, - "learning_rate": 3.333694487375168e-05, - "loss": 38.9571, - "step": 113150 - }, - { - "epoch": 0.45718071890011597, - "grad_norm": 455.6088562011719, - "learning_rate": 3.33336539464257e-05, - "loss": 32.256, - "step": 113160 - }, - { - "epoch": 0.4572211201654836, - "grad_norm": 1092.000732421875, - "learning_rate": 3.3330362856630845e-05, - "loss": 59.1251, - "step": 113170 - }, - { - "epoch": 0.4572615214308512, - "grad_norm": 1621.383544921875, - "learning_rate": 3.3327071604431275e-05, - "loss": 43.2729, - "step": 113180 - }, - { - "epoch": 0.45730192269621883, - "grad_norm": 680.7568969726562, - "learning_rate": 3.3323780189891166e-05, - "loss": 49.1451, - "step": 113190 - }, - { - "epoch": 0.45734232396158647, - "grad_norm": 280.8765563964844, - "learning_rate": 3.332048861307467e-05, - "loss": 54.7998, - "step": 113200 - }, - { - "epoch": 0.4573827252269541, - "grad_norm": 457.403564453125, - "learning_rate": 3.331719687404597e-05, - "loss": 91.7487, - "step": 113210 - }, - { - "epoch": 0.45742312649232175, - "grad_norm": 777.8742065429688, - "learning_rate": 3.331390497286922e-05, - "loss": 44.5807, - "step": 113220 - }, - { - "epoch": 0.4574635277576894, - "grad_norm": 1098.077880859375, - "learning_rate": 3.331061290960863e-05, - "loss": 90.1487, - "step": 113230 - }, - { - "epoch": 0.457503929023057, - "grad_norm": 882.6914672851562, - "learning_rate": 3.3307320684328354e-05, - "loss": 68.7481, - "step": 113240 - }, - { - "epoch": 0.4575443302884246, - "grad_norm": 1128.127197265625, - "learning_rate": 3.330402829709258e-05, - "loss": 65.0538, - "step": 113250 - }, - { - "epoch": 0.45758473155379226, - "grad_norm": 937.3806762695312, - "learning_rate": 3.3300735747965505e-05, - "loss": 57.1227, - "step": 113260 - }, - { - "epoch": 0.4576251328191599, - "grad_norm": 911.514404296875, - "learning_rate": 3.329744303701132e-05, - "loss": 85.5697, - "step": 113270 - }, - { - "epoch": 0.45766553408452754, - "grad_norm": 532.0978393554688, - "learning_rate": 3.3294150164294204e-05, - "loss": 41.7875, - "step": 113280 - }, - { - "epoch": 0.4577059353498952, - "grad_norm": 2276.76904296875, - "learning_rate": 3.329085712987836e-05, - "loss": 58.2938, - "step": 113290 - }, - { - "epoch": 0.4577463366152628, - "grad_norm": 633.0645751953125, - "learning_rate": 3.3287563933827995e-05, - "loss": 45.8421, - "step": 113300 - }, - { - "epoch": 0.4577867378806304, - "grad_norm": 884.1083984375, - "learning_rate": 3.328427057620729e-05, - "loss": 69.57, - "step": 113310 - }, - { - "epoch": 0.45782713914599804, - "grad_norm": 832.331298828125, - "learning_rate": 3.328097705708047e-05, - "loss": 36.6988, - "step": 113320 - }, - { - "epoch": 0.4578675404113657, - "grad_norm": 1298.6260986328125, - "learning_rate": 3.3277683376511744e-05, - "loss": 67.1316, - "step": 113330 - }, - { - "epoch": 0.4579079416767333, - "grad_norm": 690.5106811523438, - "learning_rate": 3.327438953456532e-05, - "loss": 43.1363, - "step": 113340 - }, - { - "epoch": 0.45794834294210096, - "grad_norm": 638.0509643554688, - "learning_rate": 3.327109553130541e-05, - "loss": 64.9828, - "step": 113350 - }, - { - "epoch": 0.4579887442074686, - "grad_norm": 712.4091186523438, - "learning_rate": 3.326780136679623e-05, - "loss": 51.8144, - "step": 113360 - }, - { - "epoch": 0.4580291454728362, - "grad_norm": 768.2869873046875, - "learning_rate": 3.326450704110201e-05, - "loss": 43.4623, - "step": 113370 - }, - { - "epoch": 0.4580695467382038, - "grad_norm": 667.932373046875, - "learning_rate": 3.3261212554286975e-05, - "loss": 60.1954, - "step": 113380 - }, - { - "epoch": 0.45810994800357147, - "grad_norm": 797.756591796875, - "learning_rate": 3.3257917906415336e-05, - "loss": 64.3577, - "step": 113390 - }, - { - "epoch": 0.4581503492689391, - "grad_norm": 417.9449462890625, - "learning_rate": 3.325462309755134e-05, - "loss": 52.2687, - "step": 113400 - }, - { - "epoch": 0.45819075053430675, - "grad_norm": 131.72840881347656, - "learning_rate": 3.325132812775922e-05, - "loss": 53.6158, - "step": 113410 - }, - { - "epoch": 0.4582311517996744, - "grad_norm": 1277.050537109375, - "learning_rate": 3.324803299710321e-05, - "loss": 79.7133, - "step": 113420 - }, - { - "epoch": 0.45827155306504197, - "grad_norm": 247.889404296875, - "learning_rate": 3.3244737705647554e-05, - "loss": 79.97, - "step": 113430 - }, - { - "epoch": 0.4583119543304096, - "grad_norm": 1504.002685546875, - "learning_rate": 3.324144225345649e-05, - "loss": 57.3291, - "step": 113440 - }, - { - "epoch": 0.45835235559577725, - "grad_norm": 3024.2744140625, - "learning_rate": 3.3238146640594256e-05, - "loss": 55.5882, - "step": 113450 - }, - { - "epoch": 0.4583927568611449, - "grad_norm": 1827.9388427734375, - "learning_rate": 3.323485086712513e-05, - "loss": 68.6746, - "step": 113460 - }, - { - "epoch": 0.45843315812651253, - "grad_norm": 1997.11279296875, - "learning_rate": 3.323155493311334e-05, - "loss": 60.3449, - "step": 113470 - }, - { - "epoch": 0.45847355939188017, - "grad_norm": 1310.3616943359375, - "learning_rate": 3.322825883862314e-05, - "loss": 58.0651, - "step": 113480 - }, - { - "epoch": 0.4585139606572478, - "grad_norm": 542.6761474609375, - "learning_rate": 3.32249625837188e-05, - "loss": 45.5163, - "step": 113490 - }, - { - "epoch": 0.4585543619226154, - "grad_norm": 109.23764038085938, - "learning_rate": 3.322166616846458e-05, - "loss": 46.8832, - "step": 113500 - }, - { - "epoch": 0.45859476318798303, - "grad_norm": 770.7872314453125, - "learning_rate": 3.321836959292475e-05, - "loss": 54.6829, - "step": 113510 - }, - { - "epoch": 0.4586351644533507, - "grad_norm": 1391.15185546875, - "learning_rate": 3.321507285716357e-05, - "loss": 76.5766, - "step": 113520 - }, - { - "epoch": 0.4586755657187183, - "grad_norm": 1156.2738037109375, - "learning_rate": 3.321177596124532e-05, - "loss": 44.4677, - "step": 113530 - }, - { - "epoch": 0.45871596698408595, - "grad_norm": 882.244140625, - "learning_rate": 3.3208478905234274e-05, - "loss": 64.2383, - "step": 113540 - }, - { - "epoch": 0.4587563682494536, - "grad_norm": 1060.12841796875, - "learning_rate": 3.32051816891947e-05, - "loss": 49.1175, - "step": 113550 - }, - { - "epoch": 0.4587967695148212, - "grad_norm": 1234.741455078125, - "learning_rate": 3.320188431319088e-05, - "loss": 41.2987, - "step": 113560 - }, - { - "epoch": 0.4588371707801888, - "grad_norm": 437.5050048828125, - "learning_rate": 3.31985867772871e-05, - "loss": 63.9297, - "step": 113570 - }, - { - "epoch": 0.45887757204555646, - "grad_norm": 839.0947875976562, - "learning_rate": 3.319528908154766e-05, - "loss": 55.2399, - "step": 113580 - }, - { - "epoch": 0.4589179733109241, - "grad_norm": 1059.20654296875, - "learning_rate": 3.319199122603683e-05, - "loss": 35.0855, - "step": 113590 - }, - { - "epoch": 0.45895837457629174, - "grad_norm": 1006.2760009765625, - "learning_rate": 3.318869321081892e-05, - "loss": 75.6239, - "step": 113600 - }, - { - "epoch": 0.4589987758416594, - "grad_norm": 1427.097412109375, - "learning_rate": 3.3185395035958224e-05, - "loss": 53.2945, - "step": 113610 - }, - { - "epoch": 0.459039177107027, - "grad_norm": 960.8377075195312, - "learning_rate": 3.318209670151904e-05, - "loss": 74.3849, - "step": 113620 - }, - { - "epoch": 0.4590795783723946, - "grad_norm": 780.0336303710938, - "learning_rate": 3.317879820756566e-05, - "loss": 59.6619, - "step": 113630 - }, - { - "epoch": 0.45911997963776224, - "grad_norm": 913.7399291992188, - "learning_rate": 3.31754995541624e-05, - "loss": 54.3696, - "step": 113640 - }, - { - "epoch": 0.4591603809031299, - "grad_norm": 884.738037109375, - "learning_rate": 3.3172200741373563e-05, - "loss": 87.1222, - "step": 113650 - }, - { - "epoch": 0.4592007821684975, - "grad_norm": 495.6034851074219, - "learning_rate": 3.3168901769263474e-05, - "loss": 52.4804, - "step": 113660 - }, - { - "epoch": 0.45924118343386516, - "grad_norm": 1088.6365966796875, - "learning_rate": 3.316560263789643e-05, - "loss": 46.7347, - "step": 113670 - }, - { - "epoch": 0.4592815846992328, - "grad_norm": 724.1251831054688, - "learning_rate": 3.3162303347336764e-05, - "loss": 53.7025, - "step": 113680 - }, - { - "epoch": 0.4593219859646004, - "grad_norm": 982.661376953125, - "learning_rate": 3.315900389764879e-05, - "loss": 47.1129, - "step": 113690 - }, - { - "epoch": 0.459362387229968, - "grad_norm": 662.5863037109375, - "learning_rate": 3.315570428889684e-05, - "loss": 35.4883, - "step": 113700 - }, - { - "epoch": 0.45940278849533567, - "grad_norm": 1490.281005859375, - "learning_rate": 3.315240452114523e-05, - "loss": 52.5399, - "step": 113710 - }, - { - "epoch": 0.4594431897607033, - "grad_norm": 1237.041748046875, - "learning_rate": 3.31491045944583e-05, - "loss": 59.784, - "step": 113720 - }, - { - "epoch": 0.45948359102607095, - "grad_norm": 700.3053588867188, - "learning_rate": 3.314580450890038e-05, - "loss": 70.0428, - "step": 113730 - }, - { - "epoch": 0.4595239922914386, - "grad_norm": 851.7976684570312, - "learning_rate": 3.3142504264535804e-05, - "loss": 50.4964, - "step": 113740 - }, - { - "epoch": 0.45956439355680617, - "grad_norm": 983.1741943359375, - "learning_rate": 3.313920386142892e-05, - "loss": 59.0461, - "step": 113750 - }, - { - "epoch": 0.4596047948221738, - "grad_norm": 1348.2669677734375, - "learning_rate": 3.313590329964406e-05, - "loss": 59.1032, - "step": 113760 - }, - { - "epoch": 0.45964519608754145, - "grad_norm": 487.41680908203125, - "learning_rate": 3.313260257924558e-05, - "loss": 87.3492, - "step": 113770 - }, - { - "epoch": 0.4596855973529091, - "grad_norm": 345.0389099121094, - "learning_rate": 3.312930170029783e-05, - "loss": 52.9883, - "step": 113780 - }, - { - "epoch": 0.45972599861827673, - "grad_norm": 438.7192077636719, - "learning_rate": 3.3126000662865156e-05, - "loss": 49.1873, - "step": 113790 - }, - { - "epoch": 0.45976639988364437, - "grad_norm": 493.15130615234375, - "learning_rate": 3.312269946701191e-05, - "loss": 50.4528, - "step": 113800 - }, - { - "epoch": 0.459806801149012, - "grad_norm": 1520.73974609375, - "learning_rate": 3.311939811280246e-05, - "loss": 42.8416, - "step": 113810 - }, - { - "epoch": 0.4598472024143796, - "grad_norm": 146.88551330566406, - "learning_rate": 3.311609660030117e-05, - "loss": 52.2549, - "step": 113820 - }, - { - "epoch": 0.45988760367974724, - "grad_norm": 610.1900634765625, - "learning_rate": 3.311279492957239e-05, - "loss": 81.4292, - "step": 113830 - }, - { - "epoch": 0.4599280049451149, - "grad_norm": 478.4176330566406, - "learning_rate": 3.31094931006805e-05, - "loss": 59.0037, - "step": 113840 - }, - { - "epoch": 0.4599684062104825, - "grad_norm": 698.8412475585938, - "learning_rate": 3.310619111368986e-05, - "loss": 60.8137, - "step": 113850 - }, - { - "epoch": 0.46000880747585016, - "grad_norm": 719.0433349609375, - "learning_rate": 3.310288896866486e-05, - "loss": 40.1776, - "step": 113860 - }, - { - "epoch": 0.4600492087412178, - "grad_norm": 807.5015258789062, - "learning_rate": 3.309958666566986e-05, - "loss": 87.1863, - "step": 113870 - }, - { - "epoch": 0.4600896100065854, - "grad_norm": 773.2158813476562, - "learning_rate": 3.309628420476926e-05, - "loss": 65.442, - "step": 113880 - }, - { - "epoch": 0.460130011271953, - "grad_norm": 1120.7901611328125, - "learning_rate": 3.309298158602742e-05, - "loss": 46.1566, - "step": 113890 - }, - { - "epoch": 0.46017041253732066, - "grad_norm": 2974.2939453125, - "learning_rate": 3.308967880950874e-05, - "loss": 86.9134, - "step": 113900 - }, - { - "epoch": 0.4602108138026883, - "grad_norm": 1336.4696044921875, - "learning_rate": 3.308637587527761e-05, - "loss": 79.6506, - "step": 113910 - }, - { - "epoch": 0.46025121506805594, - "grad_norm": 1928.9310302734375, - "learning_rate": 3.3083072783398416e-05, - "loss": 96.6911, - "step": 113920 - }, - { - "epoch": 0.4602916163334236, - "grad_norm": 538.5294799804688, - "learning_rate": 3.3079769533935556e-05, - "loss": 49.8037, - "step": 113930 - }, - { - "epoch": 0.4603320175987912, - "grad_norm": 809.7352294921875, - "learning_rate": 3.307646612695343e-05, - "loss": 61.5456, - "step": 113940 - }, - { - "epoch": 0.4603724188641588, - "grad_norm": 550.5420532226562, - "learning_rate": 3.307316256251644e-05, - "loss": 49.7282, - "step": 113950 - }, - { - "epoch": 0.46041282012952645, - "grad_norm": 1422.148193359375, - "learning_rate": 3.3069858840688994e-05, - "loss": 71.9855, - "step": 113960 - }, - { - "epoch": 0.4604532213948941, - "grad_norm": 426.9280090332031, - "learning_rate": 3.3066554961535485e-05, - "loss": 46.9851, - "step": 113970 - }, - { - "epoch": 0.4604936226602617, - "grad_norm": 844.1886596679688, - "learning_rate": 3.3063250925120334e-05, - "loss": 56.5634, - "step": 113980 - }, - { - "epoch": 0.46053402392562937, - "grad_norm": 590.6248779296875, - "learning_rate": 3.305994673150797e-05, - "loss": 58.149, - "step": 113990 - }, - { - "epoch": 0.460574425190997, - "grad_norm": 722.6728515625, - "learning_rate": 3.305664238076278e-05, - "loss": 48.5719, - "step": 114000 - }, - { - "epoch": 0.4606148264563646, - "grad_norm": 761.7833251953125, - "learning_rate": 3.30533378729492e-05, - "loss": 67.1143, - "step": 114010 - }, - { - "epoch": 0.46065522772173223, - "grad_norm": 258.24639892578125, - "learning_rate": 3.3050033208131656e-05, - "loss": 50.417, - "step": 114020 - }, - { - "epoch": 0.46069562898709987, - "grad_norm": 391.4142150878906, - "learning_rate": 3.304672838637457e-05, - "loss": 65.8437, - "step": 114030 - }, - { - "epoch": 0.4607360302524675, - "grad_norm": 432.7169189453125, - "learning_rate": 3.3043423407742375e-05, - "loss": 60.9631, - "step": 114040 - }, - { - "epoch": 0.46077643151783515, - "grad_norm": 893.4138793945312, - "learning_rate": 3.3040118272299495e-05, - "loss": 131.9281, - "step": 114050 - }, - { - "epoch": 0.4608168327832028, - "grad_norm": 1183.3709716796875, - "learning_rate": 3.303681298011037e-05, - "loss": 74.6201, - "step": 114060 - }, - { - "epoch": 0.4608572340485704, - "grad_norm": 327.8617248535156, - "learning_rate": 3.303350753123944e-05, - "loss": 38.2665, - "step": 114070 - }, - { - "epoch": 0.460897635313938, - "grad_norm": 642.1771850585938, - "learning_rate": 3.3030201925751145e-05, - "loss": 50.6443, - "step": 114080 - }, - { - "epoch": 0.46093803657930565, - "grad_norm": 1546.3465576171875, - "learning_rate": 3.302689616370993e-05, - "loss": 37.8828, - "step": 114090 - }, - { - "epoch": 0.4609784378446733, - "grad_norm": 634.73876953125, - "learning_rate": 3.302359024518024e-05, - "loss": 29.7485, - "step": 114100 - }, - { - "epoch": 0.46101883911004093, - "grad_norm": 658.0567626953125, - "learning_rate": 3.302028417022653e-05, - "loss": 45.2375, - "step": 114110 - }, - { - "epoch": 0.4610592403754086, - "grad_norm": 633.6270751953125, - "learning_rate": 3.301697793891324e-05, - "loss": 59.6125, - "step": 114120 - }, - { - "epoch": 0.4610996416407762, - "grad_norm": 1438.4256591796875, - "learning_rate": 3.301367155130485e-05, - "loss": 89.7295, - "step": 114130 - }, - { - "epoch": 0.4611400429061438, - "grad_norm": 1383.4329833984375, - "learning_rate": 3.3010365007465805e-05, - "loss": 48.6195, - "step": 114140 - }, - { - "epoch": 0.46118044417151144, - "grad_norm": 530.7550659179688, - "learning_rate": 3.300705830746057e-05, - "loss": 46.5861, - "step": 114150 - }, - { - "epoch": 0.4612208454368791, - "grad_norm": 633.077880859375, - "learning_rate": 3.300375145135361e-05, - "loss": 60.9828, - "step": 114160 - }, - { - "epoch": 0.4612612467022467, - "grad_norm": 376.91864013671875, - "learning_rate": 3.3000444439209396e-05, - "loss": 62.8593, - "step": 114170 - }, - { - "epoch": 0.46130164796761436, - "grad_norm": 721.732666015625, - "learning_rate": 3.299713727109239e-05, - "loss": 84.59, - "step": 114180 - }, - { - "epoch": 0.461342049232982, - "grad_norm": 473.7373352050781, - "learning_rate": 3.299382994706709e-05, - "loss": 94.0822, - "step": 114190 - }, - { - "epoch": 0.4613824504983496, - "grad_norm": 1528.2044677734375, - "learning_rate": 3.299052246719795e-05, - "loss": 81.7651, - "step": 114200 - }, - { - "epoch": 0.4614228517637172, - "grad_norm": 890.8794555664062, - "learning_rate": 3.298721483154946e-05, - "loss": 40.4416, - "step": 114210 - }, - { - "epoch": 0.46146325302908486, - "grad_norm": 541.0910034179688, - "learning_rate": 3.298390704018611e-05, - "loss": 43.575, - "step": 114220 - }, - { - "epoch": 0.4615036542944525, - "grad_norm": 915.8447875976562, - "learning_rate": 3.298059909317239e-05, - "loss": 70.035, - "step": 114230 - }, - { - "epoch": 0.46154405555982014, - "grad_norm": 1335.07275390625, - "learning_rate": 3.297729099057277e-05, - "loss": 58.3776, - "step": 114240 - }, - { - "epoch": 0.4615844568251878, - "grad_norm": 1571.07373046875, - "learning_rate": 3.2973982732451755e-05, - "loss": 72.0075, - "step": 114250 - }, - { - "epoch": 0.4616248580905554, - "grad_norm": 1263.757080078125, - "learning_rate": 3.297067431887384e-05, - "loss": 53.4101, - "step": 114260 - }, - { - "epoch": 0.461665259355923, - "grad_norm": 916.6146850585938, - "learning_rate": 3.296736574990353e-05, - "loss": 78.4652, - "step": 114270 - }, - { - "epoch": 0.46170566062129065, - "grad_norm": 365.46142578125, - "learning_rate": 3.296405702560532e-05, - "loss": 80.6376, - "step": 114280 - }, - { - "epoch": 0.4617460618866583, - "grad_norm": 944.3591918945312, - "learning_rate": 3.2960748146043716e-05, - "loss": 90.1059, - "step": 114290 - }, - { - "epoch": 0.46178646315202593, - "grad_norm": 679.8099365234375, - "learning_rate": 3.295743911128324e-05, - "loss": 48.7843, - "step": 114300 - }, - { - "epoch": 0.46182686441739357, - "grad_norm": 1261.843017578125, - "learning_rate": 3.295412992138838e-05, - "loss": 78.7052, - "step": 114310 - }, - { - "epoch": 0.4618672656827612, - "grad_norm": 481.5779724121094, - "learning_rate": 3.295082057642367e-05, - "loss": 41.2433, - "step": 114320 - }, - { - "epoch": 0.4619076669481288, - "grad_norm": 903.3895874023438, - "learning_rate": 3.294751107645361e-05, - "loss": 48.583, - "step": 114330 - }, - { - "epoch": 0.46194806821349643, - "grad_norm": 730.4287109375, - "learning_rate": 3.294420142154274e-05, - "loss": 78.3227, - "step": 114340 - }, - { - "epoch": 0.46198846947886407, - "grad_norm": 563.7612915039062, - "learning_rate": 3.2940891611755564e-05, - "loss": 53.6732, - "step": 114350 - }, - { - "epoch": 0.4620288707442317, - "grad_norm": 1336.9444580078125, - "learning_rate": 3.293758164715663e-05, - "loss": 55.2327, - "step": 114360 - }, - { - "epoch": 0.46206927200959935, - "grad_norm": 1316.6781005859375, - "learning_rate": 3.293427152781044e-05, - "loss": 39.9807, - "step": 114370 - }, - { - "epoch": 0.462109673274967, - "grad_norm": 1014.697265625, - "learning_rate": 3.2930961253781554e-05, - "loss": 81.932, - "step": 114380 - }, - { - "epoch": 0.4621500745403346, - "grad_norm": 2144.41064453125, - "learning_rate": 3.292765082513449e-05, - "loss": 80.4216, - "step": 114390 - }, - { - "epoch": 0.4621904758057022, - "grad_norm": 509.5421447753906, - "learning_rate": 3.29243402419338e-05, - "loss": 40.3778, - "step": 114400 - }, - { - "epoch": 0.46223087707106986, - "grad_norm": 432.00933837890625, - "learning_rate": 3.2921029504244004e-05, - "loss": 76.2071, - "step": 114410 - }, - { - "epoch": 0.4622712783364375, - "grad_norm": 722.176025390625, - "learning_rate": 3.2917718612129665e-05, - "loss": 41.5058, - "step": 114420 - }, - { - "epoch": 0.46231167960180514, - "grad_norm": 725.9029541015625, - "learning_rate": 3.291440756565533e-05, - "loss": 53.8693, - "step": 114430 - }, - { - "epoch": 0.4623520808671728, - "grad_norm": 281.3984375, - "learning_rate": 3.2911096364885544e-05, - "loss": 80.6305, - "step": 114440 - }, - { - "epoch": 0.4623924821325404, - "grad_norm": 811.3935546875, - "learning_rate": 3.290778500988485e-05, - "loss": 55.3654, - "step": 114450 - }, - { - "epoch": 0.462432883397908, - "grad_norm": 472.8229064941406, - "learning_rate": 3.2904473500717824e-05, - "loss": 59.1575, - "step": 114460 - }, - { - "epoch": 0.46247328466327564, - "grad_norm": 713.5052490234375, - "learning_rate": 3.290116183744902e-05, - "loss": 50.5053, - "step": 114470 - }, - { - "epoch": 0.4625136859286433, - "grad_norm": 1182.7664794921875, - "learning_rate": 3.2897850020143005e-05, - "loss": 59.1632, - "step": 114480 - }, - { - "epoch": 0.4625540871940109, - "grad_norm": 845.945556640625, - "learning_rate": 3.289453804886433e-05, - "loss": 85.3159, - "step": 114490 - }, - { - "epoch": 0.46259448845937856, - "grad_norm": 406.4460754394531, - "learning_rate": 3.289122592367757e-05, - "loss": 39.6031, - "step": 114500 - }, - { - "epoch": 0.4626348897247462, - "grad_norm": 883.4130249023438, - "learning_rate": 3.288791364464729e-05, - "loss": 44.7248, - "step": 114510 - }, - { - "epoch": 0.4626752909901138, - "grad_norm": 447.2448425292969, - "learning_rate": 3.2884601211838085e-05, - "loss": 62.0368, - "step": 114520 - }, - { - "epoch": 0.4627156922554814, - "grad_norm": 640.0363159179688, - "learning_rate": 3.288128862531452e-05, - "loss": 56.4858, - "step": 114530 - }, - { - "epoch": 0.46275609352084907, - "grad_norm": 891.1563720703125, - "learning_rate": 3.287797588514117e-05, - "loss": 70.3198, - "step": 114540 - }, - { - "epoch": 0.4627964947862167, - "grad_norm": 628.9306030273438, - "learning_rate": 3.287466299138262e-05, - "loss": 53.9219, - "step": 114550 - }, - { - "epoch": 0.46283689605158435, - "grad_norm": 382.36456298828125, - "learning_rate": 3.287134994410347e-05, - "loss": 42.8895, - "step": 114560 - }, - { - "epoch": 0.462877297316952, - "grad_norm": 1126.9713134765625, - "learning_rate": 3.28680367433683e-05, - "loss": 38.392, - "step": 114570 - }, - { - "epoch": 0.4629176985823196, - "grad_norm": 2824.01025390625, - "learning_rate": 3.28647233892417e-05, - "loss": 77.3813, - "step": 114580 - }, - { - "epoch": 0.4629580998476872, - "grad_norm": 413.0010986328125, - "learning_rate": 3.286140988178826e-05, - "loss": 50.3714, - "step": 114590 - }, - { - "epoch": 0.46299850111305485, - "grad_norm": 952.1806030273438, - "learning_rate": 3.28580962210726e-05, - "loss": 55.7656, - "step": 114600 - }, - { - "epoch": 0.4630389023784225, - "grad_norm": 491.023193359375, - "learning_rate": 3.2854782407159305e-05, - "loss": 47.8107, - "step": 114610 - }, - { - "epoch": 0.46307930364379013, - "grad_norm": 413.3840026855469, - "learning_rate": 3.285146844011298e-05, - "loss": 57.6984, - "step": 114620 - }, - { - "epoch": 0.46311970490915777, - "grad_norm": 593.4551391601562, - "learning_rate": 3.2848154319998235e-05, - "loss": 47.3308, - "step": 114630 - }, - { - "epoch": 0.4631601061745254, - "grad_norm": 543.7266845703125, - "learning_rate": 3.2844840046879686e-05, - "loss": 46.1537, - "step": 114640 - }, - { - "epoch": 0.463200507439893, - "grad_norm": 876.9476318359375, - "learning_rate": 3.2841525620821945e-05, - "loss": 39.367, - "step": 114650 - }, - { - "epoch": 0.46324090870526063, - "grad_norm": 470.560791015625, - "learning_rate": 3.2838211041889625e-05, - "loss": 56.22, - "step": 114660 - }, - { - "epoch": 0.4632813099706283, - "grad_norm": 692.4049072265625, - "learning_rate": 3.2834896310147336e-05, - "loss": 46.9886, - "step": 114670 - }, - { - "epoch": 0.4633217112359959, - "grad_norm": 1012.37890625, - "learning_rate": 3.283158142565971e-05, - "loss": 77.0895, - "step": 114680 - }, - { - "epoch": 0.46336211250136355, - "grad_norm": 1028.7650146484375, - "learning_rate": 3.282826638849138e-05, - "loss": 60.6332, - "step": 114690 - }, - { - "epoch": 0.4634025137667312, - "grad_norm": 1001.2539672851562, - "learning_rate": 3.2824951198706954e-05, - "loss": 55.1329, - "step": 114700 - }, - { - "epoch": 0.4634429150320988, - "grad_norm": 707.6488647460938, - "learning_rate": 3.2821635856371086e-05, - "loss": 68.9933, - "step": 114710 - }, - { - "epoch": 0.4634833162974664, - "grad_norm": 640.0159912109375, - "learning_rate": 3.28183203615484e-05, - "loss": 56.8567, - "step": 114720 - }, - { - "epoch": 0.46352371756283406, - "grad_norm": 1112.1865234375, - "learning_rate": 3.281500471430353e-05, - "loss": 63.7654, - "step": 114730 - }, - { - "epoch": 0.4635641188282017, - "grad_norm": 871.5289306640625, - "learning_rate": 3.281168891470112e-05, - "loss": 64.3242, - "step": 114740 - }, - { - "epoch": 0.46360452009356934, - "grad_norm": 232.17013549804688, - "learning_rate": 3.2808372962805816e-05, - "loss": 51.8217, - "step": 114750 - }, - { - "epoch": 0.463644921358937, - "grad_norm": 653.4492797851562, - "learning_rate": 3.280505685868226e-05, - "loss": 32.7491, - "step": 114760 - }, - { - "epoch": 0.4636853226243046, - "grad_norm": 806.7274780273438, - "learning_rate": 3.2801740602395105e-05, - "loss": 60.581, - "step": 114770 - }, - { - "epoch": 0.4637257238896722, - "grad_norm": 698.8681640625, - "learning_rate": 3.279842419400899e-05, - "loss": 81.199, - "step": 114780 - }, - { - "epoch": 0.46376612515503984, - "grad_norm": 1318.1488037109375, - "learning_rate": 3.2795107633588586e-05, - "loss": 61.5727, - "step": 114790 - }, - { - "epoch": 0.4638065264204075, - "grad_norm": 2012.5032958984375, - "learning_rate": 3.279179092119855e-05, - "loss": 63.0752, - "step": 114800 - }, - { - "epoch": 0.4638469276857751, - "grad_norm": 946.7776489257812, - "learning_rate": 3.278847405690353e-05, - "loss": 104.6616, - "step": 114810 - }, - { - "epoch": 0.46388732895114276, - "grad_norm": 332.1543273925781, - "learning_rate": 3.278515704076821e-05, - "loss": 57.2958, - "step": 114820 - }, - { - "epoch": 0.4639277302165104, - "grad_norm": 628.6360473632812, - "learning_rate": 3.278183987285724e-05, - "loss": 49.5415, - "step": 114830 - }, - { - "epoch": 0.463968131481878, - "grad_norm": 407.3933410644531, - "learning_rate": 3.277852255323529e-05, - "loss": 45.7567, - "step": 114840 - }, - { - "epoch": 0.46400853274724563, - "grad_norm": 1091.437255859375, - "learning_rate": 3.277520508196705e-05, - "loss": 76.9394, - "step": 114850 - }, - { - "epoch": 0.46404893401261327, - "grad_norm": 540.4053344726562, - "learning_rate": 3.277188745911717e-05, - "loss": 42.3653, - "step": 114860 - }, - { - "epoch": 0.4640893352779809, - "grad_norm": 1648.4383544921875, - "learning_rate": 3.276856968475035e-05, - "loss": 78.4631, - "step": 114870 - }, - { - "epoch": 0.46412973654334855, - "grad_norm": 469.8823547363281, - "learning_rate": 3.276525175893126e-05, - "loss": 59.8111, - "step": 114880 - }, - { - "epoch": 0.4641701378087162, - "grad_norm": 925.3463745117188, - "learning_rate": 3.27619336817246e-05, - "loss": 49.3538, - "step": 114890 - }, - { - "epoch": 0.46421053907408383, - "grad_norm": 1113.8001708984375, - "learning_rate": 3.2758615453195034e-05, - "loss": 64.5263, - "step": 114900 - }, - { - "epoch": 0.4642509403394514, - "grad_norm": 591.8302612304688, - "learning_rate": 3.275529707340728e-05, - "loss": 71.7116, - "step": 114910 - }, - { - "epoch": 0.46429134160481905, - "grad_norm": 564.6202392578125, - "learning_rate": 3.2751978542425995e-05, - "loss": 58.8196, - "step": 114920 - }, - { - "epoch": 0.4643317428701867, - "grad_norm": 1191.913330078125, - "learning_rate": 3.2748659860315916e-05, - "loss": 42.0783, - "step": 114930 - }, - { - "epoch": 0.46437214413555433, - "grad_norm": 599.6019287109375, - "learning_rate": 3.274534102714172e-05, - "loss": 50.7316, - "step": 114940 - }, - { - "epoch": 0.46441254540092197, - "grad_norm": 527.0614013671875, - "learning_rate": 3.2742022042968104e-05, - "loss": 51.8205, - "step": 114950 - }, - { - "epoch": 0.4644529466662896, - "grad_norm": 665.9529418945312, - "learning_rate": 3.273870290785979e-05, - "loss": 68.3194, - "step": 114960 - }, - { - "epoch": 0.4644933479316572, - "grad_norm": 515.5640258789062, - "learning_rate": 3.2735383621881485e-05, - "loss": 37.5466, - "step": 114970 - }, - { - "epoch": 0.46453374919702484, - "grad_norm": 822.676513671875, - "learning_rate": 3.273206418509788e-05, - "loss": 69.2502, - "step": 114980 - }, - { - "epoch": 0.4645741504623925, - "grad_norm": 1226.153076171875, - "learning_rate": 3.272874459757371e-05, - "loss": 50.1505, - "step": 114990 - }, - { - "epoch": 0.4646145517277601, - "grad_norm": 1813.3848876953125, - "learning_rate": 3.272542485937369e-05, - "loss": 77.2155, - "step": 115000 - }, - { - "epoch": 0.46465495299312776, - "grad_norm": 579.1246337890625, - "learning_rate": 3.2722104970562525e-05, - "loss": 93.8051, - "step": 115010 - }, - { - "epoch": 0.4646953542584954, - "grad_norm": 0.0, - "learning_rate": 3.271878493120496e-05, - "loss": 37.53, - "step": 115020 - }, - { - "epoch": 0.464735755523863, - "grad_norm": 285.748779296875, - "learning_rate": 3.27154647413657e-05, - "loss": 51.3141, - "step": 115030 - }, - { - "epoch": 0.4647761567892306, - "grad_norm": 585.9220581054688, - "learning_rate": 3.271214440110948e-05, - "loss": 66.3542, - "step": 115040 - }, - { - "epoch": 0.46481655805459826, - "grad_norm": 385.7046813964844, - "learning_rate": 3.270882391050104e-05, - "loss": 43.7319, - "step": 115050 - }, - { - "epoch": 0.4648569593199659, - "grad_norm": 799.7272338867188, - "learning_rate": 3.270550326960511e-05, - "loss": 73.2882, - "step": 115060 - }, - { - "epoch": 0.46489736058533354, - "grad_norm": 1642.924560546875, - "learning_rate": 3.270218247848642e-05, - "loss": 90.7406, - "step": 115070 - }, - { - "epoch": 0.4649377618507012, - "grad_norm": 890.0045776367188, - "learning_rate": 3.269886153720972e-05, - "loss": 65.4913, - "step": 115080 - }, - { - "epoch": 0.4649781631160688, - "grad_norm": 1087.22802734375, - "learning_rate": 3.2695540445839764e-05, - "loss": 66.0933, - "step": 115090 - }, - { - "epoch": 0.4650185643814364, - "grad_norm": 436.0012512207031, - "learning_rate": 3.269221920444127e-05, - "loss": 48.0393, - "step": 115100 - }, - { - "epoch": 0.46505896564680405, - "grad_norm": 821.9993286132812, - "learning_rate": 3.2688897813079005e-05, - "loss": 48.355, - "step": 115110 - }, - { - "epoch": 0.4650993669121717, - "grad_norm": 434.6961364746094, - "learning_rate": 3.2685576271817716e-05, - "loss": 54.5126, - "step": 115120 - }, - { - "epoch": 0.4651397681775393, - "grad_norm": 897.4439697265625, - "learning_rate": 3.268225458072217e-05, - "loss": 62.3177, - "step": 115130 - }, - { - "epoch": 0.46518016944290697, - "grad_norm": 2384.05224609375, - "learning_rate": 3.267893273985711e-05, - "loss": 72.2372, - "step": 115140 - }, - { - "epoch": 0.4652205707082746, - "grad_norm": 724.9418334960938, - "learning_rate": 3.26756107492873e-05, - "loss": 56.3772, - "step": 115150 - }, - { - "epoch": 0.4652609719736422, - "grad_norm": 666.811767578125, - "learning_rate": 3.267228860907751e-05, - "loss": 45.4823, - "step": 115160 - }, - { - "epoch": 0.46530137323900983, - "grad_norm": 846.5064086914062, - "learning_rate": 3.266896631929251e-05, - "loss": 49.0491, - "step": 115170 - }, - { - "epoch": 0.46534177450437747, - "grad_norm": 372.02447509765625, - "learning_rate": 3.2665643879997056e-05, - "loss": 78.3602, - "step": 115180 - }, - { - "epoch": 0.4653821757697451, - "grad_norm": 745.8118896484375, - "learning_rate": 3.266232129125593e-05, - "loss": 39.577, - "step": 115190 - }, - { - "epoch": 0.46542257703511275, - "grad_norm": 936.6083984375, - "learning_rate": 3.2658998553133895e-05, - "loss": 43.7622, - "step": 115200 - }, - { - "epoch": 0.4654629783004804, - "grad_norm": 458.98114013671875, - "learning_rate": 3.2655675665695754e-05, - "loss": 49.8332, - "step": 115210 - }, - { - "epoch": 0.46550337956584803, - "grad_norm": 636.2042846679688, - "learning_rate": 3.2652352629006275e-05, - "loss": 63.9273, - "step": 115220 - }, - { - "epoch": 0.4655437808312156, - "grad_norm": 556.1316528320312, - "learning_rate": 3.264902944313023e-05, - "loss": 46.3902, - "step": 115230 - }, - { - "epoch": 0.46558418209658325, - "grad_norm": 724.4549560546875, - "learning_rate": 3.2645706108132424e-05, - "loss": 54.3436, - "step": 115240 - }, - { - "epoch": 0.4656245833619509, - "grad_norm": 546.86767578125, - "learning_rate": 3.264238262407764e-05, - "loss": 50.2506, - "step": 115250 - }, - { - "epoch": 0.46566498462731853, - "grad_norm": 784.9192504882812, - "learning_rate": 3.263905899103068e-05, - "loss": 40.6821, - "step": 115260 - }, - { - "epoch": 0.4657053858926862, - "grad_norm": 686.7406616210938, - "learning_rate": 3.263573520905633e-05, - "loss": 52.2234, - "step": 115270 - }, - { - "epoch": 0.4657457871580538, - "grad_norm": 709.7639770507812, - "learning_rate": 3.263241127821938e-05, - "loss": 57.2129, - "step": 115280 - }, - { - "epoch": 0.4657861884234214, - "grad_norm": 704.1000366210938, - "learning_rate": 3.262908719858466e-05, - "loss": 44.5228, - "step": 115290 - }, - { - "epoch": 0.46582658968878904, - "grad_norm": 444.74029541015625, - "learning_rate": 3.262576297021695e-05, - "loss": 49.4663, - "step": 115300 - }, - { - "epoch": 0.4658669909541567, - "grad_norm": 951.1953125, - "learning_rate": 3.262243859318105e-05, - "loss": 60.8233, - "step": 115310 - }, - { - "epoch": 0.4659073922195243, - "grad_norm": 466.8185119628906, - "learning_rate": 3.2619114067541796e-05, - "loss": 92.2595, - "step": 115320 - }, - { - "epoch": 0.46594779348489196, - "grad_norm": 581.5831298828125, - "learning_rate": 3.2615789393363995e-05, - "loss": 53.4414, - "step": 115330 - }, - { - "epoch": 0.4659881947502596, - "grad_norm": 1562.436767578125, - "learning_rate": 3.261246457071245e-05, - "loss": 61.0114, - "step": 115340 - }, - { - "epoch": 0.4660285960156272, - "grad_norm": 1172.59130859375, - "learning_rate": 3.260913959965201e-05, - "loss": 71.2536, - "step": 115350 - }, - { - "epoch": 0.4660689972809948, - "grad_norm": 751.3859252929688, - "learning_rate": 3.260581448024745e-05, - "loss": 53.4582, - "step": 115360 - }, - { - "epoch": 0.46610939854636246, - "grad_norm": 479.7427673339844, - "learning_rate": 3.260248921256364e-05, - "loss": 38.3002, - "step": 115370 - }, - { - "epoch": 0.4661497998117301, - "grad_norm": 562.0839233398438, - "learning_rate": 3.2599163796665376e-05, - "loss": 74.0789, - "step": 115380 - }, - { - "epoch": 0.46619020107709774, - "grad_norm": 1441.49853515625, - "learning_rate": 3.25958382326175e-05, - "loss": 46.7784, - "step": 115390 - }, - { - "epoch": 0.4662306023424654, - "grad_norm": 586.0812377929688, - "learning_rate": 3.2592512520484856e-05, - "loss": 54.3181, - "step": 115400 - }, - { - "epoch": 0.466271003607833, - "grad_norm": 782.4759521484375, - "learning_rate": 3.2589186660332274e-05, - "loss": 59.9024, - "step": 115410 - }, - { - "epoch": 0.4663114048732006, - "grad_norm": 623.7604370117188, - "learning_rate": 3.2585860652224585e-05, - "loss": 52.6946, - "step": 115420 - }, - { - "epoch": 0.46635180613856825, - "grad_norm": 958.4971313476562, - "learning_rate": 3.2582534496226644e-05, - "loss": 56.1301, - "step": 115430 - }, - { - "epoch": 0.4663922074039359, - "grad_norm": 803.4427490234375, - "learning_rate": 3.257920819240328e-05, - "loss": 56.7052, - "step": 115440 - }, - { - "epoch": 0.46643260866930353, - "grad_norm": 481.8475646972656, - "learning_rate": 3.2575881740819355e-05, - "loss": 53.755, - "step": 115450 - }, - { - "epoch": 0.46647300993467117, - "grad_norm": 542.654052734375, - "learning_rate": 3.257255514153971e-05, - "loss": 66.2002, - "step": 115460 - }, - { - "epoch": 0.4665134112000388, - "grad_norm": 468.3689270019531, - "learning_rate": 3.256922839462921e-05, - "loss": 60.2367, - "step": 115470 - }, - { - "epoch": 0.4665538124654064, - "grad_norm": 1403.767333984375, - "learning_rate": 3.25659015001527e-05, - "loss": 108.456, - "step": 115480 - }, - { - "epoch": 0.46659421373077403, - "grad_norm": 708.3703002929688, - "learning_rate": 3.2562574458175044e-05, - "loss": 46.7288, - "step": 115490 - }, - { - "epoch": 0.4666346149961417, - "grad_norm": 1207.593017578125, - "learning_rate": 3.2559247268761115e-05, - "loss": 63.8019, - "step": 115500 - }, - { - "epoch": 0.4666750162615093, - "grad_norm": 919.4540405273438, - "learning_rate": 3.2555919931975766e-05, - "loss": 71.2822, - "step": 115510 - }, - { - "epoch": 0.46671541752687695, - "grad_norm": 2360.607421875, - "learning_rate": 3.2552592447883865e-05, - "loss": 63.8303, - "step": 115520 - }, - { - "epoch": 0.4667558187922446, - "grad_norm": 1612.3497314453125, - "learning_rate": 3.254926481655028e-05, - "loss": 45.8642, - "step": 115530 - }, - { - "epoch": 0.4667962200576122, - "grad_norm": 1008.3920288085938, - "learning_rate": 3.25459370380399e-05, - "loss": 49.0776, - "step": 115540 - }, - { - "epoch": 0.4668366213229798, - "grad_norm": 2217.866943359375, - "learning_rate": 3.254260911241759e-05, - "loss": 72.4225, - "step": 115550 - }, - { - "epoch": 0.46687702258834746, - "grad_norm": 413.4029235839844, - "learning_rate": 3.253928103974823e-05, - "loss": 66.9536, - "step": 115560 - }, - { - "epoch": 0.4669174238537151, - "grad_norm": 389.1944580078125, - "learning_rate": 3.253595282009671e-05, - "loss": 32.475, - "step": 115570 - }, - { - "epoch": 0.46695782511908274, - "grad_norm": 0.0, - "learning_rate": 3.253262445352791e-05, - "loss": 60.644, - "step": 115580 - }, - { - "epoch": 0.4669982263844504, - "grad_norm": 366.5119934082031, - "learning_rate": 3.252929594010671e-05, - "loss": 59.2907, - "step": 115590 - }, - { - "epoch": 0.467038627649818, - "grad_norm": 882.8370971679688, - "learning_rate": 3.2525967279898015e-05, - "loss": 70.1194, - "step": 115600 - }, - { - "epoch": 0.4670790289151856, - "grad_norm": 955.0043334960938, - "learning_rate": 3.252263847296671e-05, - "loss": 62.6897, - "step": 115610 - }, - { - "epoch": 0.46711943018055324, - "grad_norm": 815.323486328125, - "learning_rate": 3.25193095193777e-05, - "loss": 30.4579, - "step": 115620 - }, - { - "epoch": 0.4671598314459209, - "grad_norm": 721.734130859375, - "learning_rate": 3.251598041919587e-05, - "loss": 66.6569, - "step": 115630 - }, - { - "epoch": 0.4672002327112885, - "grad_norm": 870.7584838867188, - "learning_rate": 3.251265117248614e-05, - "loss": 52.2032, - "step": 115640 - }, - { - "epoch": 0.46724063397665616, - "grad_norm": 4967.96728515625, - "learning_rate": 3.25093217793134e-05, - "loss": 56.7999, - "step": 115650 - }, - { - "epoch": 0.4672810352420238, - "grad_norm": 805.152099609375, - "learning_rate": 3.250599223974258e-05, - "loss": 52.8073, - "step": 115660 - }, - { - "epoch": 0.4673214365073914, - "grad_norm": 614.6864624023438, - "learning_rate": 3.250266255383857e-05, - "loss": 54.9746, - "step": 115670 - }, - { - "epoch": 0.467361837772759, - "grad_norm": 1058.3001708984375, - "learning_rate": 3.249933272166629e-05, - "loss": 92.6986, - "step": 115680 - }, - { - "epoch": 0.46740223903812667, - "grad_norm": 1258.4139404296875, - "learning_rate": 3.249600274329066e-05, - "loss": 54.6076, - "step": 115690 - }, - { - "epoch": 0.4674426403034943, - "grad_norm": 712.8331909179688, - "learning_rate": 3.24926726187766e-05, - "loss": 65.3554, - "step": 115700 - }, - { - "epoch": 0.46748304156886195, - "grad_norm": 826.5805053710938, - "learning_rate": 3.248934234818902e-05, - "loss": 82.2117, - "step": 115710 - }, - { - "epoch": 0.4675234428342296, - "grad_norm": 770.5543212890625, - "learning_rate": 3.248601193159287e-05, - "loss": 73.6685, - "step": 115720 - }, - { - "epoch": 0.4675638440995972, - "grad_norm": 780.5651245117188, - "learning_rate": 3.248268136905304e-05, - "loss": 44.8846, - "step": 115730 - }, - { - "epoch": 0.4676042453649648, - "grad_norm": 1183.1986083984375, - "learning_rate": 3.247935066063451e-05, - "loss": 57.1673, - "step": 115740 - }, - { - "epoch": 0.46764464663033245, - "grad_norm": 461.81573486328125, - "learning_rate": 3.247601980640217e-05, - "loss": 45.7133, - "step": 115750 - }, - { - "epoch": 0.4676850478957001, - "grad_norm": 720.4097290039062, - "learning_rate": 3.247268880642098e-05, - "loss": 44.3927, - "step": 115760 - }, - { - "epoch": 0.46772544916106773, - "grad_norm": 1013.3065795898438, - "learning_rate": 3.246935766075588e-05, - "loss": 52.5788, - "step": 115770 - }, - { - "epoch": 0.46776585042643537, - "grad_norm": 947.147216796875, - "learning_rate": 3.24660263694718e-05, - "loss": 93.3442, - "step": 115780 - }, - { - "epoch": 0.467806251691803, - "grad_norm": 676.0074462890625, - "learning_rate": 3.24626949326337e-05, - "loss": 48.2604, - "step": 115790 - }, - { - "epoch": 0.4678466529571706, - "grad_norm": 433.5167541503906, - "learning_rate": 3.245936335030651e-05, - "loss": 68.8754, - "step": 115800 - }, - { - "epoch": 0.46788705422253823, - "grad_norm": 759.6268920898438, - "learning_rate": 3.2456031622555197e-05, - "loss": 42.3743, - "step": 115810 - }, - { - "epoch": 0.4679274554879059, - "grad_norm": 650.4202880859375, - "learning_rate": 3.245269974944471e-05, - "loss": 83.6153, - "step": 115820 - }, - { - "epoch": 0.4679678567532735, - "grad_norm": 963.0789794921875, - "learning_rate": 3.2449367731039996e-05, - "loss": 60.6028, - "step": 115830 - }, - { - "epoch": 0.46800825801864115, - "grad_norm": 2292.56005859375, - "learning_rate": 3.244603556740603e-05, - "loss": 74.8116, - "step": 115840 - }, - { - "epoch": 0.4680486592840088, - "grad_norm": 457.2271423339844, - "learning_rate": 3.2442703258607766e-05, - "loss": 37.3418, - "step": 115850 - }, - { - "epoch": 0.4680890605493764, - "grad_norm": 474.98126220703125, - "learning_rate": 3.243937080471017e-05, - "loss": 61.6, - "step": 115860 - }, - { - "epoch": 0.468129461814744, - "grad_norm": 528.2426147460938, - "learning_rate": 3.243603820577822e-05, - "loss": 37.5654, - "step": 115870 - }, - { - "epoch": 0.46816986308011166, - "grad_norm": 885.8614501953125, - "learning_rate": 3.243270546187687e-05, - "loss": 39.1532, - "step": 115880 - }, - { - "epoch": 0.4682102643454793, - "grad_norm": 2204.4306640625, - "learning_rate": 3.242937257307109e-05, - "loss": 67.4763, - "step": 115890 - }, - { - "epoch": 0.46825066561084694, - "grad_norm": 1517.2777099609375, - "learning_rate": 3.2426039539425876e-05, - "loss": 53.94, - "step": 115900 - }, - { - "epoch": 0.4682910668762146, - "grad_norm": 2055.982666015625, - "learning_rate": 3.2422706361006194e-05, - "loss": 58.1359, - "step": 115910 - }, - { - "epoch": 0.4683314681415822, - "grad_norm": 607.13330078125, - "learning_rate": 3.241937303787703e-05, - "loss": 49.6527, - "step": 115920 - }, - { - "epoch": 0.4683718694069498, - "grad_norm": 899.364990234375, - "learning_rate": 3.2416039570103375e-05, - "loss": 57.977, - "step": 115930 - }, - { - "epoch": 0.46841227067231744, - "grad_norm": 654.2115478515625, - "learning_rate": 3.241270595775021e-05, - "loss": 73.5976, - "step": 115940 - }, - { - "epoch": 0.4684526719376851, - "grad_norm": 786.1331176757812, - "learning_rate": 3.240937220088253e-05, - "loss": 35.1766, - "step": 115950 - }, - { - "epoch": 0.4684930732030527, - "grad_norm": 1251.7491455078125, - "learning_rate": 3.240603829956531e-05, - "loss": 62.6786, - "step": 115960 - }, - { - "epoch": 0.46853347446842036, - "grad_norm": 1378.3673095703125, - "learning_rate": 3.240270425386357e-05, - "loss": 75.6629, - "step": 115970 - }, - { - "epoch": 0.468573875733788, - "grad_norm": 344.47747802734375, - "learning_rate": 3.2399370063842294e-05, - "loss": 46.953, - "step": 115980 - }, - { - "epoch": 0.4686142769991556, - "grad_norm": 383.6866760253906, - "learning_rate": 3.23960357295665e-05, - "loss": 47.923, - "step": 115990 - }, - { - "epoch": 0.46865467826452323, - "grad_norm": 471.9056091308594, - "learning_rate": 3.239270125110117e-05, - "loss": 77.3322, - "step": 116000 - }, - { - "epoch": 0.46869507952989087, - "grad_norm": 879.043701171875, - "learning_rate": 3.238936662851133e-05, - "loss": 48.9826, - "step": 116010 - }, - { - "epoch": 0.4687354807952585, - "grad_norm": 632.9555053710938, - "learning_rate": 3.2386031861861976e-05, - "loss": 31.8865, - "step": 116020 - }, - { - "epoch": 0.46877588206062615, - "grad_norm": 1467.3824462890625, - "learning_rate": 3.2382696951218135e-05, - "loss": 95.1815, - "step": 116030 - }, - { - "epoch": 0.4688162833259938, - "grad_norm": 336.1209411621094, - "learning_rate": 3.2379361896644816e-05, - "loss": 54.7575, - "step": 116040 - }, - { - "epoch": 0.46885668459136143, - "grad_norm": 873.735595703125, - "learning_rate": 3.237602669820704e-05, - "loss": 63.7571, - "step": 116050 - }, - { - "epoch": 0.468897085856729, - "grad_norm": 537.8552856445312, - "learning_rate": 3.2372691355969816e-05, - "loss": 55.9651, - "step": 116060 - }, - { - "epoch": 0.46893748712209665, - "grad_norm": 353.98614501953125, - "learning_rate": 3.2369355869998185e-05, - "loss": 39.4995, - "step": 116070 - }, - { - "epoch": 0.4689778883874643, - "grad_norm": 195.72950744628906, - "learning_rate": 3.236602024035716e-05, - "loss": 53.4221, - "step": 116080 - }, - { - "epoch": 0.46901828965283193, - "grad_norm": 657.9056396484375, - "learning_rate": 3.236268446711179e-05, - "loss": 66.8073, - "step": 116090 - }, - { - "epoch": 0.4690586909181996, - "grad_norm": 483.935546875, - "learning_rate": 3.235934855032709e-05, - "loss": 51.066, - "step": 116100 - }, - { - "epoch": 0.4690990921835672, - "grad_norm": 1542.11865234375, - "learning_rate": 3.23560124900681e-05, - "loss": 89.0544, - "step": 116110 - }, - { - "epoch": 0.4691394934489348, - "grad_norm": 838.8958740234375, - "learning_rate": 3.235267628639987e-05, - "loss": 60.8755, - "step": 116120 - }, - { - "epoch": 0.46917989471430244, - "grad_norm": 861.8223266601562, - "learning_rate": 3.234933993938742e-05, - "loss": 47.4594, - "step": 116130 - }, - { - "epoch": 0.4692202959796701, - "grad_norm": 550.157958984375, - "learning_rate": 3.2346003449095805e-05, - "loss": 53.5828, - "step": 116140 - }, - { - "epoch": 0.4692606972450377, - "grad_norm": 503.9146728515625, - "learning_rate": 3.234266681559007e-05, - "loss": 78.4864, - "step": 116150 - }, - { - "epoch": 0.46930109851040536, - "grad_norm": 333.4444885253906, - "learning_rate": 3.2339330038935265e-05, - "loss": 54.1187, - "step": 116160 - }, - { - "epoch": 0.469341499775773, - "grad_norm": 666.8385620117188, - "learning_rate": 3.233599311919644e-05, - "loss": 60.4023, - "step": 116170 - }, - { - "epoch": 0.4693819010411406, - "grad_norm": 641.484619140625, - "learning_rate": 3.233265605643866e-05, - "loss": 63.9357, - "step": 116180 - }, - { - "epoch": 0.4694223023065082, - "grad_norm": 1227.126953125, - "learning_rate": 3.232931885072697e-05, - "loss": 61.4396, - "step": 116190 - }, - { - "epoch": 0.46946270357187586, - "grad_norm": 1222.384765625, - "learning_rate": 3.2325981502126433e-05, - "loss": 61.0376, - "step": 116200 - }, - { - "epoch": 0.4695031048372435, - "grad_norm": 706.1589965820312, - "learning_rate": 3.232264401070213e-05, - "loss": 58.2779, - "step": 116210 - }, - { - "epoch": 0.46954350610261114, - "grad_norm": 558.5364990234375, - "learning_rate": 3.231930637651909e-05, - "loss": 52.5935, - "step": 116220 - }, - { - "epoch": 0.4695839073679788, - "grad_norm": 928.905029296875, - "learning_rate": 3.231596859964242e-05, - "loss": 61.5201, - "step": 116230 - }, - { - "epoch": 0.4696243086333464, - "grad_norm": 630.4097290039062, - "learning_rate": 3.2312630680137175e-05, - "loss": 37.5664, - "step": 116240 - }, - { - "epoch": 0.469664709898714, - "grad_norm": 1384.9384765625, - "learning_rate": 3.230929261806842e-05, - "loss": 53.7496, - "step": 116250 - }, - { - "epoch": 0.46970511116408165, - "grad_norm": 147.7491912841797, - "learning_rate": 3.230595441350125e-05, - "loss": 55.0247, - "step": 116260 - }, - { - "epoch": 0.4697455124294493, - "grad_norm": 334.8374938964844, - "learning_rate": 3.2302616066500735e-05, - "loss": 46.3881, - "step": 116270 - }, - { - "epoch": 0.4697859136948169, - "grad_norm": 789.24755859375, - "learning_rate": 3.229927757713196e-05, - "loss": 70.6806, - "step": 116280 - }, - { - "epoch": 0.46982631496018457, - "grad_norm": 1363.5418701171875, - "learning_rate": 3.229593894546001e-05, - "loss": 58.1901, - "step": 116290 - }, - { - "epoch": 0.4698667162255522, - "grad_norm": 609.9053344726562, - "learning_rate": 3.229260017154997e-05, - "loss": 98.0226, - "step": 116300 - }, - { - "epoch": 0.4699071174909198, - "grad_norm": 865.0936279296875, - "learning_rate": 3.228926125546695e-05, - "loss": 61.3911, - "step": 116310 - }, - { - "epoch": 0.46994751875628743, - "grad_norm": 590.865234375, - "learning_rate": 3.228592219727602e-05, - "loss": 86.893, - "step": 116320 - }, - { - "epoch": 0.46998792002165507, - "grad_norm": 789.493408203125, - "learning_rate": 3.2282582997042285e-05, - "loss": 49.3227, - "step": 116330 - }, - { - "epoch": 0.4700283212870227, - "grad_norm": 707.34814453125, - "learning_rate": 3.2279243654830836e-05, - "loss": 69.9071, - "step": 116340 - }, - { - "epoch": 0.47006872255239035, - "grad_norm": 1062.26123046875, - "learning_rate": 3.2275904170706797e-05, - "loss": 55.3705, - "step": 116350 - }, - { - "epoch": 0.470109123817758, - "grad_norm": 967.4932861328125, - "learning_rate": 3.227256454473526e-05, - "loss": 42.835, - "step": 116360 - }, - { - "epoch": 0.47014952508312563, - "grad_norm": 645.4476318359375, - "learning_rate": 3.226922477698133e-05, - "loss": 57.326, - "step": 116370 - }, - { - "epoch": 0.4701899263484932, - "grad_norm": 884.6676025390625, - "learning_rate": 3.226588486751012e-05, - "loss": 66.296, - "step": 116380 - }, - { - "epoch": 0.47023032761386085, - "grad_norm": 3511.986083984375, - "learning_rate": 3.2262544816386745e-05, - "loss": 49.9027, - "step": 116390 - }, - { - "epoch": 0.4702707288792285, - "grad_norm": 476.053955078125, - "learning_rate": 3.225920462367632e-05, - "loss": 68.0334, - "step": 116400 - }, - { - "epoch": 0.47031113014459613, - "grad_norm": 367.8993225097656, - "learning_rate": 3.225586428944396e-05, - "loss": 41.9805, - "step": 116410 - }, - { - "epoch": 0.4703515314099638, - "grad_norm": 500.8946533203125, - "learning_rate": 3.225252381375479e-05, - "loss": 65.6448, - "step": 116420 - }, - { - "epoch": 0.4703919326753314, - "grad_norm": 1217.2655029296875, - "learning_rate": 3.224918319667394e-05, - "loss": 83.4919, - "step": 116430 - }, - { - "epoch": 0.470432333940699, - "grad_norm": 662.4497680664062, - "learning_rate": 3.2245842438266526e-05, - "loss": 65.4877, - "step": 116440 - }, - { - "epoch": 0.47047273520606664, - "grad_norm": 725.7619018554688, - "learning_rate": 3.224250153859769e-05, - "loss": 55.515, - "step": 116450 - }, - { - "epoch": 0.4705131364714343, - "grad_norm": 910.7822265625, - "learning_rate": 3.223916049773256e-05, - "loss": 43.4893, - "step": 116460 - }, - { - "epoch": 0.4705535377368019, - "grad_norm": 2010.1138916015625, - "learning_rate": 3.223581931573625e-05, - "loss": 58.6502, - "step": 116470 - }, - { - "epoch": 0.47059393900216956, - "grad_norm": 715.5496826171875, - "learning_rate": 3.223247799267394e-05, - "loss": 84.7735, - "step": 116480 - }, - { - "epoch": 0.4706343402675372, - "grad_norm": 737.7570190429688, - "learning_rate": 3.2229136528610736e-05, - "loss": 39.7872, - "step": 116490 - }, - { - "epoch": 0.4706747415329048, - "grad_norm": 532.3171997070312, - "learning_rate": 3.222579492361179e-05, - "loss": 67.2663, - "step": 116500 - }, - { - "epoch": 0.4707151427982724, - "grad_norm": 1556.9482421875, - "learning_rate": 3.222245317774226e-05, - "loss": 57.7589, - "step": 116510 - }, - { - "epoch": 0.47075554406364006, - "grad_norm": 1564.6119384765625, - "learning_rate": 3.221911129106728e-05, - "loss": 41.327, - "step": 116520 - }, - { - "epoch": 0.4707959453290077, - "grad_norm": 668.7022094726562, - "learning_rate": 3.221576926365202e-05, - "loss": 53.1316, - "step": 116530 - }, - { - "epoch": 0.47083634659437534, - "grad_norm": 940.4009399414062, - "learning_rate": 3.221242709556161e-05, - "loss": 52.2775, - "step": 116540 - }, - { - "epoch": 0.470876747859743, - "grad_norm": 929.6431884765625, - "learning_rate": 3.220908478686123e-05, - "loss": 63.8318, - "step": 116550 - }, - { - "epoch": 0.4709171491251106, - "grad_norm": 770.2941284179688, - "learning_rate": 3.220574233761603e-05, - "loss": 50.6914, - "step": 116560 - }, - { - "epoch": 0.4709575503904782, - "grad_norm": 2690.2001953125, - "learning_rate": 3.220239974789117e-05, - "loss": 103.2043, - "step": 116570 - }, - { - "epoch": 0.47099795165584585, - "grad_norm": 744.5656127929688, - "learning_rate": 3.219905701775182e-05, - "loss": 40.3893, - "step": 116580 - }, - { - "epoch": 0.4710383529212135, - "grad_norm": 2249.509765625, - "learning_rate": 3.219571414726315e-05, - "loss": 55.5555, - "step": 116590 - }, - { - "epoch": 0.47107875418658113, - "grad_norm": 392.30657958984375, - "learning_rate": 3.219237113649032e-05, - "loss": 63.3421, - "step": 116600 - }, - { - "epoch": 0.47111915545194877, - "grad_norm": 0.0, - "learning_rate": 3.2189027985498514e-05, - "loss": 82.4716, - "step": 116610 - }, - { - "epoch": 0.4711595567173164, - "grad_norm": 626.5792846679688, - "learning_rate": 3.2185684694352916e-05, - "loss": 38.248, - "step": 116620 - }, - { - "epoch": 0.471199957982684, - "grad_norm": 0.0, - "learning_rate": 3.218234126311869e-05, - "loss": 39.9608, - "step": 116630 - }, - { - "epoch": 0.47124035924805163, - "grad_norm": 959.5233154296875, - "learning_rate": 3.2178997691861014e-05, - "loss": 56.8771, - "step": 116640 - }, - { - "epoch": 0.4712807605134193, - "grad_norm": 368.611083984375, - "learning_rate": 3.217565398064509e-05, - "loss": 30.2148, - "step": 116650 - }, - { - "epoch": 0.4713211617787869, - "grad_norm": 2147.029296875, - "learning_rate": 3.2172310129536096e-05, - "loss": 56.8126, - "step": 116660 - }, - { - "epoch": 0.47136156304415455, - "grad_norm": 528.3900146484375, - "learning_rate": 3.2168966138599225e-05, - "loss": 93.9726, - "step": 116670 - }, - { - "epoch": 0.4714019643095222, - "grad_norm": 1705.246337890625, - "learning_rate": 3.2165622007899676e-05, - "loss": 42.9779, - "step": 116680 - }, - { - "epoch": 0.47144236557488983, - "grad_norm": 1022.4556884765625, - "learning_rate": 3.216227773750262e-05, - "loss": 40.3776, - "step": 116690 - }, - { - "epoch": 0.4714827668402574, - "grad_norm": 836.2701416015625, - "learning_rate": 3.215893332747328e-05, - "loss": 75.0597, - "step": 116700 - }, - { - "epoch": 0.47152316810562506, - "grad_norm": 493.19110107421875, - "learning_rate": 3.2155588777876856e-05, - "loss": 45.9303, - "step": 116710 - }, - { - "epoch": 0.4715635693709927, - "grad_norm": 800.6790771484375, - "learning_rate": 3.215224408877854e-05, - "loss": 52.3173, - "step": 116720 - }, - { - "epoch": 0.47160397063636034, - "grad_norm": 1031.7352294921875, - "learning_rate": 3.2148899260243545e-05, - "loss": 41.5077, - "step": 116730 - }, - { - "epoch": 0.471644371901728, - "grad_norm": 524.3075561523438, - "learning_rate": 3.214555429233707e-05, - "loss": 43.6509, - "step": 116740 - }, - { - "epoch": 0.4716847731670956, - "grad_norm": 2053.65576171875, - "learning_rate": 3.214220918512434e-05, - "loss": 52.429, - "step": 116750 - }, - { - "epoch": 0.4717251744324632, - "grad_norm": 660.8265380859375, - "learning_rate": 3.213886393867057e-05, - "loss": 59.9484, - "step": 116760 - }, - { - "epoch": 0.47176557569783084, - "grad_norm": 539.3421630859375, - "learning_rate": 3.2135518553040964e-05, - "loss": 56.5854, - "step": 116770 - }, - { - "epoch": 0.4718059769631985, - "grad_norm": 461.3349304199219, - "learning_rate": 3.2132173028300756e-05, - "loss": 56.3017, - "step": 116780 - }, - { - "epoch": 0.4718463782285661, - "grad_norm": 1058.8426513671875, - "learning_rate": 3.212882736451516e-05, - "loss": 47.9099, - "step": 116790 - }, - { - "epoch": 0.47188677949393376, - "grad_norm": 521.5872802734375, - "learning_rate": 3.21254815617494e-05, - "loss": 48.9531, - "step": 116800 - }, - { - "epoch": 0.4719271807593014, - "grad_norm": 789.1767578125, - "learning_rate": 3.212213562006872e-05, - "loss": 77.1051, - "step": 116810 - }, - { - "epoch": 0.471967582024669, - "grad_norm": 710.3841552734375, - "learning_rate": 3.2118789539538335e-05, - "loss": 69.6694, - "step": 116820 - }, - { - "epoch": 0.4720079832900366, - "grad_norm": 1419.693115234375, - "learning_rate": 3.211544332022348e-05, - "loss": 65.4239, - "step": 116830 - }, - { - "epoch": 0.47204838455540427, - "grad_norm": 518.5504760742188, - "learning_rate": 3.21120969621894e-05, - "loss": 60.0513, - "step": 116840 - }, - { - "epoch": 0.4720887858207719, - "grad_norm": 385.5600280761719, - "learning_rate": 3.210875046550132e-05, - "loss": 39.933, - "step": 116850 - }, - { - "epoch": 0.47212918708613955, - "grad_norm": 850.2272338867188, - "learning_rate": 3.210540383022449e-05, - "loss": 81.0095, - "step": 116860 - }, - { - "epoch": 0.4721695883515072, - "grad_norm": 443.19189453125, - "learning_rate": 3.210205705642416e-05, - "loss": 81.4084, - "step": 116870 - }, - { - "epoch": 0.4722099896168748, - "grad_norm": 490.591796875, - "learning_rate": 3.209871014416557e-05, - "loss": 44.2058, - "step": 116880 - }, - { - "epoch": 0.4722503908822424, - "grad_norm": 1418.0242919921875, - "learning_rate": 3.209536309351397e-05, - "loss": 80.6199, - "step": 116890 - }, - { - "epoch": 0.47229079214761005, - "grad_norm": 1384.6348876953125, - "learning_rate": 3.209201590453461e-05, - "loss": 90.8569, - "step": 116900 - }, - { - "epoch": 0.4723311934129777, - "grad_norm": 595.4918823242188, - "learning_rate": 3.208866857729276e-05, - "loss": 63.6417, - "step": 116910 - }, - { - "epoch": 0.47237159467834533, - "grad_norm": 689.3577880859375, - "learning_rate": 3.208532111185365e-05, - "loss": 57.5565, - "step": 116920 - }, - { - "epoch": 0.47241199594371297, - "grad_norm": 611.20751953125, - "learning_rate": 3.208197350828257e-05, - "loss": 80.1349, - "step": 116930 - }, - { - "epoch": 0.4724523972090806, - "grad_norm": 793.7510375976562, - "learning_rate": 3.207862576664477e-05, - "loss": 73.2373, - "step": 116940 - }, - { - "epoch": 0.4724927984744482, - "grad_norm": 777.3289184570312, - "learning_rate": 3.207527788700551e-05, - "loss": 74.5804, - "step": 116950 - }, - { - "epoch": 0.47253319973981583, - "grad_norm": 1934.6875, - "learning_rate": 3.207192986943006e-05, - "loss": 78.4664, - "step": 116960 - }, - { - "epoch": 0.4725736010051835, - "grad_norm": 573.094482421875, - "learning_rate": 3.206858171398371e-05, - "loss": 77.9668, - "step": 116970 - }, - { - "epoch": 0.4726140022705511, - "grad_norm": 290.15185546875, - "learning_rate": 3.206523342073172e-05, - "loss": 56.8567, - "step": 116980 - }, - { - "epoch": 0.47265440353591875, - "grad_norm": 603.3993530273438, - "learning_rate": 3.206188498973935e-05, - "loss": 62.5748, - "step": 116990 - }, - { - "epoch": 0.4726948048012864, - "grad_norm": 459.7473449707031, - "learning_rate": 3.205853642107192e-05, - "loss": 55.5894, - "step": 117000 - }, - { - "epoch": 0.47273520606665403, - "grad_norm": 2034.221923828125, - "learning_rate": 3.2055187714794674e-05, - "loss": 94.7735, - "step": 117010 - }, - { - "epoch": 0.4727756073320216, - "grad_norm": 556.3843383789062, - "learning_rate": 3.205183887097291e-05, - "loss": 49.9171, - "step": 117020 - }, - { - "epoch": 0.47281600859738926, - "grad_norm": 618.3563232421875, - "learning_rate": 3.2048489889671915e-05, - "loss": 64.9501, - "step": 117030 - }, - { - "epoch": 0.4728564098627569, - "grad_norm": 1050.610107421875, - "learning_rate": 3.204514077095699e-05, - "loss": 45.0024, - "step": 117040 - }, - { - "epoch": 0.47289681112812454, - "grad_norm": 676.1912841796875, - "learning_rate": 3.2041791514893416e-05, - "loss": 71.348, - "step": 117050 - }, - { - "epoch": 0.4729372123934922, - "grad_norm": 338.9596252441406, - "learning_rate": 3.2038442121546487e-05, - "loss": 60.3322, - "step": 117060 - }, - { - "epoch": 0.4729776136588598, - "grad_norm": 1104.4334716796875, - "learning_rate": 3.2035092590981514e-05, - "loss": 65.3458, - "step": 117070 - }, - { - "epoch": 0.4730180149242274, - "grad_norm": 977.1143188476562, - "learning_rate": 3.203174292326378e-05, - "loss": 65.3839, - "step": 117080 - }, - { - "epoch": 0.47305841618959504, - "grad_norm": 552.0875854492188, - "learning_rate": 3.20283931184586e-05, - "loss": 52.0979, - "step": 117090 - }, - { - "epoch": 0.4730988174549627, - "grad_norm": 615.712646484375, - "learning_rate": 3.202504317663128e-05, - "loss": 32.2021, - "step": 117100 - }, - { - "epoch": 0.4731392187203303, - "grad_norm": 550.6535034179688, - "learning_rate": 3.2021693097847125e-05, - "loss": 70.6825, - "step": 117110 - }, - { - "epoch": 0.47317961998569796, - "grad_norm": 610.3817749023438, - "learning_rate": 3.2018342882171445e-05, - "loss": 36.3148, - "step": 117120 - }, - { - "epoch": 0.4732200212510656, - "grad_norm": 755.9873657226562, - "learning_rate": 3.2014992529669566e-05, - "loss": 44.023, - "step": 117130 - }, - { - "epoch": 0.4732604225164332, - "grad_norm": 847.390625, - "learning_rate": 3.2011642040406784e-05, - "loss": 49.9786, - "step": 117140 - }, - { - "epoch": 0.47330082378180083, - "grad_norm": 644.954345703125, - "learning_rate": 3.200829141444844e-05, - "loss": 93.2963, - "step": 117150 - }, - { - "epoch": 0.47334122504716847, - "grad_norm": 646.0951538085938, - "learning_rate": 3.2004940651859844e-05, - "loss": 55.8603, - "step": 117160 - }, - { - "epoch": 0.4733816263125361, - "grad_norm": 1527.83447265625, - "learning_rate": 3.200158975270633e-05, - "loss": 69.3903, - "step": 117170 - }, - { - "epoch": 0.47342202757790375, - "grad_norm": 1161.49658203125, - "learning_rate": 3.1998238717053206e-05, - "loss": 63.7593, - "step": 117180 - }, - { - "epoch": 0.4734624288432714, - "grad_norm": 1293.619140625, - "learning_rate": 3.199488754496582e-05, - "loss": 63.1005, - "step": 117190 - }, - { - "epoch": 0.47350283010863903, - "grad_norm": 505.3819580078125, - "learning_rate": 3.19915362365095e-05, - "loss": 50.2402, - "step": 117200 - }, - { - "epoch": 0.4735432313740066, - "grad_norm": 648.815185546875, - "learning_rate": 3.198818479174959e-05, - "loss": 53.0908, - "step": 117210 - }, - { - "epoch": 0.47358363263937425, - "grad_norm": 601.2451171875, - "learning_rate": 3.198483321075141e-05, - "loss": 56.0472, - "step": 117220 - }, - { - "epoch": 0.4736240339047419, - "grad_norm": 401.19842529296875, - "learning_rate": 3.198148149358031e-05, - "loss": 58.7673, - "step": 117230 - }, - { - "epoch": 0.47366443517010953, - "grad_norm": 942.248046875, - "learning_rate": 3.197812964030164e-05, - "loss": 42.9004, - "step": 117240 - }, - { - "epoch": 0.4737048364354772, - "grad_norm": 1621.0218505859375, - "learning_rate": 3.1974777650980735e-05, - "loss": 59.7592, - "step": 117250 - }, - { - "epoch": 0.4737452377008448, - "grad_norm": 1157.42431640625, - "learning_rate": 3.197142552568295e-05, - "loss": 53.8315, - "step": 117260 - }, - { - "epoch": 0.4737856389662124, - "grad_norm": 2135.51611328125, - "learning_rate": 3.196807326447363e-05, - "loss": 74.3025, - "step": 117270 - }, - { - "epoch": 0.47382604023158004, - "grad_norm": 982.652587890625, - "learning_rate": 3.196472086741815e-05, - "loss": 56.9442, - "step": 117280 - }, - { - "epoch": 0.4738664414969477, - "grad_norm": 303.63623046875, - "learning_rate": 3.1961368334581844e-05, - "loss": 44.213, - "step": 117290 - }, - { - "epoch": 0.4739068427623153, - "grad_norm": 555.4284057617188, - "learning_rate": 3.195801566603007e-05, - "loss": 53.6509, - "step": 117300 - }, - { - "epoch": 0.47394724402768296, - "grad_norm": 946.455078125, - "learning_rate": 3.1954662861828204e-05, - "loss": 82.5156, - "step": 117310 - }, - { - "epoch": 0.4739876452930506, - "grad_norm": 1169.52685546875, - "learning_rate": 3.195130992204161e-05, - "loss": 57.4077, - "step": 117320 - }, - { - "epoch": 0.47402804655841824, - "grad_norm": 595.98291015625, - "learning_rate": 3.1947956846735645e-05, - "loss": 76.41, - "step": 117330 - }, - { - "epoch": 0.4740684478237858, - "grad_norm": 463.3590393066406, - "learning_rate": 3.194460363597569e-05, - "loss": 44.9645, - "step": 117340 - }, - { - "epoch": 0.47410884908915346, - "grad_norm": 443.7630615234375, - "learning_rate": 3.1941250289827104e-05, - "loss": 49.6183, - "step": 117350 - }, - { - "epoch": 0.4741492503545211, - "grad_norm": 2856.91259765625, - "learning_rate": 3.193789680835527e-05, - "loss": 59.4347, - "step": 117360 - }, - { - "epoch": 0.47418965161988874, - "grad_norm": 502.811279296875, - "learning_rate": 3.193454319162557e-05, - "loss": 52.4119, - "step": 117370 - }, - { - "epoch": 0.4742300528852564, - "grad_norm": 867.6959228515625, - "learning_rate": 3.193118943970338e-05, - "loss": 90.0065, - "step": 117380 - }, - { - "epoch": 0.474270454150624, - "grad_norm": 1995.4403076171875, - "learning_rate": 3.192783555265408e-05, - "loss": 68.9316, - "step": 117390 - }, - { - "epoch": 0.4743108554159916, - "grad_norm": 522.066162109375, - "learning_rate": 3.192448153054306e-05, - "loss": 41.8321, - "step": 117400 - }, - { - "epoch": 0.47435125668135925, - "grad_norm": 835.8606567382812, - "learning_rate": 3.1921127373435714e-05, - "loss": 67.2701, - "step": 117410 - }, - { - "epoch": 0.4743916579467269, - "grad_norm": 628.2836303710938, - "learning_rate": 3.191777308139742e-05, - "loss": 57.3309, - "step": 117420 - }, - { - "epoch": 0.4744320592120945, - "grad_norm": 582.8585205078125, - "learning_rate": 3.1914418654493586e-05, - "loss": 79.4639, - "step": 117430 - }, - { - "epoch": 0.47447246047746217, - "grad_norm": 452.00830078125, - "learning_rate": 3.191106409278959e-05, - "loss": 57.4894, - "step": 117440 - }, - { - "epoch": 0.4745128617428298, - "grad_norm": 867.4202270507812, - "learning_rate": 3.1907709396350844e-05, - "loss": 54.852, - "step": 117450 - }, - { - "epoch": 0.4745532630081974, - "grad_norm": 375.7869567871094, - "learning_rate": 3.190435456524275e-05, - "loss": 42.05, - "step": 117460 - }, - { - "epoch": 0.47459366427356503, - "grad_norm": 453.1134033203125, - "learning_rate": 3.190099959953071e-05, - "loss": 34.9451, - "step": 117470 - }, - { - "epoch": 0.47463406553893267, - "grad_norm": 614.6144409179688, - "learning_rate": 3.189764449928012e-05, - "loss": 86.9492, - "step": 117480 - }, - { - "epoch": 0.4746744668043003, - "grad_norm": 673.782470703125, - "learning_rate": 3.1894289264556417e-05, - "loss": 69.0601, - "step": 117490 - }, - { - "epoch": 0.47471486806966795, - "grad_norm": 541.9735107421875, - "learning_rate": 3.1890933895424976e-05, - "loss": 46.3959, - "step": 117500 - }, - { - "epoch": 0.4747552693350356, - "grad_norm": 379.82891845703125, - "learning_rate": 3.188757839195125e-05, - "loss": 44.2336, - "step": 117510 - }, - { - "epoch": 0.47479567060040323, - "grad_norm": 680.2620849609375, - "learning_rate": 3.1884222754200625e-05, - "loss": 54.9478, - "step": 117520 - }, - { - "epoch": 0.4748360718657708, - "grad_norm": 1045.8995361328125, - "learning_rate": 3.188086698223853e-05, - "loss": 62.2926, - "step": 117530 - }, - { - "epoch": 0.47487647313113845, - "grad_norm": 1066.854248046875, - "learning_rate": 3.1877511076130404e-05, - "loss": 52.8988, - "step": 117540 - }, - { - "epoch": 0.4749168743965061, - "grad_norm": 1133.3275146484375, - "learning_rate": 3.187415503594166e-05, - "loss": 50.2685, - "step": 117550 - }, - { - "epoch": 0.47495727566187373, - "grad_norm": 561.2869873046875, - "learning_rate": 3.1870798861737705e-05, - "loss": 71.2999, - "step": 117560 - }, - { - "epoch": 0.4749976769272414, - "grad_norm": 905.6627197265625, - "learning_rate": 3.1867442553584e-05, - "loss": 35.9215, - "step": 117570 - }, - { - "epoch": 0.475038078192609, - "grad_norm": 2765.510009765625, - "learning_rate": 3.186408611154597e-05, - "loss": 62.86, - "step": 117580 - }, - { - "epoch": 0.4750784794579766, - "grad_norm": 3249.31298828125, - "learning_rate": 3.186072953568905e-05, - "loss": 89.4793, - "step": 117590 - }, - { - "epoch": 0.47511888072334424, - "grad_norm": 1029.83642578125, - "learning_rate": 3.185737282607867e-05, - "loss": 52.9404, - "step": 117600 - }, - { - "epoch": 0.4751592819887119, - "grad_norm": 638.6604614257812, - "learning_rate": 3.1854015982780275e-05, - "loss": 70.1836, - "step": 117610 - }, - { - "epoch": 0.4751996832540795, - "grad_norm": 866.2382202148438, - "learning_rate": 3.185065900585931e-05, - "loss": 60.6042, - "step": 117620 - }, - { - "epoch": 0.47524008451944716, - "grad_norm": 967.8104858398438, - "learning_rate": 3.184730189538122e-05, - "loss": 67.1052, - "step": 117630 - }, - { - "epoch": 0.4752804857848148, - "grad_norm": 990.4212646484375, - "learning_rate": 3.1843944651411456e-05, - "loss": 44.4866, - "step": 117640 - }, - { - "epoch": 0.47532088705018244, - "grad_norm": 528.0682983398438, - "learning_rate": 3.184058727401546e-05, - "loss": 51.6501, - "step": 117650 - }, - { - "epoch": 0.47536128831555, - "grad_norm": 936.557373046875, - "learning_rate": 3.1837229763258705e-05, - "loss": 97.2483, - "step": 117660 - }, - { - "epoch": 0.47540168958091766, - "grad_norm": 870.8411865234375, - "learning_rate": 3.183387211920663e-05, - "loss": 81.8832, - "step": 117670 - }, - { - "epoch": 0.4754420908462853, - "grad_norm": 628.0894165039062, - "learning_rate": 3.183051434192471e-05, - "loss": 44.0012, - "step": 117680 - }, - { - "epoch": 0.47548249211165294, - "grad_norm": 423.3321228027344, - "learning_rate": 3.1827156431478386e-05, - "loss": 77.5057, - "step": 117690 - }, - { - "epoch": 0.4755228933770206, - "grad_norm": 483.94091796875, - "learning_rate": 3.1823798387933134e-05, - "loss": 39.6376, - "step": 117700 - }, - { - "epoch": 0.4755632946423882, - "grad_norm": 791.345703125, - "learning_rate": 3.182044021135442e-05, - "loss": 55.5527, - "step": 117710 - }, - { - "epoch": 0.4756036959077558, - "grad_norm": 498.8603210449219, - "learning_rate": 3.181708190180771e-05, - "loss": 41.6617, - "step": 117720 - }, - { - "epoch": 0.47564409717312345, - "grad_norm": 771.0953979492188, - "learning_rate": 3.181372345935848e-05, - "loss": 48.7883, - "step": 117730 - }, - { - "epoch": 0.4756844984384911, - "grad_norm": 661.4583740234375, - "learning_rate": 3.1810364884072205e-05, - "loss": 72.9009, - "step": 117740 - }, - { - "epoch": 0.47572489970385873, - "grad_norm": 632.1604614257812, - "learning_rate": 3.180700617601436e-05, - "loss": 59.3145, - "step": 117750 - }, - { - "epoch": 0.47576530096922637, - "grad_norm": 806.9335327148438, - "learning_rate": 3.180364733525043e-05, - "loss": 41.7695, - "step": 117760 - }, - { - "epoch": 0.475805702234594, - "grad_norm": 776.6650390625, - "learning_rate": 3.1800288361845883e-05, - "loss": 62.7163, - "step": 117770 - }, - { - "epoch": 0.4758461034999616, - "grad_norm": 601.4182739257812, - "learning_rate": 3.179692925586622e-05, - "loss": 54.4891, - "step": 117780 - }, - { - "epoch": 0.47588650476532923, - "grad_norm": 907.4159545898438, - "learning_rate": 3.179357001737692e-05, - "loss": 54.7433, - "step": 117790 - }, - { - "epoch": 0.4759269060306969, - "grad_norm": 792.1842651367188, - "learning_rate": 3.179021064644347e-05, - "loss": 73.5924, - "step": 117800 - }, - { - "epoch": 0.4759673072960645, - "grad_norm": 641.330810546875, - "learning_rate": 3.178685114313137e-05, - "loss": 35.0172, - "step": 117810 - }, - { - "epoch": 0.47600770856143215, - "grad_norm": 272.5650939941406, - "learning_rate": 3.178349150750612e-05, - "loss": 77.2498, - "step": 117820 - }, - { - "epoch": 0.4760481098267998, - "grad_norm": 917.3323974609375, - "learning_rate": 3.1780131739633204e-05, - "loss": 66.6369, - "step": 117830 - }, - { - "epoch": 0.47608851109216743, - "grad_norm": 924.8410034179688, - "learning_rate": 3.177677183957813e-05, - "loss": 50.9081, - "step": 117840 - }, - { - "epoch": 0.476128912357535, - "grad_norm": 539.0849609375, - "learning_rate": 3.17734118074064e-05, - "loss": 43.3925, - "step": 117850 - }, - { - "epoch": 0.47616931362290266, - "grad_norm": 1520.1893310546875, - "learning_rate": 3.177005164318353e-05, - "loss": 52.2887, - "step": 117860 - }, - { - "epoch": 0.4762097148882703, - "grad_norm": 0.0, - "learning_rate": 3.1766691346974996e-05, - "loss": 31.6033, - "step": 117870 - }, - { - "epoch": 0.47625011615363794, - "grad_norm": 0.0, - "learning_rate": 3.176333091884635e-05, - "loss": 59.7049, - "step": 117880 - }, - { - "epoch": 0.4762905174190056, - "grad_norm": 1278.6094970703125, - "learning_rate": 3.175997035886307e-05, - "loss": 56.4383, - "step": 117890 - }, - { - "epoch": 0.4763309186843732, - "grad_norm": 1077.4521484375, - "learning_rate": 3.1756609667090696e-05, - "loss": 70.2687, - "step": 117900 - }, - { - "epoch": 0.4763713199497408, - "grad_norm": 842.562255859375, - "learning_rate": 3.175324884359474e-05, - "loss": 112.2075, - "step": 117910 - }, - { - "epoch": 0.47641172121510844, - "grad_norm": 310.8381042480469, - "learning_rate": 3.174988788844072e-05, - "loss": 48.3912, - "step": 117920 - }, - { - "epoch": 0.4764521224804761, - "grad_norm": 1312.25244140625, - "learning_rate": 3.1746526801694156e-05, - "loss": 52.8015, - "step": 117930 - }, - { - "epoch": 0.4764925237458437, - "grad_norm": 885.5872802734375, - "learning_rate": 3.174316558342059e-05, - "loss": 78.4678, - "step": 117940 - }, - { - "epoch": 0.47653292501121136, - "grad_norm": 629.9498291015625, - "learning_rate": 3.173980423368553e-05, - "loss": 57.0286, - "step": 117950 - }, - { - "epoch": 0.476573326276579, - "grad_norm": 622.9977416992188, - "learning_rate": 3.173644275255451e-05, - "loss": 61.0304, - "step": 117960 - }, - { - "epoch": 0.47661372754194664, - "grad_norm": 772.329833984375, - "learning_rate": 3.173308114009308e-05, - "loss": 34.8157, - "step": 117970 - }, - { - "epoch": 0.4766541288073142, - "grad_norm": 884.4105834960938, - "learning_rate": 3.1729719396366765e-05, - "loss": 44.0258, - "step": 117980 - }, - { - "epoch": 0.47669453007268187, - "grad_norm": 1267.8201904296875, - "learning_rate": 3.172635752144111e-05, - "loss": 53.9944, - "step": 117990 - }, - { - "epoch": 0.4767349313380495, - "grad_norm": 1093.1011962890625, - "learning_rate": 3.172299551538164e-05, - "loss": 49.2085, - "step": 118000 - }, - { - "epoch": 0.47677533260341715, - "grad_norm": 345.0997009277344, - "learning_rate": 3.1719633378253924e-05, - "loss": 48.0018, - "step": 118010 - }, - { - "epoch": 0.4768157338687848, - "grad_norm": 610.406494140625, - "learning_rate": 3.171627111012349e-05, - "loss": 52.8992, - "step": 118020 - }, - { - "epoch": 0.4768561351341524, - "grad_norm": 1354.5128173828125, - "learning_rate": 3.1712908711055897e-05, - "loss": 72.9081, - "step": 118030 - }, - { - "epoch": 0.47689653639952, - "grad_norm": 446.1787109375, - "learning_rate": 3.170954618111669e-05, - "loss": 63.6848, - "step": 118040 - }, - { - "epoch": 0.47693693766488765, - "grad_norm": 704.4627075195312, - "learning_rate": 3.170618352037142e-05, - "loss": 58.9005, - "step": 118050 - }, - { - "epoch": 0.4769773389302553, - "grad_norm": 481.2560119628906, - "learning_rate": 3.170282072888566e-05, - "loss": 46.0265, - "step": 118060 - }, - { - "epoch": 0.47701774019562293, - "grad_norm": 560.6388549804688, - "learning_rate": 3.169945780672495e-05, - "loss": 42.9609, - "step": 118070 - }, - { - "epoch": 0.47705814146099057, - "grad_norm": 2451.45556640625, - "learning_rate": 3.169609475395486e-05, - "loss": 84.6672, - "step": 118080 - }, - { - "epoch": 0.4770985427263582, - "grad_norm": 458.1382141113281, - "learning_rate": 3.169273157064097e-05, - "loss": 59.9135, - "step": 118090 - }, - { - "epoch": 0.4771389439917258, - "grad_norm": 736.26416015625, - "learning_rate": 3.168936825684882e-05, - "loss": 42.6383, - "step": 118100 - }, - { - "epoch": 0.47717934525709343, - "grad_norm": 3540.630126953125, - "learning_rate": 3.1686004812644e-05, - "loss": 58.3743, - "step": 118110 - }, - { - "epoch": 0.4772197465224611, - "grad_norm": 609.9805908203125, - "learning_rate": 3.1682641238092064e-05, - "loss": 44.9807, - "step": 118120 - }, - { - "epoch": 0.4772601477878287, - "grad_norm": 864.6240234375, - "learning_rate": 3.16792775332586e-05, - "loss": 91.4741, - "step": 118130 - }, - { - "epoch": 0.47730054905319635, - "grad_norm": 804.3956298828125, - "learning_rate": 3.167591369820918e-05, - "loss": 64.7296, - "step": 118140 - }, - { - "epoch": 0.477340950318564, - "grad_norm": 365.2998962402344, - "learning_rate": 3.1672549733009396e-05, - "loss": 50.6288, - "step": 118150 - }, - { - "epoch": 0.47738135158393163, - "grad_norm": 402.1658630371094, - "learning_rate": 3.166918563772481e-05, - "loss": 53.5217, - "step": 118160 - }, - { - "epoch": 0.4774217528492992, - "grad_norm": 668.7144165039062, - "learning_rate": 3.1665821412421015e-05, - "loss": 54.7511, - "step": 118170 - }, - { - "epoch": 0.47746215411466686, - "grad_norm": 919.4798583984375, - "learning_rate": 3.1662457057163604e-05, - "loss": 47.6506, - "step": 118180 - }, - { - "epoch": 0.4775025553800345, - "grad_norm": 1031.98193359375, - "learning_rate": 3.165909257201816e-05, - "loss": 71.545, - "step": 118190 - }, - { - "epoch": 0.47754295664540214, - "grad_norm": 390.0589294433594, - "learning_rate": 3.1655727957050285e-05, - "loss": 49.304, - "step": 118200 - }, - { - "epoch": 0.4775833579107698, - "grad_norm": 1422.1484375, - "learning_rate": 3.165236321232557e-05, - "loss": 52.9677, - "step": 118210 - }, - { - "epoch": 0.4776237591761374, - "grad_norm": 846.0404052734375, - "learning_rate": 3.1648998337909594e-05, - "loss": 62.2265, - "step": 118220 - }, - { - "epoch": 0.477664160441505, - "grad_norm": 408.1695556640625, - "learning_rate": 3.164563333386798e-05, - "loss": 67.1796, - "step": 118230 - }, - { - "epoch": 0.47770456170687264, - "grad_norm": 256.5215759277344, - "learning_rate": 3.1642268200266317e-05, - "loss": 40.5587, - "step": 118240 - }, - { - "epoch": 0.4777449629722403, - "grad_norm": 1751.73486328125, - "learning_rate": 3.163890293717022e-05, - "loss": 52.8397, - "step": 118250 - }, - { - "epoch": 0.4777853642376079, - "grad_norm": 564.2115478515625, - "learning_rate": 3.1635537544645296e-05, - "loss": 67.6077, - "step": 118260 - }, - { - "epoch": 0.47782576550297556, - "grad_norm": 1232.5323486328125, - "learning_rate": 3.163217202275715e-05, - "loss": 49.1063, - "step": 118270 - }, - { - "epoch": 0.4778661667683432, - "grad_norm": 3210.296142578125, - "learning_rate": 3.162880637157139e-05, - "loss": 69.347, - "step": 118280 - }, - { - "epoch": 0.47790656803371084, - "grad_norm": 674.1715087890625, - "learning_rate": 3.1625440591153645e-05, - "loss": 63.0254, - "step": 118290 - }, - { - "epoch": 0.47794696929907843, - "grad_norm": 369.24017333984375, - "learning_rate": 3.162207468156952e-05, - "loss": 44.4211, - "step": 118300 - }, - { - "epoch": 0.47798737056444607, - "grad_norm": 634.0050659179688, - "learning_rate": 3.161870864288464e-05, - "loss": 58.934, - "step": 118310 - }, - { - "epoch": 0.4780277718298137, - "grad_norm": 1877.2578125, - "learning_rate": 3.1615342475164636e-05, - "loss": 62.1043, - "step": 118320 - }, - { - "epoch": 0.47806817309518135, - "grad_norm": 550.0879516601562, - "learning_rate": 3.161197617847511e-05, - "loss": 41.5093, - "step": 118330 - }, - { - "epoch": 0.478108574360549, - "grad_norm": 544.1702270507812, - "learning_rate": 3.160860975288171e-05, - "loss": 59.2241, - "step": 118340 - }, - { - "epoch": 0.47814897562591663, - "grad_norm": 316.4408264160156, - "learning_rate": 3.1605243198450066e-05, - "loss": 60.8573, - "step": 118350 - }, - { - "epoch": 0.4781893768912842, - "grad_norm": 412.7470703125, - "learning_rate": 3.16018765152458e-05, - "loss": 47.1499, - "step": 118360 - }, - { - "epoch": 0.47822977815665185, - "grad_norm": 998.5579833984375, - "learning_rate": 3.159850970333456e-05, - "loss": 62.5579, - "step": 118370 - }, - { - "epoch": 0.4782701794220195, - "grad_norm": 425.9685974121094, - "learning_rate": 3.159514276278197e-05, - "loss": 55.9611, - "step": 118380 - }, - { - "epoch": 0.47831058068738713, - "grad_norm": 393.94635009765625, - "learning_rate": 3.1591775693653674e-05, - "loss": 50.0992, - "step": 118390 - }, - { - "epoch": 0.4783509819527548, - "grad_norm": 1880.3455810546875, - "learning_rate": 3.158840849601532e-05, - "loss": 65.1443, - "step": 118400 - }, - { - "epoch": 0.4783913832181224, - "grad_norm": 503.0107421875, - "learning_rate": 3.1585041169932545e-05, - "loss": 44.3258, - "step": 118410 - }, - { - "epoch": 0.47843178448349, - "grad_norm": 0.0, - "learning_rate": 3.1581673715471006e-05, - "loss": 53.8743, - "step": 118420 - }, - { - "epoch": 0.47847218574885764, - "grad_norm": 608.0613403320312, - "learning_rate": 3.157830613269635e-05, - "loss": 54.61, - "step": 118430 - }, - { - "epoch": 0.4785125870142253, - "grad_norm": 427.8965148925781, - "learning_rate": 3.157493842167423e-05, - "loss": 63.0965, - "step": 118440 - }, - { - "epoch": 0.4785529882795929, - "grad_norm": 507.14190673828125, - "learning_rate": 3.15715705824703e-05, - "loss": 50.7432, - "step": 118450 - }, - { - "epoch": 0.47859338954496056, - "grad_norm": 983.0687255859375, - "learning_rate": 3.156820261515022e-05, - "loss": 87.0616, - "step": 118460 - }, - { - "epoch": 0.4786337908103282, - "grad_norm": 1234.0418701171875, - "learning_rate": 3.1564834519779647e-05, - "loss": 48.0925, - "step": 118470 - }, - { - "epoch": 0.47867419207569584, - "grad_norm": 935.4703369140625, - "learning_rate": 3.156146629642425e-05, - "loss": 53.1847, - "step": 118480 - }, - { - "epoch": 0.4787145933410634, - "grad_norm": 820.0416259765625, - "learning_rate": 3.155809794514968e-05, - "loss": 77.3875, - "step": 118490 - }, - { - "epoch": 0.47875499460643106, - "grad_norm": 877.6055297851562, - "learning_rate": 3.155472946602162e-05, - "loss": 59.0853, - "step": 118500 - }, - { - "epoch": 0.4787953958717987, - "grad_norm": 979.28662109375, - "learning_rate": 3.155136085910573e-05, - "loss": 60.1543, - "step": 118510 - }, - { - "epoch": 0.47883579713716634, - "grad_norm": 702.0006713867188, - "learning_rate": 3.15479921244677e-05, - "loss": 44.0273, - "step": 118520 - }, - { - "epoch": 0.478876198402534, - "grad_norm": 1405.5360107421875, - "learning_rate": 3.1544623262173176e-05, - "loss": 63.5543, - "step": 118530 - }, - { - "epoch": 0.4789165996679016, - "grad_norm": 971.0574340820312, - "learning_rate": 3.1541254272287865e-05, - "loss": 55.8479, - "step": 118540 - }, - { - "epoch": 0.4789570009332692, - "grad_norm": 1137.547119140625, - "learning_rate": 3.153788515487742e-05, - "loss": 66.1714, - "step": 118550 - }, - { - "epoch": 0.47899740219863685, - "grad_norm": 813.9231567382812, - "learning_rate": 3.153451591000756e-05, - "loss": 102.5478, - "step": 118560 - }, - { - "epoch": 0.4790378034640045, - "grad_norm": 870.7437133789062, - "learning_rate": 3.153114653774393e-05, - "loss": 81.2061, - "step": 118570 - }, - { - "epoch": 0.4790782047293721, - "grad_norm": 984.029296875, - "learning_rate": 3.152777703815223e-05, - "loss": 64.9325, - "step": 118580 - }, - { - "epoch": 0.47911860599473977, - "grad_norm": 853.7675170898438, - "learning_rate": 3.152440741129817e-05, - "loss": 69.4327, - "step": 118590 - }, - { - "epoch": 0.4791590072601074, - "grad_norm": 1823.9547119140625, - "learning_rate": 3.152103765724743e-05, - "loss": 51.6635, - "step": 118600 - }, - { - "epoch": 0.479199408525475, - "grad_norm": 1002.6013793945312, - "learning_rate": 3.1517667776065696e-05, - "loss": 53.4132, - "step": 118610 - }, - { - "epoch": 0.47923980979084263, - "grad_norm": 1199.445556640625, - "learning_rate": 3.151429776781868e-05, - "loss": 57.5089, - "step": 118620 - }, - { - "epoch": 0.47928021105621027, - "grad_norm": 545.897705078125, - "learning_rate": 3.151092763257206e-05, - "loss": 66.9297, - "step": 118630 - }, - { - "epoch": 0.4793206123215779, - "grad_norm": 1726.234375, - "learning_rate": 3.150755737039157e-05, - "loss": 53.6566, - "step": 118640 - }, - { - "epoch": 0.47936101358694555, - "grad_norm": 719.9454956054688, - "learning_rate": 3.150418698134289e-05, - "loss": 48.0167, - "step": 118650 - }, - { - "epoch": 0.4794014148523132, - "grad_norm": 864.4921264648438, - "learning_rate": 3.150081646549174e-05, - "loss": 84.6051, - "step": 118660 - }, - { - "epoch": 0.47944181611768083, - "grad_norm": 824.414306640625, - "learning_rate": 3.149744582290383e-05, - "loss": 53.0983, - "step": 118670 - }, - { - "epoch": 0.4794822173830484, - "grad_norm": 471.1961975097656, - "learning_rate": 3.149407505364486e-05, - "loss": 69.1806, - "step": 118680 - }, - { - "epoch": 0.47952261864841605, - "grad_norm": 684.9292602539062, - "learning_rate": 3.149070415778056e-05, - "loss": 43.8025, - "step": 118690 - }, - { - "epoch": 0.4795630199137837, - "grad_norm": 507.1294860839844, - "learning_rate": 3.148733313537664e-05, - "loss": 53.6068, - "step": 118700 - }, - { - "epoch": 0.47960342117915133, - "grad_norm": 1452.51953125, - "learning_rate": 3.148396198649882e-05, - "loss": 62.2304, - "step": 118710 - }, - { - "epoch": 0.479643822444519, - "grad_norm": 568.4696655273438, - "learning_rate": 3.148059071121282e-05, - "loss": 69.3875, - "step": 118720 - }, - { - "epoch": 0.4796842237098866, - "grad_norm": 527.501953125, - "learning_rate": 3.147721930958437e-05, - "loss": 53.4141, - "step": 118730 - }, - { - "epoch": 0.4797246249752542, - "grad_norm": 1219.31640625, - "learning_rate": 3.14738477816792e-05, - "loss": 58.4269, - "step": 118740 - }, - { - "epoch": 0.47976502624062184, - "grad_norm": 1039.8255615234375, - "learning_rate": 3.147047612756302e-05, - "loss": 39.0244, - "step": 118750 - }, - { - "epoch": 0.4798054275059895, - "grad_norm": 1056.8941650390625, - "learning_rate": 3.146710434730159e-05, - "loss": 54.2339, - "step": 118760 - }, - { - "epoch": 0.4798458287713571, - "grad_norm": 756.7150268554688, - "learning_rate": 3.1463732440960625e-05, - "loss": 53.7804, - "step": 118770 - }, - { - "epoch": 0.47988623003672476, - "grad_norm": 990.2318725585938, - "learning_rate": 3.1460360408605866e-05, - "loss": 45.0402, - "step": 118780 - }, - { - "epoch": 0.4799266313020924, - "grad_norm": 782.805908203125, - "learning_rate": 3.145698825030307e-05, - "loss": 52.3285, - "step": 118790 - }, - { - "epoch": 0.47996703256746004, - "grad_norm": 781.7633666992188, - "learning_rate": 3.145361596611795e-05, - "loss": 53.3584, - "step": 118800 - }, - { - "epoch": 0.4800074338328276, - "grad_norm": 429.9420166015625, - "learning_rate": 3.1450243556116266e-05, - "loss": 61.5989, - "step": 118810 - }, - { - "epoch": 0.48004783509819526, - "grad_norm": 1086.17822265625, - "learning_rate": 3.144687102036376e-05, - "loss": 44.8307, - "step": 118820 - }, - { - "epoch": 0.4800882363635629, - "grad_norm": 858.8822631835938, - "learning_rate": 3.1443498358926186e-05, - "loss": 63.265, - "step": 118830 - }, - { - "epoch": 0.48012863762893054, - "grad_norm": 530.9268188476562, - "learning_rate": 3.1440125571869306e-05, - "loss": 45.8561, - "step": 118840 - }, - { - "epoch": 0.4801690388942982, - "grad_norm": 426.1288146972656, - "learning_rate": 3.143675265925885e-05, - "loss": 52.3246, - "step": 118850 - }, - { - "epoch": 0.4802094401596658, - "grad_norm": 374.76849365234375, - "learning_rate": 3.1433379621160586e-05, - "loss": 44.3146, - "step": 118860 - }, - { - "epoch": 0.4802498414250334, - "grad_norm": 727.5894165039062, - "learning_rate": 3.143000645764028e-05, - "loss": 45.2602, - "step": 118870 - }, - { - "epoch": 0.48029024269040105, - "grad_norm": 914.193603515625, - "learning_rate": 3.142663316876368e-05, - "loss": 67.587, - "step": 118880 - }, - { - "epoch": 0.4803306439557687, - "grad_norm": 119.14258575439453, - "learning_rate": 3.1423259754596576e-05, - "loss": 48.5501, - "step": 118890 - }, - { - "epoch": 0.48037104522113633, - "grad_norm": 947.4161376953125, - "learning_rate": 3.1419886215204694e-05, - "loss": 56.8426, - "step": 118900 - }, - { - "epoch": 0.48041144648650397, - "grad_norm": 593.0452270507812, - "learning_rate": 3.1416512550653835e-05, - "loss": 70.3968, - "step": 118910 - }, - { - "epoch": 0.4804518477518716, - "grad_norm": 757.6504516601562, - "learning_rate": 3.141313876100976e-05, - "loss": 59.1005, - "step": 118920 - }, - { - "epoch": 0.4804922490172392, - "grad_norm": 3180.365966796875, - "learning_rate": 3.1409764846338245e-05, - "loss": 64.8498, - "step": 118930 - }, - { - "epoch": 0.48053265028260683, - "grad_norm": 566.63671875, - "learning_rate": 3.140639080670507e-05, - "loss": 64.9276, - "step": 118940 - }, - { - "epoch": 0.4805730515479745, - "grad_norm": 714.4104614257812, - "learning_rate": 3.140301664217599e-05, - "loss": 59.2069, - "step": 118950 - }, - { - "epoch": 0.4806134528133421, - "grad_norm": 575.2021484375, - "learning_rate": 3.139964235281682e-05, - "loss": 43.4193, - "step": 118960 - }, - { - "epoch": 0.48065385407870975, - "grad_norm": 1377.7861328125, - "learning_rate": 3.1396267938693316e-05, - "loss": 63.2737, - "step": 118970 - }, - { - "epoch": 0.4806942553440774, - "grad_norm": 661.6658935546875, - "learning_rate": 3.1392893399871295e-05, - "loss": 60.5852, - "step": 118980 - }, - { - "epoch": 0.48073465660944503, - "grad_norm": 513.0794067382812, - "learning_rate": 3.1389518736416507e-05, - "loss": 50.5535, - "step": 118990 - }, - { - "epoch": 0.4807750578748126, - "grad_norm": 1068.8321533203125, - "learning_rate": 3.138614394839476e-05, - "loss": 56.0899, - "step": 119000 - }, - { - "epoch": 0.48081545914018026, - "grad_norm": 346.4224548339844, - "learning_rate": 3.138276903587186e-05, - "loss": 62.7875, - "step": 119010 - }, - { - "epoch": 0.4808558604055479, - "grad_norm": 591.9736328125, - "learning_rate": 3.137939399891359e-05, - "loss": 50.5829, - "step": 119020 - }, - { - "epoch": 0.48089626167091554, - "grad_norm": 762.9810791015625, - "learning_rate": 3.1376018837585747e-05, - "loss": 66.904, - "step": 119030 - }, - { - "epoch": 0.4809366629362832, - "grad_norm": 700.6834106445312, - "learning_rate": 3.137264355195413e-05, - "loss": 66.1691, - "step": 119040 - }, - { - "epoch": 0.4809770642016508, - "grad_norm": 315.9108581542969, - "learning_rate": 3.1369268142084556e-05, - "loss": 49.7968, - "step": 119050 - }, - { - "epoch": 0.4810174654670184, - "grad_norm": 0.0, - "learning_rate": 3.136589260804282e-05, - "loss": 47.6848, - "step": 119060 - }, - { - "epoch": 0.48105786673238604, - "grad_norm": 395.9146423339844, - "learning_rate": 3.1362516949894725e-05, - "loss": 57.0576, - "step": 119070 - }, - { - "epoch": 0.4810982679977537, - "grad_norm": 476.3524475097656, - "learning_rate": 3.135914116770609e-05, - "loss": 58.1471, - "step": 119080 - }, - { - "epoch": 0.4811386692631213, - "grad_norm": 626.7835083007812, - "learning_rate": 3.135576526154272e-05, - "loss": 39.7096, - "step": 119090 - }, - { - "epoch": 0.48117907052848896, - "grad_norm": 1119.859619140625, - "learning_rate": 3.135238923147043e-05, - "loss": 87.1097, - "step": 119100 - }, - { - "epoch": 0.4812194717938566, - "grad_norm": 804.6915283203125, - "learning_rate": 3.1349013077555045e-05, - "loss": 36.7657, - "step": 119110 - }, - { - "epoch": 0.48125987305922424, - "grad_norm": 769.036865234375, - "learning_rate": 3.134563679986238e-05, - "loss": 40.638, - "step": 119120 - }, - { - "epoch": 0.4813002743245918, - "grad_norm": 191.26966857910156, - "learning_rate": 3.134226039845827e-05, - "loss": 50.2752, - "step": 119130 - }, - { - "epoch": 0.48134067558995947, - "grad_norm": 778.8662109375, - "learning_rate": 3.1338883873408516e-05, - "loss": 58.7929, - "step": 119140 - }, - { - "epoch": 0.4813810768553271, - "grad_norm": 514.7074584960938, - "learning_rate": 3.133550722477896e-05, - "loss": 66.8902, - "step": 119150 - }, - { - "epoch": 0.48142147812069475, - "grad_norm": 355.43096923828125, - "learning_rate": 3.133213045263543e-05, - "loss": 58.968, - "step": 119160 - }, - { - "epoch": 0.4814618793860624, - "grad_norm": 3042.458984375, - "learning_rate": 3.132875355704376e-05, - "loss": 88.0314, - "step": 119170 - }, - { - "epoch": 0.48150228065143, - "grad_norm": 656.7515869140625, - "learning_rate": 3.1325376538069776e-05, - "loss": 68.5067, - "step": 119180 - }, - { - "epoch": 0.4815426819167976, - "grad_norm": 495.6517028808594, - "learning_rate": 3.132199939577932e-05, - "loss": 73.5789, - "step": 119190 - }, - { - "epoch": 0.48158308318216525, - "grad_norm": 1507.7938232421875, - "learning_rate": 3.1318622130238236e-05, - "loss": 55.6776, - "step": 119200 - }, - { - "epoch": 0.4816234844475329, - "grad_norm": 802.6456298828125, - "learning_rate": 3.1315244741512356e-05, - "loss": 49.8764, - "step": 119210 - }, - { - "epoch": 0.48166388571290053, - "grad_norm": 814.6461181640625, - "learning_rate": 3.131186722966753e-05, - "loss": 51.0948, - "step": 119220 - }, - { - "epoch": 0.48170428697826817, - "grad_norm": 826.6341552734375, - "learning_rate": 3.1308489594769605e-05, - "loss": 54.8559, - "step": 119230 - }, - { - "epoch": 0.4817446882436358, - "grad_norm": 536.5158081054688, - "learning_rate": 3.1305111836884425e-05, - "loss": 49.7287, - "step": 119240 - }, - { - "epoch": 0.4817850895090034, - "grad_norm": 1080.1739501953125, - "learning_rate": 3.130173395607785e-05, - "loss": 96.4672, - "step": 119250 - }, - { - "epoch": 0.48182549077437103, - "grad_norm": 875.5479125976562, - "learning_rate": 3.129835595241571e-05, - "loss": 41.3818, - "step": 119260 - }, - { - "epoch": 0.4818658920397387, - "grad_norm": 977.9671020507812, - "learning_rate": 3.129497782596389e-05, - "loss": 44.1907, - "step": 119270 - }, - { - "epoch": 0.4819062933051063, - "grad_norm": 469.0617370605469, - "learning_rate": 3.129159957678824e-05, - "loss": 50.5765, - "step": 119280 - }, - { - "epoch": 0.48194669457047395, - "grad_norm": 851.140869140625, - "learning_rate": 3.128822120495462e-05, - "loss": 50.6099, - "step": 119290 - }, - { - "epoch": 0.4819870958358416, - "grad_norm": 608.3344116210938, - "learning_rate": 3.1284842710528876e-05, - "loss": 72.0118, - "step": 119300 - }, - { - "epoch": 0.48202749710120923, - "grad_norm": 380.56195068359375, - "learning_rate": 3.128146409357689e-05, - "loss": 83.0865, - "step": 119310 - }, - { - "epoch": 0.4820678983665768, - "grad_norm": 601.8255615234375, - "learning_rate": 3.127808535416454e-05, - "loss": 48.3086, - "step": 119320 - }, - { - "epoch": 0.48210829963194446, - "grad_norm": 949.0869140625, - "learning_rate": 3.127470649235768e-05, - "loss": 94.7472, - "step": 119330 - }, - { - "epoch": 0.4821487008973121, - "grad_norm": 398.5122985839844, - "learning_rate": 3.1271327508222174e-05, - "loss": 73.9752, - "step": 119340 - }, - { - "epoch": 0.48218910216267974, - "grad_norm": 683.5873413085938, - "learning_rate": 3.126794840182392e-05, - "loss": 65.0503, - "step": 119350 - }, - { - "epoch": 0.4822295034280474, - "grad_norm": 574.1619873046875, - "learning_rate": 3.126456917322878e-05, - "loss": 55.5617, - "step": 119360 - }, - { - "epoch": 0.482269904693415, - "grad_norm": 918.1862182617188, - "learning_rate": 3.1261189822502644e-05, - "loss": 48.1121, - "step": 119370 - }, - { - "epoch": 0.4823103059587826, - "grad_norm": 1102.67919921875, - "learning_rate": 3.125781034971139e-05, - "loss": 79.4511, - "step": 119380 - }, - { - "epoch": 0.48235070722415024, - "grad_norm": 865.8079223632812, - "learning_rate": 3.125443075492089e-05, - "loss": 58.5068, - "step": 119390 - }, - { - "epoch": 0.4823911084895179, - "grad_norm": 644.5775756835938, - "learning_rate": 3.1251051038197055e-05, - "loss": 53.9188, - "step": 119400 - }, - { - "epoch": 0.4824315097548855, - "grad_norm": 472.7004699707031, - "learning_rate": 3.124767119960576e-05, - "loss": 46.8185, - "step": 119410 - }, - { - "epoch": 0.48247191102025316, - "grad_norm": 988.4273681640625, - "learning_rate": 3.1244291239212896e-05, - "loss": 64.0862, - "step": 119420 - }, - { - "epoch": 0.4825123122856208, - "grad_norm": 320.8127746582031, - "learning_rate": 3.124091115708436e-05, - "loss": 66.8379, - "step": 119430 - }, - { - "epoch": 0.48255271355098844, - "grad_norm": 703.6538696289062, - "learning_rate": 3.123753095328604e-05, - "loss": 74.7884, - "step": 119440 - }, - { - "epoch": 0.48259311481635603, - "grad_norm": 741.8955078125, - "learning_rate": 3.123415062788385e-05, - "loss": 47.4477, - "step": 119450 - }, - { - "epoch": 0.48263351608172367, - "grad_norm": 764.6326904296875, - "learning_rate": 3.123077018094369e-05, - "loss": 48.5175, - "step": 119460 - }, - { - "epoch": 0.4826739173470913, - "grad_norm": 436.60516357421875, - "learning_rate": 3.122738961253145e-05, - "loss": 66.414, - "step": 119470 - }, - { - "epoch": 0.48271431861245895, - "grad_norm": 369.33343505859375, - "learning_rate": 3.1224008922713044e-05, - "loss": 71.549, - "step": 119480 - }, - { - "epoch": 0.4827547198778266, - "grad_norm": 921.5698852539062, - "learning_rate": 3.122062811155438e-05, - "loss": 50.5309, - "step": 119490 - }, - { - "epoch": 0.48279512114319423, - "grad_norm": 597.4305419921875, - "learning_rate": 3.121724717912138e-05, - "loss": 50.5436, - "step": 119500 - }, - { - "epoch": 0.4828355224085618, - "grad_norm": 1035.259765625, - "learning_rate": 3.121386612547993e-05, - "loss": 31.3353, - "step": 119510 - }, - { - "epoch": 0.48287592367392945, - "grad_norm": 1129.0030517578125, - "learning_rate": 3.121048495069596e-05, - "loss": 74.6693, - "step": 119520 - }, - { - "epoch": 0.4829163249392971, - "grad_norm": 1025.2454833984375, - "learning_rate": 3.1207103654835394e-05, - "loss": 102.4706, - "step": 119530 - }, - { - "epoch": 0.48295672620466473, - "grad_norm": 379.8052062988281, - "learning_rate": 3.120372223796415e-05, - "loss": 49.5203, - "step": 119540 - }, - { - "epoch": 0.4829971274700324, - "grad_norm": 607.2550048828125, - "learning_rate": 3.120034070014814e-05, - "loss": 70.4186, - "step": 119550 - }, - { - "epoch": 0.4830375287354, - "grad_norm": 690.1021728515625, - "learning_rate": 3.11969590414533e-05, - "loss": 43.4509, - "step": 119560 - }, - { - "epoch": 0.4830779300007676, - "grad_norm": 1215.8643798828125, - "learning_rate": 3.119357726194556e-05, - "loss": 42.1141, - "step": 119570 - }, - { - "epoch": 0.48311833126613524, - "grad_norm": 259.37872314453125, - "learning_rate": 3.119019536169083e-05, - "loss": 58.9376, - "step": 119580 - }, - { - "epoch": 0.4831587325315029, - "grad_norm": 755.9671020507812, - "learning_rate": 3.118681334075506e-05, - "loss": 45.3444, - "step": 119590 - }, - { - "epoch": 0.4831991337968705, - "grad_norm": 1142.39892578125, - "learning_rate": 3.118343119920418e-05, - "loss": 58.759, - "step": 119600 - }, - { - "epoch": 0.48323953506223816, - "grad_norm": 651.2678833007812, - "learning_rate": 3.1180048937104114e-05, - "loss": 51.0291, - "step": 119610 - }, - { - "epoch": 0.4832799363276058, - "grad_norm": 555.2761840820312, - "learning_rate": 3.117666655452083e-05, - "loss": 48.9106, - "step": 119620 - }, - { - "epoch": 0.48332033759297344, - "grad_norm": 1292.247802734375, - "learning_rate": 3.117328405152024e-05, - "loss": 67.4017, - "step": 119630 - }, - { - "epoch": 0.483360738858341, - "grad_norm": 579.1093139648438, - "learning_rate": 3.11699014281683e-05, - "loss": 56.0868, - "step": 119640 - }, - { - "epoch": 0.48340114012370866, - "grad_norm": 571.940673828125, - "learning_rate": 3.116651868453097e-05, - "loss": 57.993, - "step": 119650 - }, - { - "epoch": 0.4834415413890763, - "grad_norm": 592.5040893554688, - "learning_rate": 3.116313582067416e-05, - "loss": 51.3045, - "step": 119660 - }, - { - "epoch": 0.48348194265444394, - "grad_norm": 820.21337890625, - "learning_rate": 3.115975283666386e-05, - "loss": 50.0175, - "step": 119670 - }, - { - "epoch": 0.4835223439198116, - "grad_norm": 1067.8109130859375, - "learning_rate": 3.1156369732566006e-05, - "loss": 31.4268, - "step": 119680 - }, - { - "epoch": 0.4835627451851792, - "grad_norm": 589.9420776367188, - "learning_rate": 3.115298650844655e-05, - "loss": 85.4477, - "step": 119690 - }, - { - "epoch": 0.4836031464505468, - "grad_norm": 485.7039489746094, - "learning_rate": 3.114960316437145e-05, - "loss": 53.188, - "step": 119700 - }, - { - "epoch": 0.48364354771591445, - "grad_norm": 809.940673828125, - "learning_rate": 3.1146219700406674e-05, - "loss": 73.8831, - "step": 119710 - }, - { - "epoch": 0.4836839489812821, - "grad_norm": 1266.80419921875, - "learning_rate": 3.114283611661818e-05, - "loss": 65.2104, - "step": 119720 - }, - { - "epoch": 0.4837243502466497, - "grad_norm": 1203.2113037109375, - "learning_rate": 3.113945241307194e-05, - "loss": 64.743, - "step": 119730 - }, - { - "epoch": 0.48376475151201737, - "grad_norm": 655.8514404296875, - "learning_rate": 3.1136068589833914e-05, - "loss": 70.6579, - "step": 119740 - }, - { - "epoch": 0.483805152777385, - "grad_norm": 842.0756225585938, - "learning_rate": 3.1132684646970064e-05, - "loss": 41.7421, - "step": 119750 - }, - { - "epoch": 0.48384555404275265, - "grad_norm": 859.2930297851562, - "learning_rate": 3.1129300584546375e-05, - "loss": 61.2274, - "step": 119760 - }, - { - "epoch": 0.48388595530812023, - "grad_norm": 972.3688354492188, - "learning_rate": 3.1125916402628814e-05, - "loss": 58.9962, - "step": 119770 - }, - { - "epoch": 0.48392635657348787, - "grad_norm": 983.5337524414062, - "learning_rate": 3.112253210128336e-05, - "loss": 57.9547, - "step": 119780 - }, - { - "epoch": 0.4839667578388555, - "grad_norm": 997.2120361328125, - "learning_rate": 3.111914768057599e-05, - "loss": 47.62, - "step": 119790 - }, - { - "epoch": 0.48400715910422315, - "grad_norm": 1432.3392333984375, - "learning_rate": 3.111576314057268e-05, - "loss": 48.3912, - "step": 119800 - }, - { - "epoch": 0.4840475603695908, - "grad_norm": 502.978759765625, - "learning_rate": 3.1112378481339425e-05, - "loss": 60.9147, - "step": 119810 - }, - { - "epoch": 0.48408796163495843, - "grad_norm": 881.69580078125, - "learning_rate": 3.1108993702942205e-05, - "loss": 63.0135, - "step": 119820 - }, - { - "epoch": 0.484128362900326, - "grad_norm": 619.3832397460938, - "learning_rate": 3.110560880544701e-05, - "loss": 55.6975, - "step": 119830 - }, - { - "epoch": 0.48416876416569365, - "grad_norm": 933.4210815429688, - "learning_rate": 3.1102223788919824e-05, - "loss": 45.4596, - "step": 119840 - }, - { - "epoch": 0.4842091654310613, - "grad_norm": 1533.591552734375, - "learning_rate": 3.1098838653426645e-05, - "loss": 62.3054, - "step": 119850 - }, - { - "epoch": 0.48424956669642893, - "grad_norm": 815.6768798828125, - "learning_rate": 3.1095453399033466e-05, - "loss": 53.5172, - "step": 119860 - }, - { - "epoch": 0.4842899679617966, - "grad_norm": 1024.9600830078125, - "learning_rate": 3.109206802580629e-05, - "loss": 63.8872, - "step": 119870 - }, - { - "epoch": 0.4843303692271642, - "grad_norm": 0.0, - "learning_rate": 3.10886825338111e-05, - "loss": 48.7491, - "step": 119880 - }, - { - "epoch": 0.4843707704925318, - "grad_norm": 610.7237548828125, - "learning_rate": 3.108529692311391e-05, - "loss": 59.5073, - "step": 119890 - }, - { - "epoch": 0.48441117175789944, - "grad_norm": 999.9380493164062, - "learning_rate": 3.108191119378073e-05, - "loss": 54.3617, - "step": 119900 - }, - { - "epoch": 0.4844515730232671, - "grad_norm": 589.252685546875, - "learning_rate": 3.107852534587756e-05, - "loss": 70.6535, - "step": 119910 - }, - { - "epoch": 0.4844919742886347, - "grad_norm": 1168.3255615234375, - "learning_rate": 3.107513937947041e-05, - "loss": 65.7924, - "step": 119920 - }, - { - "epoch": 0.48453237555400236, - "grad_norm": 452.32415771484375, - "learning_rate": 3.107175329462529e-05, - "loss": 33.2393, - "step": 119930 - }, - { - "epoch": 0.48457277681937, - "grad_norm": 365.3132629394531, - "learning_rate": 3.106836709140821e-05, - "loss": 61.387, - "step": 119940 - }, - { - "epoch": 0.48461317808473764, - "grad_norm": 431.0849914550781, - "learning_rate": 3.1064980769885187e-05, - "loss": 49.9083, - "step": 119950 - }, - { - "epoch": 0.4846535793501052, - "grad_norm": 706.0770874023438, - "learning_rate": 3.1061594330122246e-05, - "loss": 41.8204, - "step": 119960 - }, - { - "epoch": 0.48469398061547286, - "grad_norm": 519.536865234375, - "learning_rate": 3.10582077721854e-05, - "loss": 39.0427, - "step": 119970 - }, - { - "epoch": 0.4847343818808405, - "grad_norm": 1396.991455078125, - "learning_rate": 3.1054821096140676e-05, - "loss": 70.4659, - "step": 119980 - }, - { - "epoch": 0.48477478314620814, - "grad_norm": 1468.1710205078125, - "learning_rate": 3.10514343020541e-05, - "loss": 91.4469, - "step": 119990 - }, - { - "epoch": 0.4848151844115758, - "grad_norm": 483.8679504394531, - "learning_rate": 3.104804738999169e-05, - "loss": 31.8263, - "step": 120000 - }, - { - "epoch": 0.4848555856769434, - "grad_norm": 1103.7783203125, - "learning_rate": 3.10446603600195e-05, - "loss": 48.717, - "step": 120010 - }, - { - "epoch": 0.484895986942311, - "grad_norm": 0.0, - "learning_rate": 3.104127321220353e-05, - "loss": 44.1308, - "step": 120020 - }, - { - "epoch": 0.48493638820767865, - "grad_norm": 2047.26025390625, - "learning_rate": 3.1037885946609824e-05, - "loss": 61.4208, - "step": 120030 - }, - { - "epoch": 0.4849767894730463, - "grad_norm": 2711.773681640625, - "learning_rate": 3.103449856330443e-05, - "loss": 70.1431, - "step": 120040 - }, - { - "epoch": 0.48501719073841393, - "grad_norm": 1064.7301025390625, - "learning_rate": 3.1031111062353373e-05, - "loss": 59.401, - "step": 120050 - }, - { - "epoch": 0.48505759200378157, - "grad_norm": 1243.82763671875, - "learning_rate": 3.102772344382271e-05, - "loss": 78.9579, - "step": 120060 - }, - { - "epoch": 0.4850979932691492, - "grad_norm": 935.9784545898438, - "learning_rate": 3.102433570777847e-05, - "loss": 56.5204, - "step": 120070 - }, - { - "epoch": 0.48513839453451685, - "grad_norm": 583.158935546875, - "learning_rate": 3.102094785428671e-05, - "loss": 63.3833, - "step": 120080 - }, - { - "epoch": 0.48517879579988443, - "grad_norm": 749.0221557617188, - "learning_rate": 3.101755988341347e-05, - "loss": 48.2709, - "step": 120090 - }, - { - "epoch": 0.4852191970652521, - "grad_norm": 876.2115478515625, - "learning_rate": 3.101417179522479e-05, - "loss": 53.4147, - "step": 120100 - }, - { - "epoch": 0.4852595983306197, - "grad_norm": 1196.973388671875, - "learning_rate": 3.101078358978675e-05, - "loss": 56.0996, - "step": 120110 - }, - { - "epoch": 0.48529999959598735, - "grad_norm": 580.7915649414062, - "learning_rate": 3.100739526716538e-05, - "loss": 58.37, - "step": 120120 - }, - { - "epoch": 0.485340400861355, - "grad_norm": 942.7879028320312, - "learning_rate": 3.100400682742675e-05, - "loss": 64.2288, - "step": 120130 - }, - { - "epoch": 0.48538080212672263, - "grad_norm": 627.4921264648438, - "learning_rate": 3.100061827063692e-05, - "loss": 58.1287, - "step": 120140 - }, - { - "epoch": 0.4854212033920902, - "grad_norm": 1478.2410888671875, - "learning_rate": 3.0997229596861944e-05, - "loss": 91.4508, - "step": 120150 - }, - { - "epoch": 0.48546160465745786, - "grad_norm": 1782.7314453125, - "learning_rate": 3.099384080616789e-05, - "loss": 56.9644, - "step": 120160 - }, - { - "epoch": 0.4855020059228255, - "grad_norm": 359.5166320800781, - "learning_rate": 3.099045189862081e-05, - "loss": 43.2427, - "step": 120170 - }, - { - "epoch": 0.48554240718819314, - "grad_norm": 604.357177734375, - "learning_rate": 3.0987062874286804e-05, - "loss": 43.9985, - "step": 120180 - }, - { - "epoch": 0.4855828084535608, - "grad_norm": 536.3892211914062, - "learning_rate": 3.098367373323192e-05, - "loss": 60.2531, - "step": 120190 - }, - { - "epoch": 0.4856232097189284, - "grad_norm": 762.7628784179688, - "learning_rate": 3.098028447552224e-05, - "loss": 45.4865, - "step": 120200 - }, - { - "epoch": 0.485663610984296, - "grad_norm": 621.0496215820312, - "learning_rate": 3.097689510122382e-05, - "loss": 49.9217, - "step": 120210 - }, - { - "epoch": 0.48570401224966364, - "grad_norm": 1028.444580078125, - "learning_rate": 3.0973505610402765e-05, - "loss": 54.5276, - "step": 120220 - }, - { - "epoch": 0.4857444135150313, - "grad_norm": 639.4829711914062, - "learning_rate": 3.0970116003125146e-05, - "loss": 46.6422, - "step": 120230 - }, - { - "epoch": 0.4857848147803989, - "grad_norm": 955.115234375, - "learning_rate": 3.0966726279457034e-05, - "loss": 59.0818, - "step": 120240 - }, - { - "epoch": 0.48582521604576656, - "grad_norm": 1233.369873046875, - "learning_rate": 3.0963336439464526e-05, - "loss": 56.1891, - "step": 120250 - }, - { - "epoch": 0.4858656173111342, - "grad_norm": 3286.7587890625, - "learning_rate": 3.09599464832137e-05, - "loss": 70.8362, - "step": 120260 - }, - { - "epoch": 0.48590601857650184, - "grad_norm": 749.9498291015625, - "learning_rate": 3.0956556410770655e-05, - "loss": 71.6872, - "step": 120270 - }, - { - "epoch": 0.4859464198418694, - "grad_norm": 337.7810974121094, - "learning_rate": 3.0953166222201476e-05, - "loss": 34.0317, - "step": 120280 - }, - { - "epoch": 0.48598682110723707, - "grad_norm": 969.7774047851562, - "learning_rate": 3.094977591757224e-05, - "loss": 39.0394, - "step": 120290 - }, - { - "epoch": 0.4860272223726047, - "grad_norm": 600.6663208007812, - "learning_rate": 3.094638549694908e-05, - "loss": 72.5898, - "step": 120300 - }, - { - "epoch": 0.48606762363797235, - "grad_norm": 388.6318664550781, - "learning_rate": 3.0942994960398064e-05, - "loss": 58.7893, - "step": 120310 - }, - { - "epoch": 0.48610802490334, - "grad_norm": 1114.3638916015625, - "learning_rate": 3.09396043079853e-05, - "loss": 67.1577, - "step": 120320 - }, - { - "epoch": 0.4861484261687076, - "grad_norm": 723.9693603515625, - "learning_rate": 3.0936213539776895e-05, - "loss": 66.5562, - "step": 120330 - }, - { - "epoch": 0.4861888274340752, - "grad_norm": 608.6270141601562, - "learning_rate": 3.093282265583895e-05, - "loss": 61.2591, - "step": 120340 - }, - { - "epoch": 0.48622922869944285, - "grad_norm": 455.3746643066406, - "learning_rate": 3.092943165623758e-05, - "loss": 57.6604, - "step": 120350 - }, - { - "epoch": 0.4862696299648105, - "grad_norm": 494.3013610839844, - "learning_rate": 3.092604054103888e-05, - "loss": 42.1755, - "step": 120360 - }, - { - "epoch": 0.48631003123017813, - "grad_norm": 2547.077392578125, - "learning_rate": 3.092264931030897e-05, - "loss": 86.2226, - "step": 120370 - }, - { - "epoch": 0.48635043249554577, - "grad_norm": 671.0562133789062, - "learning_rate": 3.0919257964113964e-05, - "loss": 60.703, - "step": 120380 - }, - { - "epoch": 0.4863908337609134, - "grad_norm": 471.1807556152344, - "learning_rate": 3.0915866502519975e-05, - "loss": 55.5357, - "step": 120390 - }, - { - "epoch": 0.48643123502628105, - "grad_norm": 656.9327392578125, - "learning_rate": 3.091247492559312e-05, - "loss": 66.8196, - "step": 120400 - }, - { - "epoch": 0.48647163629164863, - "grad_norm": 2174.91064453125, - "learning_rate": 3.090908323339952e-05, - "loss": 62.3699, - "step": 120410 - }, - { - "epoch": 0.4865120375570163, - "grad_norm": 663.234619140625, - "learning_rate": 3.090569142600531e-05, - "loss": 58.3517, - "step": 120420 - }, - { - "epoch": 0.4865524388223839, - "grad_norm": 1044.9910888671875, - "learning_rate": 3.09022995034766e-05, - "loss": 45.2753, - "step": 120430 - }, - { - "epoch": 0.48659284008775155, - "grad_norm": 1259.57666015625, - "learning_rate": 3.089890746587953e-05, - "loss": 37.8244, - "step": 120440 - }, - { - "epoch": 0.4866332413531192, - "grad_norm": 619.0317993164062, - "learning_rate": 3.089551531328021e-05, - "loss": 52.6852, - "step": 120450 - }, - { - "epoch": 0.48667364261848683, - "grad_norm": 547.9066772460938, - "learning_rate": 3.0892123045744785e-05, - "loss": 35.2198, - "step": 120460 - }, - { - "epoch": 0.4867140438838544, - "grad_norm": 538.3446044921875, - "learning_rate": 3.08887306633394e-05, - "loss": 49.3895, - "step": 120470 - }, - { - "epoch": 0.48675444514922206, - "grad_norm": 1102.380859375, - "learning_rate": 3.088533816613017e-05, - "loss": 78.057, - "step": 120480 - }, - { - "epoch": 0.4867948464145897, - "grad_norm": 836.9898071289062, - "learning_rate": 3.0881945554183235e-05, - "loss": 73.6896, - "step": 120490 - }, - { - "epoch": 0.48683524767995734, - "grad_norm": 657.3259887695312, - "learning_rate": 3.087855282756475e-05, - "loss": 45.1531, - "step": 120500 - }, - { - "epoch": 0.486875648945325, - "grad_norm": 440.7289733886719, - "learning_rate": 3.087515998634085e-05, - "loss": 57.9621, - "step": 120510 - }, - { - "epoch": 0.4869160502106926, - "grad_norm": 955.15966796875, - "learning_rate": 3.087176703057769e-05, - "loss": 56.2972, - "step": 120520 - }, - { - "epoch": 0.4869564514760602, - "grad_norm": 293.07012939453125, - "learning_rate": 3.08683739603414e-05, - "loss": 42.8358, - "step": 120530 - }, - { - "epoch": 0.48699685274142784, - "grad_norm": 516.2140502929688, - "learning_rate": 3.0864980775698145e-05, - "loss": 92.3896, - "step": 120540 - }, - { - "epoch": 0.4870372540067955, - "grad_norm": 425.08343505859375, - "learning_rate": 3.086158747671406e-05, - "loss": 90.2946, - "step": 120550 - }, - { - "epoch": 0.4870776552721631, - "grad_norm": 302.85626220703125, - "learning_rate": 3.085819406345532e-05, - "loss": 73.1443, - "step": 120560 - }, - { - "epoch": 0.48711805653753076, - "grad_norm": 910.6437377929688, - "learning_rate": 3.0854800535988064e-05, - "loss": 54.2145, - "step": 120570 - }, - { - "epoch": 0.4871584578028984, - "grad_norm": 1036.3970947265625, - "learning_rate": 3.085140689437846e-05, - "loss": 49.4283, - "step": 120580 - }, - { - "epoch": 0.48719885906826604, - "grad_norm": 1020.8902587890625, - "learning_rate": 3.084801313869266e-05, - "loss": 50.4306, - "step": 120590 - }, - { - "epoch": 0.48723926033363363, - "grad_norm": 683.2181396484375, - "learning_rate": 3.0844619268996845e-05, - "loss": 52.7355, - "step": 120600 - }, - { - "epoch": 0.48727966159900127, - "grad_norm": 712.2384033203125, - "learning_rate": 3.084122528535717e-05, - "loss": 76.377, - "step": 120610 - }, - { - "epoch": 0.4873200628643689, - "grad_norm": 1040.790771484375, - "learning_rate": 3.0837831187839784e-05, - "loss": 66.5839, - "step": 120620 - }, - { - "epoch": 0.48736046412973655, - "grad_norm": 826.3641967773438, - "learning_rate": 3.083443697651088e-05, - "loss": 81.0511, - "step": 120630 - }, - { - "epoch": 0.4874008653951042, - "grad_norm": 1687.8861083984375, - "learning_rate": 3.083104265143663e-05, - "loss": 73.7149, - "step": 120640 - }, - { - "epoch": 0.48744126666047183, - "grad_norm": 700.1195678710938, - "learning_rate": 3.08276482126832e-05, - "loss": 49.3427, - "step": 120650 - }, - { - "epoch": 0.4874816679258394, - "grad_norm": 661.125244140625, - "learning_rate": 3.082425366031676e-05, - "loss": 37.9126, - "step": 120660 - }, - { - "epoch": 0.48752206919120705, - "grad_norm": 773.9281005859375, - "learning_rate": 3.08208589944035e-05, - "loss": 47.0344, - "step": 120670 - }, - { - "epoch": 0.4875624704565747, - "grad_norm": 1276.460205078125, - "learning_rate": 3.08174642150096e-05, - "loss": 53.2406, - "step": 120680 - }, - { - "epoch": 0.48760287172194233, - "grad_norm": 669.7227172851562, - "learning_rate": 3.081406932220123e-05, - "loss": 39.3701, - "step": 120690 - }, - { - "epoch": 0.48764327298731, - "grad_norm": 2131.654541015625, - "learning_rate": 3.08106743160446e-05, - "loss": 64.2498, - "step": 120700 - }, - { - "epoch": 0.4876836742526776, - "grad_norm": 621.3330078125, - "learning_rate": 3.0807279196605876e-05, - "loss": 64.5872, - "step": 120710 - }, - { - "epoch": 0.48772407551804525, - "grad_norm": 565.8031616210938, - "learning_rate": 3.0803883963951255e-05, - "loss": 59.7642, - "step": 120720 - }, - { - "epoch": 0.48776447678341284, - "grad_norm": 936.9571533203125, - "learning_rate": 3.080048861814693e-05, - "loss": 34.2556, - "step": 120730 - }, - { - "epoch": 0.4878048780487805, - "grad_norm": 361.34063720703125, - "learning_rate": 3.0797093159259085e-05, - "loss": 52.7916, - "step": 120740 - }, - { - "epoch": 0.4878452793141481, - "grad_norm": 325.8556213378906, - "learning_rate": 3.079369758735393e-05, - "loss": 41.6967, - "step": 120750 - }, - { - "epoch": 0.48788568057951576, - "grad_norm": 522.2239990234375, - "learning_rate": 3.0790301902497666e-05, - "loss": 44.1217, - "step": 120760 - }, - { - "epoch": 0.4879260818448834, - "grad_norm": 1932.948486328125, - "learning_rate": 3.078690610475647e-05, - "loss": 70.4253, - "step": 120770 - }, - { - "epoch": 0.48796648311025104, - "grad_norm": 1131.1263427734375, - "learning_rate": 3.0783510194196576e-05, - "loss": 101.964, - "step": 120780 - }, - { - "epoch": 0.4880068843756186, - "grad_norm": 1616.0198974609375, - "learning_rate": 3.078011417088416e-05, - "loss": 55.159, - "step": 120790 - }, - { - "epoch": 0.48804728564098626, - "grad_norm": 942.608642578125, - "learning_rate": 3.0776718034885454e-05, - "loss": 47.1413, - "step": 120800 - }, - { - "epoch": 0.4880876869063539, - "grad_norm": 503.5368347167969, - "learning_rate": 3.0773321786266644e-05, - "loss": 36.5475, - "step": 120810 - }, - { - "epoch": 0.48812808817172154, - "grad_norm": 844.5652465820312, - "learning_rate": 3.076992542509396e-05, - "loss": 52.4938, - "step": 120820 - }, - { - "epoch": 0.4881684894370892, - "grad_norm": 856.3523559570312, - "learning_rate": 3.07665289514336e-05, - "loss": 83.7883, - "step": 120830 - }, - { - "epoch": 0.4882088907024568, - "grad_norm": 400.933349609375, - "learning_rate": 3.07631323653518e-05, - "loss": 55.8697, - "step": 120840 - }, - { - "epoch": 0.4882492919678244, - "grad_norm": 604.0474853515625, - "learning_rate": 3.075973566691477e-05, - "loss": 72.4179, - "step": 120850 - }, - { - "epoch": 0.48828969323319205, - "grad_norm": 584.9152221679688, - "learning_rate": 3.0756338856188716e-05, - "loss": 59.9413, - "step": 120860 - }, - { - "epoch": 0.4883300944985597, - "grad_norm": 734.94287109375, - "learning_rate": 3.075294193323988e-05, - "loss": 81.9922, - "step": 120870 - }, - { - "epoch": 0.4883704957639273, - "grad_norm": 775.8441162109375, - "learning_rate": 3.074954489813449e-05, - "loss": 59.0604, - "step": 120880 - }, - { - "epoch": 0.48841089702929497, - "grad_norm": 464.788330078125, - "learning_rate": 3.074614775093874e-05, - "loss": 77.2058, - "step": 120890 - }, - { - "epoch": 0.4884512982946626, - "grad_norm": 250.41958618164062, - "learning_rate": 3.074275049171889e-05, - "loss": 40.2773, - "step": 120900 - }, - { - "epoch": 0.48849169956003025, - "grad_norm": 608.3348388671875, - "learning_rate": 3.073935312054117e-05, - "loss": 34.6437, - "step": 120910 - }, - { - "epoch": 0.48853210082539783, - "grad_norm": 1844.59033203125, - "learning_rate": 3.0735955637471794e-05, - "loss": 71.1501, - "step": 120920 - }, - { - "epoch": 0.48857250209076547, - "grad_norm": 385.8973388671875, - "learning_rate": 3.073255804257702e-05, - "loss": 56.5991, - "step": 120930 - }, - { - "epoch": 0.4886129033561331, - "grad_norm": 503.91558837890625, - "learning_rate": 3.072916033592307e-05, - "loss": 62.6247, - "step": 120940 - }, - { - "epoch": 0.48865330462150075, - "grad_norm": 590.6072387695312, - "learning_rate": 3.0725762517576195e-05, - "loss": 39.7041, - "step": 120950 - }, - { - "epoch": 0.4886937058868684, - "grad_norm": 369.4613952636719, - "learning_rate": 3.072236458760262e-05, - "loss": 37.5118, - "step": 120960 - }, - { - "epoch": 0.48873410715223603, - "grad_norm": 743.9142456054688, - "learning_rate": 3.071896654606862e-05, - "loss": 48.3398, - "step": 120970 - }, - { - "epoch": 0.4887745084176036, - "grad_norm": 611.325439453125, - "learning_rate": 3.0715568393040405e-05, - "loss": 82.7277, - "step": 120980 - }, - { - "epoch": 0.48881490968297125, - "grad_norm": 1614.0037841796875, - "learning_rate": 3.071217012858425e-05, - "loss": 102.5452, - "step": 120990 - }, - { - "epoch": 0.4888553109483389, - "grad_norm": 984.4487915039062, - "learning_rate": 3.0708771752766394e-05, - "loss": 56.9584, - "step": 121000 - }, - { - "epoch": 0.48889571221370653, - "grad_norm": 1377.5753173828125, - "learning_rate": 3.07053732656531e-05, - "loss": 75.7293, - "step": 121010 - }, - { - "epoch": 0.4889361134790742, - "grad_norm": 438.0140686035156, - "learning_rate": 3.070197466731061e-05, - "loss": 43.8571, - "step": 121020 - }, - { - "epoch": 0.4889765147444418, - "grad_norm": 710.8450317382812, - "learning_rate": 3.069857595780519e-05, - "loss": 66.9329, - "step": 121030 - }, - { - "epoch": 0.48901691600980945, - "grad_norm": 762.4956665039062, - "learning_rate": 3.06951771372031e-05, - "loss": 52.6469, - "step": 121040 - }, - { - "epoch": 0.48905731727517704, - "grad_norm": 732.955322265625, - "learning_rate": 3.06917782055706e-05, - "loss": 84.5691, - "step": 121050 - }, - { - "epoch": 0.4890977185405447, - "grad_norm": 899.1510009765625, - "learning_rate": 3.0688379162973955e-05, - "loss": 78.2393, - "step": 121060 - }, - { - "epoch": 0.4891381198059123, - "grad_norm": 798.7470092773438, - "learning_rate": 3.0684980009479424e-05, - "loss": 58.7748, - "step": 121070 - }, - { - "epoch": 0.48917852107127996, - "grad_norm": 897.6027221679688, - "learning_rate": 3.068158074515328e-05, - "loss": 52.4273, - "step": 121080 - }, - { - "epoch": 0.4892189223366476, - "grad_norm": 503.83392333984375, - "learning_rate": 3.0678181370061805e-05, - "loss": 59.4657, - "step": 121090 - }, - { - "epoch": 0.48925932360201524, - "grad_norm": 1317.728759765625, - "learning_rate": 3.0674781884271254e-05, - "loss": 93.741, - "step": 121100 - }, - { - "epoch": 0.4892997248673828, - "grad_norm": 1355.2291259765625, - "learning_rate": 3.067138228784791e-05, - "loss": 84.8171, - "step": 121110 - }, - { - "epoch": 0.48934012613275046, - "grad_norm": 1263.6253662109375, - "learning_rate": 3.0667982580858044e-05, - "loss": 65.6653, - "step": 121120 - }, - { - "epoch": 0.4893805273981181, - "grad_norm": 630.547607421875, - "learning_rate": 3.066458276336794e-05, - "loss": 62.2357, - "step": 121130 - }, - { - "epoch": 0.48942092866348574, - "grad_norm": 1021.1478271484375, - "learning_rate": 3.0661182835443884e-05, - "loss": 57.1426, - "step": 121140 - }, - { - "epoch": 0.4894613299288534, - "grad_norm": 1333.4888916015625, - "learning_rate": 3.065778279715215e-05, - "loss": 49.1796, - "step": 121150 - }, - { - "epoch": 0.489501731194221, - "grad_norm": 481.13165283203125, - "learning_rate": 3.0654382648559026e-05, - "loss": 65.8724, - "step": 121160 - }, - { - "epoch": 0.4895421324595886, - "grad_norm": 359.9427795410156, - "learning_rate": 3.065098238973081e-05, - "loss": 77.4488, - "step": 121170 - }, - { - "epoch": 0.48958253372495625, - "grad_norm": 274.55328369140625, - "learning_rate": 3.064758202073377e-05, - "loss": 54.833, - "step": 121180 - }, - { - "epoch": 0.4896229349903239, - "grad_norm": 489.7810974121094, - "learning_rate": 3.064418154163422e-05, - "loss": 54.8473, - "step": 121190 - }, - { - "epoch": 0.48966333625569153, - "grad_norm": 674.3745727539062, - "learning_rate": 3.064078095249844e-05, - "loss": 51.562, - "step": 121200 - }, - { - "epoch": 0.48970373752105917, - "grad_norm": 443.5577087402344, - "learning_rate": 3.0637380253392736e-05, - "loss": 68.5722, - "step": 121210 - }, - { - "epoch": 0.4897441387864268, - "grad_norm": 824.37158203125, - "learning_rate": 3.06339794443834e-05, - "loss": 46.6327, - "step": 121220 - }, - { - "epoch": 0.48978454005179445, - "grad_norm": 275.5827331542969, - "learning_rate": 3.063057852553674e-05, - "loss": 32.1468, - "step": 121230 - }, - { - "epoch": 0.48982494131716203, - "grad_norm": 573.8633422851562, - "learning_rate": 3.062717749691904e-05, - "loss": 58.9486, - "step": 121240 - }, - { - "epoch": 0.4898653425825297, - "grad_norm": 383.3703918457031, - "learning_rate": 3.062377635859663e-05, - "loss": 39.3934, - "step": 121250 - }, - { - "epoch": 0.4899057438478973, - "grad_norm": 942.3863525390625, - "learning_rate": 3.06203751106358e-05, - "loss": 49.6613, - "step": 121260 - }, - { - "epoch": 0.48994614511326495, - "grad_norm": 0.0, - "learning_rate": 3.0616973753102856e-05, - "loss": 37.8872, - "step": 121270 - }, - { - "epoch": 0.4899865463786326, - "grad_norm": 1657.1973876953125, - "learning_rate": 3.0613572286064125e-05, - "loss": 68.007, - "step": 121280 - }, - { - "epoch": 0.49002694764400023, - "grad_norm": 647.4043579101562, - "learning_rate": 3.061017070958591e-05, - "loss": 92.0626, - "step": 121290 - }, - { - "epoch": 0.4900673489093678, - "grad_norm": 885.7959594726562, - "learning_rate": 3.0606769023734536e-05, - "loss": 30.4313, - "step": 121300 - }, - { - "epoch": 0.49010775017473546, - "grad_norm": 1316.2650146484375, - "learning_rate": 3.060336722857631e-05, - "loss": 83.0374, - "step": 121310 - }, - { - "epoch": 0.4901481514401031, - "grad_norm": 1012.3235473632812, - "learning_rate": 3.059996532417754e-05, - "loss": 41.8788, - "step": 121320 - }, - { - "epoch": 0.49018855270547074, - "grad_norm": 449.5831604003906, - "learning_rate": 3.059656331060458e-05, - "loss": 55.4948, - "step": 121330 - }, - { - "epoch": 0.4902289539708384, - "grad_norm": 723.0786743164062, - "learning_rate": 3.0593161187923736e-05, - "loss": 68.319, - "step": 121340 - }, - { - "epoch": 0.490269355236206, - "grad_norm": 569.5224609375, - "learning_rate": 3.0589758956201327e-05, - "loss": 62.8122, - "step": 121350 - }, - { - "epoch": 0.49030975650157366, - "grad_norm": 572.9276733398438, - "learning_rate": 3.058635661550369e-05, - "loss": 60.2772, - "step": 121360 - }, - { - "epoch": 0.49035015776694124, - "grad_norm": 379.9477844238281, - "learning_rate": 3.058295416589716e-05, - "loss": 69.8202, - "step": 121370 - }, - { - "epoch": 0.4903905590323089, - "grad_norm": 534.4418334960938, - "learning_rate": 3.0579551607448066e-05, - "loss": 51.7835, - "step": 121380 - }, - { - "epoch": 0.4904309602976765, - "grad_norm": 803.1372680664062, - "learning_rate": 3.057614894022274e-05, - "loss": 43.7992, - "step": 121390 - }, - { - "epoch": 0.49047136156304416, - "grad_norm": 2386.29736328125, - "learning_rate": 3.0572746164287514e-05, - "loss": 54.0955, - "step": 121400 - }, - { - "epoch": 0.4905117628284118, - "grad_norm": 997.156005859375, - "learning_rate": 3.0569343279708734e-05, - "loss": 65.676, - "step": 121410 - }, - { - "epoch": 0.49055216409377944, - "grad_norm": 525.251953125, - "learning_rate": 3.056594028655274e-05, - "loss": 52.9306, - "step": 121420 - }, - { - "epoch": 0.490592565359147, - "grad_norm": 1752.92236328125, - "learning_rate": 3.056253718488588e-05, - "loss": 56.5857, - "step": 121430 - }, - { - "epoch": 0.49063296662451467, - "grad_norm": 704.6538696289062, - "learning_rate": 3.055913397477448e-05, - "loss": 68.621, - "step": 121440 - }, - { - "epoch": 0.4906733678898823, - "grad_norm": 615.273681640625, - "learning_rate": 3.0555730656284914e-05, - "loss": 54.718, - "step": 121450 - }, - { - "epoch": 0.49071376915524995, - "grad_norm": 676.6206665039062, - "learning_rate": 3.0552327229483515e-05, - "loss": 60.8578, - "step": 121460 - }, - { - "epoch": 0.4907541704206176, - "grad_norm": 512.4281005859375, - "learning_rate": 3.054892369443663e-05, - "loss": 48.8229, - "step": 121470 - }, - { - "epoch": 0.4907945716859852, - "grad_norm": 885.903076171875, - "learning_rate": 3.054552005121064e-05, - "loss": 83.9689, - "step": 121480 - }, - { - "epoch": 0.4908349729513528, - "grad_norm": 405.67919921875, - "learning_rate": 3.054211629987187e-05, - "loss": 50.5087, - "step": 121490 - }, - { - "epoch": 0.49087537421672045, - "grad_norm": 430.7709045410156, - "learning_rate": 3.053871244048669e-05, - "loss": 54.2436, - "step": 121500 - }, - { - "epoch": 0.4909157754820881, - "grad_norm": 394.2481689453125, - "learning_rate": 3.0535308473121455e-05, - "loss": 37.6645, - "step": 121510 - }, - { - "epoch": 0.49095617674745573, - "grad_norm": 2119.940673828125, - "learning_rate": 3.053190439784253e-05, - "loss": 85.2859, - "step": 121520 - }, - { - "epoch": 0.49099657801282337, - "grad_norm": 509.3744201660156, - "learning_rate": 3.052850021471629e-05, - "loss": 74.4669, - "step": 121530 - }, - { - "epoch": 0.491036979278191, - "grad_norm": 0.0, - "learning_rate": 3.052509592380909e-05, - "loss": 51.207, - "step": 121540 - }, - { - "epoch": 0.49107738054355865, - "grad_norm": 615.7112426757812, - "learning_rate": 3.052169152518729e-05, - "loss": 37.4327, - "step": 121550 - }, - { - "epoch": 0.49111778180892623, - "grad_norm": 624.2453002929688, - "learning_rate": 3.051828701891729e-05, - "loss": 52.5991, - "step": 121560 - }, - { - "epoch": 0.4911581830742939, - "grad_norm": 558.4168090820312, - "learning_rate": 3.0514882405065432e-05, - "loss": 63.3106, - "step": 121570 - }, - { - "epoch": 0.4911985843396615, - "grad_norm": 509.6629333496094, - "learning_rate": 3.0511477683698108e-05, - "loss": 63.4842, - "step": 121580 - }, - { - "epoch": 0.49123898560502915, - "grad_norm": 806.631103515625, - "learning_rate": 3.050807285488168e-05, - "loss": 62.4659, - "step": 121590 - }, - { - "epoch": 0.4912793868703968, - "grad_norm": 814.9642944335938, - "learning_rate": 3.050466791868254e-05, - "loss": 66.1909, - "step": 121600 - }, - { - "epoch": 0.49131978813576443, - "grad_norm": 712.5091552734375, - "learning_rate": 3.0501262875167063e-05, - "loss": 74.5207, - "step": 121610 - }, - { - "epoch": 0.491360189401132, - "grad_norm": 615.8709106445312, - "learning_rate": 3.0497857724401642e-05, - "loss": 79.785, - "step": 121620 - }, - { - "epoch": 0.49140059066649966, - "grad_norm": 729.620361328125, - "learning_rate": 3.0494452466452644e-05, - "loss": 75.2466, - "step": 121630 - }, - { - "epoch": 0.4914409919318673, - "grad_norm": 232.82257080078125, - "learning_rate": 3.049104710138647e-05, - "loss": 40.4371, - "step": 121640 - }, - { - "epoch": 0.49148139319723494, - "grad_norm": 528.4829711914062, - "learning_rate": 3.0487641629269516e-05, - "loss": 49.6708, - "step": 121650 - }, - { - "epoch": 0.4915217944626026, - "grad_norm": 2002.76416015625, - "learning_rate": 3.0484236050168153e-05, - "loss": 52.0956, - "step": 121660 - }, - { - "epoch": 0.4915621957279702, - "grad_norm": 575.5763549804688, - "learning_rate": 3.048083036414878e-05, - "loss": 39.8568, - "step": 121670 - }, - { - "epoch": 0.4916025969933378, - "grad_norm": 0.0, - "learning_rate": 3.0477424571277807e-05, - "loss": 39.302, - "step": 121680 - }, - { - "epoch": 0.49164299825870544, - "grad_norm": 412.2666015625, - "learning_rate": 3.047401867162162e-05, - "loss": 46.3011, - "step": 121690 - }, - { - "epoch": 0.4916833995240731, - "grad_norm": 749.7333374023438, - "learning_rate": 3.0470612665246618e-05, - "loss": 63.5483, - "step": 121700 - }, - { - "epoch": 0.4917238007894407, - "grad_norm": 686.0115966796875, - "learning_rate": 3.0467206552219208e-05, - "loss": 37.7942, - "step": 121710 - }, - { - "epoch": 0.49176420205480836, - "grad_norm": 1040.0323486328125, - "learning_rate": 3.0463800332605784e-05, - "loss": 69.1257, - "step": 121720 - }, - { - "epoch": 0.491804603320176, - "grad_norm": 547.2194213867188, - "learning_rate": 3.046039400647277e-05, - "loss": 28.7517, - "step": 121730 - }, - { - "epoch": 0.49184500458554364, - "grad_norm": 1607.9515380859375, - "learning_rate": 3.0456987573886564e-05, - "loss": 67.1605, - "step": 121740 - }, - { - "epoch": 0.49188540585091123, - "grad_norm": 1343.8623046875, - "learning_rate": 3.045358103491357e-05, - "loss": 61.9395, - "step": 121750 - }, - { - "epoch": 0.49192580711627887, - "grad_norm": 360.9034729003906, - "learning_rate": 3.0450174389620205e-05, - "loss": 60.9107, - "step": 121760 - }, - { - "epoch": 0.4919662083816465, - "grad_norm": 658.2755126953125, - "learning_rate": 3.044676763807288e-05, - "loss": 44.2099, - "step": 121770 - }, - { - "epoch": 0.49200660964701415, - "grad_norm": 463.0184326171875, - "learning_rate": 3.044336078033803e-05, - "loss": 50.352, - "step": 121780 - }, - { - "epoch": 0.4920470109123818, - "grad_norm": 1313.499755859375, - "learning_rate": 3.043995381648205e-05, - "loss": 65.2679, - "step": 121790 - }, - { - "epoch": 0.49208741217774943, - "grad_norm": 662.1671142578125, - "learning_rate": 3.0436546746571372e-05, - "loss": 47.1219, - "step": 121800 - }, - { - "epoch": 0.492127813443117, - "grad_norm": 3354.996826171875, - "learning_rate": 3.0433139570672407e-05, - "loss": 87.668, - "step": 121810 - }, - { - "epoch": 0.49216821470848465, - "grad_norm": 931.5249633789062, - "learning_rate": 3.0429732288851603e-05, - "loss": 44.4428, - "step": 121820 - }, - { - "epoch": 0.4922086159738523, - "grad_norm": 798.1958618164062, - "learning_rate": 3.0426324901175374e-05, - "loss": 51.0042, - "step": 121830 - }, - { - "epoch": 0.49224901723921993, - "grad_norm": 674.7509765625, - "learning_rate": 3.0422917407710137e-05, - "loss": 33.4516, - "step": 121840 - }, - { - "epoch": 0.4922894185045876, - "grad_norm": 734.49169921875, - "learning_rate": 3.0419509808522334e-05, - "loss": 77.9193, - "step": 121850 - }, - { - "epoch": 0.4923298197699552, - "grad_norm": 561.5974731445312, - "learning_rate": 3.0416102103678402e-05, - "loss": 52.3013, - "step": 121860 - }, - { - "epoch": 0.49237022103532285, - "grad_norm": 292.3045959472656, - "learning_rate": 3.041269429324477e-05, - "loss": 31.1926, - "step": 121870 - }, - { - "epoch": 0.49241062230069044, - "grad_norm": 613.70361328125, - "learning_rate": 3.040928637728787e-05, - "loss": 56.5007, - "step": 121880 - }, - { - "epoch": 0.4924510235660581, - "grad_norm": 1232.8988037109375, - "learning_rate": 3.040587835587415e-05, - "loss": 73.1134, - "step": 121890 - }, - { - "epoch": 0.4924914248314257, - "grad_norm": 831.9037475585938, - "learning_rate": 3.0402470229070056e-05, - "loss": 46.9302, - "step": 121900 - }, - { - "epoch": 0.49253182609679336, - "grad_norm": 409.0111389160156, - "learning_rate": 3.039906199694202e-05, - "loss": 39.5951, - "step": 121910 - }, - { - "epoch": 0.492572227362161, - "grad_norm": 646.8606567382812, - "learning_rate": 3.0395653659556488e-05, - "loss": 47.5818, - "step": 121920 - }, - { - "epoch": 0.49261262862752864, - "grad_norm": 363.55609130859375, - "learning_rate": 3.039224521697991e-05, - "loss": 72.8517, - "step": 121930 - }, - { - "epoch": 0.4926530298928962, - "grad_norm": 498.0539855957031, - "learning_rate": 3.0388836669278738e-05, - "loss": 53.5023, - "step": 121940 - }, - { - "epoch": 0.49269343115826386, - "grad_norm": 943.4515991210938, - "learning_rate": 3.038542801651941e-05, - "loss": 59.0278, - "step": 121950 - }, - { - "epoch": 0.4927338324236315, - "grad_norm": 3113.823486328125, - "learning_rate": 3.0382019258768403e-05, - "loss": 72.7263, - "step": 121960 - }, - { - "epoch": 0.49277423368899914, - "grad_norm": 509.8355407714844, - "learning_rate": 3.0378610396092154e-05, - "loss": 51.3322, - "step": 121970 - }, - { - "epoch": 0.4928146349543668, - "grad_norm": 791.736572265625, - "learning_rate": 3.0375201428557132e-05, - "loss": 48.9846, - "step": 121980 - }, - { - "epoch": 0.4928550362197344, - "grad_norm": 532.8504638671875, - "learning_rate": 3.0371792356229783e-05, - "loss": 35.4836, - "step": 121990 - }, - { - "epoch": 0.492895437485102, - "grad_norm": 419.5154113769531, - "learning_rate": 3.0368383179176585e-05, - "loss": 69.5128, - "step": 122000 - }, - { - "epoch": 0.49293583875046965, - "grad_norm": 3489.129150390625, - "learning_rate": 3.036497389746399e-05, - "loss": 68.8055, - "step": 122010 - }, - { - "epoch": 0.4929762400158373, - "grad_norm": 1044.6541748046875, - "learning_rate": 3.0361564511158457e-05, - "loss": 62.8193, - "step": 122020 - }, - { - "epoch": 0.4930166412812049, - "grad_norm": 1858.55615234375, - "learning_rate": 3.0358155020326477e-05, - "loss": 77.7083, - "step": 122030 - }, - { - "epoch": 0.49305704254657257, - "grad_norm": 682.7332153320312, - "learning_rate": 3.0354745425034498e-05, - "loss": 64.1814, - "step": 122040 - }, - { - "epoch": 0.4930974438119402, - "grad_norm": 653.3331298828125, - "learning_rate": 3.0351335725349e-05, - "loss": 49.824, - "step": 122050 - }, - { - "epoch": 0.49313784507730785, - "grad_norm": 859.71337890625, - "learning_rate": 3.0347925921336463e-05, - "loss": 54.182, - "step": 122060 - }, - { - "epoch": 0.49317824634267543, - "grad_norm": 1686.3477783203125, - "learning_rate": 3.0344516013063357e-05, - "loss": 43.8826, - "step": 122070 - }, - { - "epoch": 0.49321864760804307, - "grad_norm": 1858.2529296875, - "learning_rate": 3.034110600059616e-05, - "loss": 42.0115, - "step": 122080 - }, - { - "epoch": 0.4932590488734107, - "grad_norm": 857.298828125, - "learning_rate": 3.0337695884001343e-05, - "loss": 44.892, - "step": 122090 - }, - { - "epoch": 0.49329945013877835, - "grad_norm": 3229.34765625, - "learning_rate": 3.0334285663345404e-05, - "loss": 49.8066, - "step": 122100 - }, - { - "epoch": 0.493339851404146, - "grad_norm": 782.3743896484375, - "learning_rate": 3.033087533869482e-05, - "loss": 46.6393, - "step": 122110 - }, - { - "epoch": 0.49338025266951363, - "grad_norm": 1177.9285888671875, - "learning_rate": 3.032746491011607e-05, - "loss": 54.9906, - "step": 122120 - }, - { - "epoch": 0.4934206539348812, - "grad_norm": 997.4578247070312, - "learning_rate": 3.0324054377675654e-05, - "loss": 47.8525, - "step": 122130 - }, - { - "epoch": 0.49346105520024885, - "grad_norm": 1065.852294921875, - "learning_rate": 3.032064374144005e-05, - "loss": 58.0819, - "step": 122140 - }, - { - "epoch": 0.4935014564656165, - "grad_norm": 2079.648681640625, - "learning_rate": 3.031723300147577e-05, - "loss": 95.1813, - "step": 122150 - }, - { - "epoch": 0.49354185773098413, - "grad_norm": 759.1595458984375, - "learning_rate": 3.0313822157849287e-05, - "loss": 54.4963, - "step": 122160 - }, - { - "epoch": 0.4935822589963518, - "grad_norm": 2073.466064453125, - "learning_rate": 3.031041121062711e-05, - "loss": 70.8082, - "step": 122170 - }, - { - "epoch": 0.4936226602617194, - "grad_norm": 1743.0040283203125, - "learning_rate": 3.030700015987573e-05, - "loss": 57.2004, - "step": 122180 - }, - { - "epoch": 0.49366306152708705, - "grad_norm": 714.07275390625, - "learning_rate": 3.030358900566165e-05, - "loss": 60.7911, - "step": 122190 - }, - { - "epoch": 0.49370346279245464, - "grad_norm": 1210.780029296875, - "learning_rate": 3.0300177748051373e-05, - "loss": 57.1338, - "step": 122200 - }, - { - "epoch": 0.4937438640578223, - "grad_norm": 815.665771484375, - "learning_rate": 3.02967663871114e-05, - "loss": 74.5411, - "step": 122210 - }, - { - "epoch": 0.4937842653231899, - "grad_norm": 1104.837646484375, - "learning_rate": 3.0293354922908235e-05, - "loss": 69.4025, - "step": 122220 - }, - { - "epoch": 0.49382466658855756, - "grad_norm": 693.127197265625, - "learning_rate": 3.0289943355508392e-05, - "loss": 63.0513, - "step": 122230 - }, - { - "epoch": 0.4938650678539252, - "grad_norm": 683.1317749023438, - "learning_rate": 3.028653168497838e-05, - "loss": 46.7121, - "step": 122240 - }, - { - "epoch": 0.49390546911929284, - "grad_norm": 317.39532470703125, - "learning_rate": 3.028311991138472e-05, - "loss": 63.5555, - "step": 122250 - }, - { - "epoch": 0.4939458703846604, - "grad_norm": 466.8677062988281, - "learning_rate": 3.0279708034793907e-05, - "loss": 45.8454, - "step": 122260 - }, - { - "epoch": 0.49398627165002806, - "grad_norm": 1304.4661865234375, - "learning_rate": 3.027629605527248e-05, - "loss": 67.0937, - "step": 122270 - }, - { - "epoch": 0.4940266729153957, - "grad_norm": 341.1806335449219, - "learning_rate": 3.0272883972886935e-05, - "loss": 32.4989, - "step": 122280 - }, - { - "epoch": 0.49406707418076334, - "grad_norm": 668.6641235351562, - "learning_rate": 3.02694717877038e-05, - "loss": 58.9114, - "step": 122290 - }, - { - "epoch": 0.494107475446131, - "grad_norm": 1312.1033935546875, - "learning_rate": 3.02660594997896e-05, - "loss": 52.9595, - "step": 122300 - }, - { - "epoch": 0.4941478767114986, - "grad_norm": 206.2220916748047, - "learning_rate": 3.0262647109210867e-05, - "loss": 42.2671, - "step": 122310 - }, - { - "epoch": 0.4941882779768662, - "grad_norm": 533.5427856445312, - "learning_rate": 3.0259234616034116e-05, - "loss": 54.6035, - "step": 122320 - }, - { - "epoch": 0.49422867924223385, - "grad_norm": 389.654296875, - "learning_rate": 3.0255822020325873e-05, - "loss": 46.6373, - "step": 122330 - }, - { - "epoch": 0.4942690805076015, - "grad_norm": 509.89276123046875, - "learning_rate": 3.025240932215268e-05, - "loss": 65.848, - "step": 122340 - }, - { - "epoch": 0.49430948177296913, - "grad_norm": 738.1375732421875, - "learning_rate": 3.024899652158107e-05, - "loss": 46.7456, - "step": 122350 - }, - { - "epoch": 0.49434988303833677, - "grad_norm": 272.38751220703125, - "learning_rate": 3.0245583618677558e-05, - "loss": 58.7914, - "step": 122360 - }, - { - "epoch": 0.4943902843037044, - "grad_norm": 599.438232421875, - "learning_rate": 3.0242170613508692e-05, - "loss": 57.4107, - "step": 122370 - }, - { - "epoch": 0.49443068556907205, - "grad_norm": 323.33062744140625, - "learning_rate": 3.0238757506141012e-05, - "loss": 48.1619, - "step": 122380 - }, - { - "epoch": 0.49447108683443963, - "grad_norm": 1464.4964599609375, - "learning_rate": 3.0235344296641067e-05, - "loss": 58.9648, - "step": 122390 - }, - { - "epoch": 0.4945114880998073, - "grad_norm": 2316.94873046875, - "learning_rate": 3.023193098507538e-05, - "loss": 80.6663, - "step": 122400 - }, - { - "epoch": 0.4945518893651749, - "grad_norm": 777.03173828125, - "learning_rate": 3.0228517571510507e-05, - "loss": 74.6564, - "step": 122410 - }, - { - "epoch": 0.49459229063054255, - "grad_norm": 832.2376098632812, - "learning_rate": 3.0225104056013e-05, - "loss": 50.8063, - "step": 122420 - }, - { - "epoch": 0.4946326918959102, - "grad_norm": 1374.7584228515625, - "learning_rate": 3.0221690438649386e-05, - "loss": 84.333, - "step": 122430 - }, - { - "epoch": 0.49467309316127783, - "grad_norm": 586.2657470703125, - "learning_rate": 3.0218276719486244e-05, - "loss": 54.8292, - "step": 122440 - }, - { - "epoch": 0.4947134944266454, - "grad_norm": 362.1689147949219, - "learning_rate": 3.0214862898590095e-05, - "loss": 45.3425, - "step": 122450 - }, - { - "epoch": 0.49475389569201306, - "grad_norm": 740.5028076171875, - "learning_rate": 3.021144897602752e-05, - "loss": 65.865, - "step": 122460 - }, - { - "epoch": 0.4947942969573807, - "grad_norm": 776.755615234375, - "learning_rate": 3.020803495186506e-05, - "loss": 45.3244, - "step": 122470 - }, - { - "epoch": 0.49483469822274834, - "grad_norm": 1503.520263671875, - "learning_rate": 3.020462082616928e-05, - "loss": 44.012, - "step": 122480 - }, - { - "epoch": 0.494875099488116, - "grad_norm": 772.3239135742188, - "learning_rate": 3.0201206599006733e-05, - "loss": 50.5993, - "step": 122490 - }, - { - "epoch": 0.4949155007534836, - "grad_norm": 343.9445495605469, - "learning_rate": 3.0197792270443982e-05, - "loss": 43.6323, - "step": 122500 - }, - { - "epoch": 0.49495590201885126, - "grad_norm": 765.3672485351562, - "learning_rate": 3.0194377840547606e-05, - "loss": 48.1465, - "step": 122510 - }, - { - "epoch": 0.49499630328421884, - "grad_norm": 936.2794799804688, - "learning_rate": 3.0190963309384156e-05, - "loss": 58.2882, - "step": 122520 - }, - { - "epoch": 0.4950367045495865, - "grad_norm": 770.3196411132812, - "learning_rate": 3.01875486770202e-05, - "loss": 43.9133, - "step": 122530 - }, - { - "epoch": 0.4950771058149541, - "grad_norm": 411.43255615234375, - "learning_rate": 3.0184133943522314e-05, - "loss": 50.147, - "step": 122540 - }, - { - "epoch": 0.49511750708032176, - "grad_norm": 589.0484619140625, - "learning_rate": 3.0180719108957063e-05, - "loss": 48.991, - "step": 122550 - }, - { - "epoch": 0.4951579083456894, - "grad_norm": 317.48046875, - "learning_rate": 3.0177304173391037e-05, - "loss": 47.1381, - "step": 122560 - }, - { - "epoch": 0.49519830961105704, - "grad_norm": 2199.10888671875, - "learning_rate": 3.0173889136890786e-05, - "loss": 60.0664, - "step": 122570 - }, - { - "epoch": 0.4952387108764246, - "grad_norm": 2371.26025390625, - "learning_rate": 3.0170473999522915e-05, - "loss": 53.2513, - "step": 122580 - }, - { - "epoch": 0.49527911214179227, - "grad_norm": 2204.5810546875, - "learning_rate": 3.016705876135399e-05, - "loss": 75.0456, - "step": 122590 - }, - { - "epoch": 0.4953195134071599, - "grad_norm": 1063.3651123046875, - "learning_rate": 3.016364342245059e-05, - "loss": 85.0364, - "step": 122600 - }, - { - "epoch": 0.49535991467252755, - "grad_norm": 1718.4310302734375, - "learning_rate": 3.016022798287931e-05, - "loss": 66.0194, - "step": 122610 - }, - { - "epoch": 0.4954003159378952, - "grad_norm": 953.0413208007812, - "learning_rate": 3.0156812442706715e-05, - "loss": 54.6552, - "step": 122620 - }, - { - "epoch": 0.4954407172032628, - "grad_norm": 702.347900390625, - "learning_rate": 3.015339680199941e-05, - "loss": 39.405, - "step": 122630 - }, - { - "epoch": 0.4954811184686304, - "grad_norm": 477.78302001953125, - "learning_rate": 3.0149981060823995e-05, - "loss": 39.2307, - "step": 122640 - }, - { - "epoch": 0.49552151973399805, - "grad_norm": 893.2791137695312, - "learning_rate": 3.0146565219247036e-05, - "loss": 70.0523, - "step": 122650 - }, - { - "epoch": 0.4955619209993657, - "grad_norm": 493.2530517578125, - "learning_rate": 3.0143149277335138e-05, - "loss": 59.8175, - "step": 122660 - }, - { - "epoch": 0.49560232226473333, - "grad_norm": 2252.927734375, - "learning_rate": 3.01397332351549e-05, - "loss": 79.7985, - "step": 122670 - }, - { - "epoch": 0.49564272353010097, - "grad_norm": 639.860595703125, - "learning_rate": 3.013631709277292e-05, - "loss": 80.3618, - "step": 122680 - }, - { - "epoch": 0.4956831247954686, - "grad_norm": 542.6490478515625, - "learning_rate": 3.013290085025579e-05, - "loss": 64.4751, - "step": 122690 - }, - { - "epoch": 0.49572352606083625, - "grad_norm": 933.4337158203125, - "learning_rate": 3.0129484507670115e-05, - "loss": 57.9422, - "step": 122700 - }, - { - "epoch": 0.49576392732620383, - "grad_norm": 1414.308837890625, - "learning_rate": 3.0126068065082504e-05, - "loss": 50.3865, - "step": 122710 - }, - { - "epoch": 0.4958043285915715, - "grad_norm": 900.0178833007812, - "learning_rate": 3.0122651522559553e-05, - "loss": 54.273, - "step": 122720 - }, - { - "epoch": 0.4958447298569391, - "grad_norm": 1027.331787109375, - "learning_rate": 3.0119234880167867e-05, - "loss": 46.3332, - "step": 122730 - }, - { - "epoch": 0.49588513112230675, - "grad_norm": 527.3615112304688, - "learning_rate": 3.0115818137974067e-05, - "loss": 98.7442, - "step": 122740 - }, - { - "epoch": 0.4959255323876744, - "grad_norm": 546.8230590820312, - "learning_rate": 3.0112401296044757e-05, - "loss": 68.4313, - "step": 122750 - }, - { - "epoch": 0.49596593365304203, - "grad_norm": 835.651611328125, - "learning_rate": 3.0108984354446556e-05, - "loss": 45.3258, - "step": 122760 - }, - { - "epoch": 0.4960063349184096, - "grad_norm": 459.29144287109375, - "learning_rate": 3.0105567313246074e-05, - "loss": 70.6443, - "step": 122770 - }, - { - "epoch": 0.49604673618377726, - "grad_norm": 1120.5982666015625, - "learning_rate": 3.010215017250993e-05, - "loss": 73.4087, - "step": 122780 - }, - { - "epoch": 0.4960871374491449, - "grad_norm": 815.575927734375, - "learning_rate": 3.0098732932304734e-05, - "loss": 51.7506, - "step": 122790 - }, - { - "epoch": 0.49612753871451254, - "grad_norm": 529.2800903320312, - "learning_rate": 3.0095315592697126e-05, - "loss": 43.3278, - "step": 122800 - }, - { - "epoch": 0.4961679399798802, - "grad_norm": 352.13128662109375, - "learning_rate": 3.0091898153753705e-05, - "loss": 52.9101, - "step": 122810 - }, - { - "epoch": 0.4962083412452478, - "grad_norm": 2164.66064453125, - "learning_rate": 3.0088480615541113e-05, - "loss": 69.647, - "step": 122820 - }, - { - "epoch": 0.49624874251061546, - "grad_norm": 468.87158203125, - "learning_rate": 3.0085062978125967e-05, - "loss": 65.0103, - "step": 122830 - }, - { - "epoch": 0.49628914377598304, - "grad_norm": 771.53857421875, - "learning_rate": 3.008164524157491e-05, - "loss": 47.0131, - "step": 122840 - }, - { - "epoch": 0.4963295450413507, - "grad_norm": 630.9580078125, - "learning_rate": 3.0078227405954557e-05, - "loss": 36.1967, - "step": 122850 - }, - { - "epoch": 0.4963699463067183, - "grad_norm": 632.0338745117188, - "learning_rate": 3.007480947133155e-05, - "loss": 78.9644, - "step": 122860 - }, - { - "epoch": 0.49641034757208596, - "grad_norm": 748.582763671875, - "learning_rate": 3.0071391437772516e-05, - "loss": 52.5742, - "step": 122870 - }, - { - "epoch": 0.4964507488374536, - "grad_norm": 402.97882080078125, - "learning_rate": 3.00679733053441e-05, - "loss": 49.2446, - "step": 122880 - }, - { - "epoch": 0.49649115010282124, - "grad_norm": 814.5822143554688, - "learning_rate": 3.0064555074112927e-05, - "loss": 49.1269, - "step": 122890 - }, - { - "epoch": 0.49653155136818883, - "grad_norm": 484.45751953125, - "learning_rate": 3.0061136744145652e-05, - "loss": 46.5377, - "step": 122900 - }, - { - "epoch": 0.49657195263355647, - "grad_norm": 1381.5684814453125, - "learning_rate": 3.0057718315508905e-05, - "loss": 56.9945, - "step": 122910 - }, - { - "epoch": 0.4966123538989241, - "grad_norm": 1052.2681884765625, - "learning_rate": 3.005429978826934e-05, - "loss": 54.9941, - "step": 122920 - }, - { - "epoch": 0.49665275516429175, - "grad_norm": 1034.386474609375, - "learning_rate": 3.0050881162493593e-05, - "loss": 46.4848, - "step": 122930 - }, - { - "epoch": 0.4966931564296594, - "grad_norm": 919.5922241210938, - "learning_rate": 3.004746243824833e-05, - "loss": 56.9942, - "step": 122940 - }, - { - "epoch": 0.49673355769502703, - "grad_norm": 787.20947265625, - "learning_rate": 3.0044043615600175e-05, - "loss": 65.2977, - "step": 122950 - }, - { - "epoch": 0.4967739589603946, - "grad_norm": 0.0, - "learning_rate": 3.0040624694615803e-05, - "loss": 41.2134, - "step": 122960 - }, - { - "epoch": 0.49681436022576225, - "grad_norm": 683.5494995117188, - "learning_rate": 3.003720567536185e-05, - "loss": 72.0073, - "step": 122970 - }, - { - "epoch": 0.4968547614911299, - "grad_norm": 575.966796875, - "learning_rate": 3.003378655790498e-05, - "loss": 65.4787, - "step": 122980 - }, - { - "epoch": 0.49689516275649753, - "grad_norm": 1353.0836181640625, - "learning_rate": 3.0030367342311848e-05, - "loss": 67.0195, - "step": 122990 - }, - { - "epoch": 0.4969355640218652, - "grad_norm": 590.2861328125, - "learning_rate": 3.002694802864912e-05, - "loss": 53.1483, - "step": 123000 - }, - { - "epoch": 0.4969759652872328, - "grad_norm": 1127.41259765625, - "learning_rate": 3.002352861698345e-05, - "loss": 88.6338, - "step": 123010 - }, - { - "epoch": 0.49701636655260045, - "grad_norm": 902.4027099609375, - "learning_rate": 3.00201091073815e-05, - "loss": 39.8632, - "step": 123020 - }, - { - "epoch": 0.49705676781796804, - "grad_norm": 760.8033447265625, - "learning_rate": 3.0016689499909945e-05, - "loss": 37.9242, - "step": 123030 - }, - { - "epoch": 0.4970971690833357, - "grad_norm": 612.1544189453125, - "learning_rate": 3.0013269794635446e-05, - "loss": 56.6241, - "step": 123040 - }, - { - "epoch": 0.4971375703487033, - "grad_norm": 777.5595092773438, - "learning_rate": 3.0009849991624662e-05, - "loss": 82.5385, - "step": 123050 - }, - { - "epoch": 0.49717797161407096, - "grad_norm": 1702.484619140625, - "learning_rate": 3.0006430090944277e-05, - "loss": 59.1968, - "step": 123060 - }, - { - "epoch": 0.4972183728794386, - "grad_norm": 537.38134765625, - "learning_rate": 3.000301009266096e-05, - "loss": 46.0222, - "step": 123070 - }, - { - "epoch": 0.49725877414480624, - "grad_norm": 305.5016784667969, - "learning_rate": 2.9999589996841386e-05, - "loss": 57.2696, - "step": 123080 - }, - { - "epoch": 0.4972991754101738, - "grad_norm": 844.5637817382812, - "learning_rate": 2.9996169803552233e-05, - "loss": 56.0423, - "step": 123090 - }, - { - "epoch": 0.49733957667554146, - "grad_norm": 883.1146850585938, - "learning_rate": 2.9992749512860173e-05, - "loss": 32.5165, - "step": 123100 - }, - { - "epoch": 0.4973799779409091, - "grad_norm": 1908.025634765625, - "learning_rate": 2.99893291248319e-05, - "loss": 110.2845, - "step": 123110 - }, - { - "epoch": 0.49742037920627674, - "grad_norm": 1058.1068115234375, - "learning_rate": 2.9985908639534075e-05, - "loss": 67.6201, - "step": 123120 - }, - { - "epoch": 0.4974607804716444, - "grad_norm": 1846.614013671875, - "learning_rate": 2.998248805703341e-05, - "loss": 49.1556, - "step": 123130 - }, - { - "epoch": 0.497501181737012, - "grad_norm": 1011.847412109375, - "learning_rate": 2.9979067377396565e-05, - "loss": 25.5965, - "step": 123140 - }, - { - "epoch": 0.49754158300237966, - "grad_norm": 627.6660766601562, - "learning_rate": 2.9975646600690234e-05, - "loss": 34.3208, - "step": 123150 - }, - { - "epoch": 0.49758198426774725, - "grad_norm": 770.2060546875, - "learning_rate": 2.9972225726981113e-05, - "loss": 52.1218, - "step": 123160 - }, - { - "epoch": 0.4976223855331149, - "grad_norm": 398.234130859375, - "learning_rate": 2.99688047563359e-05, - "loss": 60.0683, - "step": 123170 - }, - { - "epoch": 0.4976627867984825, - "grad_norm": 1440.9393310546875, - "learning_rate": 2.996538368882127e-05, - "loss": 94.9321, - "step": 123180 - }, - { - "epoch": 0.49770318806385017, - "grad_norm": 580.39892578125, - "learning_rate": 2.9961962524503927e-05, - "loss": 46.6695, - "step": 123190 - }, - { - "epoch": 0.4977435893292178, - "grad_norm": 402.80419921875, - "learning_rate": 2.9958541263450584e-05, - "loss": 58.4956, - "step": 123200 - }, - { - "epoch": 0.49778399059458545, - "grad_norm": 709.5952758789062, - "learning_rate": 2.9955119905727925e-05, - "loss": 54.5307, - "step": 123210 - }, - { - "epoch": 0.49782439185995303, - "grad_norm": 418.1485290527344, - "learning_rate": 2.995169845140264e-05, - "loss": 33.9692, - "step": 123220 - }, - { - "epoch": 0.49786479312532067, - "grad_norm": 555.658203125, - "learning_rate": 2.994827690054145e-05, - "loss": 57.7686, - "step": 123230 - }, - { - "epoch": 0.4979051943906883, - "grad_norm": 1826.1614990234375, - "learning_rate": 2.9944855253211052e-05, - "loss": 59.2031, - "step": 123240 - }, - { - "epoch": 0.49794559565605595, - "grad_norm": 728.795166015625, - "learning_rate": 2.9941433509478156e-05, - "loss": 64.7671, - "step": 123250 - }, - { - "epoch": 0.4979859969214236, - "grad_norm": 562.8650512695312, - "learning_rate": 2.993801166940947e-05, - "loss": 30.4642, - "step": 123260 - }, - { - "epoch": 0.49802639818679123, - "grad_norm": 718.48681640625, - "learning_rate": 2.9934589733071704e-05, - "loss": 43.2636, - "step": 123270 - }, - { - "epoch": 0.4980667994521588, - "grad_norm": 775.666748046875, - "learning_rate": 2.9931167700531578e-05, - "loss": 64.5946, - "step": 123280 - }, - { - "epoch": 0.49810720071752645, - "grad_norm": 941.3685302734375, - "learning_rate": 2.9927745571855786e-05, - "loss": 68.1941, - "step": 123290 - }, - { - "epoch": 0.4981476019828941, - "grad_norm": 806.8750610351562, - "learning_rate": 2.9924323347111073e-05, - "loss": 75.4404, - "step": 123300 - }, - { - "epoch": 0.49818800324826173, - "grad_norm": 1879.86279296875, - "learning_rate": 2.992090102636413e-05, - "loss": 82.7253, - "step": 123310 - }, - { - "epoch": 0.4982284045136294, - "grad_norm": 500.6525573730469, - "learning_rate": 2.991747860968168e-05, - "loss": 59.617, - "step": 123320 - }, - { - "epoch": 0.498268805778997, - "grad_norm": 576.9788818359375, - "learning_rate": 2.9914056097130473e-05, - "loss": 37.7997, - "step": 123330 - }, - { - "epoch": 0.49830920704436465, - "grad_norm": 1132.8011474609375, - "learning_rate": 2.9910633488777196e-05, - "loss": 60.6151, - "step": 123340 - }, - { - "epoch": 0.49834960830973224, - "grad_norm": 1793.87109375, - "learning_rate": 2.99072107846886e-05, - "loss": 49.606, - "step": 123350 - }, - { - "epoch": 0.4983900095750999, - "grad_norm": 226.36434936523438, - "learning_rate": 2.9903787984931396e-05, - "loss": 29.6053, - "step": 123360 - }, - { - "epoch": 0.4984304108404675, - "grad_norm": 257.8052673339844, - "learning_rate": 2.9900365089572328e-05, - "loss": 42.242, - "step": 123370 - }, - { - "epoch": 0.49847081210583516, - "grad_norm": 719.3456420898438, - "learning_rate": 2.9896942098678122e-05, - "loss": 46.6997, - "step": 123380 - }, - { - "epoch": 0.4985112133712028, - "grad_norm": 641.3980102539062, - "learning_rate": 2.9893519012315503e-05, - "loss": 67.5891, - "step": 123390 - }, - { - "epoch": 0.49855161463657044, - "grad_norm": 685.2548828125, - "learning_rate": 2.9890095830551207e-05, - "loss": 60.4898, - "step": 123400 - }, - { - "epoch": 0.498592015901938, - "grad_norm": 632.763671875, - "learning_rate": 2.9886672553451985e-05, - "loss": 61.0664, - "step": 123410 - }, - { - "epoch": 0.49863241716730566, - "grad_norm": 0.0, - "learning_rate": 2.988324918108456e-05, - "loss": 56.5434, - "step": 123420 - }, - { - "epoch": 0.4986728184326733, - "grad_norm": 1752.1431884765625, - "learning_rate": 2.9879825713515676e-05, - "loss": 105.4006, - "step": 123430 - }, - { - "epoch": 0.49871321969804094, - "grad_norm": 575.599853515625, - "learning_rate": 2.9876402150812078e-05, - "loss": 52.491, - "step": 123440 - }, - { - "epoch": 0.4987536209634086, - "grad_norm": 620.3259887695312, - "learning_rate": 2.9872978493040514e-05, - "loss": 48.9634, - "step": 123450 - }, - { - "epoch": 0.4987940222287762, - "grad_norm": 837.0609741210938, - "learning_rate": 2.9869554740267724e-05, - "loss": 46.9197, - "step": 123460 - }, - { - "epoch": 0.49883442349414386, - "grad_norm": 231.35792541503906, - "learning_rate": 2.986613089256046e-05, - "loss": 62.4719, - "step": 123470 - }, - { - "epoch": 0.49887482475951145, - "grad_norm": 848.2174072265625, - "learning_rate": 2.9862706949985463e-05, - "loss": 63.5692, - "step": 123480 - }, - { - "epoch": 0.4989152260248791, - "grad_norm": 482.8079833984375, - "learning_rate": 2.9859282912609497e-05, - "loss": 48.4904, - "step": 123490 - }, - { - "epoch": 0.49895562729024673, - "grad_norm": 786.4293212890625, - "learning_rate": 2.98558587804993e-05, - "loss": 51.905, - "step": 123500 - }, - { - "epoch": 0.49899602855561437, - "grad_norm": 967.361083984375, - "learning_rate": 2.9852434553721642e-05, - "loss": 55.1196, - "step": 123510 - }, - { - "epoch": 0.499036429820982, - "grad_norm": 691.3804321289062, - "learning_rate": 2.984901023234327e-05, - "loss": 63.2347, - "step": 123520 - }, - { - "epoch": 0.49907683108634965, - "grad_norm": 2461.232421875, - "learning_rate": 2.9845585816430955e-05, - "loss": 86.1027, - "step": 123530 - }, - { - "epoch": 0.49911723235171723, - "grad_norm": 625.51220703125, - "learning_rate": 2.9842161306051446e-05, - "loss": 59.4591, - "step": 123540 - }, - { - "epoch": 0.4991576336170849, - "grad_norm": 937.5235595703125, - "learning_rate": 2.9838736701271514e-05, - "loss": 86.4743, - "step": 123550 - }, - { - "epoch": 0.4991980348824525, - "grad_norm": 719.2523803710938, - "learning_rate": 2.9835312002157913e-05, - "loss": 57.3074, - "step": 123560 - }, - { - "epoch": 0.49923843614782015, - "grad_norm": 1146.9417724609375, - "learning_rate": 2.983188720877741e-05, - "loss": 59.0976, - "step": 123570 - }, - { - "epoch": 0.4992788374131878, - "grad_norm": 643.1884155273438, - "learning_rate": 2.9828462321196788e-05, - "loss": 59.2565, - "step": 123580 - }, - { - "epoch": 0.49931923867855543, - "grad_norm": 717.5197143554688, - "learning_rate": 2.9825037339482804e-05, - "loss": 62.1937, - "step": 123590 - }, - { - "epoch": 0.499359639943923, - "grad_norm": 911.0812377929688, - "learning_rate": 2.9821612263702226e-05, - "loss": 53.7806, - "step": 123600 - }, - { - "epoch": 0.49940004120929066, - "grad_norm": 655.2685546875, - "learning_rate": 2.981818709392184e-05, - "loss": 76.8916, - "step": 123610 - }, - { - "epoch": 0.4994404424746583, - "grad_norm": 694.6813354492188, - "learning_rate": 2.981476183020842e-05, - "loss": 54.2385, - "step": 123620 - }, - { - "epoch": 0.49948084374002594, - "grad_norm": 277.7393798828125, - "learning_rate": 2.9811336472628737e-05, - "loss": 56.6968, - "step": 123630 - }, - { - "epoch": 0.4995212450053936, - "grad_norm": 2142.0654296875, - "learning_rate": 2.9807911021249573e-05, - "loss": 71.4143, - "step": 123640 - }, - { - "epoch": 0.4995616462707612, - "grad_norm": 685.448974609375, - "learning_rate": 2.9804485476137706e-05, - "loss": 41.1674, - "step": 123650 - }, - { - "epoch": 0.49960204753612886, - "grad_norm": 0.0, - "learning_rate": 2.9801059837359925e-05, - "loss": 67.2189, - "step": 123660 - }, - { - "epoch": 0.49964244880149644, - "grad_norm": 563.3724975585938, - "learning_rate": 2.979763410498301e-05, - "loss": 42.097, - "step": 123670 - }, - { - "epoch": 0.4996828500668641, - "grad_norm": 823.16259765625, - "learning_rate": 2.9794208279073743e-05, - "loss": 46.5123, - "step": 123680 - }, - { - "epoch": 0.4997232513322317, - "grad_norm": 396.4298095703125, - "learning_rate": 2.9790782359698914e-05, - "loss": 38.3301, - "step": 123690 - }, - { - "epoch": 0.49976365259759936, - "grad_norm": 487.17205810546875, - "learning_rate": 2.9787356346925327e-05, - "loss": 54.4319, - "step": 123700 - }, - { - "epoch": 0.499804053862967, - "grad_norm": 1401.6353759765625, - "learning_rate": 2.9783930240819758e-05, - "loss": 89.3769, - "step": 123710 - }, - { - "epoch": 0.49984445512833464, - "grad_norm": 401.6158447265625, - "learning_rate": 2.978050404144901e-05, - "loss": 39.3288, - "step": 123720 - }, - { - "epoch": 0.4998848563937022, - "grad_norm": 1638.397216796875, - "learning_rate": 2.977707774887987e-05, - "loss": 85.3148, - "step": 123730 - }, - { - "epoch": 0.49992525765906987, - "grad_norm": 525.0819702148438, - "learning_rate": 2.9773651363179144e-05, - "loss": 62.3121, - "step": 123740 - }, - { - "epoch": 0.4999656589244375, - "grad_norm": 576.019775390625, - "learning_rate": 2.9770224884413623e-05, - "loss": 34.6271, - "step": 123750 - }, - { - "epoch": 0.5000060601898051, - "grad_norm": 596.5224609375, - "learning_rate": 2.9766798312650112e-05, - "loss": 45.5462, - "step": 123760 - }, - { - "epoch": 0.5000464614551727, - "grad_norm": 1210.31005859375, - "learning_rate": 2.976337164795541e-05, - "loss": 43.6167, - "step": 123770 - }, - { - "epoch": 0.5000868627205404, - "grad_norm": 1921.7060546875, - "learning_rate": 2.975994489039634e-05, - "loss": 52.5438, - "step": 123780 - }, - { - "epoch": 0.500127263985908, - "grad_norm": 906.3182373046875, - "learning_rate": 2.9756518040039682e-05, - "loss": 64.2235, - "step": 123790 - }, - { - "epoch": 0.5001676652512757, - "grad_norm": 724.0418701171875, - "learning_rate": 2.9753091096952255e-05, - "loss": 63.5248, - "step": 123800 - }, - { - "epoch": 0.5002080665166433, - "grad_norm": 1490.1466064453125, - "learning_rate": 2.9749664061200877e-05, - "loss": 38.7024, - "step": 123810 - }, - { - "epoch": 0.5002484677820109, - "grad_norm": 262.531494140625, - "learning_rate": 2.9746236932852355e-05, - "loss": 66.6208, - "step": 123820 - }, - { - "epoch": 0.5002888690473786, - "grad_norm": 283.681640625, - "learning_rate": 2.974280971197349e-05, - "loss": 45.1853, - "step": 123830 - }, - { - "epoch": 0.5003292703127462, - "grad_norm": 636.9791259765625, - "learning_rate": 2.973938239863111e-05, - "loss": 54.2482, - "step": 123840 - }, - { - "epoch": 0.5003696715781139, - "grad_norm": 1471.0166015625, - "learning_rate": 2.9735954992892033e-05, - "loss": 73.4811, - "step": 123850 - }, - { - "epoch": 0.5004100728434815, - "grad_norm": 870.6261596679688, - "learning_rate": 2.9732527494823083e-05, - "loss": 69.1818, - "step": 123860 - }, - { - "epoch": 0.5004504741088491, - "grad_norm": 1015.466064453125, - "learning_rate": 2.9729099904491058e-05, - "loss": 65.9109, - "step": 123870 - }, - { - "epoch": 0.5004908753742168, - "grad_norm": 769.5751953125, - "learning_rate": 2.97256722219628e-05, - "loss": 41.2782, - "step": 123880 - }, - { - "epoch": 0.5005312766395843, - "grad_norm": 814.8590087890625, - "learning_rate": 2.9722244447305135e-05, - "loss": 84.8106, - "step": 123890 - }, - { - "epoch": 0.5005716779049519, - "grad_norm": 476.1192932128906, - "learning_rate": 2.9718816580584884e-05, - "loss": 43.7513, - "step": 123900 - }, - { - "epoch": 0.5006120791703196, - "grad_norm": 729.9635620117188, - "learning_rate": 2.9715388621868873e-05, - "loss": 44.9804, - "step": 123910 - }, - { - "epoch": 0.5006524804356872, - "grad_norm": 657.6481323242188, - "learning_rate": 2.971196057122393e-05, - "loss": 68.3173, - "step": 123920 - }, - { - "epoch": 0.5006928817010549, - "grad_norm": 620.4969482421875, - "learning_rate": 2.9708532428716883e-05, - "loss": 50.9084, - "step": 123930 - }, - { - "epoch": 0.5007332829664225, - "grad_norm": 840.12890625, - "learning_rate": 2.9705104194414586e-05, - "loss": 63.0937, - "step": 123940 - }, - { - "epoch": 0.5007736842317901, - "grad_norm": 1180.94970703125, - "learning_rate": 2.9701675868383848e-05, - "loss": 69.3689, - "step": 123950 - }, - { - "epoch": 0.5008140854971578, - "grad_norm": 630.738037109375, - "learning_rate": 2.9698247450691525e-05, - "loss": 56.1486, - "step": 123960 - }, - { - "epoch": 0.5008544867625254, - "grad_norm": 1723.3427734375, - "learning_rate": 2.9694818941404444e-05, - "loss": 52.5067, - "step": 123970 - }, - { - "epoch": 0.5008948880278931, - "grad_norm": 1080.7830810546875, - "learning_rate": 2.9691390340589466e-05, - "loss": 63.3121, - "step": 123980 - }, - { - "epoch": 0.5009352892932607, - "grad_norm": 428.3757629394531, - "learning_rate": 2.9687961648313405e-05, - "loss": 45.4763, - "step": 123990 - }, - { - "epoch": 0.5009756905586283, - "grad_norm": 585.084228515625, - "learning_rate": 2.9684532864643122e-05, - "loss": 47.8538, - "step": 124000 - }, - { - "epoch": 0.501016091823996, - "grad_norm": 951.522705078125, - "learning_rate": 2.9681103989645453e-05, - "loss": 64.0775, - "step": 124010 - }, - { - "epoch": 0.5010564930893635, - "grad_norm": 414.710693359375, - "learning_rate": 2.9677675023387258e-05, - "loss": 35.3562, - "step": 124020 - }, - { - "epoch": 0.5010968943547311, - "grad_norm": 1510.4676513671875, - "learning_rate": 2.9674245965935378e-05, - "loss": 85.7635, - "step": 124030 - }, - { - "epoch": 0.5011372956200988, - "grad_norm": 555.8351440429688, - "learning_rate": 2.9670816817356668e-05, - "loss": 39.8303, - "step": 124040 - }, - { - "epoch": 0.5011776968854664, - "grad_norm": 976.0780639648438, - "learning_rate": 2.9667387577717976e-05, - "loss": 61.9712, - "step": 124050 - }, - { - "epoch": 0.5012180981508341, - "grad_norm": 1387.890869140625, - "learning_rate": 2.9663958247086166e-05, - "loss": 58.7585, - "step": 124060 - }, - { - "epoch": 0.5012584994162017, - "grad_norm": 1036.8333740234375, - "learning_rate": 2.966052882552809e-05, - "loss": 44.4313, - "step": 124070 - }, - { - "epoch": 0.5012989006815693, - "grad_norm": 1022.4268798828125, - "learning_rate": 2.9657099313110593e-05, - "loss": 57.0276, - "step": 124080 - }, - { - "epoch": 0.501339301946937, - "grad_norm": 1398.85546875, - "learning_rate": 2.9653669709900555e-05, - "loss": 58.4346, - "step": 124090 - }, - { - "epoch": 0.5013797032123046, - "grad_norm": 496.8158264160156, - "learning_rate": 2.9650240015964825e-05, - "loss": 62.5083, - "step": 124100 - }, - { - "epoch": 0.5014201044776723, - "grad_norm": 911.7360229492188, - "learning_rate": 2.964681023137028e-05, - "loss": 52.9003, - "step": 124110 - }, - { - "epoch": 0.5014605057430399, - "grad_norm": 932.399658203125, - "learning_rate": 2.9643380356183775e-05, - "loss": 56.112, - "step": 124120 - }, - { - "epoch": 0.5015009070084075, - "grad_norm": 0.0, - "learning_rate": 2.9639950390472177e-05, - "loss": 45.8005, - "step": 124130 - }, - { - "epoch": 0.5015413082737752, - "grad_norm": 789.1591796875, - "learning_rate": 2.9636520334302354e-05, - "loss": 73.2515, - "step": 124140 - }, - { - "epoch": 0.5015817095391427, - "grad_norm": 1213.167236328125, - "learning_rate": 2.9633090187741185e-05, - "loss": 45.1217, - "step": 124150 - }, - { - "epoch": 0.5016221108045104, - "grad_norm": 1115.8275146484375, - "learning_rate": 2.9629659950855544e-05, - "loss": 48.8063, - "step": 124160 - }, - { - "epoch": 0.501662512069878, - "grad_norm": 1056.0667724609375, - "learning_rate": 2.9626229623712288e-05, - "loss": 59.1212, - "step": 124170 - }, - { - "epoch": 0.5017029133352456, - "grad_norm": 269.9258728027344, - "learning_rate": 2.9622799206378305e-05, - "loss": 83.2247, - "step": 124180 - }, - { - "epoch": 0.5017433146006133, - "grad_norm": 539.0936889648438, - "learning_rate": 2.961936869892048e-05, - "loss": 53.4077, - "step": 124190 - }, - { - "epoch": 0.5017837158659809, - "grad_norm": 3030.404052734375, - "learning_rate": 2.9615938101405676e-05, - "loss": 76.7048, - "step": 124200 - }, - { - "epoch": 0.5018241171313486, - "grad_norm": 1794.911376953125, - "learning_rate": 2.961250741390078e-05, - "loss": 60.769, - "step": 124210 - }, - { - "epoch": 0.5018645183967162, - "grad_norm": 1027.421875, - "learning_rate": 2.960907663647268e-05, - "loss": 43.8524, - "step": 124220 - }, - { - "epoch": 0.5019049196620838, - "grad_norm": 726.89697265625, - "learning_rate": 2.9605645769188268e-05, - "loss": 57.9974, - "step": 124230 - }, - { - "epoch": 0.5019453209274515, - "grad_norm": 741.7726440429688, - "learning_rate": 2.9602214812114415e-05, - "loss": 45.0123, - "step": 124240 - }, - { - "epoch": 0.5019857221928191, - "grad_norm": 572.0324096679688, - "learning_rate": 2.9598783765318007e-05, - "loss": 41.7237, - "step": 124250 - }, - { - "epoch": 0.5020261234581868, - "grad_norm": 905.0762329101562, - "learning_rate": 2.9595352628865947e-05, - "loss": 60.8909, - "step": 124260 - }, - { - "epoch": 0.5020665247235544, - "grad_norm": 418.3033447265625, - "learning_rate": 2.9591921402825123e-05, - "loss": 56.9024, - "step": 124270 - }, - { - "epoch": 0.5021069259889219, - "grad_norm": 642.4514770507812, - "learning_rate": 2.958849008726242e-05, - "loss": 56.9975, - "step": 124280 - }, - { - "epoch": 0.5021473272542896, - "grad_norm": 545.2382202148438, - "learning_rate": 2.9585058682244748e-05, - "loss": 54.7912, - "step": 124290 - }, - { - "epoch": 0.5021877285196572, - "grad_norm": 1271.816162109375, - "learning_rate": 2.9581627187838994e-05, - "loss": 67.879, - "step": 124300 - }, - { - "epoch": 0.5022281297850248, - "grad_norm": 404.8785095214844, - "learning_rate": 2.9578195604112064e-05, - "loss": 68.5172, - "step": 124310 - }, - { - "epoch": 0.5022685310503925, - "grad_norm": 1688.64599609375, - "learning_rate": 2.9574763931130843e-05, - "loss": 53.8717, - "step": 124320 - }, - { - "epoch": 0.5023089323157601, - "grad_norm": 574.3252563476562, - "learning_rate": 2.9571332168962256e-05, - "loss": 66.5893, - "step": 124330 - }, - { - "epoch": 0.5023493335811278, - "grad_norm": 418.29864501953125, - "learning_rate": 2.956790031767319e-05, - "loss": 47.6842, - "step": 124340 - }, - { - "epoch": 0.5023897348464954, - "grad_norm": 1550.780029296875, - "learning_rate": 2.9564468377330556e-05, - "loss": 66.8126, - "step": 124350 - }, - { - "epoch": 0.502430136111863, - "grad_norm": 5627.90478515625, - "learning_rate": 2.956103634800126e-05, - "loss": 86.4269, - "step": 124360 - }, - { - "epoch": 0.5024705373772307, - "grad_norm": 2004.539306640625, - "learning_rate": 2.9557604229752212e-05, - "loss": 57.9298, - "step": 124370 - }, - { - "epoch": 0.5025109386425983, - "grad_norm": 531.3135986328125, - "learning_rate": 2.9554172022650317e-05, - "loss": 53.1231, - "step": 124380 - }, - { - "epoch": 0.502551339907966, - "grad_norm": 1106.7730712890625, - "learning_rate": 2.9550739726762507e-05, - "loss": 62.664, - "step": 124390 - }, - { - "epoch": 0.5025917411733335, - "grad_norm": 1226.38818359375, - "learning_rate": 2.9547307342155673e-05, - "loss": 94.7886, - "step": 124400 - }, - { - "epoch": 0.5026321424387011, - "grad_norm": 608.1491088867188, - "learning_rate": 2.9543874868896747e-05, - "loss": 71.896, - "step": 124410 - }, - { - "epoch": 0.5026725437040688, - "grad_norm": 240.57858276367188, - "learning_rate": 2.954044230705264e-05, - "loss": 96.0706, - "step": 124420 - }, - { - "epoch": 0.5027129449694364, - "grad_norm": 596.6292114257812, - "learning_rate": 2.9537009656690275e-05, - "loss": 50.2395, - "step": 124430 - }, - { - "epoch": 0.502753346234804, - "grad_norm": 742.9195556640625, - "learning_rate": 2.953357691787656e-05, - "loss": 39.4977, - "step": 124440 - }, - { - "epoch": 0.5027937475001717, - "grad_norm": 715.0127563476562, - "learning_rate": 2.9530144090678435e-05, - "loss": 40.7906, - "step": 124450 - }, - { - "epoch": 0.5028341487655393, - "grad_norm": 494.5076599121094, - "learning_rate": 2.952671117516282e-05, - "loss": 57.1695, - "step": 124460 - }, - { - "epoch": 0.502874550030907, - "grad_norm": 766.6427001953125, - "learning_rate": 2.952327817139664e-05, - "loss": 62.9475, - "step": 124470 - }, - { - "epoch": 0.5029149512962746, - "grad_norm": 670.2885131835938, - "learning_rate": 2.9519845079446823e-05, - "loss": 47.9745, - "step": 124480 - }, - { - "epoch": 0.5029553525616423, - "grad_norm": 636.1236572265625, - "learning_rate": 2.9516411899380296e-05, - "loss": 57.113, - "step": 124490 - }, - { - "epoch": 0.5029957538270099, - "grad_norm": 434.45477294921875, - "learning_rate": 2.9512978631264006e-05, - "loss": 44.9944, - "step": 124500 - }, - { - "epoch": 0.5030361550923775, - "grad_norm": 687.5653686523438, - "learning_rate": 2.950954527516487e-05, - "loss": 50.9721, - "step": 124510 - }, - { - "epoch": 0.5030765563577452, - "grad_norm": 451.1505126953125, - "learning_rate": 2.9506111831149818e-05, - "loss": 44.7546, - "step": 124520 - }, - { - "epoch": 0.5031169576231127, - "grad_norm": 1212.3389892578125, - "learning_rate": 2.9502678299285798e-05, - "loss": 44.7764, - "step": 124530 - }, - { - "epoch": 0.5031573588884803, - "grad_norm": 572.8779907226562, - "learning_rate": 2.949924467963975e-05, - "loss": 48.557, - "step": 124540 - }, - { - "epoch": 0.503197760153848, - "grad_norm": 520.7388916015625, - "learning_rate": 2.949581097227861e-05, - "loss": 80.5346, - "step": 124550 - }, - { - "epoch": 0.5032381614192156, - "grad_norm": 713.9329833984375, - "learning_rate": 2.9492377177269315e-05, - "loss": 32.9715, - "step": 124560 - }, - { - "epoch": 0.5032785626845833, - "grad_norm": 978.047119140625, - "learning_rate": 2.9488943294678818e-05, - "loss": 52.3525, - "step": 124570 - }, - { - "epoch": 0.5033189639499509, - "grad_norm": 1501.214599609375, - "learning_rate": 2.948550932457407e-05, - "loss": 55.6765, - "step": 124580 - }, - { - "epoch": 0.5033593652153185, - "grad_norm": 625.777587890625, - "learning_rate": 2.9482075267021995e-05, - "loss": 46.8336, - "step": 124590 - }, - { - "epoch": 0.5033997664806862, - "grad_norm": 601.8237915039062, - "learning_rate": 2.9478641122089562e-05, - "loss": 51.4297, - "step": 124600 - }, - { - "epoch": 0.5034401677460538, - "grad_norm": 924.4441528320312, - "learning_rate": 2.947520688984371e-05, - "loss": 60.7378, - "step": 124610 - }, - { - "epoch": 0.5034805690114215, - "grad_norm": 516.689453125, - "learning_rate": 2.9471772570351398e-05, - "loss": 31.4356, - "step": 124620 - }, - { - "epoch": 0.5035209702767891, - "grad_norm": 1193.8798828125, - "learning_rate": 2.9468338163679577e-05, - "loss": 60.812, - "step": 124630 - }, - { - "epoch": 0.5035613715421567, - "grad_norm": 663.6973266601562, - "learning_rate": 2.9464903669895205e-05, - "loss": 35.3694, - "step": 124640 - }, - { - "epoch": 0.5036017728075244, - "grad_norm": 1353.3345947265625, - "learning_rate": 2.9461469089065234e-05, - "loss": 51.9646, - "step": 124650 - }, - { - "epoch": 0.5036421740728919, - "grad_norm": 529.1851806640625, - "learning_rate": 2.945803442125663e-05, - "loss": 48.2301, - "step": 124660 - }, - { - "epoch": 0.5036825753382596, - "grad_norm": 331.9324035644531, - "learning_rate": 2.9454599666536347e-05, - "loss": 54.444, - "step": 124670 - }, - { - "epoch": 0.5037229766036272, - "grad_norm": 620.619384765625, - "learning_rate": 2.9451164824971356e-05, - "loss": 92.8183, - "step": 124680 - }, - { - "epoch": 0.5037633778689948, - "grad_norm": 441.77734375, - "learning_rate": 2.9447729896628612e-05, - "loss": 47.082, - "step": 124690 - }, - { - "epoch": 0.5038037791343625, - "grad_norm": 891.2379150390625, - "learning_rate": 2.944429488157508e-05, - "loss": 70.2383, - "step": 124700 - }, - { - "epoch": 0.5038441803997301, - "grad_norm": 1449.03955078125, - "learning_rate": 2.9440859779877728e-05, - "loss": 57.3963, - "step": 124710 - }, - { - "epoch": 0.5038845816650978, - "grad_norm": 1024.277587890625, - "learning_rate": 2.943742459160354e-05, - "loss": 48.08, - "step": 124720 - }, - { - "epoch": 0.5039249829304654, - "grad_norm": 272.8819885253906, - "learning_rate": 2.9433989316819467e-05, - "loss": 65.6629, - "step": 124730 - }, - { - "epoch": 0.503965384195833, - "grad_norm": 665.0719604492188, - "learning_rate": 2.943055395559249e-05, - "loss": 60.4535, - "step": 124740 - }, - { - "epoch": 0.5040057854612007, - "grad_norm": 1616.3466796875, - "learning_rate": 2.9427118507989586e-05, - "loss": 88.4401, - "step": 124750 - }, - { - "epoch": 0.5040461867265683, - "grad_norm": 1536.4730224609375, - "learning_rate": 2.942368297407772e-05, - "loss": 67.7257, - "step": 124760 - }, - { - "epoch": 0.504086587991936, - "grad_norm": 258.504638671875, - "learning_rate": 2.942024735392389e-05, - "loss": 45.7606, - "step": 124770 - }, - { - "epoch": 0.5041269892573036, - "grad_norm": 1484.1357421875, - "learning_rate": 2.9416811647595048e-05, - "loss": 65.3528, - "step": 124780 - }, - { - "epoch": 0.5041673905226711, - "grad_norm": 933.3783569335938, - "learning_rate": 2.94133758551582e-05, - "loss": 30.6998, - "step": 124790 - }, - { - "epoch": 0.5042077917880388, - "grad_norm": 437.1009216308594, - "learning_rate": 2.9409939976680313e-05, - "loss": 73.0693, - "step": 124800 - }, - { - "epoch": 0.5042481930534064, - "grad_norm": 888.6316528320312, - "learning_rate": 2.9406504012228375e-05, - "loss": 38.9805, - "step": 124810 - }, - { - "epoch": 0.504288594318774, - "grad_norm": 454.36627197265625, - "learning_rate": 2.9403067961869367e-05, - "loss": 32.8442, - "step": 124820 - }, - { - "epoch": 0.5043289955841417, - "grad_norm": 648.9022827148438, - "learning_rate": 2.9399631825670292e-05, - "loss": 40.4288, - "step": 124830 - }, - { - "epoch": 0.5043693968495093, - "grad_norm": 967.1868286132812, - "learning_rate": 2.939619560369813e-05, - "loss": 63.7735, - "step": 124840 - }, - { - "epoch": 0.504409798114877, - "grad_norm": 1663.5084228515625, - "learning_rate": 2.9392759296019867e-05, - "loss": 57.5468, - "step": 124850 - }, - { - "epoch": 0.5044501993802446, - "grad_norm": 1367.6448974609375, - "learning_rate": 2.9389322902702497e-05, - "loss": 61.3237, - "step": 124860 - }, - { - "epoch": 0.5044906006456122, - "grad_norm": 756.0995483398438, - "learning_rate": 2.9385886423813024e-05, - "loss": 65.3766, - "step": 124870 - }, - { - "epoch": 0.5045310019109799, - "grad_norm": 630.2203369140625, - "learning_rate": 2.938244985941844e-05, - "loss": 42.1302, - "step": 124880 - }, - { - "epoch": 0.5045714031763475, - "grad_norm": 451.697998046875, - "learning_rate": 2.9379013209585726e-05, - "loss": 47.7857, - "step": 124890 - }, - { - "epoch": 0.5046118044417152, - "grad_norm": 890.1043701171875, - "learning_rate": 2.9375576474381905e-05, - "loss": 51.9966, - "step": 124900 - }, - { - "epoch": 0.5046522057070827, - "grad_norm": 901.4338989257812, - "learning_rate": 2.9372139653873958e-05, - "loss": 65.1977, - "step": 124910 - }, - { - "epoch": 0.5046926069724503, - "grad_norm": 1069.64208984375, - "learning_rate": 2.9368702748128912e-05, - "loss": 55.9412, - "step": 124920 - }, - { - "epoch": 0.504733008237818, - "grad_norm": 1394.1937255859375, - "learning_rate": 2.9365265757213745e-05, - "loss": 66.5137, - "step": 124930 - }, - { - "epoch": 0.5047734095031856, - "grad_norm": 260.0003967285156, - "learning_rate": 2.9361828681195484e-05, - "loss": 46.3195, - "step": 124940 - }, - { - "epoch": 0.5048138107685533, - "grad_norm": 521.545654296875, - "learning_rate": 2.9358391520141122e-05, - "loss": 54.6765, - "step": 124950 - }, - { - "epoch": 0.5048542120339209, - "grad_norm": 676.0771484375, - "learning_rate": 2.935495427411768e-05, - "loss": 27.8798, - "step": 124960 - }, - { - "epoch": 0.5048946132992885, - "grad_norm": 554.6319580078125, - "learning_rate": 2.9351516943192155e-05, - "loss": 39.7234, - "step": 124970 - }, - { - "epoch": 0.5049350145646562, - "grad_norm": 1820.2799072265625, - "learning_rate": 2.9348079527431567e-05, - "loss": 47.2992, - "step": 124980 - }, - { - "epoch": 0.5049754158300238, - "grad_norm": 730.9981689453125, - "learning_rate": 2.9344642026902924e-05, - "loss": 58.4385, - "step": 124990 - }, - { - "epoch": 0.5050158170953915, - "grad_norm": 232.58773803710938, - "learning_rate": 2.9341204441673266e-05, - "loss": 51.1445, - "step": 125000 - }, - { - "epoch": 0.5050562183607591, - "grad_norm": 744.6976318359375, - "learning_rate": 2.9337766771809577e-05, - "loss": 56.7142, - "step": 125010 - }, - { - "epoch": 0.5050966196261267, - "grad_norm": 807.66748046875, - "learning_rate": 2.9334329017378898e-05, - "loss": 66.2732, - "step": 125020 - }, - { - "epoch": 0.5051370208914944, - "grad_norm": 1156.4183349609375, - "learning_rate": 2.933089117844824e-05, - "loss": 53.764, - "step": 125030 - }, - { - "epoch": 0.5051774221568619, - "grad_norm": 1409.17626953125, - "learning_rate": 2.9327453255084638e-05, - "loss": 51.1774, - "step": 125040 - }, - { - "epoch": 0.5052178234222295, - "grad_norm": 1206.735107421875, - "learning_rate": 2.9324015247355098e-05, - "loss": 50.6902, - "step": 125050 - }, - { - "epoch": 0.5052582246875972, - "grad_norm": 1590.0701904296875, - "learning_rate": 2.932057715532665e-05, - "loss": 61.7914, - "step": 125060 - }, - { - "epoch": 0.5052986259529648, - "grad_norm": 905.3733520507812, - "learning_rate": 2.9317138979066327e-05, - "loss": 44.9613, - "step": 125070 - }, - { - "epoch": 0.5053390272183325, - "grad_norm": 747.811279296875, - "learning_rate": 2.9313700718641167e-05, - "loss": 65.6674, - "step": 125080 - }, - { - "epoch": 0.5053794284837001, - "grad_norm": 0.0, - "learning_rate": 2.9310262374118185e-05, - "loss": 45.0609, - "step": 125090 - }, - { - "epoch": 0.5054198297490677, - "grad_norm": 528.493408203125, - "learning_rate": 2.9306823945564422e-05, - "loss": 58.0582, - "step": 125100 - }, - { - "epoch": 0.5054602310144354, - "grad_norm": 680.7048950195312, - "learning_rate": 2.9303385433046902e-05, - "loss": 36.582, - "step": 125110 - }, - { - "epoch": 0.505500632279803, - "grad_norm": 767.6009521484375, - "learning_rate": 2.9299946836632673e-05, - "loss": 40.1557, - "step": 125120 - }, - { - "epoch": 0.5055410335451707, - "grad_norm": 1023.16015625, - "learning_rate": 2.929650815638877e-05, - "loss": 47.9998, - "step": 125130 - }, - { - "epoch": 0.5055814348105383, - "grad_norm": 588.2609252929688, - "learning_rate": 2.9293069392382224e-05, - "loss": 50.6491, - "step": 125140 - }, - { - "epoch": 0.5056218360759059, - "grad_norm": 831.1854858398438, - "learning_rate": 2.9289630544680075e-05, - "loss": 68.4844, - "step": 125150 - }, - { - "epoch": 0.5056622373412736, - "grad_norm": 1181.1595458984375, - "learning_rate": 2.9286191613349374e-05, - "loss": 84.0284, - "step": 125160 - }, - { - "epoch": 0.5057026386066411, - "grad_norm": 435.8255920410156, - "learning_rate": 2.9282752598457165e-05, - "loss": 58.2437, - "step": 125170 - }, - { - "epoch": 0.5057430398720087, - "grad_norm": 727.278564453125, - "learning_rate": 2.9279313500070483e-05, - "loss": 66.3914, - "step": 125180 - }, - { - "epoch": 0.5057834411373764, - "grad_norm": 302.980224609375, - "learning_rate": 2.927587431825639e-05, - "loss": 44.221, - "step": 125190 - }, - { - "epoch": 0.505823842402744, - "grad_norm": 758.2953491210938, - "learning_rate": 2.9272435053081922e-05, - "loss": 65.575, - "step": 125200 - }, - { - "epoch": 0.5058642436681117, - "grad_norm": 181.88172912597656, - "learning_rate": 2.9268995704614132e-05, - "loss": 40.2345, - "step": 125210 - }, - { - "epoch": 0.5059046449334793, - "grad_norm": 920.4985961914062, - "learning_rate": 2.926555627292007e-05, - "loss": 51.6828, - "step": 125220 - }, - { - "epoch": 0.505945046198847, - "grad_norm": 1029.88330078125, - "learning_rate": 2.9262116758066793e-05, - "loss": 64.6292, - "step": 125230 - }, - { - "epoch": 0.5059854474642146, - "grad_norm": 733.74951171875, - "learning_rate": 2.9258677160121352e-05, - "loss": 53.2531, - "step": 125240 - }, - { - "epoch": 0.5060258487295822, - "grad_norm": 2031.1961669921875, - "learning_rate": 2.9255237479150816e-05, - "loss": 62.1065, - "step": 125250 - }, - { - "epoch": 0.5060662499949499, - "grad_norm": 900.5405883789062, - "learning_rate": 2.925179771522223e-05, - "loss": 52.0671, - "step": 125260 - }, - { - "epoch": 0.5061066512603175, - "grad_norm": 554.5584106445312, - "learning_rate": 2.924835786840266e-05, - "loss": 38.7164, - "step": 125270 - }, - { - "epoch": 0.5061470525256851, - "grad_norm": 386.526123046875, - "learning_rate": 2.9244917938759163e-05, - "loss": 54.9804, - "step": 125280 - }, - { - "epoch": 0.5061874537910528, - "grad_norm": 616.0713500976562, - "learning_rate": 2.9241477926358818e-05, - "loss": 56.0183, - "step": 125290 - }, - { - "epoch": 0.5062278550564203, - "grad_norm": 831.8729858398438, - "learning_rate": 2.923803783126866e-05, - "loss": 46.269, - "step": 125300 - }, - { - "epoch": 0.506268256321788, - "grad_norm": 506.3004455566406, - "learning_rate": 2.923459765355578e-05, - "loss": 44.9364, - "step": 125310 - }, - { - "epoch": 0.5063086575871556, - "grad_norm": 662.8009033203125, - "learning_rate": 2.9231157393287234e-05, - "loss": 44.294, - "step": 125320 - }, - { - "epoch": 0.5063490588525232, - "grad_norm": 1044.3468017578125, - "learning_rate": 2.9227717050530107e-05, - "loss": 59.7929, - "step": 125330 - }, - { - "epoch": 0.5063894601178909, - "grad_norm": 836.830322265625, - "learning_rate": 2.922427662535145e-05, - "loss": 50.6092, - "step": 125340 - }, - { - "epoch": 0.5064298613832585, - "grad_norm": 848.728759765625, - "learning_rate": 2.9220836117818344e-05, - "loss": 46.637, - "step": 125350 - }, - { - "epoch": 0.5064702626486262, - "grad_norm": 456.876953125, - "learning_rate": 2.9217395527997875e-05, - "loss": 45.2742, - "step": 125360 - }, - { - "epoch": 0.5065106639139938, - "grad_norm": 780.4397583007812, - "learning_rate": 2.921395485595711e-05, - "loss": 47.9569, - "step": 125370 - }, - { - "epoch": 0.5065510651793614, - "grad_norm": 851.5180053710938, - "learning_rate": 2.9210514101763113e-05, - "loss": 74.7248, - "step": 125380 - }, - { - "epoch": 0.5065914664447291, - "grad_norm": 776.1827392578125, - "learning_rate": 2.9207073265482982e-05, - "loss": 49.9978, - "step": 125390 - }, - { - "epoch": 0.5066318677100967, - "grad_norm": 528.63330078125, - "learning_rate": 2.920363234718379e-05, - "loss": 73.2618, - "step": 125400 - }, - { - "epoch": 0.5066722689754644, - "grad_norm": 1279.852294921875, - "learning_rate": 2.9200191346932627e-05, - "loss": 48.9025, - "step": 125410 - }, - { - "epoch": 0.506712670240832, - "grad_norm": 1353.2442626953125, - "learning_rate": 2.919675026479656e-05, - "loss": 51.6571, - "step": 125420 - }, - { - "epoch": 0.5067530715061995, - "grad_norm": 1191.01416015625, - "learning_rate": 2.9193309100842693e-05, - "loss": 46.1766, - "step": 125430 - }, - { - "epoch": 0.5067934727715672, - "grad_norm": 1029.6173095703125, - "learning_rate": 2.9189867855138103e-05, - "loss": 58.42, - "step": 125440 - }, - { - "epoch": 0.5068338740369348, - "grad_norm": 575.3421630859375, - "learning_rate": 2.918642652774989e-05, - "loss": 49.3698, - "step": 125450 - }, - { - "epoch": 0.5068742753023024, - "grad_norm": 710.7742919921875, - "learning_rate": 2.9182985118745136e-05, - "loss": 46.0317, - "step": 125460 - }, - { - "epoch": 0.5069146765676701, - "grad_norm": 794.8711547851562, - "learning_rate": 2.9179543628190925e-05, - "loss": 61.0076, - "step": 125470 - }, - { - "epoch": 0.5069550778330377, - "grad_norm": 621.3440551757812, - "learning_rate": 2.9176102056154363e-05, - "loss": 58.3854, - "step": 125480 - }, - { - "epoch": 0.5069954790984054, - "grad_norm": 617.9365844726562, - "learning_rate": 2.9172660402702546e-05, - "loss": 47.3791, - "step": 125490 - }, - { - "epoch": 0.507035880363773, - "grad_norm": 902.1868286132812, - "learning_rate": 2.916921866790256e-05, - "loss": 49.7716, - "step": 125500 - }, - { - "epoch": 0.5070762816291406, - "grad_norm": 882.5235595703125, - "learning_rate": 2.9165776851821508e-05, - "loss": 44.4464, - "step": 125510 - }, - { - "epoch": 0.5071166828945083, - "grad_norm": 893.1301879882812, - "learning_rate": 2.9162334954526493e-05, - "loss": 40.7801, - "step": 125520 - }, - { - "epoch": 0.5071570841598759, - "grad_norm": 457.0363464355469, - "learning_rate": 2.915889297608462e-05, - "loss": 43.9219, - "step": 125530 - }, - { - "epoch": 0.5071974854252436, - "grad_norm": 504.5679626464844, - "learning_rate": 2.9155450916562994e-05, - "loss": 33.9357, - "step": 125540 - }, - { - "epoch": 0.5072378866906111, - "grad_norm": 866.373046875, - "learning_rate": 2.91520087760287e-05, - "loss": 72.3561, - "step": 125550 - }, - { - "epoch": 0.5072782879559787, - "grad_norm": 599.7020874023438, - "learning_rate": 2.9148566554548857e-05, - "loss": 53.9173, - "step": 125560 - }, - { - "epoch": 0.5073186892213464, - "grad_norm": 567.0419921875, - "learning_rate": 2.914512425219058e-05, - "loss": 71.651, - "step": 125570 - }, - { - "epoch": 0.507359090486714, - "grad_norm": 628.0565795898438, - "learning_rate": 2.914168186902097e-05, - "loss": 56.5678, - "step": 125580 - }, - { - "epoch": 0.5073994917520817, - "grad_norm": 528.4154663085938, - "learning_rate": 2.9138239405107136e-05, - "loss": 42.4691, - "step": 125590 - }, - { - "epoch": 0.5074398930174493, - "grad_norm": 829.2301635742188, - "learning_rate": 2.9134796860516194e-05, - "loss": 49.0979, - "step": 125600 - }, - { - "epoch": 0.5074802942828169, - "grad_norm": 1135.746826171875, - "learning_rate": 2.9131354235315268e-05, - "loss": 60.1448, - "step": 125610 - }, - { - "epoch": 0.5075206955481846, - "grad_norm": 402.7625427246094, - "learning_rate": 2.912791152957145e-05, - "loss": 69.6794, - "step": 125620 - }, - { - "epoch": 0.5075610968135522, - "grad_norm": 654.6475219726562, - "learning_rate": 2.9124468743351884e-05, - "loss": 58.2971, - "step": 125630 - }, - { - "epoch": 0.5076014980789199, - "grad_norm": 552.7247924804688, - "learning_rate": 2.9121025876723674e-05, - "loss": 66.4654, - "step": 125640 - }, - { - "epoch": 0.5076418993442875, - "grad_norm": 887.902099609375, - "learning_rate": 2.9117582929753932e-05, - "loss": 76.861, - "step": 125650 - }, - { - "epoch": 0.5076823006096551, - "grad_norm": 966.7318725585938, - "learning_rate": 2.9114139902509807e-05, - "loss": 57.0813, - "step": 125660 - }, - { - "epoch": 0.5077227018750228, - "grad_norm": 1011.2301635742188, - "learning_rate": 2.9110696795058394e-05, - "loss": 45.7711, - "step": 125670 - }, - { - "epoch": 0.5077631031403903, - "grad_norm": 557.7918090820312, - "learning_rate": 2.9107253607466832e-05, - "loss": 55.3871, - "step": 125680 - }, - { - "epoch": 0.5078035044057579, - "grad_norm": 3118.763916015625, - "learning_rate": 2.910381033980225e-05, - "loss": 50.6585, - "step": 125690 - }, - { - "epoch": 0.5078439056711256, - "grad_norm": 863.99658203125, - "learning_rate": 2.910036699213178e-05, - "loss": 68.7242, - "step": 125700 - }, - { - "epoch": 0.5078843069364932, - "grad_norm": 1471.3134765625, - "learning_rate": 2.909692356452254e-05, - "loss": 52.9448, - "step": 125710 - }, - { - "epoch": 0.5079247082018609, - "grad_norm": 553.6021118164062, - "learning_rate": 2.9093480057041662e-05, - "loss": 55.443, - "step": 125720 - }, - { - "epoch": 0.5079651094672285, - "grad_norm": 329.5192565917969, - "learning_rate": 2.9090036469756276e-05, - "loss": 67.455, - "step": 125730 - }, - { - "epoch": 0.5080055107325961, - "grad_norm": 1218.230712890625, - "learning_rate": 2.9086592802733536e-05, - "loss": 84.1644, - "step": 125740 - }, - { - "epoch": 0.5080459119979638, - "grad_norm": 140.51890563964844, - "learning_rate": 2.908314905604056e-05, - "loss": 58.1322, - "step": 125750 - }, - { - "epoch": 0.5080863132633314, - "grad_norm": 557.7669677734375, - "learning_rate": 2.9079705229744493e-05, - "loss": 58.6665, - "step": 125760 - }, - { - "epoch": 0.5081267145286991, - "grad_norm": 357.09234619140625, - "learning_rate": 2.907626132391246e-05, - "loss": 33.4991, - "step": 125770 - }, - { - "epoch": 0.5081671157940667, - "grad_norm": 404.6373596191406, - "learning_rate": 2.9072817338611636e-05, - "loss": 89.8573, - "step": 125780 - }, - { - "epoch": 0.5082075170594343, - "grad_norm": 606.2252807617188, - "learning_rate": 2.9069373273909123e-05, - "loss": 61.2002, - "step": 125790 - }, - { - "epoch": 0.508247918324802, - "grad_norm": 458.4631652832031, - "learning_rate": 2.9065929129872094e-05, - "loss": 46.1727, - "step": 125800 - }, - { - "epoch": 0.5082883195901695, - "grad_norm": 901.8555908203125, - "learning_rate": 2.906248490656768e-05, - "loss": 48.2973, - "step": 125810 - }, - { - "epoch": 0.5083287208555372, - "grad_norm": 676.2325439453125, - "learning_rate": 2.905904060406303e-05, - "loss": 57.2065, - "step": 125820 - }, - { - "epoch": 0.5083691221209048, - "grad_norm": 1410.12158203125, - "learning_rate": 2.905559622242529e-05, - "loss": 45.1312, - "step": 125830 - }, - { - "epoch": 0.5084095233862724, - "grad_norm": 386.5525207519531, - "learning_rate": 2.9052151761721617e-05, - "loss": 73.7721, - "step": 125840 - }, - { - "epoch": 0.5084499246516401, - "grad_norm": 1108.298583984375, - "learning_rate": 2.9048707222019154e-05, - "loss": 66.7324, - "step": 125850 - }, - { - "epoch": 0.5084903259170077, - "grad_norm": 752.5873413085938, - "learning_rate": 2.904526260338507e-05, - "loss": 52.5618, - "step": 125860 - }, - { - "epoch": 0.5085307271823754, - "grad_norm": 393.9751281738281, - "learning_rate": 2.9041817905886504e-05, - "loss": 55.0743, - "step": 125870 - }, - { - "epoch": 0.508571128447743, - "grad_norm": 509.97735595703125, - "learning_rate": 2.9038373129590622e-05, - "loss": 51.6777, - "step": 125880 - }, - { - "epoch": 0.5086115297131106, - "grad_norm": 1160.9713134765625, - "learning_rate": 2.903492827456457e-05, - "loss": 59.5145, - "step": 125890 - }, - { - "epoch": 0.5086519309784783, - "grad_norm": 510.33514404296875, - "learning_rate": 2.903148334087552e-05, - "loss": 34.4883, - "step": 125900 - }, - { - "epoch": 0.5086923322438459, - "grad_norm": 778.451171875, - "learning_rate": 2.9028038328590617e-05, - "loss": 57.813, - "step": 125910 - }, - { - "epoch": 0.5087327335092136, - "grad_norm": 604.14990234375, - "learning_rate": 2.9024593237777037e-05, - "loss": 44.8038, - "step": 125920 - }, - { - "epoch": 0.5087731347745812, - "grad_norm": 1083.8634033203125, - "learning_rate": 2.902114806850194e-05, - "loss": 66.7281, - "step": 125930 - }, - { - "epoch": 0.5088135360399487, - "grad_norm": 626.0941772460938, - "learning_rate": 2.9017702820832498e-05, - "loss": 64.0597, - "step": 125940 - }, - { - "epoch": 0.5088539373053164, - "grad_norm": 2549.89453125, - "learning_rate": 2.9014257494835862e-05, - "loss": 83.0733, - "step": 125950 - }, - { - "epoch": 0.508894338570684, - "grad_norm": 271.8528137207031, - "learning_rate": 2.901081209057921e-05, - "loss": 72.4616, - "step": 125960 - }, - { - "epoch": 0.5089347398360516, - "grad_norm": 418.9285583496094, - "learning_rate": 2.900736660812972e-05, - "loss": 32.0242, - "step": 125970 - }, - { - "epoch": 0.5089751411014193, - "grad_norm": 748.1174926757812, - "learning_rate": 2.900392104755455e-05, - "loss": 62.5196, - "step": 125980 - }, - { - "epoch": 0.5090155423667869, - "grad_norm": 1057.87060546875, - "learning_rate": 2.900047540892088e-05, - "loss": 66.0844, - "step": 125990 - }, - { - "epoch": 0.5090559436321546, - "grad_norm": 1053.06689453125, - "learning_rate": 2.8997029692295874e-05, - "loss": 59.8133, - "step": 126000 - }, - { - "epoch": 0.5090963448975222, - "grad_norm": 1008.4521484375, - "learning_rate": 2.8993583897746717e-05, - "loss": 67.719, - "step": 126010 - }, - { - "epoch": 0.5091367461628898, - "grad_norm": 400.8494567871094, - "learning_rate": 2.8990138025340596e-05, - "loss": 55.9787, - "step": 126020 - }, - { - "epoch": 0.5091771474282575, - "grad_norm": 776.619140625, - "learning_rate": 2.8986692075144673e-05, - "loss": 53.733, - "step": 126030 - }, - { - "epoch": 0.5092175486936251, - "grad_norm": 344.3357238769531, - "learning_rate": 2.8983246047226135e-05, - "loss": 49.7655, - "step": 126040 - }, - { - "epoch": 0.5092579499589928, - "grad_norm": 461.5896911621094, - "learning_rate": 2.897979994165217e-05, - "loss": 66.4076, - "step": 126050 - }, - { - "epoch": 0.5092983512243604, - "grad_norm": 0.0, - "learning_rate": 2.8976353758489955e-05, - "loss": 44.4132, - "step": 126060 - }, - { - "epoch": 0.5093387524897279, - "grad_norm": 446.7651062011719, - "learning_rate": 2.897290749780667e-05, - "loss": 48.4824, - "step": 126070 - }, - { - "epoch": 0.5093791537550956, - "grad_norm": 1131.740966796875, - "learning_rate": 2.8969461159669513e-05, - "loss": 45.0098, - "step": 126080 - }, - { - "epoch": 0.5094195550204632, - "grad_norm": 1675.897705078125, - "learning_rate": 2.8966014744145663e-05, - "loss": 93.9438, - "step": 126090 - }, - { - "epoch": 0.5094599562858309, - "grad_norm": 665.18994140625, - "learning_rate": 2.8962568251302324e-05, - "loss": 52.6255, - "step": 126100 - }, - { - "epoch": 0.5095003575511985, - "grad_norm": 404.408203125, - "learning_rate": 2.895912168120667e-05, - "loss": 57.1605, - "step": 126110 - }, - { - "epoch": 0.5095407588165661, - "grad_norm": 690.7555541992188, - "learning_rate": 2.8955675033925895e-05, - "loss": 63.0719, - "step": 126120 - }, - { - "epoch": 0.5095811600819338, - "grad_norm": 937.3909912109375, - "learning_rate": 2.89522283095272e-05, - "loss": 61.4134, - "step": 126130 - }, - { - "epoch": 0.5096215613473014, - "grad_norm": 1157.58642578125, - "learning_rate": 2.8948781508077786e-05, - "loss": 43.2224, - "step": 126140 - }, - { - "epoch": 0.509661962612669, - "grad_norm": 766.99609375, - "learning_rate": 2.894533462964485e-05, - "loss": 45.9795, - "step": 126150 - }, - { - "epoch": 0.5097023638780367, - "grad_norm": 1816.978515625, - "learning_rate": 2.894188767429557e-05, - "loss": 53.0026, - "step": 126160 - }, - { - "epoch": 0.5097427651434043, - "grad_norm": 813.2989501953125, - "learning_rate": 2.8938440642097164e-05, - "loss": 37.4644, - "step": 126170 - }, - { - "epoch": 0.509783166408772, - "grad_norm": 691.785888671875, - "learning_rate": 2.893499353311683e-05, - "loss": 51.4019, - "step": 126180 - }, - { - "epoch": 0.5098235676741395, - "grad_norm": 1490.9388427734375, - "learning_rate": 2.8931546347421773e-05, - "loss": 45.6698, - "step": 126190 - }, - { - "epoch": 0.5098639689395071, - "grad_norm": 548.1686401367188, - "learning_rate": 2.8928099085079197e-05, - "loss": 41.4219, - "step": 126200 - }, - { - "epoch": 0.5099043702048748, - "grad_norm": 1110.064697265625, - "learning_rate": 2.89246517461563e-05, - "loss": 71.6463, - "step": 126210 - }, - { - "epoch": 0.5099447714702424, - "grad_norm": 549.9130859375, - "learning_rate": 2.892120433072031e-05, - "loss": 45.8994, - "step": 126220 - }, - { - "epoch": 0.5099851727356101, - "grad_norm": 4628.6025390625, - "learning_rate": 2.8917756838838418e-05, - "loss": 75.2251, - "step": 126230 - }, - { - "epoch": 0.5100255740009777, - "grad_norm": 518.2777099609375, - "learning_rate": 2.8914309270577834e-05, - "loss": 48.5844, - "step": 126240 - }, - { - "epoch": 0.5100659752663453, - "grad_norm": 677.185302734375, - "learning_rate": 2.8910861626005776e-05, - "loss": 61.1778, - "step": 126250 - }, - { - "epoch": 0.510106376531713, - "grad_norm": 1070.945556640625, - "learning_rate": 2.8907413905189456e-05, - "loss": 43.0625, - "step": 126260 - }, - { - "epoch": 0.5101467777970806, - "grad_norm": 1414.0343017578125, - "learning_rate": 2.8903966108196096e-05, - "loss": 69.5534, - "step": 126270 - }, - { - "epoch": 0.5101871790624483, - "grad_norm": 1134.17041015625, - "learning_rate": 2.8900518235092905e-05, - "loss": 73.4212, - "step": 126280 - }, - { - "epoch": 0.5102275803278159, - "grad_norm": 970.3851318359375, - "learning_rate": 2.8897070285947098e-05, - "loss": 58.0343, - "step": 126290 - }, - { - "epoch": 0.5102679815931835, - "grad_norm": 340.81231689453125, - "learning_rate": 2.8893622260825904e-05, - "loss": 76.9128, - "step": 126300 - }, - { - "epoch": 0.5103083828585512, - "grad_norm": 637.6676025390625, - "learning_rate": 2.889017415979654e-05, - "loss": 68.674, - "step": 126310 - }, - { - "epoch": 0.5103487841239187, - "grad_norm": 643.8228759765625, - "learning_rate": 2.8886725982926232e-05, - "loss": 81.6261, - "step": 126320 - }, - { - "epoch": 0.5103891853892863, - "grad_norm": 378.8822021484375, - "learning_rate": 2.8883277730282194e-05, - "loss": 46.2597, - "step": 126330 - }, - { - "epoch": 0.510429586654654, - "grad_norm": 1168.7510986328125, - "learning_rate": 2.8879829401931652e-05, - "loss": 71.7926, - "step": 126340 - }, - { - "epoch": 0.5104699879200216, - "grad_norm": 522.0606689453125, - "learning_rate": 2.8876380997941847e-05, - "loss": 54.6862, - "step": 126350 - }, - { - "epoch": 0.5105103891853893, - "grad_norm": 396.95263671875, - "learning_rate": 2.8872932518379997e-05, - "loss": 44.9638, - "step": 126360 - }, - { - "epoch": 0.5105507904507569, - "grad_norm": 794.6138305664062, - "learning_rate": 2.886948396331333e-05, - "loss": 76.7646, - "step": 126370 - }, - { - "epoch": 0.5105911917161245, - "grad_norm": 1046.785888671875, - "learning_rate": 2.8866035332809084e-05, - "loss": 53.4936, - "step": 126380 - }, - { - "epoch": 0.5106315929814922, - "grad_norm": 869.3177490234375, - "learning_rate": 2.886258662693449e-05, - "loss": 43.9115, - "step": 126390 - }, - { - "epoch": 0.5106719942468598, - "grad_norm": 724.6814575195312, - "learning_rate": 2.8859137845756784e-05, - "loss": 45.5164, - "step": 126400 - }, - { - "epoch": 0.5107123955122275, - "grad_norm": 411.1709289550781, - "learning_rate": 2.8855688989343193e-05, - "loss": 57.5156, - "step": 126410 - }, - { - "epoch": 0.5107527967775951, - "grad_norm": 326.8053283691406, - "learning_rate": 2.885224005776096e-05, - "loss": 51.5223, - "step": 126420 - }, - { - "epoch": 0.5107931980429627, - "grad_norm": 843.7996215820312, - "learning_rate": 2.884879105107733e-05, - "loss": 44.6353, - "step": 126430 - }, - { - "epoch": 0.5108335993083304, - "grad_norm": 1049.7962646484375, - "learning_rate": 2.884534196935953e-05, - "loss": 58.139, - "step": 126440 - }, - { - "epoch": 0.5108740005736979, - "grad_norm": 2668.488037109375, - "learning_rate": 2.8841892812674808e-05, - "loss": 37.7585, - "step": 126450 - }, - { - "epoch": 0.5109144018390656, - "grad_norm": 1897.6781005859375, - "learning_rate": 2.8838443581090412e-05, - "loss": 50.773, - "step": 126460 - }, - { - "epoch": 0.5109548031044332, - "grad_norm": 556.2504272460938, - "learning_rate": 2.8834994274673582e-05, - "loss": 56.0956, - "step": 126470 - }, - { - "epoch": 0.5109952043698008, - "grad_norm": 292.6050109863281, - "learning_rate": 2.8831544893491563e-05, - "loss": 49.7071, - "step": 126480 - }, - { - "epoch": 0.5110356056351685, - "grad_norm": 442.0137939453125, - "learning_rate": 2.882809543761161e-05, - "loss": 51.5451, - "step": 126490 - }, - { - "epoch": 0.5110760069005361, - "grad_norm": 810.0142211914062, - "learning_rate": 2.8824645907100954e-05, - "loss": 55.3643, - "step": 126500 - }, - { - "epoch": 0.5111164081659038, - "grad_norm": 1782.1546630859375, - "learning_rate": 2.8821196302026863e-05, - "loss": 68.3655, - "step": 126510 - }, - { - "epoch": 0.5111568094312714, - "grad_norm": 900.7822875976562, - "learning_rate": 2.881774662245658e-05, - "loss": 48.6357, - "step": 126520 - }, - { - "epoch": 0.511197210696639, - "grad_norm": 648.9537963867188, - "learning_rate": 2.8814296868457364e-05, - "loss": 51.299, - "step": 126530 - }, - { - "epoch": 0.5112376119620067, - "grad_norm": 949.2380981445312, - "learning_rate": 2.8810847040096467e-05, - "loss": 66.241, - "step": 126540 - }, - { - "epoch": 0.5112780132273743, - "grad_norm": 227.76019287109375, - "learning_rate": 2.8807397137441145e-05, - "loss": 32.5676, - "step": 126550 - }, - { - "epoch": 0.511318414492742, - "grad_norm": 545.3878173828125, - "learning_rate": 2.8803947160558652e-05, - "loss": 77.758, - "step": 126560 - }, - { - "epoch": 0.5113588157581096, - "grad_norm": 621.6892700195312, - "learning_rate": 2.8800497109516263e-05, - "loss": 58.1789, - "step": 126570 - }, - { - "epoch": 0.5113992170234771, - "grad_norm": 1468.47509765625, - "learning_rate": 2.8797046984381208e-05, - "loss": 72.7109, - "step": 126580 - }, - { - "epoch": 0.5114396182888448, - "grad_norm": 406.4592590332031, - "learning_rate": 2.8793596785220783e-05, - "loss": 64.9677, - "step": 126590 - }, - { - "epoch": 0.5114800195542124, - "grad_norm": 290.8427429199219, - "learning_rate": 2.879014651210223e-05, - "loss": 47.5232, - "step": 126600 - }, - { - "epoch": 0.51152042081958, - "grad_norm": 1523.126220703125, - "learning_rate": 2.8786696165092812e-05, - "loss": 42.1534, - "step": 126610 - }, - { - "epoch": 0.5115608220849477, - "grad_norm": 1009.9312133789062, - "learning_rate": 2.8783245744259806e-05, - "loss": 61.5785, - "step": 126620 - }, - { - "epoch": 0.5116012233503153, - "grad_norm": 527.927490234375, - "learning_rate": 2.877979524967048e-05, - "loss": 61.9816, - "step": 126630 - }, - { - "epoch": 0.511641624615683, - "grad_norm": 575.1068725585938, - "learning_rate": 2.8776344681392105e-05, - "loss": 60.9859, - "step": 126640 - }, - { - "epoch": 0.5116820258810506, - "grad_norm": 338.4177551269531, - "learning_rate": 2.877289403949194e-05, - "loss": 54.107, - "step": 126650 - }, - { - "epoch": 0.5117224271464182, - "grad_norm": 354.56005859375, - "learning_rate": 2.876944332403726e-05, - "loss": 54.2842, - "step": 126660 - }, - { - "epoch": 0.5117628284117859, - "grad_norm": 989.342041015625, - "learning_rate": 2.8765992535095345e-05, - "loss": 100.1021, - "step": 126670 - }, - { - "epoch": 0.5118032296771535, - "grad_norm": 547.06982421875, - "learning_rate": 2.8762541672733472e-05, - "loss": 48.5291, - "step": 126680 - }, - { - "epoch": 0.5118436309425212, - "grad_norm": 1068.9014892578125, - "learning_rate": 2.8759090737018902e-05, - "loss": 58.7899, - "step": 126690 - }, - { - "epoch": 0.5118840322078888, - "grad_norm": 1199.883544921875, - "learning_rate": 2.875563972801893e-05, - "loss": 43.4314, - "step": 126700 - }, - { - "epoch": 0.5119244334732563, - "grad_norm": 616.8649291992188, - "learning_rate": 2.8752188645800822e-05, - "loss": 46.0097, - "step": 126710 - }, - { - "epoch": 0.511964834738624, - "grad_norm": 2177.439697265625, - "learning_rate": 2.874873749043187e-05, - "loss": 33.2443, - "step": 126720 - }, - { - "epoch": 0.5120052360039916, - "grad_norm": 3227.6328125, - "learning_rate": 2.8745286261979348e-05, - "loss": 64.0699, - "step": 126730 - }, - { - "epoch": 0.5120456372693593, - "grad_norm": 754.943115234375, - "learning_rate": 2.874183496051055e-05, - "loss": 40.789, - "step": 126740 - }, - { - "epoch": 0.5120860385347269, - "grad_norm": 560.1809692382812, - "learning_rate": 2.8738383586092745e-05, - "loss": 42.9098, - "step": 126750 - }, - { - "epoch": 0.5121264398000945, - "grad_norm": 1200.2210693359375, - "learning_rate": 2.8734932138793225e-05, - "loss": 58.1281, - "step": 126760 - }, - { - "epoch": 0.5121668410654622, - "grad_norm": 569.3571166992188, - "learning_rate": 2.8731480618679285e-05, - "loss": 71.4217, - "step": 126770 - }, - { - "epoch": 0.5122072423308298, - "grad_norm": 1209.4700927734375, - "learning_rate": 2.8728029025818204e-05, - "loss": 51.1106, - "step": 126780 - }, - { - "epoch": 0.5122476435961975, - "grad_norm": 541.9495849609375, - "learning_rate": 2.872457736027728e-05, - "loss": 42.31, - "step": 126790 - }, - { - "epoch": 0.5122880448615651, - "grad_norm": 1008.5130004882812, - "learning_rate": 2.8721125622123806e-05, - "loss": 101.0686, - "step": 126800 - }, - { - "epoch": 0.5123284461269327, - "grad_norm": 794.3250732421875, - "learning_rate": 2.8717673811425072e-05, - "loss": 56.3601, - "step": 126810 - }, - { - "epoch": 0.5123688473923004, - "grad_norm": 796.3021240234375, - "learning_rate": 2.8714221928248368e-05, - "loss": 50.3433, - "step": 126820 - }, - { - "epoch": 0.5124092486576679, - "grad_norm": 536.2626953125, - "learning_rate": 2.8710769972661e-05, - "loss": 53.0116, - "step": 126830 - }, - { - "epoch": 0.5124496499230355, - "grad_norm": 816.8209228515625, - "learning_rate": 2.8707317944730268e-05, - "loss": 42.8337, - "step": 126840 - }, - { - "epoch": 0.5124900511884032, - "grad_norm": 1031.8271484375, - "learning_rate": 2.8703865844523452e-05, - "loss": 47.6963, - "step": 126850 - }, - { - "epoch": 0.5125304524537708, - "grad_norm": 1931.7418212890625, - "learning_rate": 2.8700413672107866e-05, - "loss": 52.6271, - "step": 126860 - }, - { - "epoch": 0.5125708537191385, - "grad_norm": 747.9002075195312, - "learning_rate": 2.869696142755081e-05, - "loss": 57.4997, - "step": 126870 - }, - { - "epoch": 0.5126112549845061, - "grad_norm": 460.6169128417969, - "learning_rate": 2.8693509110919598e-05, - "loss": 49.3172, - "step": 126880 - }, - { - "epoch": 0.5126516562498737, - "grad_norm": 395.9313659667969, - "learning_rate": 2.8690056722281513e-05, - "loss": 50.4048, - "step": 126890 - }, - { - "epoch": 0.5126920575152414, - "grad_norm": 673.880859375, - "learning_rate": 2.8686604261703875e-05, - "loss": 44.8503, - "step": 126900 - }, - { - "epoch": 0.512732458780609, - "grad_norm": 745.4364624023438, - "learning_rate": 2.8683151729253994e-05, - "loss": 50.778, - "step": 126910 - }, - { - "epoch": 0.5127728600459767, - "grad_norm": 887.1217041015625, - "learning_rate": 2.8679699124999166e-05, - "loss": 44.0796, - "step": 126920 - }, - { - "epoch": 0.5128132613113443, - "grad_norm": 565.948486328125, - "learning_rate": 2.8676246449006715e-05, - "loss": 79.4067, - "step": 126930 - }, - { - "epoch": 0.5128536625767119, - "grad_norm": 602.2057495117188, - "learning_rate": 2.8672793701343946e-05, - "loss": 53.1035, - "step": 126940 - }, - { - "epoch": 0.5128940638420796, - "grad_norm": 670.5474243164062, - "learning_rate": 2.8669340882078166e-05, - "loss": 60.5767, - "step": 126950 - }, - { - "epoch": 0.5129344651074471, - "grad_norm": 447.6339111328125, - "learning_rate": 2.866588799127671e-05, - "loss": 67.3426, - "step": 126960 - }, - { - "epoch": 0.5129748663728148, - "grad_norm": 728.9623413085938, - "learning_rate": 2.8662435029006868e-05, - "loss": 72.0813, - "step": 126970 - }, - { - "epoch": 0.5130152676381824, - "grad_norm": 600.4077758789062, - "learning_rate": 2.865898199533597e-05, - "loss": 54.8805, - "step": 126980 - }, - { - "epoch": 0.51305566890355, - "grad_norm": 580.8352661132812, - "learning_rate": 2.865552889033134e-05, - "loss": 55.582, - "step": 126990 - }, - { - "epoch": 0.5130960701689177, - "grad_norm": 1138.0733642578125, - "learning_rate": 2.8652075714060295e-05, - "loss": 64.3382, - "step": 127000 - }, - { - "epoch": 0.5131364714342853, - "grad_norm": 281.50640869140625, - "learning_rate": 2.864862246659015e-05, - "loss": 45.8135, - "step": 127010 - }, - { - "epoch": 0.513176872699653, - "grad_norm": 1133.2386474609375, - "learning_rate": 2.8645169147988226e-05, - "loss": 56.7795, - "step": 127020 - }, - { - "epoch": 0.5132172739650206, - "grad_norm": 846.5953979492188, - "learning_rate": 2.8641715758321857e-05, - "loss": 31.9575, - "step": 127030 - }, - { - "epoch": 0.5132576752303882, - "grad_norm": 3238.64013671875, - "learning_rate": 2.8638262297658368e-05, - "loss": 78.6456, - "step": 127040 - }, - { - "epoch": 0.5132980764957559, - "grad_norm": 1550.1759033203125, - "learning_rate": 2.863480876606508e-05, - "loss": 56.8611, - "step": 127050 - }, - { - "epoch": 0.5133384777611235, - "grad_norm": 442.9187927246094, - "learning_rate": 2.863135516360932e-05, - "loss": 41.2573, - "step": 127060 - }, - { - "epoch": 0.5133788790264912, - "grad_norm": 1137.1116943359375, - "learning_rate": 2.8627901490358422e-05, - "loss": 46.5267, - "step": 127070 - }, - { - "epoch": 0.5134192802918588, - "grad_norm": 1043.6861572265625, - "learning_rate": 2.8624447746379722e-05, - "loss": 60.2835, - "step": 127080 - }, - { - "epoch": 0.5134596815572263, - "grad_norm": 739.7015380859375, - "learning_rate": 2.862099393174055e-05, - "loss": 48.624, - "step": 127090 - }, - { - "epoch": 0.513500082822594, - "grad_norm": 945.224853515625, - "learning_rate": 2.861754004650823e-05, - "loss": 63.0741, - "step": 127100 - }, - { - "epoch": 0.5135404840879616, - "grad_norm": 425.7527160644531, - "learning_rate": 2.8614086090750103e-05, - "loss": 46.5734, - "step": 127110 - }, - { - "epoch": 0.5135808853533292, - "grad_norm": 1409.0302734375, - "learning_rate": 2.8610632064533517e-05, - "loss": 47.2388, - "step": 127120 - }, - { - "epoch": 0.5136212866186969, - "grad_norm": 581.88916015625, - "learning_rate": 2.8607177967925792e-05, - "loss": 69.2831, - "step": 127130 - }, - { - "epoch": 0.5136616878840645, - "grad_norm": 478.259765625, - "learning_rate": 2.8603723800994275e-05, - "loss": 60.1264, - "step": 127140 - }, - { - "epoch": 0.5137020891494322, - "grad_norm": 2480.73095703125, - "learning_rate": 2.8600269563806302e-05, - "loss": 68.6383, - "step": 127150 - }, - { - "epoch": 0.5137424904147998, - "grad_norm": 673.7091064453125, - "learning_rate": 2.859681525642923e-05, - "loss": 65.6581, - "step": 127160 - }, - { - "epoch": 0.5137828916801674, - "grad_norm": 675.0384521484375, - "learning_rate": 2.8593360878930392e-05, - "loss": 57.0633, - "step": 127170 - }, - { - "epoch": 0.5138232929455351, - "grad_norm": 613.3917236328125, - "learning_rate": 2.8589906431377134e-05, - "loss": 50.5051, - "step": 127180 - }, - { - "epoch": 0.5138636942109027, - "grad_norm": 358.6791687011719, - "learning_rate": 2.8586451913836797e-05, - "loss": 48.4163, - "step": 127190 - }, - { - "epoch": 0.5139040954762704, - "grad_norm": 1500.3143310546875, - "learning_rate": 2.858299732637674e-05, - "loss": 69.6514, - "step": 127200 - }, - { - "epoch": 0.513944496741638, - "grad_norm": 303.2386474609375, - "learning_rate": 2.8579542669064296e-05, - "loss": 46.2417, - "step": 127210 - }, - { - "epoch": 0.5139848980070055, - "grad_norm": 936.28662109375, - "learning_rate": 2.8576087941966835e-05, - "loss": 59.2663, - "step": 127220 - }, - { - "epoch": 0.5140252992723732, - "grad_norm": 550.0284423828125, - "learning_rate": 2.857263314515169e-05, - "loss": 46.5445, - "step": 127230 - }, - { - "epoch": 0.5140657005377408, - "grad_norm": 1154.9842529296875, - "learning_rate": 2.856917827868622e-05, - "loss": 48.4179, - "step": 127240 - }, - { - "epoch": 0.5141061018031085, - "grad_norm": 400.9862365722656, - "learning_rate": 2.8565723342637796e-05, - "loss": 84.6768, - "step": 127250 - }, - { - "epoch": 0.5141465030684761, - "grad_norm": 1014.5621948242188, - "learning_rate": 2.856226833707375e-05, - "loss": 60.3505, - "step": 127260 - }, - { - "epoch": 0.5141869043338437, - "grad_norm": 351.1904602050781, - "learning_rate": 2.855881326206145e-05, - "loss": 59.1501, - "step": 127270 - }, - { - "epoch": 0.5142273055992114, - "grad_norm": 520.452392578125, - "learning_rate": 2.855535811766825e-05, - "loss": 60.3091, - "step": 127280 - }, - { - "epoch": 0.514267706864579, - "grad_norm": 717.9920043945312, - "learning_rate": 2.8551902903961526e-05, - "loss": 40.0301, - "step": 127290 - }, - { - "epoch": 0.5143081081299467, - "grad_norm": 560.2972412109375, - "learning_rate": 2.854844762100861e-05, - "loss": 50.7043, - "step": 127300 - }, - { - "epoch": 0.5143485093953143, - "grad_norm": 1860.7342529296875, - "learning_rate": 2.854499226887689e-05, - "loss": 63.0763, - "step": 127310 - }, - { - "epoch": 0.5143889106606819, - "grad_norm": 135.43519592285156, - "learning_rate": 2.8541536847633717e-05, - "loss": 45.6506, - "step": 127320 - }, - { - "epoch": 0.5144293119260496, - "grad_norm": 766.4346313476562, - "learning_rate": 2.8538081357346465e-05, - "loss": 72.0964, - "step": 127330 - }, - { - "epoch": 0.5144697131914172, - "grad_norm": 778.751220703125, - "learning_rate": 2.8534625798082488e-05, - "loss": 65.8823, - "step": 127340 - }, - { - "epoch": 0.5145101144567847, - "grad_norm": 490.54461669921875, - "learning_rate": 2.853117016990917e-05, - "loss": 41.0561, - "step": 127350 - }, - { - "epoch": 0.5145505157221524, - "grad_norm": 450.0224914550781, - "learning_rate": 2.8527714472893862e-05, - "loss": 61.406, - "step": 127360 - }, - { - "epoch": 0.51459091698752, - "grad_norm": 298.4858093261719, - "learning_rate": 2.8524258707103957e-05, - "loss": 88.5857, - "step": 127370 - }, - { - "epoch": 0.5146313182528877, - "grad_norm": 1860.1826171875, - "learning_rate": 2.85208028726068e-05, - "loss": 108.9013, - "step": 127380 - }, - { - "epoch": 0.5146717195182553, - "grad_norm": 948.806396484375, - "learning_rate": 2.8517346969469782e-05, - "loss": 47.6132, - "step": 127390 - }, - { - "epoch": 0.5147121207836229, - "grad_norm": 1325.517578125, - "learning_rate": 2.8513890997760272e-05, - "loss": 45.2896, - "step": 127400 - }, - { - "epoch": 0.5147525220489906, - "grad_norm": 605.529052734375, - "learning_rate": 2.851043495754566e-05, - "loss": 43.9705, - "step": 127410 - }, - { - "epoch": 0.5147929233143582, - "grad_norm": 426.92193603515625, - "learning_rate": 2.8506978848893302e-05, - "loss": 39.5349, - "step": 127420 - }, - { - "epoch": 0.5148333245797259, - "grad_norm": 1157.5235595703125, - "learning_rate": 2.8503522671870585e-05, - "loss": 56.4158, - "step": 127430 - }, - { - "epoch": 0.5148737258450935, - "grad_norm": 782.9813232421875, - "learning_rate": 2.8500066426544896e-05, - "loss": 85.5858, - "step": 127440 - }, - { - "epoch": 0.5149141271104611, - "grad_norm": 1053.0633544921875, - "learning_rate": 2.849661011298361e-05, - "loss": 64.2638, - "step": 127450 - }, - { - "epoch": 0.5149545283758288, - "grad_norm": 652.576171875, - "learning_rate": 2.8493153731254102e-05, - "loss": 43.2801, - "step": 127460 - }, - { - "epoch": 0.5149949296411963, - "grad_norm": 1399.2457275390625, - "learning_rate": 2.8489697281423767e-05, - "loss": 57.9567, - "step": 127470 - }, - { - "epoch": 0.515035330906564, - "grad_norm": 1636.6573486328125, - "learning_rate": 2.8486240763559986e-05, - "loss": 87.6768, - "step": 127480 - }, - { - "epoch": 0.5150757321719316, - "grad_norm": 0.0, - "learning_rate": 2.848278417773015e-05, - "loss": 68.869, - "step": 127490 - }, - { - "epoch": 0.5151161334372992, - "grad_norm": 499.08416748046875, - "learning_rate": 2.8479327524001636e-05, - "loss": 44.9667, - "step": 127500 - }, - { - "epoch": 0.5151565347026669, - "grad_norm": 905.2075805664062, - "learning_rate": 2.8475870802441844e-05, - "loss": 51.4555, - "step": 127510 - }, - { - "epoch": 0.5151969359680345, - "grad_norm": 567.349365234375, - "learning_rate": 2.847241401311817e-05, - "loss": 37.061, - "step": 127520 - }, - { - "epoch": 0.5152373372334021, - "grad_norm": 499.4542541503906, - "learning_rate": 2.846895715609799e-05, - "loss": 46.9687, - "step": 127530 - }, - { - "epoch": 0.5152777384987698, - "grad_norm": 1374.189208984375, - "learning_rate": 2.8465500231448704e-05, - "loss": 74.0274, - "step": 127540 - }, - { - "epoch": 0.5153181397641374, - "grad_norm": 1301.90673828125, - "learning_rate": 2.8462043239237707e-05, - "loss": 51.4661, - "step": 127550 - }, - { - "epoch": 0.5153585410295051, - "grad_norm": 290.5340576171875, - "learning_rate": 2.845858617953239e-05, - "loss": 49.4092, - "step": 127560 - }, - { - "epoch": 0.5153989422948727, - "grad_norm": 617.203857421875, - "learning_rate": 2.8455129052400166e-05, - "loss": 48.6922, - "step": 127570 - }, - { - "epoch": 0.5154393435602403, - "grad_norm": 877.1514282226562, - "learning_rate": 2.8451671857908415e-05, - "loss": 57.4519, - "step": 127580 - }, - { - "epoch": 0.515479744825608, - "grad_norm": 1293.734375, - "learning_rate": 2.844821459612454e-05, - "loss": 97.9196, - "step": 127590 - }, - { - "epoch": 0.5155201460909755, - "grad_norm": 628.9718017578125, - "learning_rate": 2.844475726711595e-05, - "loss": 44.7918, - "step": 127600 - }, - { - "epoch": 0.5155605473563432, - "grad_norm": 645.7540283203125, - "learning_rate": 2.844129987095005e-05, - "loss": 42.1978, - "step": 127610 - }, - { - "epoch": 0.5156009486217108, - "grad_norm": 470.0179138183594, - "learning_rate": 2.8437842407694236e-05, - "loss": 59.6109, - "step": 127620 - }, - { - "epoch": 0.5156413498870784, - "grad_norm": 548.3775024414062, - "learning_rate": 2.843438487741591e-05, - "loss": 53.7288, - "step": 127630 - }, - { - "epoch": 0.5156817511524461, - "grad_norm": 294.67889404296875, - "learning_rate": 2.843092728018248e-05, - "loss": 49.3608, - "step": 127640 - }, - { - "epoch": 0.5157221524178137, - "grad_norm": 2212.821533203125, - "learning_rate": 2.8427469616061364e-05, - "loss": 59.0455, - "step": 127650 - }, - { - "epoch": 0.5157625536831814, - "grad_norm": 849.8643798828125, - "learning_rate": 2.8424011885119954e-05, - "loss": 44.9502, - "step": 127660 - }, - { - "epoch": 0.515802954948549, - "grad_norm": 565.5281372070312, - "learning_rate": 2.842055408742567e-05, - "loss": 53.8755, - "step": 127670 - }, - { - "epoch": 0.5158433562139166, - "grad_norm": 874.5379638671875, - "learning_rate": 2.8417096223045925e-05, - "loss": 53.4064, - "step": 127680 - }, - { - "epoch": 0.5158837574792843, - "grad_norm": 761.3839111328125, - "learning_rate": 2.841363829204814e-05, - "loss": 40.8466, - "step": 127690 - }, - { - "epoch": 0.5159241587446519, - "grad_norm": 409.4591369628906, - "learning_rate": 2.841018029449971e-05, - "loss": 40.9471, - "step": 127700 - }, - { - "epoch": 0.5159645600100196, - "grad_norm": 896.9284057617188, - "learning_rate": 2.8406722230468063e-05, - "loss": 69.344, - "step": 127710 - }, - { - "epoch": 0.5160049612753872, - "grad_norm": 248.2589569091797, - "learning_rate": 2.840326410002061e-05, - "loss": 53.1701, - "step": 127720 - }, - { - "epoch": 0.5160453625407547, - "grad_norm": 770.6051025390625, - "learning_rate": 2.839980590322477e-05, - "loss": 37.5784, - "step": 127730 - }, - { - "epoch": 0.5160857638061224, - "grad_norm": 1029.7786865234375, - "learning_rate": 2.8396347640147962e-05, - "loss": 57.3108, - "step": 127740 - }, - { - "epoch": 0.51612616507149, - "grad_norm": 450.9689636230469, - "learning_rate": 2.8392889310857612e-05, - "loss": 47.8311, - "step": 127750 - }, - { - "epoch": 0.5161665663368576, - "grad_norm": 471.0567932128906, - "learning_rate": 2.8389430915421132e-05, - "loss": 58.49, - "step": 127760 - }, - { - "epoch": 0.5162069676022253, - "grad_norm": 991.8955688476562, - "learning_rate": 2.8385972453905958e-05, - "loss": 43.4331, - "step": 127770 - }, - { - "epoch": 0.5162473688675929, - "grad_norm": 0.0, - "learning_rate": 2.8382513926379504e-05, - "loss": 66.81, - "step": 127780 - }, - { - "epoch": 0.5162877701329606, - "grad_norm": 1275.402587890625, - "learning_rate": 2.837905533290921e-05, - "loss": 33.7486, - "step": 127790 - }, - { - "epoch": 0.5163281713983282, - "grad_norm": 921.3496704101562, - "learning_rate": 2.8375596673562482e-05, - "loss": 66.0726, - "step": 127800 - }, - { - "epoch": 0.5163685726636958, - "grad_norm": 1107.1654052734375, - "learning_rate": 2.8372137948406762e-05, - "loss": 64.4679, - "step": 127810 - }, - { - "epoch": 0.5164089739290635, - "grad_norm": 978.436279296875, - "learning_rate": 2.8368679157509477e-05, - "loss": 55.1411, - "step": 127820 - }, - { - "epoch": 0.5164493751944311, - "grad_norm": 794.9774169921875, - "learning_rate": 2.8365220300938055e-05, - "loss": 44.3256, - "step": 127830 - }, - { - "epoch": 0.5164897764597988, - "grad_norm": 2009.2308349609375, - "learning_rate": 2.8361761378759934e-05, - "loss": 52.7043, - "step": 127840 - }, - { - "epoch": 0.5165301777251664, - "grad_norm": 989.152099609375, - "learning_rate": 2.8358302391042536e-05, - "loss": 37.631, - "step": 127850 - }, - { - "epoch": 0.5165705789905339, - "grad_norm": 356.483642578125, - "learning_rate": 2.8354843337853314e-05, - "loss": 77.6594, - "step": 127860 - }, - { - "epoch": 0.5166109802559016, - "grad_norm": 1363.230224609375, - "learning_rate": 2.835138421925969e-05, - "loss": 79.6859, - "step": 127870 - }, - { - "epoch": 0.5166513815212692, - "grad_norm": 811.1392211914062, - "learning_rate": 2.834792503532911e-05, - "loss": 75.7948, - "step": 127880 - }, - { - "epoch": 0.5166917827866369, - "grad_norm": 1689.7584228515625, - "learning_rate": 2.8344465786129e-05, - "loss": 79.6354, - "step": 127890 - }, - { - "epoch": 0.5167321840520045, - "grad_norm": 251.05508422851562, - "learning_rate": 2.8341006471726816e-05, - "loss": 38.7622, - "step": 127900 - }, - { - "epoch": 0.5167725853173721, - "grad_norm": 1307.02880859375, - "learning_rate": 2.833754709218998e-05, - "loss": 75.8891, - "step": 127910 - }, - { - "epoch": 0.5168129865827398, - "grad_norm": 743.4976196289062, - "learning_rate": 2.833408764758595e-05, - "loss": 37.3798, - "step": 127920 - }, - { - "epoch": 0.5168533878481074, - "grad_norm": 395.01214599609375, - "learning_rate": 2.833062813798216e-05, - "loss": 53.7051, - "step": 127930 - }, - { - "epoch": 0.516893789113475, - "grad_norm": 1006.5745849609375, - "learning_rate": 2.832716856344607e-05, - "loss": 53.2251, - "step": 127940 - }, - { - "epoch": 0.5169341903788427, - "grad_norm": 649.6722412109375, - "learning_rate": 2.832370892404511e-05, - "loss": 47.6598, - "step": 127950 - }, - { - "epoch": 0.5169745916442103, - "grad_norm": 952.0791625976562, - "learning_rate": 2.832024921984674e-05, - "loss": 46.0533, - "step": 127960 - }, - { - "epoch": 0.517014992909578, - "grad_norm": 663.2807006835938, - "learning_rate": 2.8316789450918396e-05, - "loss": 60.6055, - "step": 127970 - }, - { - "epoch": 0.5170553941749455, - "grad_norm": 1854.5562744140625, - "learning_rate": 2.8313329617327537e-05, - "loss": 104.9597, - "step": 127980 - }, - { - "epoch": 0.5170957954403131, - "grad_norm": 399.9706726074219, - "learning_rate": 2.8309869719141608e-05, - "loss": 73.0087, - "step": 127990 - }, - { - "epoch": 0.5171361967056808, - "grad_norm": 742.0582275390625, - "learning_rate": 2.8306409756428064e-05, - "loss": 63.6711, - "step": 128000 - }, - { - "epoch": 0.5171765979710484, - "grad_norm": 426.78814697265625, - "learning_rate": 2.8302949729254358e-05, - "loss": 44.5783, - "step": 128010 - }, - { - "epoch": 0.5172169992364161, - "grad_norm": 537.81494140625, - "learning_rate": 2.8299489637687954e-05, - "loss": 46.2207, - "step": 128020 - }, - { - "epoch": 0.5172574005017837, - "grad_norm": 545.3671264648438, - "learning_rate": 2.8296029481796292e-05, - "loss": 52.6649, - "step": 128030 - }, - { - "epoch": 0.5172978017671513, - "grad_norm": 1167.154541015625, - "learning_rate": 2.829256926164685e-05, - "loss": 68.5006, - "step": 128040 - }, - { - "epoch": 0.517338203032519, - "grad_norm": 743.8500366210938, - "learning_rate": 2.8289108977307067e-05, - "loss": 49.9936, - "step": 128050 - }, - { - "epoch": 0.5173786042978866, - "grad_norm": 708.7972412109375, - "learning_rate": 2.8285648628844413e-05, - "loss": 58.3343, - "step": 128060 - }, - { - "epoch": 0.5174190055632543, - "grad_norm": 952.4853515625, - "learning_rate": 2.8282188216326345e-05, - "loss": 51.3421, - "step": 128070 - }, - { - "epoch": 0.5174594068286219, - "grad_norm": 761.9249267578125, - "learning_rate": 2.8278727739820333e-05, - "loss": 47.8939, - "step": 128080 - }, - { - "epoch": 0.5174998080939895, - "grad_norm": 1164.4027099609375, - "learning_rate": 2.827526719939383e-05, - "loss": 112.3727, - "step": 128090 - }, - { - "epoch": 0.5175402093593572, - "grad_norm": 1486.0279541015625, - "learning_rate": 2.827180659511431e-05, - "loss": 54.5274, - "step": 128100 - }, - { - "epoch": 0.5175806106247247, - "grad_norm": 1370.9776611328125, - "learning_rate": 2.8268345927049234e-05, - "loss": 65.6802, - "step": 128110 - }, - { - "epoch": 0.5176210118900924, - "grad_norm": 796.0413818359375, - "learning_rate": 2.8264885195266065e-05, - "loss": 58.2697, - "step": 128120 - }, - { - "epoch": 0.51766141315546, - "grad_norm": 703.8768310546875, - "learning_rate": 2.8261424399832293e-05, - "loss": 49.1636, - "step": 128130 - }, - { - "epoch": 0.5177018144208276, - "grad_norm": 419.2386779785156, - "learning_rate": 2.825796354081537e-05, - "loss": 39.2348, - "step": 128140 - }, - { - "epoch": 0.5177422156861953, - "grad_norm": 687.80859375, - "learning_rate": 2.8254502618282763e-05, - "loss": 33.7805, - "step": 128150 - }, - { - "epoch": 0.5177826169515629, - "grad_norm": 0.0, - "learning_rate": 2.8251041632301957e-05, - "loss": 54.0625, - "step": 128160 - }, - { - "epoch": 0.5178230182169306, - "grad_norm": 458.79937744140625, - "learning_rate": 2.8247580582940413e-05, - "loss": 60.1953, - "step": 128170 - }, - { - "epoch": 0.5178634194822982, - "grad_norm": 739.254638671875, - "learning_rate": 2.824411947026563e-05, - "loss": 67.9315, - "step": 128180 - }, - { - "epoch": 0.5179038207476658, - "grad_norm": 998.814208984375, - "learning_rate": 2.824065829434505e-05, - "loss": 52.8274, - "step": 128190 - }, - { - "epoch": 0.5179442220130335, - "grad_norm": 862.947265625, - "learning_rate": 2.8237197055246172e-05, - "loss": 47.5776, - "step": 128200 - }, - { - "epoch": 0.5179846232784011, - "grad_norm": 0.0, - "learning_rate": 2.8233735753036484e-05, - "loss": 58.8695, - "step": 128210 - }, - { - "epoch": 0.5180250245437688, - "grad_norm": 1318.5653076171875, - "learning_rate": 2.823027438778344e-05, - "loss": 46.4017, - "step": 128220 - }, - { - "epoch": 0.5180654258091364, - "grad_norm": 1210.0972900390625, - "learning_rate": 2.8226812959554537e-05, - "loss": 61.901, - "step": 128230 - }, - { - "epoch": 0.5181058270745039, - "grad_norm": 756.2396850585938, - "learning_rate": 2.8223351468417254e-05, - "loss": 49.0368, - "step": 128240 - }, - { - "epoch": 0.5181462283398716, - "grad_norm": 419.0620422363281, - "learning_rate": 2.8219889914439074e-05, - "loss": 52.058, - "step": 128250 - }, - { - "epoch": 0.5181866296052392, - "grad_norm": 868.4646606445312, - "learning_rate": 2.821642829768748e-05, - "loss": 78.654, - "step": 128260 - }, - { - "epoch": 0.5182270308706068, - "grad_norm": 424.6618957519531, - "learning_rate": 2.8212966618229964e-05, - "loss": 94.6811, - "step": 128270 - }, - { - "epoch": 0.5182674321359745, - "grad_norm": 535.59228515625, - "learning_rate": 2.8209504876134007e-05, - "loss": 44.2588, - "step": 128280 - }, - { - "epoch": 0.5183078334013421, - "grad_norm": 578.2286987304688, - "learning_rate": 2.8206043071467102e-05, - "loss": 38.9842, - "step": 128290 - }, - { - "epoch": 0.5183482346667098, - "grad_norm": 948.8009033203125, - "learning_rate": 2.8202581204296742e-05, - "loss": 66.6254, - "step": 128300 - }, - { - "epoch": 0.5183886359320774, - "grad_norm": 956.4691772460938, - "learning_rate": 2.819911927469041e-05, - "loss": 83.3204, - "step": 128310 - }, - { - "epoch": 0.518429037197445, - "grad_norm": 662.3033447265625, - "learning_rate": 2.8195657282715594e-05, - "loss": 33.0041, - "step": 128320 - }, - { - "epoch": 0.5184694384628127, - "grad_norm": 841.9630737304688, - "learning_rate": 2.81921952284398e-05, - "loss": 40.0045, - "step": 128330 - }, - { - "epoch": 0.5185098397281803, - "grad_norm": 644.8792114257812, - "learning_rate": 2.818873311193051e-05, - "loss": 49.6458, - "step": 128340 - }, - { - "epoch": 0.518550240993548, - "grad_norm": 1007.34765625, - "learning_rate": 2.8185270933255237e-05, - "loss": 45.5391, - "step": 128350 - }, - { - "epoch": 0.5185906422589156, - "grad_norm": 412.73883056640625, - "learning_rate": 2.8181808692481453e-05, - "loss": 45.3399, - "step": 128360 - }, - { - "epoch": 0.5186310435242831, - "grad_norm": 775.2849731445312, - "learning_rate": 2.817834638967668e-05, - "loss": 56.8275, - "step": 128370 - }, - { - "epoch": 0.5186714447896508, - "grad_norm": 880.3590698242188, - "learning_rate": 2.817488402490841e-05, - "loss": 62.0417, - "step": 128380 - }, - { - "epoch": 0.5187118460550184, - "grad_norm": 0.0, - "learning_rate": 2.8171421598244134e-05, - "loss": 100.0851, - "step": 128390 - }, - { - "epoch": 0.518752247320386, - "grad_norm": 757.4847412109375, - "learning_rate": 2.816795910975137e-05, - "loss": 81.3571, - "step": 128400 - }, - { - "epoch": 0.5187926485857537, - "grad_norm": 986.7409057617188, - "learning_rate": 2.8164496559497605e-05, - "loss": 99.9801, - "step": 128410 - }, - { - "epoch": 0.5188330498511213, - "grad_norm": 1231.8388671875, - "learning_rate": 2.816103394755035e-05, - "loss": 36.7881, - "step": 128420 - }, - { - "epoch": 0.518873451116489, - "grad_norm": 551.5934448242188, - "learning_rate": 2.8157571273977117e-05, - "loss": 48.0908, - "step": 128430 - }, - { - "epoch": 0.5189138523818566, - "grad_norm": 957.9308471679688, - "learning_rate": 2.8154108538845404e-05, - "loss": 54.7105, - "step": 128440 - }, - { - "epoch": 0.5189542536472243, - "grad_norm": 746.921875, - "learning_rate": 2.8150645742222714e-05, - "loss": 47.9935, - "step": 128450 - }, - { - "epoch": 0.5189946549125919, - "grad_norm": 817.0953369140625, - "learning_rate": 2.814718288417657e-05, - "loss": 34.7214, - "step": 128460 - }, - { - "epoch": 0.5190350561779595, - "grad_norm": 652.5723876953125, - "learning_rate": 2.814371996477448e-05, - "loss": 51.5591, - "step": 128470 - }, - { - "epoch": 0.5190754574433272, - "grad_norm": 1426.8663330078125, - "learning_rate": 2.8140256984083947e-05, - "loss": 53.6987, - "step": 128480 - }, - { - "epoch": 0.5191158587086948, - "grad_norm": 1349.2745361328125, - "learning_rate": 2.8136793942172483e-05, - "loss": 67.8195, - "step": 128490 - }, - { - "epoch": 0.5191562599740623, - "grad_norm": 1255.9248046875, - "learning_rate": 2.8133330839107608e-05, - "loss": 89.4671, - "step": 128500 - }, - { - "epoch": 0.51919666123943, - "grad_norm": 772.8133544921875, - "learning_rate": 2.8129867674956838e-05, - "loss": 99.116, - "step": 128510 - }, - { - "epoch": 0.5192370625047976, - "grad_norm": 799.4025268554688, - "learning_rate": 2.8126404449787685e-05, - "loss": 37.4048, - "step": 128520 - }, - { - "epoch": 0.5192774637701653, - "grad_norm": 767.2107543945312, - "learning_rate": 2.8122941163667667e-05, - "loss": 53.6676, - "step": 128530 - }, - { - "epoch": 0.5193178650355329, - "grad_norm": 717.8088989257812, - "learning_rate": 2.8119477816664296e-05, - "loss": 53.9604, - "step": 128540 - }, - { - "epoch": 0.5193582663009005, - "grad_norm": 568.8512573242188, - "learning_rate": 2.8116014408845116e-05, - "loss": 52.336, - "step": 128550 - }, - { - "epoch": 0.5193986675662682, - "grad_norm": 1033.583984375, - "learning_rate": 2.8112550940277616e-05, - "loss": 60.4758, - "step": 128560 - }, - { - "epoch": 0.5194390688316358, - "grad_norm": 267.0958251953125, - "learning_rate": 2.810908741102934e-05, - "loss": 87.3894, - "step": 128570 - }, - { - "epoch": 0.5194794700970035, - "grad_norm": 966.0076293945312, - "learning_rate": 2.8105623821167804e-05, - "loss": 64.1264, - "step": 128580 - }, - { - "epoch": 0.5195198713623711, - "grad_norm": 888.2479858398438, - "learning_rate": 2.810216017076053e-05, - "loss": 37.3439, - "step": 128590 - }, - { - "epoch": 0.5195602726277387, - "grad_norm": 1010.2241821289062, - "learning_rate": 2.8098696459875046e-05, - "loss": 59.4499, - "step": 128600 - }, - { - "epoch": 0.5196006738931064, - "grad_norm": 672.6810302734375, - "learning_rate": 2.8095232688578883e-05, - "loss": 59.5572, - "step": 128610 - }, - { - "epoch": 0.5196410751584739, - "grad_norm": 1322.0794677734375, - "learning_rate": 2.809176885693956e-05, - "loss": 75.077, - "step": 128620 - }, - { - "epoch": 0.5196814764238415, - "grad_norm": 647.8616943359375, - "learning_rate": 2.8088304965024614e-05, - "loss": 54.4772, - "step": 128630 - }, - { - "epoch": 0.5197218776892092, - "grad_norm": 895.9517211914062, - "learning_rate": 2.8084841012901574e-05, - "loss": 32.6104, - "step": 128640 - }, - { - "epoch": 0.5197622789545768, - "grad_norm": 346.7025451660156, - "learning_rate": 2.808137700063797e-05, - "loss": 39.3208, - "step": 128650 - }, - { - "epoch": 0.5198026802199445, - "grad_norm": 911.7136840820312, - "learning_rate": 2.807791292830133e-05, - "loss": 58.0953, - "step": 128660 - }, - { - "epoch": 0.5198430814853121, - "grad_norm": 522.1300659179688, - "learning_rate": 2.8074448795959203e-05, - "loss": 73.4923, - "step": 128670 - }, - { - "epoch": 0.5198834827506797, - "grad_norm": 657.18505859375, - "learning_rate": 2.8070984603679107e-05, - "loss": 42.6445, - "step": 128680 - }, - { - "epoch": 0.5199238840160474, - "grad_norm": 389.8962707519531, - "learning_rate": 2.8067520351528587e-05, - "loss": 43.4139, - "step": 128690 - }, - { - "epoch": 0.519964285281415, - "grad_norm": 851.1239013671875, - "learning_rate": 2.806405603957517e-05, - "loss": 72.5371, - "step": 128700 - }, - { - "epoch": 0.5200046865467827, - "grad_norm": 3148.775634765625, - "learning_rate": 2.8060591667886416e-05, - "loss": 56.5808, - "step": 128710 - }, - { - "epoch": 0.5200450878121503, - "grad_norm": 1202.496826171875, - "learning_rate": 2.8057127236529844e-05, - "loss": 46.4309, - "step": 128720 - }, - { - "epoch": 0.520085489077518, - "grad_norm": 962.0496826171875, - "learning_rate": 2.805366274557301e-05, - "loss": 50.884, - "step": 128730 - }, - { - "epoch": 0.5201258903428856, - "grad_norm": 453.456298828125, - "learning_rate": 2.8050198195083444e-05, - "loss": 59.2252, - "step": 128740 - }, - { - "epoch": 0.5201662916082531, - "grad_norm": 239.76718139648438, - "learning_rate": 2.8046733585128687e-05, - "loss": 70.7534, - "step": 128750 - }, - { - "epoch": 0.5202066928736208, - "grad_norm": 457.1228942871094, - "learning_rate": 2.80432689157763e-05, - "loss": 52.9828, - "step": 128760 - }, - { - "epoch": 0.5202470941389884, - "grad_norm": 877.8587036132812, - "learning_rate": 2.8039804187093816e-05, - "loss": 52.8952, - "step": 128770 - }, - { - "epoch": 0.520287495404356, - "grad_norm": 1153.672119140625, - "learning_rate": 2.803633939914878e-05, - "loss": 60.0604, - "step": 128780 - }, - { - "epoch": 0.5203278966697237, - "grad_norm": 120.61048126220703, - "learning_rate": 2.803287455200875e-05, - "loss": 47.4182, - "step": 128790 - }, - { - "epoch": 0.5203682979350913, - "grad_norm": 1153.82080078125, - "learning_rate": 2.8029409645741267e-05, - "loss": 59.4981, - "step": 128800 - }, - { - "epoch": 0.520408699200459, - "grad_norm": 418.24920654296875, - "learning_rate": 2.8025944680413878e-05, - "loss": 37.4401, - "step": 128810 - }, - { - "epoch": 0.5204491004658266, - "grad_norm": 781.8113403320312, - "learning_rate": 2.8022479656094154e-05, - "loss": 48.7074, - "step": 128820 - }, - { - "epoch": 0.5204895017311942, - "grad_norm": 836.5515747070312, - "learning_rate": 2.801901457284962e-05, - "loss": 51.3323, - "step": 128830 - }, - { - "epoch": 0.5205299029965619, - "grad_norm": 1007.6831665039062, - "learning_rate": 2.8015549430747852e-05, - "loss": 63.1766, - "step": 128840 - }, - { - "epoch": 0.5205703042619295, - "grad_norm": 1059.2291259765625, - "learning_rate": 2.8012084229856382e-05, - "loss": 48.2031, - "step": 128850 - }, - { - "epoch": 0.5206107055272972, - "grad_norm": 435.08837890625, - "learning_rate": 2.800861897024279e-05, - "loss": 61.0167, - "step": 128860 - }, - { - "epoch": 0.5206511067926648, - "grad_norm": 520.9664916992188, - "learning_rate": 2.8005153651974614e-05, - "loss": 68.8364, - "step": 128870 - }, - { - "epoch": 0.5206915080580323, - "grad_norm": 1375.3497314453125, - "learning_rate": 2.8001688275119432e-05, - "loss": 73.5849, - "step": 128880 - }, - { - "epoch": 0.5207319093234, - "grad_norm": 570.526611328125, - "learning_rate": 2.799822283974478e-05, - "loss": 45.2284, - "step": 128890 - }, - { - "epoch": 0.5207723105887676, - "grad_norm": 3453.25927734375, - "learning_rate": 2.7994757345918244e-05, - "loss": 65.3527, - "step": 128900 - }, - { - "epoch": 0.5208127118541352, - "grad_norm": 807.7914428710938, - "learning_rate": 2.7991291793707357e-05, - "loss": 37.759, - "step": 128910 - }, - { - "epoch": 0.5208531131195029, - "grad_norm": 2692.131103515625, - "learning_rate": 2.7987826183179712e-05, - "loss": 70.0045, - "step": 128920 - }, - { - "epoch": 0.5208935143848705, - "grad_norm": 316.426025390625, - "learning_rate": 2.798436051440284e-05, - "loss": 50.6802, - "step": 128930 - }, - { - "epoch": 0.5209339156502382, - "grad_norm": 1145.9749755859375, - "learning_rate": 2.7980894787444334e-05, - "loss": 44.7317, - "step": 128940 - }, - { - "epoch": 0.5209743169156058, - "grad_norm": 571.5775146484375, - "learning_rate": 2.7977429002371747e-05, - "loss": 39.6738, - "step": 128950 - }, - { - "epoch": 0.5210147181809734, - "grad_norm": 625.548828125, - "learning_rate": 2.797396315925265e-05, - "loss": 34.7972, - "step": 128960 - }, - { - "epoch": 0.5210551194463411, - "grad_norm": 733.4710083007812, - "learning_rate": 2.7970497258154603e-05, - "loss": 38.4996, - "step": 128970 - }, - { - "epoch": 0.5210955207117087, - "grad_norm": 1038.670654296875, - "learning_rate": 2.7967031299145193e-05, - "loss": 49.1648, - "step": 128980 - }, - { - "epoch": 0.5211359219770764, - "grad_norm": 545.0449829101562, - "learning_rate": 2.7963565282291977e-05, - "loss": 48.0368, - "step": 128990 - }, - { - "epoch": 0.521176323242444, - "grad_norm": 428.3600769042969, - "learning_rate": 2.7960099207662532e-05, - "loss": 60.4256, - "step": 129000 - }, - { - "epoch": 0.5212167245078115, - "grad_norm": 714.7028198242188, - "learning_rate": 2.7956633075324424e-05, - "loss": 60.8093, - "step": 129010 - }, - { - "epoch": 0.5212571257731792, - "grad_norm": 1208.1380615234375, - "learning_rate": 2.795316688534523e-05, - "loss": 74.678, - "step": 129020 - }, - { - "epoch": 0.5212975270385468, - "grad_norm": 534.9729614257812, - "learning_rate": 2.794970063779253e-05, - "loss": 51.2989, - "step": 129030 - }, - { - "epoch": 0.5213379283039145, - "grad_norm": 756.8164672851562, - "learning_rate": 2.79462343327339e-05, - "loss": 66.086, - "step": 129040 - }, - { - "epoch": 0.5213783295692821, - "grad_norm": 575.9217529296875, - "learning_rate": 2.794276797023691e-05, - "loss": 49.4426, - "step": 129050 - }, - { - "epoch": 0.5214187308346497, - "grad_norm": 498.71441650390625, - "learning_rate": 2.7939301550369146e-05, - "loss": 48.8029, - "step": 129060 - }, - { - "epoch": 0.5214591321000174, - "grad_norm": 594.4951782226562, - "learning_rate": 2.7935835073198192e-05, - "loss": 34.5576, - "step": 129070 - }, - { - "epoch": 0.521499533365385, - "grad_norm": 1014.3102416992188, - "learning_rate": 2.793236853879161e-05, - "loss": 58.8379, - "step": 129080 - }, - { - "epoch": 0.5215399346307527, - "grad_norm": 560.2127685546875, - "learning_rate": 2.7928901947217008e-05, - "loss": 63.1494, - "step": 129090 - }, - { - "epoch": 0.5215803358961203, - "grad_norm": 666.1370239257812, - "learning_rate": 2.792543529854194e-05, - "loss": 53.1794, - "step": 129100 - }, - { - "epoch": 0.5216207371614879, - "grad_norm": 636.662353515625, - "learning_rate": 2.7921968592834006e-05, - "loss": 25.9828, - "step": 129110 - }, - { - "epoch": 0.5216611384268556, - "grad_norm": 835.5521850585938, - "learning_rate": 2.79185018301608e-05, - "loss": 57.4081, - "step": 129120 - }, - { - "epoch": 0.5217015396922232, - "grad_norm": 992.4476928710938, - "learning_rate": 2.791503501058989e-05, - "loss": 52.8603, - "step": 129130 - }, - { - "epoch": 0.5217419409575907, - "grad_norm": 328.57794189453125, - "learning_rate": 2.7911568134188875e-05, - "loss": 88.1504, - "step": 129140 - }, - { - "epoch": 0.5217823422229584, - "grad_norm": 1140.0035400390625, - "learning_rate": 2.7908101201025337e-05, - "loss": 48.8337, - "step": 129150 - }, - { - "epoch": 0.521822743488326, - "grad_norm": 745.549072265625, - "learning_rate": 2.7904634211166876e-05, - "loss": 55.3195, - "step": 129160 - }, - { - "epoch": 0.5218631447536937, - "grad_norm": 969.3555297851562, - "learning_rate": 2.7901167164681073e-05, - "loss": 55.3426, - "step": 129170 - }, - { - "epoch": 0.5219035460190613, - "grad_norm": 726.4353637695312, - "learning_rate": 2.7897700061635517e-05, - "loss": 49.0015, - "step": 129180 - }, - { - "epoch": 0.5219439472844289, - "grad_norm": 374.95843505859375, - "learning_rate": 2.7894232902097813e-05, - "loss": 46.3873, - "step": 129190 - }, - { - "epoch": 0.5219843485497966, - "grad_norm": 511.00885009765625, - "learning_rate": 2.7890765686135544e-05, - "loss": 47.9072, - "step": 129200 - }, - { - "epoch": 0.5220247498151642, - "grad_norm": 512.494873046875, - "learning_rate": 2.788729841381631e-05, - "loss": 29.6466, - "step": 129210 - }, - { - "epoch": 0.5220651510805319, - "grad_norm": 551.0260620117188, - "learning_rate": 2.7883831085207707e-05, - "loss": 65.4076, - "step": 129220 - }, - { - "epoch": 0.5221055523458995, - "grad_norm": 621.8154907226562, - "learning_rate": 2.788036370037733e-05, - "loss": 55.3526, - "step": 129230 - }, - { - "epoch": 0.5221459536112671, - "grad_norm": 477.7518310546875, - "learning_rate": 2.7876896259392788e-05, - "loss": 70.7479, - "step": 129240 - }, - { - "epoch": 0.5221863548766348, - "grad_norm": 228.69644165039062, - "learning_rate": 2.787342876232167e-05, - "loss": 45.7491, - "step": 129250 - }, - { - "epoch": 0.5222267561420023, - "grad_norm": 406.46026611328125, - "learning_rate": 2.7869961209231577e-05, - "loss": 40.5636, - "step": 129260 - }, - { - "epoch": 0.52226715740737, - "grad_norm": 1248.6951904296875, - "learning_rate": 2.7866493600190107e-05, - "loss": 43.6552, - "step": 129270 - }, - { - "epoch": 0.5223075586727376, - "grad_norm": 780.5540161132812, - "learning_rate": 2.7863025935264875e-05, - "loss": 42.3198, - "step": 129280 - }, - { - "epoch": 0.5223479599381052, - "grad_norm": 830.3927612304688, - "learning_rate": 2.785955821452348e-05, - "loss": 50.129, - "step": 129290 - }, - { - "epoch": 0.5223883612034729, - "grad_norm": 162.7823028564453, - "learning_rate": 2.7856090438033522e-05, - "loss": 35.8086, - "step": 129300 - }, - { - "epoch": 0.5224287624688405, - "grad_norm": 577.8681030273438, - "learning_rate": 2.785262260586261e-05, - "loss": 48.4552, - "step": 129310 - }, - { - "epoch": 0.5224691637342082, - "grad_norm": 495.5697021484375, - "learning_rate": 2.7849154718078346e-05, - "loss": 41.2047, - "step": 129320 - }, - { - "epoch": 0.5225095649995758, - "grad_norm": 650.5645141601562, - "learning_rate": 2.784568677474836e-05, - "loss": 65.8662, - "step": 129330 - }, - { - "epoch": 0.5225499662649434, - "grad_norm": 276.66241455078125, - "learning_rate": 2.7842218775940237e-05, - "loss": 33.7441, - "step": 129340 - }, - { - "epoch": 0.5225903675303111, - "grad_norm": 840.7539672851562, - "learning_rate": 2.783875072172159e-05, - "loss": 49.3532, - "step": 129350 - }, - { - "epoch": 0.5226307687956787, - "grad_norm": 315.2930603027344, - "learning_rate": 2.783528261216004e-05, - "loss": 37.2258, - "step": 129360 - }, - { - "epoch": 0.5226711700610464, - "grad_norm": 398.0323791503906, - "learning_rate": 2.78318144473232e-05, - "loss": 85.9904, - "step": 129370 - }, - { - "epoch": 0.522711571326414, - "grad_norm": 779.0153198242188, - "learning_rate": 2.7828346227278674e-05, - "loss": 53.5858, - "step": 129380 - }, - { - "epoch": 0.5227519725917815, - "grad_norm": 941.3084106445312, - "learning_rate": 2.782487795209408e-05, - "loss": 77.1186, - "step": 129390 - }, - { - "epoch": 0.5227923738571492, - "grad_norm": 1087.8790283203125, - "learning_rate": 2.782140962183704e-05, - "loss": 75.8238, - "step": 129400 - }, - { - "epoch": 0.5228327751225168, - "grad_norm": 836.9957275390625, - "learning_rate": 2.7817941236575173e-05, - "loss": 60.6787, - "step": 129410 - }, - { - "epoch": 0.5228731763878844, - "grad_norm": 542.893798828125, - "learning_rate": 2.781447279637608e-05, - "loss": 54.1359, - "step": 129420 - }, - { - "epoch": 0.5229135776532521, - "grad_norm": 706.9591064453125, - "learning_rate": 2.7811004301307403e-05, - "loss": 42.0238, - "step": 129430 - }, - { - "epoch": 0.5229539789186197, - "grad_norm": 975.5703735351562, - "learning_rate": 2.7807535751436738e-05, - "loss": 42.8671, - "step": 129440 - }, - { - "epoch": 0.5229943801839874, - "grad_norm": 1424.2943115234375, - "learning_rate": 2.7804067146831725e-05, - "loss": 69.0784, - "step": 129450 - }, - { - "epoch": 0.523034781449355, - "grad_norm": 1137.7862548828125, - "learning_rate": 2.7800598487559975e-05, - "loss": 46.7197, - "step": 129460 - }, - { - "epoch": 0.5230751827147226, - "grad_norm": 195.2952423095703, - "learning_rate": 2.7797129773689118e-05, - "loss": 55.5766, - "step": 129470 - }, - { - "epoch": 0.5231155839800903, - "grad_norm": 810.8615112304688, - "learning_rate": 2.7793661005286774e-05, - "loss": 42.1326, - "step": 129480 - }, - { - "epoch": 0.5231559852454579, - "grad_norm": 259.7433166503906, - "learning_rate": 2.7790192182420578e-05, - "loss": 40.0814, - "step": 129490 - }, - { - "epoch": 0.5231963865108256, - "grad_norm": 465.7834777832031, - "learning_rate": 2.7786723305158136e-05, - "loss": 41.8416, - "step": 129500 - }, - { - "epoch": 0.5232367877761932, - "grad_norm": 309.6441650390625, - "learning_rate": 2.7783254373567103e-05, - "loss": 92.4435, - "step": 129510 - }, - { - "epoch": 0.5232771890415607, - "grad_norm": 689.2532348632812, - "learning_rate": 2.7779785387715078e-05, - "loss": 50.8395, - "step": 129520 - }, - { - "epoch": 0.5233175903069284, - "grad_norm": 551.0718994140625, - "learning_rate": 2.7776316347669722e-05, - "loss": 53.9811, - "step": 129530 - }, - { - "epoch": 0.523357991572296, - "grad_norm": 792.376953125, - "learning_rate": 2.7772847253498636e-05, - "loss": 38.2251, - "step": 129540 - }, - { - "epoch": 0.5233983928376637, - "grad_norm": 687.2542724609375, - "learning_rate": 2.7769378105269467e-05, - "loss": 72.9509, - "step": 129550 - }, - { - "epoch": 0.5234387941030313, - "grad_norm": 856.1763916015625, - "learning_rate": 2.7765908903049848e-05, - "loss": 42.2359, - "step": 129560 - }, - { - "epoch": 0.5234791953683989, - "grad_norm": 865.5512084960938, - "learning_rate": 2.7762439646907417e-05, - "loss": 46.5808, - "step": 129570 - }, - { - "epoch": 0.5235195966337666, - "grad_norm": 218.27801513671875, - "learning_rate": 2.7758970336909795e-05, - "loss": 58.7573, - "step": 129580 - }, - { - "epoch": 0.5235599978991342, - "grad_norm": 797.5692749023438, - "learning_rate": 2.7755500973124625e-05, - "loss": 64.8211, - "step": 129590 - }, - { - "epoch": 0.5236003991645019, - "grad_norm": 1161.782470703125, - "learning_rate": 2.7752031555619555e-05, - "loss": 62.3158, - "step": 129600 - }, - { - "epoch": 0.5236408004298695, - "grad_norm": 495.8021545410156, - "learning_rate": 2.774856208446221e-05, - "loss": 34.1338, - "step": 129610 - }, - { - "epoch": 0.5236812016952371, - "grad_norm": 538.5377807617188, - "learning_rate": 2.7745092559720227e-05, - "loss": 84.3193, - "step": 129620 - }, - { - "epoch": 0.5237216029606048, - "grad_norm": 472.7118225097656, - "learning_rate": 2.7741622981461253e-05, - "loss": 88.2121, - "step": 129630 - }, - { - "epoch": 0.5237620042259724, - "grad_norm": 680.1412353515625, - "learning_rate": 2.773815334975292e-05, - "loss": 61.683, - "step": 129640 - }, - { - "epoch": 0.5238024054913399, - "grad_norm": 194.98385620117188, - "learning_rate": 2.7734683664662892e-05, - "loss": 35.9615, - "step": 129650 - }, - { - "epoch": 0.5238428067567076, - "grad_norm": 664.1810913085938, - "learning_rate": 2.7731213926258794e-05, - "loss": 76.9602, - "step": 129660 - }, - { - "epoch": 0.5238832080220752, - "grad_norm": 675.6226806640625, - "learning_rate": 2.7727744134608263e-05, - "loss": 49.6265, - "step": 129670 - }, - { - "epoch": 0.5239236092874429, - "grad_norm": 424.91912841796875, - "learning_rate": 2.7724274289778974e-05, - "loss": 49.0146, - "step": 129680 - }, - { - "epoch": 0.5239640105528105, - "grad_norm": 1368.484619140625, - "learning_rate": 2.7720804391838544e-05, - "loss": 53.2469, - "step": 129690 - }, - { - "epoch": 0.5240044118181781, - "grad_norm": 988.5595092773438, - "learning_rate": 2.771733444085463e-05, - "loss": 72.9639, - "step": 129700 - }, - { - "epoch": 0.5240448130835458, - "grad_norm": 1024.283447265625, - "learning_rate": 2.771386443689489e-05, - "loss": 35.8563, - "step": 129710 - }, - { - "epoch": 0.5240852143489134, - "grad_norm": 2501.96142578125, - "learning_rate": 2.7710394380026954e-05, - "loss": 93.8633, - "step": 129720 - }, - { - "epoch": 0.5241256156142811, - "grad_norm": 397.55413818359375, - "learning_rate": 2.7706924270318496e-05, - "loss": 51.6667, - "step": 129730 - }, - { - "epoch": 0.5241660168796487, - "grad_norm": 401.48687744140625, - "learning_rate": 2.770345410783715e-05, - "loss": 53.5041, - "step": 129740 - }, - { - "epoch": 0.5242064181450163, - "grad_norm": 561.6734619140625, - "learning_rate": 2.7699983892650573e-05, - "loss": 60.5054, - "step": 129750 - }, - { - "epoch": 0.524246819410384, - "grad_norm": 1459.1732177734375, - "learning_rate": 2.769651362482642e-05, - "loss": 80.3514, - "step": 129760 - }, - { - "epoch": 0.5242872206757516, - "grad_norm": 211.4439239501953, - "learning_rate": 2.7693043304432354e-05, - "loss": 53.1344, - "step": 129770 - }, - { - "epoch": 0.5243276219411191, - "grad_norm": 535.6337890625, - "learning_rate": 2.7689572931536017e-05, - "loss": 59.1036, - "step": 129780 - }, - { - "epoch": 0.5243680232064868, - "grad_norm": 212.34646606445312, - "learning_rate": 2.7686102506205068e-05, - "loss": 50.72, - "step": 129790 - }, - { - "epoch": 0.5244084244718544, - "grad_norm": 1038.571044921875, - "learning_rate": 2.7682632028507167e-05, - "loss": 43.0176, - "step": 129800 - }, - { - "epoch": 0.5244488257372221, - "grad_norm": 621.1820678710938, - "learning_rate": 2.7679161498509976e-05, - "loss": 25.86, - "step": 129810 - }, - { - "epoch": 0.5244892270025897, - "grad_norm": 441.5834045410156, - "learning_rate": 2.7675690916281156e-05, - "loss": 54.9453, - "step": 129820 - }, - { - "epoch": 0.5245296282679573, - "grad_norm": 317.9010925292969, - "learning_rate": 2.7672220281888357e-05, - "loss": 95.5096, - "step": 129830 - }, - { - "epoch": 0.524570029533325, - "grad_norm": 862.2459106445312, - "learning_rate": 2.766874959539925e-05, - "loss": 69.8221, - "step": 129840 - }, - { - "epoch": 0.5246104307986926, - "grad_norm": 531.95263671875, - "learning_rate": 2.76652788568815e-05, - "loss": 49.4422, - "step": 129850 - }, - { - "epoch": 0.5246508320640603, - "grad_norm": 694.7820434570312, - "learning_rate": 2.7661808066402767e-05, - "loss": 46.5116, - "step": 129860 - }, - { - "epoch": 0.5246912333294279, - "grad_norm": 1010.5025024414062, - "learning_rate": 2.765833722403071e-05, - "loss": 71.7905, - "step": 129870 - }, - { - "epoch": 0.5247316345947955, - "grad_norm": 285.8402099609375, - "learning_rate": 2.7654866329833002e-05, - "loss": 37.7266, - "step": 129880 - }, - { - "epoch": 0.5247720358601632, - "grad_norm": 556.6085205078125, - "learning_rate": 2.7651395383877304e-05, - "loss": 54.9936, - "step": 129890 - }, - { - "epoch": 0.5248124371255307, - "grad_norm": 1335.543701171875, - "learning_rate": 2.76479243862313e-05, - "loss": 71.0658, - "step": 129900 - }, - { - "epoch": 0.5248528383908984, - "grad_norm": 510.7142639160156, - "learning_rate": 2.7644453336962633e-05, - "loss": 86.5262, - "step": 129910 - }, - { - "epoch": 0.524893239656266, - "grad_norm": 629.845458984375, - "learning_rate": 2.7640982236138992e-05, - "loss": 47.5349, - "step": 129920 - }, - { - "epoch": 0.5249336409216336, - "grad_norm": 1113.2122802734375, - "learning_rate": 2.7637511083828043e-05, - "loss": 66.3521, - "step": 129930 - }, - { - "epoch": 0.5249740421870013, - "grad_norm": 365.0244140625, - "learning_rate": 2.763403988009746e-05, - "loss": 54.905, - "step": 129940 - }, - { - "epoch": 0.5250144434523689, - "grad_norm": 543.3900756835938, - "learning_rate": 2.7630568625014917e-05, - "loss": 48.9247, - "step": 129950 - }, - { - "epoch": 0.5250548447177366, - "grad_norm": 381.6798400878906, - "learning_rate": 2.7627097318648076e-05, - "loss": 39.9984, - "step": 129960 - }, - { - "epoch": 0.5250952459831042, - "grad_norm": 1278.4371337890625, - "learning_rate": 2.7623625961064618e-05, - "loss": 60.6886, - "step": 129970 - }, - { - "epoch": 0.5251356472484718, - "grad_norm": 500.2679443359375, - "learning_rate": 2.7620154552332232e-05, - "loss": 54.9953, - "step": 129980 - }, - { - "epoch": 0.5251760485138395, - "grad_norm": 482.51458740234375, - "learning_rate": 2.7616683092518576e-05, - "loss": 55.6544, - "step": 129990 - }, - { - "epoch": 0.5252164497792071, - "grad_norm": 359.9608154296875, - "learning_rate": 2.761321158169134e-05, - "loss": 64.5541, - "step": 130000 - }, - { - "epoch": 0.5252568510445748, - "grad_norm": 398.5904541015625, - "learning_rate": 2.7609740019918197e-05, - "loss": 53.5339, - "step": 130010 - }, - { - "epoch": 0.5252972523099424, - "grad_norm": 698.3560791015625, - "learning_rate": 2.7606268407266827e-05, - "loss": 61.4191, - "step": 130020 - }, - { - "epoch": 0.5253376535753099, - "grad_norm": 768.4830932617188, - "learning_rate": 2.7602796743804922e-05, - "loss": 47.3355, - "step": 130030 - }, - { - "epoch": 0.5253780548406776, - "grad_norm": 775.7195434570312, - "learning_rate": 2.7599325029600143e-05, - "loss": 25.0439, - "step": 130040 - }, - { - "epoch": 0.5254184561060452, - "grad_norm": 1195.4205322265625, - "learning_rate": 2.7595853264720184e-05, - "loss": 70.0329, - "step": 130050 - }, - { - "epoch": 0.5254588573714128, - "grad_norm": 606.2637329101562, - "learning_rate": 2.759238144923274e-05, - "loss": 53.4816, - "step": 130060 - }, - { - "epoch": 0.5254992586367805, - "grad_norm": 469.8358459472656, - "learning_rate": 2.7588909583205475e-05, - "loss": 49.5405, - "step": 130070 - }, - { - "epoch": 0.5255396599021481, - "grad_norm": 648.6143188476562, - "learning_rate": 2.7585437666706087e-05, - "loss": 69.9888, - "step": 130080 - }, - { - "epoch": 0.5255800611675158, - "grad_norm": 612.1096801757812, - "learning_rate": 2.758196569980226e-05, - "loss": 51.9348, - "step": 130090 - }, - { - "epoch": 0.5256204624328834, - "grad_norm": 1045.827392578125, - "learning_rate": 2.7578493682561685e-05, - "loss": 75.3227, - "step": 130100 - }, - { - "epoch": 0.525660863698251, - "grad_norm": 990.9442138671875, - "learning_rate": 2.757502161505205e-05, - "loss": 73.4528, - "step": 130110 - }, - { - "epoch": 0.5257012649636187, - "grad_norm": 510.2374572753906, - "learning_rate": 2.7571549497341042e-05, - "loss": 60.0831, - "step": 130120 - }, - { - "epoch": 0.5257416662289863, - "grad_norm": 1088.753662109375, - "learning_rate": 2.756807732949635e-05, - "loss": 63.8334, - "step": 130130 - }, - { - "epoch": 0.525782067494354, - "grad_norm": 1749.695068359375, - "learning_rate": 2.756460511158567e-05, - "loss": 54.9062, - "step": 130140 - }, - { - "epoch": 0.5258224687597216, - "grad_norm": 992.9537963867188, - "learning_rate": 2.756113284367669e-05, - "loss": 55.7644, - "step": 130150 - }, - { - "epoch": 0.5258628700250891, - "grad_norm": 679.5419311523438, - "learning_rate": 2.7557660525837108e-05, - "loss": 51.0159, - "step": 130160 - }, - { - "epoch": 0.5259032712904568, - "grad_norm": 444.13031005859375, - "learning_rate": 2.7554188158134616e-05, - "loss": 19.1531, - "step": 130170 - }, - { - "epoch": 0.5259436725558244, - "grad_norm": 416.7135314941406, - "learning_rate": 2.7550715740636917e-05, - "loss": 46.4561, - "step": 130180 - }, - { - "epoch": 0.525984073821192, - "grad_norm": 933.5376586914062, - "learning_rate": 2.7547243273411695e-05, - "loss": 66.3492, - "step": 130190 - }, - { - "epoch": 0.5260244750865597, - "grad_norm": 444.9652404785156, - "learning_rate": 2.754377075652666e-05, - "loss": 50.9244, - "step": 130200 - }, - { - "epoch": 0.5260648763519273, - "grad_norm": 652.8956298828125, - "learning_rate": 2.7540298190049503e-05, - "loss": 59.4898, - "step": 130210 - }, - { - "epoch": 0.526105277617295, - "grad_norm": 645.0548095703125, - "learning_rate": 2.7536825574047925e-05, - "loss": 35.8312, - "step": 130220 - }, - { - "epoch": 0.5261456788826626, - "grad_norm": 534.1641845703125, - "learning_rate": 2.7533352908589622e-05, - "loss": 40.8292, - "step": 130230 - }, - { - "epoch": 0.5261860801480303, - "grad_norm": 420.173583984375, - "learning_rate": 2.7529880193742297e-05, - "loss": 30.2268, - "step": 130240 - }, - { - "epoch": 0.5262264814133979, - "grad_norm": 465.4990234375, - "learning_rate": 2.7526407429573657e-05, - "loss": 64.3663, - "step": 130250 - }, - { - "epoch": 0.5262668826787655, - "grad_norm": 511.3827819824219, - "learning_rate": 2.7522934616151414e-05, - "loss": 56.9852, - "step": 130260 - }, - { - "epoch": 0.5263072839441332, - "grad_norm": 624.65087890625, - "learning_rate": 2.751946175354325e-05, - "loss": 63.1493, - "step": 130270 - }, - { - "epoch": 0.5263476852095008, - "grad_norm": 751.966064453125, - "learning_rate": 2.7515988841816887e-05, - "loss": 69.2688, - "step": 130280 - }, - { - "epoch": 0.5263880864748683, - "grad_norm": 907.3563842773438, - "learning_rate": 2.7512515881040028e-05, - "loss": 54.0088, - "step": 130290 - }, - { - "epoch": 0.526428487740236, - "grad_norm": 893.8932495117188, - "learning_rate": 2.7509042871280372e-05, - "loss": 54.6191, - "step": 130300 - }, - { - "epoch": 0.5264688890056036, - "grad_norm": 1132.3380126953125, - "learning_rate": 2.750556981260564e-05, - "loss": 51.4256, - "step": 130310 - }, - { - "epoch": 0.5265092902709713, - "grad_norm": 408.7311096191406, - "learning_rate": 2.7502096705083535e-05, - "loss": 44.0828, - "step": 130320 - }, - { - "epoch": 0.5265496915363389, - "grad_norm": 569.1919555664062, - "learning_rate": 2.749862354878176e-05, - "loss": 34.8458, - "step": 130330 - }, - { - "epoch": 0.5265900928017065, - "grad_norm": 759.1968383789062, - "learning_rate": 2.7495150343768034e-05, - "loss": 59.1251, - "step": 130340 - }, - { - "epoch": 0.5266304940670742, - "grad_norm": 563.669677734375, - "learning_rate": 2.7491677090110076e-05, - "loss": 48.0987, - "step": 130350 - }, - { - "epoch": 0.5266708953324418, - "grad_norm": 1401.8712158203125, - "learning_rate": 2.7488203787875577e-05, - "loss": 67.7033, - "step": 130360 - }, - { - "epoch": 0.5267112965978095, - "grad_norm": 496.8099060058594, - "learning_rate": 2.7484730437132278e-05, - "loss": 44.0139, - "step": 130370 - }, - { - "epoch": 0.5267516978631771, - "grad_norm": 533.447998046875, - "learning_rate": 2.7481257037947872e-05, - "loss": 47.0373, - "step": 130380 - }, - { - "epoch": 0.5267920991285447, - "grad_norm": 825.5570068359375, - "learning_rate": 2.7477783590390082e-05, - "loss": 63.7042, - "step": 130390 - }, - { - "epoch": 0.5268325003939124, - "grad_norm": 577.7359619140625, - "learning_rate": 2.747431009452663e-05, - "loss": 43.8473, - "step": 130400 - }, - { - "epoch": 0.52687290165928, - "grad_norm": 2243.215087890625, - "learning_rate": 2.747083655042522e-05, - "loss": 58.3573, - "step": 130410 - }, - { - "epoch": 0.5269133029246476, - "grad_norm": 897.6618041992188, - "learning_rate": 2.7467362958153587e-05, - "loss": 85.0911, - "step": 130420 - }, - { - "epoch": 0.5269537041900152, - "grad_norm": 976.666259765625, - "learning_rate": 2.7463889317779446e-05, - "loss": 55.5755, - "step": 130430 - }, - { - "epoch": 0.5269941054553828, - "grad_norm": 1133.8804931640625, - "learning_rate": 2.7460415629370508e-05, - "loss": 59.8554, - "step": 130440 - }, - { - "epoch": 0.5270345067207505, - "grad_norm": 757.4534912109375, - "learning_rate": 2.7456941892994497e-05, - "loss": 43.9233, - "step": 130450 - }, - { - "epoch": 0.5270749079861181, - "grad_norm": 962.9010009765625, - "learning_rate": 2.7453468108719145e-05, - "loss": 65.6795, - "step": 130460 - }, - { - "epoch": 0.5271153092514858, - "grad_norm": 394.7574157714844, - "learning_rate": 2.744999427661217e-05, - "loss": 69.4007, - "step": 130470 - }, - { - "epoch": 0.5271557105168534, - "grad_norm": 762.9472045898438, - "learning_rate": 2.744652039674129e-05, - "loss": 49.1858, - "step": 130480 - }, - { - "epoch": 0.527196111782221, - "grad_norm": 2199.121337890625, - "learning_rate": 2.7443046469174237e-05, - "loss": 77.0622, - "step": 130490 - }, - { - "epoch": 0.5272365130475887, - "grad_norm": 370.2516784667969, - "learning_rate": 2.7439572493978736e-05, - "loss": 51.3727, - "step": 130500 - }, - { - "epoch": 0.5272769143129563, - "grad_norm": 1177.574951171875, - "learning_rate": 2.7436098471222522e-05, - "loss": 47.053, - "step": 130510 - }, - { - "epoch": 0.527317315578324, - "grad_norm": 1359.2659912109375, - "learning_rate": 2.74326244009733e-05, - "loss": 61.0058, - "step": 130520 - }, - { - "epoch": 0.5273577168436916, - "grad_norm": 1003.060546875, - "learning_rate": 2.7429150283298817e-05, - "loss": 52.1394, - "step": 130530 - }, - { - "epoch": 0.5273981181090591, - "grad_norm": 1656.4512939453125, - "learning_rate": 2.7425676118266808e-05, - "loss": 46.5512, - "step": 130540 - }, - { - "epoch": 0.5274385193744268, - "grad_norm": 509.01220703125, - "learning_rate": 2.7422201905944982e-05, - "loss": 52.2351, - "step": 130550 - }, - { - "epoch": 0.5274789206397944, - "grad_norm": 1386.0987548828125, - "learning_rate": 2.7418727646401094e-05, - "loss": 44.8062, - "step": 130560 - }, - { - "epoch": 0.527519321905162, - "grad_norm": 708.5176391601562, - "learning_rate": 2.741525333970285e-05, - "loss": 36.5751, - "step": 130570 - }, - { - "epoch": 0.5275597231705297, - "grad_norm": 3067.193115234375, - "learning_rate": 2.7411778985918006e-05, - "loss": 98.0832, - "step": 130580 - }, - { - "epoch": 0.5276001244358973, - "grad_norm": 598.675537109375, - "learning_rate": 2.7408304585114298e-05, - "loss": 60.4569, - "step": 130590 - }, - { - "epoch": 0.527640525701265, - "grad_norm": 821.6907348632812, - "learning_rate": 2.7404830137359444e-05, - "loss": 71.1968, - "step": 130600 - }, - { - "epoch": 0.5276809269666326, - "grad_norm": 796.5547485351562, - "learning_rate": 2.740135564272119e-05, - "loss": 64.8786, - "step": 130610 - }, - { - "epoch": 0.5277213282320002, - "grad_norm": 587.248291015625, - "learning_rate": 2.7397881101267263e-05, - "loss": 61.1327, - "step": 130620 - }, - { - "epoch": 0.5277617294973679, - "grad_norm": 603.5186157226562, - "learning_rate": 2.7394406513065423e-05, - "loss": 54.6609, - "step": 130630 - }, - { - "epoch": 0.5278021307627355, - "grad_norm": 1664.0540771484375, - "learning_rate": 2.739093187818339e-05, - "loss": 69.9045, - "step": 130640 - }, - { - "epoch": 0.5278425320281032, - "grad_norm": 847.181640625, - "learning_rate": 2.7387457196688908e-05, - "loss": 60.1619, - "step": 130650 - }, - { - "epoch": 0.5278829332934708, - "grad_norm": 1435.385498046875, - "learning_rate": 2.7383982468649714e-05, - "loss": 48.167, - "step": 130660 - }, - { - "epoch": 0.5279233345588383, - "grad_norm": 884.9189453125, - "learning_rate": 2.738050769413357e-05, - "loss": 57.8182, - "step": 130670 - }, - { - "epoch": 0.527963735824206, - "grad_norm": 568.2256469726562, - "learning_rate": 2.7377032873208186e-05, - "loss": 66.162, - "step": 130680 - }, - { - "epoch": 0.5280041370895736, - "grad_norm": 744.9805908203125, - "learning_rate": 2.737355800594133e-05, - "loss": 60.4696, - "step": 130690 - }, - { - "epoch": 0.5280445383549413, - "grad_norm": 907.050537109375, - "learning_rate": 2.7370083092400735e-05, - "loss": 43.196, - "step": 130700 - }, - { - "epoch": 0.5280849396203089, - "grad_norm": 778.4954833984375, - "learning_rate": 2.7366608132654154e-05, - "loss": 51.7738, - "step": 130710 - }, - { - "epoch": 0.5281253408856765, - "grad_norm": 405.15338134765625, - "learning_rate": 2.7363133126769325e-05, - "loss": 64.0481, - "step": 130720 - }, - { - "epoch": 0.5281657421510442, - "grad_norm": 591.9571533203125, - "learning_rate": 2.735965807481401e-05, - "loss": 45.1595, - "step": 130730 - }, - { - "epoch": 0.5282061434164118, - "grad_norm": 985.9591064453125, - "learning_rate": 2.7356182976855934e-05, - "loss": 35.7408, - "step": 130740 - }, - { - "epoch": 0.5282465446817795, - "grad_norm": 460.90887451171875, - "learning_rate": 2.7352707832962865e-05, - "loss": 52.3368, - "step": 130750 - }, - { - "epoch": 0.5282869459471471, - "grad_norm": 900.5870971679688, - "learning_rate": 2.734923264320254e-05, - "loss": 31.7685, - "step": 130760 - }, - { - "epoch": 0.5283273472125147, - "grad_norm": 851.9434814453125, - "learning_rate": 2.7345757407642714e-05, - "loss": 41.7662, - "step": 130770 - }, - { - "epoch": 0.5283677484778824, - "grad_norm": 2293.946533203125, - "learning_rate": 2.7342282126351144e-05, - "loss": 96.9833, - "step": 130780 - }, - { - "epoch": 0.52840814974325, - "grad_norm": 837.9186401367188, - "learning_rate": 2.7338806799395577e-05, - "loss": 53.9366, - "step": 130790 - }, - { - "epoch": 0.5284485510086175, - "grad_norm": 314.82135009765625, - "learning_rate": 2.733533142684377e-05, - "loss": 56.3716, - "step": 130800 - }, - { - "epoch": 0.5284889522739852, - "grad_norm": 918.69287109375, - "learning_rate": 2.7331856008763472e-05, - "loss": 50.4389, - "step": 130810 - }, - { - "epoch": 0.5285293535393528, - "grad_norm": 775.286865234375, - "learning_rate": 2.7328380545222436e-05, - "loss": 64.7548, - "step": 130820 - }, - { - "epoch": 0.5285697548047205, - "grad_norm": 461.6344909667969, - "learning_rate": 2.732490503628843e-05, - "loss": 53.2063, - "step": 130830 - }, - { - "epoch": 0.5286101560700881, - "grad_norm": 415.2874755859375, - "learning_rate": 2.73214294820292e-05, - "loss": 94.1432, - "step": 130840 - }, - { - "epoch": 0.5286505573354557, - "grad_norm": 718.537109375, - "learning_rate": 2.7317953882512504e-05, - "loss": 47.0089, - "step": 130850 - }, - { - "epoch": 0.5286909586008234, - "grad_norm": 524.0968017578125, - "learning_rate": 2.7314478237806107e-05, - "loss": 50.3915, - "step": 130860 - }, - { - "epoch": 0.528731359866191, - "grad_norm": 685.7463989257812, - "learning_rate": 2.7311002547977766e-05, - "loss": 47.7177, - "step": 130870 - }, - { - "epoch": 0.5287717611315587, - "grad_norm": 658.6242065429688, - "learning_rate": 2.730752681309524e-05, - "loss": 44.9115, - "step": 130880 - }, - { - "epoch": 0.5288121623969263, - "grad_norm": 831.5072021484375, - "learning_rate": 2.730405103322629e-05, - "loss": 36.1585, - "step": 130890 - }, - { - "epoch": 0.5288525636622939, - "grad_norm": 2113.5791015625, - "learning_rate": 2.7300575208438683e-05, - "loss": 64.7953, - "step": 130900 - }, - { - "epoch": 0.5288929649276616, - "grad_norm": 1694.7403564453125, - "learning_rate": 2.729709933880017e-05, - "loss": 64.9504, - "step": 130910 - }, - { - "epoch": 0.5289333661930292, - "grad_norm": 959.0403442382812, - "learning_rate": 2.7293623424378535e-05, - "loss": 62.3716, - "step": 130920 - }, - { - "epoch": 0.5289737674583967, - "grad_norm": 2491.924072265625, - "learning_rate": 2.7290147465241517e-05, - "loss": 52.1772, - "step": 130930 - }, - { - "epoch": 0.5290141687237644, - "grad_norm": 243.4221954345703, - "learning_rate": 2.7286671461456897e-05, - "loss": 38.3009, - "step": 130940 - }, - { - "epoch": 0.529054569989132, - "grad_norm": 1660.8577880859375, - "learning_rate": 2.7283195413092445e-05, - "loss": 60.126, - "step": 130950 - }, - { - "epoch": 0.5290949712544997, - "grad_norm": 609.1519165039062, - "learning_rate": 2.7279719320215924e-05, - "loss": 59.0399, - "step": 130960 - }, - { - "epoch": 0.5291353725198673, - "grad_norm": 379.8237609863281, - "learning_rate": 2.7276243182895094e-05, - "loss": 64.3225, - "step": 130970 - }, - { - "epoch": 0.529175773785235, - "grad_norm": 535.1643676757812, - "learning_rate": 2.7272767001197742e-05, - "loss": 37.2136, - "step": 130980 - }, - { - "epoch": 0.5292161750506026, - "grad_norm": 587.9727783203125, - "learning_rate": 2.726929077519162e-05, - "loss": 41.9734, - "step": 130990 - }, - { - "epoch": 0.5292565763159702, - "grad_norm": 652.3465576171875, - "learning_rate": 2.726581450494451e-05, - "loss": 83.3366, - "step": 131000 - }, - { - "epoch": 0.5292969775813379, - "grad_norm": 395.1925964355469, - "learning_rate": 2.7262338190524173e-05, - "loss": 70.538, - "step": 131010 - }, - { - "epoch": 0.5293373788467055, - "grad_norm": 602.4820556640625, - "learning_rate": 2.7258861831998388e-05, - "loss": 55.8564, - "step": 131020 - }, - { - "epoch": 0.5293777801120731, - "grad_norm": 2387.6884765625, - "learning_rate": 2.7255385429434932e-05, - "loss": 77.701, - "step": 131030 - }, - { - "epoch": 0.5294181813774408, - "grad_norm": 626.4151000976562, - "learning_rate": 2.725190898290158e-05, - "loss": 71.4265, - "step": 131040 - }, - { - "epoch": 0.5294585826428083, - "grad_norm": 1227.336669921875, - "learning_rate": 2.7248432492466096e-05, - "loss": 63.7168, - "step": 131050 - }, - { - "epoch": 0.529498983908176, - "grad_norm": 506.81475830078125, - "learning_rate": 2.7244955958196265e-05, - "loss": 38.2177, - "step": 131060 - }, - { - "epoch": 0.5295393851735436, - "grad_norm": 550.5950317382812, - "learning_rate": 2.7241479380159868e-05, - "loss": 69.3767, - "step": 131070 - }, - { - "epoch": 0.5295797864389112, - "grad_norm": 751.3677368164062, - "learning_rate": 2.723800275842468e-05, - "loss": 45.0376, - "step": 131080 - }, - { - "epoch": 0.5296201877042789, - "grad_norm": 459.6380310058594, - "learning_rate": 2.7234526093058464e-05, - "loss": 59.1021, - "step": 131090 - }, - { - "epoch": 0.5296605889696465, - "grad_norm": 463.3095703125, - "learning_rate": 2.7231049384129016e-05, - "loss": 37.5911, - "step": 131100 - }, - { - "epoch": 0.5297009902350142, - "grad_norm": 0.0, - "learning_rate": 2.7227572631704107e-05, - "loss": 52.0239, - "step": 131110 - }, - { - "epoch": 0.5297413915003818, - "grad_norm": 1287.0667724609375, - "learning_rate": 2.7224095835851525e-05, - "loss": 85.9998, - "step": 131120 - }, - { - "epoch": 0.5297817927657494, - "grad_norm": 710.130615234375, - "learning_rate": 2.722061899663905e-05, - "loss": 41.3392, - "step": 131130 - }, - { - "epoch": 0.5298221940311171, - "grad_norm": 241.11277770996094, - "learning_rate": 2.7217142114134463e-05, - "loss": 75.5654, - "step": 131140 - }, - { - "epoch": 0.5298625952964847, - "grad_norm": 2309.976806640625, - "learning_rate": 2.7213665188405556e-05, - "loss": 62.305, - "step": 131150 - }, - { - "epoch": 0.5299029965618524, - "grad_norm": 592.4625244140625, - "learning_rate": 2.721018821952011e-05, - "loss": 49.6338, - "step": 131160 - }, - { - "epoch": 0.52994339782722, - "grad_norm": 3443.605224609375, - "learning_rate": 2.7206711207545893e-05, - "loss": 65.5017, - "step": 131170 - }, - { - "epoch": 0.5299837990925875, - "grad_norm": 924.45654296875, - "learning_rate": 2.7203234152550712e-05, - "loss": 52.5658, - "step": 131180 - }, - { - "epoch": 0.5300242003579552, - "grad_norm": 385.39910888671875, - "learning_rate": 2.719975705460234e-05, - "loss": 35.2366, - "step": 131190 - }, - { - "epoch": 0.5300646016233228, - "grad_norm": 709.4951171875, - "learning_rate": 2.7196279913768584e-05, - "loss": 56.7616, - "step": 131200 - }, - { - "epoch": 0.5301050028886904, - "grad_norm": 849.27294921875, - "learning_rate": 2.719280273011721e-05, - "loss": 62.0372, - "step": 131210 - }, - { - "epoch": 0.5301454041540581, - "grad_norm": 943.2574462890625, - "learning_rate": 2.7189325503716022e-05, - "loss": 81.1754, - "step": 131220 - }, - { - "epoch": 0.5301858054194257, - "grad_norm": 839.4765014648438, - "learning_rate": 2.7185848234632803e-05, - "loss": 65.2041, - "step": 131230 - }, - { - "epoch": 0.5302262066847934, - "grad_norm": 879.865966796875, - "learning_rate": 2.7182370922935353e-05, - "loss": 57.2173, - "step": 131240 - }, - { - "epoch": 0.530266607950161, - "grad_norm": 980.9358520507812, - "learning_rate": 2.717889356869146e-05, - "loss": 47.6376, - "step": 131250 - }, - { - "epoch": 0.5303070092155286, - "grad_norm": 680.2994384765625, - "learning_rate": 2.717541617196891e-05, - "loss": 35.0096, - "step": 131260 - }, - { - "epoch": 0.5303474104808963, - "grad_norm": 870.2879638671875, - "learning_rate": 2.71719387328355e-05, - "loss": 54.6478, - "step": 131270 - }, - { - "epoch": 0.5303878117462639, - "grad_norm": 456.5863952636719, - "learning_rate": 2.716846125135903e-05, - "loss": 66.5173, - "step": 131280 - }, - { - "epoch": 0.5304282130116316, - "grad_norm": 1249.5938720703125, - "learning_rate": 2.716498372760729e-05, - "loss": 44.1854, - "step": 131290 - }, - { - "epoch": 0.5304686142769992, - "grad_norm": 364.5703430175781, - "learning_rate": 2.7161506161648076e-05, - "loss": 58.9766, - "step": 131300 - }, - { - "epoch": 0.5305090155423667, - "grad_norm": 890.297119140625, - "learning_rate": 2.7158028553549187e-05, - "loss": 56.6682, - "step": 131310 - }, - { - "epoch": 0.5305494168077344, - "grad_norm": 925.9620361328125, - "learning_rate": 2.715455090337842e-05, - "loss": 43.9666, - "step": 131320 - }, - { - "epoch": 0.530589818073102, - "grad_norm": 511.55609130859375, - "learning_rate": 2.715107321120358e-05, - "loss": 51.9629, - "step": 131330 - }, - { - "epoch": 0.5306302193384697, - "grad_norm": 547.1328125, - "learning_rate": 2.7147595477092457e-05, - "loss": 43.9675, - "step": 131340 - }, - { - "epoch": 0.5306706206038373, - "grad_norm": 784.9970703125, - "learning_rate": 2.7144117701112846e-05, - "loss": 65.4148, - "step": 131350 - }, - { - "epoch": 0.5307110218692049, - "grad_norm": 653.8527221679688, - "learning_rate": 2.7140639883332564e-05, - "loss": 64.9822, - "step": 131360 - }, - { - "epoch": 0.5307514231345726, - "grad_norm": 480.8045959472656, - "learning_rate": 2.713716202381941e-05, - "loss": 53.8297, - "step": 131370 - }, - { - "epoch": 0.5307918243999402, - "grad_norm": 251.93240356445312, - "learning_rate": 2.713368412264118e-05, - "loss": 55.8464, - "step": 131380 - }, - { - "epoch": 0.5308322256653079, - "grad_norm": 609.9501953125, - "learning_rate": 2.713020617986567e-05, - "loss": 38.5014, - "step": 131390 - }, - { - "epoch": 0.5308726269306755, - "grad_norm": 921.5916748046875, - "learning_rate": 2.7126728195560702e-05, - "loss": 52.8066, - "step": 131400 - }, - { - "epoch": 0.5309130281960431, - "grad_norm": 912.7449951171875, - "learning_rate": 2.7123250169794075e-05, - "loss": 54.2252, - "step": 131410 - }, - { - "epoch": 0.5309534294614108, - "grad_norm": 718.9556274414062, - "learning_rate": 2.711977210263359e-05, - "loss": 57.7856, - "step": 131420 - }, - { - "epoch": 0.5309938307267784, - "grad_norm": 457.2162170410156, - "learning_rate": 2.7116293994147053e-05, - "loss": 64.9568, - "step": 131430 - }, - { - "epoch": 0.5310342319921459, - "grad_norm": 621.4869995117188, - "learning_rate": 2.711281584440228e-05, - "loss": 62.9255, - "step": 131440 - }, - { - "epoch": 0.5310746332575136, - "grad_norm": 792.2979125976562, - "learning_rate": 2.710933765346707e-05, - "loss": 47.9132, - "step": 131450 - }, - { - "epoch": 0.5311150345228812, - "grad_norm": 539.8186645507812, - "learning_rate": 2.710585942140924e-05, - "loss": 54.2183, - "step": 131460 - }, - { - "epoch": 0.5311554357882489, - "grad_norm": 558.9265747070312, - "learning_rate": 2.710238114829659e-05, - "loss": 49.2721, - "step": 131470 - }, - { - "epoch": 0.5311958370536165, - "grad_norm": 709.4071655273438, - "learning_rate": 2.7098902834196943e-05, - "loss": 58.9437, - "step": 131480 - }, - { - "epoch": 0.5312362383189841, - "grad_norm": 654.2186889648438, - "learning_rate": 2.7095424479178106e-05, - "loss": 36.4761, - "step": 131490 - }, - { - "epoch": 0.5312766395843518, - "grad_norm": 889.7582397460938, - "learning_rate": 2.7091946083307896e-05, - "loss": 68.6755, - "step": 131500 - }, - { - "epoch": 0.5313170408497194, - "grad_norm": 1404.284912109375, - "learning_rate": 2.708846764665411e-05, - "loss": 67.0801, - "step": 131510 - }, - { - "epoch": 0.5313574421150871, - "grad_norm": 774.0541381835938, - "learning_rate": 2.7084989169284568e-05, - "loss": 70.2885, - "step": 131520 - }, - { - "epoch": 0.5313978433804547, - "grad_norm": 676.1631469726562, - "learning_rate": 2.70815106512671e-05, - "loss": 57.3858, - "step": 131530 - }, - { - "epoch": 0.5314382446458223, - "grad_norm": 288.2684631347656, - "learning_rate": 2.7078032092669502e-05, - "loss": 57.0647, - "step": 131540 - }, - { - "epoch": 0.53147864591119, - "grad_norm": 692.8833618164062, - "learning_rate": 2.70745534935596e-05, - "loss": 52.4784, - "step": 131550 - }, - { - "epoch": 0.5315190471765576, - "grad_norm": 360.534912109375, - "learning_rate": 2.707107485400521e-05, - "loss": 76.2931, - "step": 131560 - }, - { - "epoch": 0.5315594484419252, - "grad_norm": 902.7174682617188, - "learning_rate": 2.7067596174074155e-05, - "loss": 66.0437, - "step": 131570 - }, - { - "epoch": 0.5315998497072928, - "grad_norm": 1684.62451171875, - "learning_rate": 2.7064117453834243e-05, - "loss": 48.1203, - "step": 131580 - }, - { - "epoch": 0.5316402509726604, - "grad_norm": 74.62276458740234, - "learning_rate": 2.70606386933533e-05, - "loss": 69.9313, - "step": 131590 - }, - { - "epoch": 0.5316806522380281, - "grad_norm": 347.76605224609375, - "learning_rate": 2.705715989269914e-05, - "loss": 67.9091, - "step": 131600 - }, - { - "epoch": 0.5317210535033957, - "grad_norm": 2573.411865234375, - "learning_rate": 2.70536810519396e-05, - "loss": 58.865, - "step": 131610 - }, - { - "epoch": 0.5317614547687634, - "grad_norm": 265.0232849121094, - "learning_rate": 2.705020217114248e-05, - "loss": 47.0783, - "step": 131620 - }, - { - "epoch": 0.531801856034131, - "grad_norm": 801.228515625, - "learning_rate": 2.7046723250375617e-05, - "loss": 51.892, - "step": 131630 - }, - { - "epoch": 0.5318422572994986, - "grad_norm": 833.2240600585938, - "learning_rate": 2.7043244289706826e-05, - "loss": 49.0188, - "step": 131640 - }, - { - "epoch": 0.5318826585648663, - "grad_norm": 984.8556518554688, - "learning_rate": 2.7039765289203946e-05, - "loss": 55.2805, - "step": 131650 - }, - { - "epoch": 0.5319230598302339, - "grad_norm": 779.8922119140625, - "learning_rate": 2.703628624893478e-05, - "loss": 40.7662, - "step": 131660 - }, - { - "epoch": 0.5319634610956016, - "grad_norm": 687.851318359375, - "learning_rate": 2.703280716896717e-05, - "loss": 71.506, - "step": 131670 - }, - { - "epoch": 0.5320038623609692, - "grad_norm": 561.60888671875, - "learning_rate": 2.702932804936894e-05, - "loss": 76.0556, - "step": 131680 - }, - { - "epoch": 0.5320442636263367, - "grad_norm": 597.3526611328125, - "learning_rate": 2.7025848890207917e-05, - "loss": 51.0563, - "step": 131690 - }, - { - "epoch": 0.5320846648917044, - "grad_norm": 1162.3173828125, - "learning_rate": 2.7022369691551917e-05, - "loss": 67.7418, - "step": 131700 - }, - { - "epoch": 0.532125066157072, - "grad_norm": 1007.927734375, - "learning_rate": 2.7018890453468788e-05, - "loss": 55.7545, - "step": 131710 - }, - { - "epoch": 0.5321654674224396, - "grad_norm": 583.6637573242188, - "learning_rate": 2.7015411176026344e-05, - "loss": 59.1643, - "step": 131720 - }, - { - "epoch": 0.5322058686878073, - "grad_norm": 503.95025634765625, - "learning_rate": 2.7011931859292427e-05, - "loss": 57.7244, - "step": 131730 - }, - { - "epoch": 0.5322462699531749, - "grad_norm": 545.329345703125, - "learning_rate": 2.7008452503334858e-05, - "loss": 46.0308, - "step": 131740 - }, - { - "epoch": 0.5322866712185426, - "grad_norm": 380.2509765625, - "learning_rate": 2.7004973108221472e-05, - "loss": 40.5332, - "step": 131750 - }, - { - "epoch": 0.5323270724839102, - "grad_norm": 740.1156005859375, - "learning_rate": 2.700149367402011e-05, - "loss": 60.6691, - "step": 131760 - }, - { - "epoch": 0.5323674737492778, - "grad_norm": 1089.05712890625, - "learning_rate": 2.69980142007986e-05, - "loss": 65.5629, - "step": 131770 - }, - { - "epoch": 0.5324078750146455, - "grad_norm": 537.0314331054688, - "learning_rate": 2.699453468862477e-05, - "loss": 55.9601, - "step": 131780 - }, - { - "epoch": 0.5324482762800131, - "grad_norm": 266.5636291503906, - "learning_rate": 2.699105513756645e-05, - "loss": 59.2656, - "step": 131790 - }, - { - "epoch": 0.5324886775453808, - "grad_norm": 889.6873779296875, - "learning_rate": 2.6987575547691497e-05, - "loss": 76.0885, - "step": 131800 - }, - { - "epoch": 0.5325290788107484, - "grad_norm": 1049.7486572265625, - "learning_rate": 2.698409591906773e-05, - "loss": 50.9139, - "step": 131810 - }, - { - "epoch": 0.5325694800761159, - "grad_norm": 820.5618286132812, - "learning_rate": 2.6980616251762997e-05, - "loss": 36.7931, - "step": 131820 - }, - { - "epoch": 0.5326098813414836, - "grad_norm": 643.490966796875, - "learning_rate": 2.6977136545845122e-05, - "loss": 56.8342, - "step": 131830 - }, - { - "epoch": 0.5326502826068512, - "grad_norm": 853.44970703125, - "learning_rate": 2.6973656801381963e-05, - "loss": 60.0717, - "step": 131840 - }, - { - "epoch": 0.5326906838722189, - "grad_norm": 440.1711730957031, - "learning_rate": 2.697017701844134e-05, - "loss": 45.1691, - "step": 131850 - }, - { - "epoch": 0.5327310851375865, - "grad_norm": 468.0743408203125, - "learning_rate": 2.6966697197091108e-05, - "loss": 54.0596, - "step": 131860 - }, - { - "epoch": 0.5327714864029541, - "grad_norm": 1050.264892578125, - "learning_rate": 2.69632173373991e-05, - "loss": 55.2322, - "step": 131870 - }, - { - "epoch": 0.5328118876683218, - "grad_norm": 720.6689453125, - "learning_rate": 2.695973743943315e-05, - "loss": 55.2848, - "step": 131880 - }, - { - "epoch": 0.5328522889336894, - "grad_norm": 1108.028564453125, - "learning_rate": 2.6956257503261116e-05, - "loss": 40.9585, - "step": 131890 - }, - { - "epoch": 0.532892690199057, - "grad_norm": 1092.5072021484375, - "learning_rate": 2.695277752895084e-05, - "loss": 44.4573, - "step": 131900 - }, - { - "epoch": 0.5329330914644247, - "grad_norm": 625.7965087890625, - "learning_rate": 2.6949297516570156e-05, - "loss": 28.5109, - "step": 131910 - }, - { - "epoch": 0.5329734927297923, - "grad_norm": 681.8804931640625, - "learning_rate": 2.6945817466186912e-05, - "loss": 43.6512, - "step": 131920 - }, - { - "epoch": 0.53301389399516, - "grad_norm": 756.3519897460938, - "learning_rate": 2.694233737786896e-05, - "loss": 76.8343, - "step": 131930 - }, - { - "epoch": 0.5330542952605276, - "grad_norm": 1092.2066650390625, - "learning_rate": 2.693885725168414e-05, - "loss": 46.8568, - "step": 131940 - }, - { - "epoch": 0.5330946965258951, - "grad_norm": 988.5387573242188, - "learning_rate": 2.6935377087700297e-05, - "loss": 73.0625, - "step": 131950 - }, - { - "epoch": 0.5331350977912628, - "grad_norm": 940.03076171875, - "learning_rate": 2.693189688598528e-05, - "loss": 63.5997, - "step": 131960 - }, - { - "epoch": 0.5331754990566304, - "grad_norm": 372.99151611328125, - "learning_rate": 2.6928416646606936e-05, - "loss": 57.4221, - "step": 131970 - }, - { - "epoch": 0.5332159003219981, - "grad_norm": 1288.4371337890625, - "learning_rate": 2.6924936369633125e-05, - "loss": 48.3105, - "step": 131980 - }, - { - "epoch": 0.5332563015873657, - "grad_norm": 2535.275634765625, - "learning_rate": 2.6921456055131683e-05, - "loss": 67.3763, - "step": 131990 - }, - { - "epoch": 0.5332967028527333, - "grad_norm": 3868.3837890625, - "learning_rate": 2.6917975703170466e-05, - "loss": 65.5701, - "step": 132000 - }, - { - "epoch": 0.533337104118101, - "grad_norm": 810.7564697265625, - "learning_rate": 2.691449531381733e-05, - "loss": 64.9525, - "step": 132010 - }, - { - "epoch": 0.5333775053834686, - "grad_norm": 1662.32373046875, - "learning_rate": 2.6911014887140122e-05, - "loss": 57.4445, - "step": 132020 - }, - { - "epoch": 0.5334179066488363, - "grad_norm": 528.4577026367188, - "learning_rate": 2.6907534423206692e-05, - "loss": 49.0796, - "step": 132030 - }, - { - "epoch": 0.5334583079142039, - "grad_norm": 798.1748657226562, - "learning_rate": 2.6904053922084895e-05, - "loss": 51.6413, - "step": 132040 - }, - { - "epoch": 0.5334987091795715, - "grad_norm": 589.7070922851562, - "learning_rate": 2.6900573383842583e-05, - "loss": 33.4644, - "step": 132050 - }, - { - "epoch": 0.5335391104449392, - "grad_norm": 757.95458984375, - "learning_rate": 2.689709280854762e-05, - "loss": 52.9057, - "step": 132060 - }, - { - "epoch": 0.5335795117103068, - "grad_norm": 567.9570922851562, - "learning_rate": 2.6893612196267853e-05, - "loss": 46.4571, - "step": 132070 - }, - { - "epoch": 0.5336199129756743, - "grad_norm": 732.6300048828125, - "learning_rate": 2.6890131547071147e-05, - "loss": 66.3549, - "step": 132080 - }, - { - "epoch": 0.533660314241042, - "grad_norm": 1564.1744384765625, - "learning_rate": 2.6886650861025343e-05, - "loss": 82.0941, - "step": 132090 - }, - { - "epoch": 0.5337007155064096, - "grad_norm": 6092.22314453125, - "learning_rate": 2.6883170138198323e-05, - "loss": 73.8271, - "step": 132100 - }, - { - "epoch": 0.5337411167717773, - "grad_norm": 3677.495361328125, - "learning_rate": 2.6879689378657923e-05, - "loss": 83.4729, - "step": 132110 - }, - { - "epoch": 0.5337815180371449, - "grad_norm": 830.318603515625, - "learning_rate": 2.6876208582472012e-05, - "loss": 43.1947, - "step": 132120 - }, - { - "epoch": 0.5338219193025125, - "grad_norm": 861.20849609375, - "learning_rate": 2.687272774970845e-05, - "loss": 79.1685, - "step": 132130 - }, - { - "epoch": 0.5338623205678802, - "grad_norm": 541.7510375976562, - "learning_rate": 2.6869246880435095e-05, - "loss": 50.6851, - "step": 132140 - }, - { - "epoch": 0.5339027218332478, - "grad_norm": 2233.982177734375, - "learning_rate": 2.686576597471981e-05, - "loss": 64.9713, - "step": 132150 - }, - { - "epoch": 0.5339431230986155, - "grad_norm": 801.9125366210938, - "learning_rate": 2.686228503263045e-05, - "loss": 67.6367, - "step": 132160 - }, - { - "epoch": 0.5339835243639831, - "grad_norm": 577.6140747070312, - "learning_rate": 2.685880405423489e-05, - "loss": 56.8992, - "step": 132170 - }, - { - "epoch": 0.5340239256293507, - "grad_norm": 303.9769592285156, - "learning_rate": 2.6855323039601e-05, - "loss": 55.4841, - "step": 132180 - }, - { - "epoch": 0.5340643268947184, - "grad_norm": 0.0, - "learning_rate": 2.685184198879662e-05, - "loss": 79.864, - "step": 132190 - }, - { - "epoch": 0.534104728160086, - "grad_norm": 1271.53759765625, - "learning_rate": 2.684836090188963e-05, - "loss": 53.1909, - "step": 132200 - }, - { - "epoch": 0.5341451294254536, - "grad_norm": 390.48504638671875, - "learning_rate": 2.6844879778947884e-05, - "loss": 45.1817, - "step": 132210 - }, - { - "epoch": 0.5341855306908212, - "grad_norm": 1045.8829345703125, - "learning_rate": 2.6841398620039273e-05, - "loss": 57.4035, - "step": 132220 - }, - { - "epoch": 0.5342259319561888, - "grad_norm": 211.4333038330078, - "learning_rate": 2.6837917425231633e-05, - "loss": 93.9413, - "step": 132230 - }, - { - "epoch": 0.5342663332215565, - "grad_norm": 362.2946472167969, - "learning_rate": 2.6834436194592853e-05, - "loss": 33.1701, - "step": 132240 - }, - { - "epoch": 0.5343067344869241, - "grad_norm": 875.4388427734375, - "learning_rate": 2.6830954928190794e-05, - "loss": 76.5484, - "step": 132250 - }, - { - "epoch": 0.5343471357522918, - "grad_norm": 546.877197265625, - "learning_rate": 2.682747362609333e-05, - "loss": 38.9788, - "step": 132260 - }, - { - "epoch": 0.5343875370176594, - "grad_norm": 674.6320190429688, - "learning_rate": 2.6823992288368322e-05, - "loss": 70.6211, - "step": 132270 - }, - { - "epoch": 0.534427938283027, - "grad_norm": 868.1038818359375, - "learning_rate": 2.6820510915083648e-05, - "loss": 49.9378, - "step": 132280 - }, - { - "epoch": 0.5344683395483947, - "grad_norm": 355.66741943359375, - "learning_rate": 2.681702950630717e-05, - "loss": 34.1495, - "step": 132290 - }, - { - "epoch": 0.5345087408137623, - "grad_norm": 460.1276550292969, - "learning_rate": 2.6813548062106775e-05, - "loss": 64.0979, - "step": 132300 - }, - { - "epoch": 0.53454914207913, - "grad_norm": 0.0, - "learning_rate": 2.6810066582550324e-05, - "loss": 59.0976, - "step": 132310 - }, - { - "epoch": 0.5345895433444976, - "grad_norm": 310.49627685546875, - "learning_rate": 2.6806585067705692e-05, - "loss": 64.1751, - "step": 132320 - }, - { - "epoch": 0.5346299446098651, - "grad_norm": 862.5602416992188, - "learning_rate": 2.680310351764075e-05, - "loss": 63.3662, - "step": 132330 - }, - { - "epoch": 0.5346703458752328, - "grad_norm": 802.3466186523438, - "learning_rate": 2.679962193242338e-05, - "loss": 71.8971, - "step": 132340 - }, - { - "epoch": 0.5347107471406004, - "grad_norm": 1650.000244140625, - "learning_rate": 2.6796140312121458e-05, - "loss": 59.4168, - "step": 132350 - }, - { - "epoch": 0.534751148405968, - "grad_norm": 832.1773071289062, - "learning_rate": 2.6792658656802856e-05, - "loss": 45.569, - "step": 132360 - }, - { - "epoch": 0.5347915496713357, - "grad_norm": 917.97412109375, - "learning_rate": 2.6789176966535444e-05, - "loss": 45.4459, - "step": 132370 - }, - { - "epoch": 0.5348319509367033, - "grad_norm": 583.6998901367188, - "learning_rate": 2.678569524138711e-05, - "loss": 66.7321, - "step": 132380 - }, - { - "epoch": 0.534872352202071, - "grad_norm": 418.53680419921875, - "learning_rate": 2.678221348142573e-05, - "loss": 34.0593, - "step": 132390 - }, - { - "epoch": 0.5349127534674386, - "grad_norm": 402.7685241699219, - "learning_rate": 2.6778731686719178e-05, - "loss": 55.3928, - "step": 132400 - }, - { - "epoch": 0.5349531547328062, - "grad_norm": 372.7942199707031, - "learning_rate": 2.6775249857335333e-05, - "loss": 30.0867, - "step": 132410 - }, - { - "epoch": 0.5349935559981739, - "grad_norm": 1119.2479248046875, - "learning_rate": 2.677176799334208e-05, - "loss": 58.895, - "step": 132420 - }, - { - "epoch": 0.5350339572635415, - "grad_norm": 407.2303771972656, - "learning_rate": 2.6768286094807298e-05, - "loss": 64.0234, - "step": 132430 - }, - { - "epoch": 0.5350743585289092, - "grad_norm": 855.4847412109375, - "learning_rate": 2.6764804161798867e-05, - "loss": 89.9496, - "step": 132440 - }, - { - "epoch": 0.5351147597942768, - "grad_norm": 1015.7791748046875, - "learning_rate": 2.6761322194384674e-05, - "loss": 60.1903, - "step": 132450 - }, - { - "epoch": 0.5351551610596443, - "grad_norm": 1254.9368896484375, - "learning_rate": 2.6757840192632598e-05, - "loss": 61.4563, - "step": 132460 - }, - { - "epoch": 0.535195562325012, - "grad_norm": 299.9359436035156, - "learning_rate": 2.6754358156610525e-05, - "loss": 38.3524, - "step": 132470 - }, - { - "epoch": 0.5352359635903796, - "grad_norm": 811.1012573242188, - "learning_rate": 2.6750876086386328e-05, - "loss": 47.9699, - "step": 132480 - }, - { - "epoch": 0.5352763648557473, - "grad_norm": 893.4951171875, - "learning_rate": 2.6747393982027903e-05, - "loss": 48.4848, - "step": 132490 - }, - { - "epoch": 0.5353167661211149, - "grad_norm": 727.3240966796875, - "learning_rate": 2.674391184360313e-05, - "loss": 34.6639, - "step": 132500 - }, - { - "epoch": 0.5353571673864825, - "grad_norm": 2415.583251953125, - "learning_rate": 2.6740429671179907e-05, - "loss": 73.8259, - "step": 132510 - }, - { - "epoch": 0.5353975686518502, - "grad_norm": 443.5478515625, - "learning_rate": 2.6736947464826108e-05, - "loss": 48.6447, - "step": 132520 - }, - { - "epoch": 0.5354379699172178, - "grad_norm": 1031.6845703125, - "learning_rate": 2.6733465224609622e-05, - "loss": 54.8037, - "step": 132530 - }, - { - "epoch": 0.5354783711825855, - "grad_norm": 564.192138671875, - "learning_rate": 2.6729982950598338e-05, - "loss": 51.8397, - "step": 132540 - }, - { - "epoch": 0.5355187724479531, - "grad_norm": 472.5278625488281, - "learning_rate": 2.6726500642860154e-05, - "loss": 49.6563, - "step": 132550 - }, - { - "epoch": 0.5355591737133207, - "grad_norm": 551.7787475585938, - "learning_rate": 2.6723018301462937e-05, - "loss": 43.9799, - "step": 132560 - }, - { - "epoch": 0.5355995749786884, - "grad_norm": 1193.7701416015625, - "learning_rate": 2.67195359264746e-05, - "loss": 64.9433, - "step": 132570 - }, - { - "epoch": 0.535639976244056, - "grad_norm": 799.9347534179688, - "learning_rate": 2.671605351796302e-05, - "loss": 41.734, - "step": 132580 - }, - { - "epoch": 0.5356803775094235, - "grad_norm": 759.9945068359375, - "learning_rate": 2.67125710759961e-05, - "loss": 76.4194, - "step": 132590 - }, - { - "epoch": 0.5357207787747912, - "grad_norm": 647.0215454101562, - "learning_rate": 2.6709088600641717e-05, - "loss": 43.6254, - "step": 132600 - }, - { - "epoch": 0.5357611800401588, - "grad_norm": 720.9840698242188, - "learning_rate": 2.6705606091967778e-05, - "loss": 49.2521, - "step": 132610 - }, - { - "epoch": 0.5358015813055265, - "grad_norm": 1699.47216796875, - "learning_rate": 2.670212355004217e-05, - "loss": 48.1515, - "step": 132620 - }, - { - "epoch": 0.5358419825708941, - "grad_norm": 912.8600463867188, - "learning_rate": 2.6698640974932793e-05, - "loss": 60.5469, - "step": 132630 - }, - { - "epoch": 0.5358823838362617, - "grad_norm": 562.8499755859375, - "learning_rate": 2.6695158366707522e-05, - "loss": 53.9552, - "step": 132640 - }, - { - "epoch": 0.5359227851016294, - "grad_norm": 778.0060424804688, - "learning_rate": 2.6691675725434272e-05, - "loss": 52.4513, - "step": 132650 - }, - { - "epoch": 0.535963186366997, - "grad_norm": 773.4092407226562, - "learning_rate": 2.6688193051180933e-05, - "loss": 39.1972, - "step": 132660 - }, - { - "epoch": 0.5360035876323647, - "grad_norm": 608.3802490234375, - "learning_rate": 2.66847103440154e-05, - "loss": 37.7895, - "step": 132670 - }, - { - "epoch": 0.5360439888977323, - "grad_norm": 775.6356811523438, - "learning_rate": 2.6681227604005576e-05, - "loss": 41.3886, - "step": 132680 - }, - { - "epoch": 0.5360843901630999, - "grad_norm": 1359.913818359375, - "learning_rate": 2.6677744831219348e-05, - "loss": 69.0232, - "step": 132690 - }, - { - "epoch": 0.5361247914284676, - "grad_norm": 540.3826293945312, - "learning_rate": 2.6674262025724627e-05, - "loss": 32.8396, - "step": 132700 - }, - { - "epoch": 0.5361651926938352, - "grad_norm": 862.3062744140625, - "learning_rate": 2.66707791875893e-05, - "loss": 64.9376, - "step": 132710 - }, - { - "epoch": 0.5362055939592028, - "grad_norm": 557.792236328125, - "learning_rate": 2.666729631688128e-05, - "loss": 53.6906, - "step": 132720 - }, - { - "epoch": 0.5362459952245704, - "grad_norm": 711.6216430664062, - "learning_rate": 2.6663813413668455e-05, - "loss": 55.813, - "step": 132730 - }, - { - "epoch": 0.536286396489938, - "grad_norm": 795.2600708007812, - "learning_rate": 2.6660330478018726e-05, - "loss": 57.3366, - "step": 132740 - }, - { - "epoch": 0.5363267977553057, - "grad_norm": 375.18975830078125, - "learning_rate": 2.6656847510000012e-05, - "loss": 64.0476, - "step": 132750 - }, - { - "epoch": 0.5363671990206733, - "grad_norm": 765.218994140625, - "learning_rate": 2.6653364509680188e-05, - "loss": 52.8319, - "step": 132760 - }, - { - "epoch": 0.536407600286041, - "grad_norm": 609.1416015625, - "learning_rate": 2.664988147712718e-05, - "loss": 34.5201, - "step": 132770 - }, - { - "epoch": 0.5364480015514086, - "grad_norm": 629.1380615234375, - "learning_rate": 2.664639841240888e-05, - "loss": 30.9847, - "step": 132780 - }, - { - "epoch": 0.5364884028167762, - "grad_norm": 0.0, - "learning_rate": 2.6642915315593204e-05, - "loss": 41.8538, - "step": 132790 - }, - { - "epoch": 0.5365288040821439, - "grad_norm": 751.7651977539062, - "learning_rate": 2.6639432186748043e-05, - "loss": 53.989, - "step": 132800 - }, - { - "epoch": 0.5365692053475115, - "grad_norm": 829.0548095703125, - "learning_rate": 2.6635949025941303e-05, - "loss": 55.0998, - "step": 132810 - }, - { - "epoch": 0.5366096066128792, - "grad_norm": 1614.1934814453125, - "learning_rate": 2.6632465833240893e-05, - "loss": 67.0182, - "step": 132820 - }, - { - "epoch": 0.5366500078782468, - "grad_norm": 1317.6927490234375, - "learning_rate": 2.662898260871473e-05, - "loss": 61.6694, - "step": 132830 - }, - { - "epoch": 0.5366904091436144, - "grad_norm": 427.5072937011719, - "learning_rate": 2.662549935243071e-05, - "loss": 50.9423, - "step": 132840 - }, - { - "epoch": 0.536730810408982, - "grad_norm": 1174.3800048828125, - "learning_rate": 2.6622016064456738e-05, - "loss": 72.6584, - "step": 132850 - }, - { - "epoch": 0.5367712116743496, - "grad_norm": 379.9467468261719, - "learning_rate": 2.661853274486073e-05, - "loss": 64.3494, - "step": 132860 - }, - { - "epoch": 0.5368116129397172, - "grad_norm": 1121.682861328125, - "learning_rate": 2.6615049393710596e-05, - "loss": 87.2399, - "step": 132870 - }, - { - "epoch": 0.5368520142050849, - "grad_norm": 1015.9825439453125, - "learning_rate": 2.661156601107424e-05, - "loss": 76.0801, - "step": 132880 - }, - { - "epoch": 0.5368924154704525, - "grad_norm": 364.2757568359375, - "learning_rate": 2.660808259701958e-05, - "loss": 44.1075, - "step": 132890 - }, - { - "epoch": 0.5369328167358202, - "grad_norm": 350.0528259277344, - "learning_rate": 2.6604599151614513e-05, - "loss": 41.6351, - "step": 132900 - }, - { - "epoch": 0.5369732180011878, - "grad_norm": 742.4920654296875, - "learning_rate": 2.660111567492696e-05, - "loss": 47.2528, - "step": 132910 - }, - { - "epoch": 0.5370136192665554, - "grad_norm": 588.64599609375, - "learning_rate": 2.6597632167024843e-05, - "loss": 49.0769, - "step": 132920 - }, - { - "epoch": 0.5370540205319231, - "grad_norm": 519.329345703125, - "learning_rate": 2.6594148627976056e-05, - "loss": 27.8517, - "step": 132930 - }, - { - "epoch": 0.5370944217972907, - "grad_norm": 620.1698608398438, - "learning_rate": 2.659066505784852e-05, - "loss": 30.6305, - "step": 132940 - }, - { - "epoch": 0.5371348230626584, - "grad_norm": 615.6215209960938, - "learning_rate": 2.6587181456710153e-05, - "loss": 70.2314, - "step": 132950 - }, - { - "epoch": 0.537175224328026, - "grad_norm": 766.043212890625, - "learning_rate": 2.6583697824628868e-05, - "loss": 59.5871, - "step": 132960 - }, - { - "epoch": 0.5372156255933935, - "grad_norm": 719.8123168945312, - "learning_rate": 2.6580214161672577e-05, - "loss": 92.2357, - "step": 132970 - }, - { - "epoch": 0.5372560268587612, - "grad_norm": 0.0, - "learning_rate": 2.65767304679092e-05, - "loss": 56.5093, - "step": 132980 - }, - { - "epoch": 0.5372964281241288, - "grad_norm": 615.7906494140625, - "learning_rate": 2.6573246743406643e-05, - "loss": 82.8496, - "step": 132990 - }, - { - "epoch": 0.5373368293894965, - "grad_norm": 377.28985595703125, - "learning_rate": 2.656976298823284e-05, - "loss": 61.0799, - "step": 133000 - }, - { - "epoch": 0.5373772306548641, - "grad_norm": 839.7172241210938, - "learning_rate": 2.656627920245569e-05, - "loss": 57.0245, - "step": 133010 - }, - { - "epoch": 0.5374176319202317, - "grad_norm": 296.6985168457031, - "learning_rate": 2.6562795386143126e-05, - "loss": 83.4302, - "step": 133020 - }, - { - "epoch": 0.5374580331855994, - "grad_norm": 615.6727294921875, - "learning_rate": 2.6559311539363057e-05, - "loss": 83.2225, - "step": 133030 - }, - { - "epoch": 0.537498434450967, - "grad_norm": 518.0780029296875, - "learning_rate": 2.6555827662183414e-05, - "loss": 61.7653, - "step": 133040 - }, - { - "epoch": 0.5375388357163347, - "grad_norm": 585.662841796875, - "learning_rate": 2.6552343754672103e-05, - "loss": 30.4691, - "step": 133050 - }, - { - "epoch": 0.5375792369817023, - "grad_norm": 763.933349609375, - "learning_rate": 2.654885981689706e-05, - "loss": 47.9403, - "step": 133060 - }, - { - "epoch": 0.5376196382470699, - "grad_norm": 4562.193359375, - "learning_rate": 2.654537584892619e-05, - "loss": 94.3141, - "step": 133070 - }, - { - "epoch": 0.5376600395124376, - "grad_norm": 2078.553466796875, - "learning_rate": 2.6541891850827427e-05, - "loss": 59.5855, - "step": 133080 - }, - { - "epoch": 0.5377004407778052, - "grad_norm": 882.0210571289062, - "learning_rate": 2.653840782266868e-05, - "loss": 48.3455, - "step": 133090 - }, - { - "epoch": 0.5377408420431727, - "grad_norm": 643.2105712890625, - "learning_rate": 2.653492376451789e-05, - "loss": 50.6237, - "step": 133100 - }, - { - "epoch": 0.5377812433085404, - "grad_norm": 468.3760986328125, - "learning_rate": 2.6531439676442966e-05, - "loss": 50.7116, - "step": 133110 - }, - { - "epoch": 0.537821644573908, - "grad_norm": 763.0037841796875, - "learning_rate": 2.6527955558511842e-05, - "loss": 78.7773, - "step": 133120 - }, - { - "epoch": 0.5378620458392757, - "grad_norm": 289.5901184082031, - "learning_rate": 2.652447141079243e-05, - "loss": 46.9934, - "step": 133130 - }, - { - "epoch": 0.5379024471046433, - "grad_norm": 776.5145874023438, - "learning_rate": 2.6520987233352668e-05, - "loss": 65.4828, - "step": 133140 - }, - { - "epoch": 0.5379428483700109, - "grad_norm": 2137.632568359375, - "learning_rate": 2.6517503026260477e-05, - "loss": 51.402, - "step": 133150 - }, - { - "epoch": 0.5379832496353786, - "grad_norm": 1670.868896484375, - "learning_rate": 2.6514018789583784e-05, - "loss": 58.1956, - "step": 133160 - }, - { - "epoch": 0.5380236509007462, - "grad_norm": 451.3445739746094, - "learning_rate": 2.651053452339051e-05, - "loss": 47.8556, - "step": 133170 - }, - { - "epoch": 0.5380640521661139, - "grad_norm": 993.9606323242188, - "learning_rate": 2.650705022774859e-05, - "loss": 53.2049, - "step": 133180 - }, - { - "epoch": 0.5381044534314815, - "grad_norm": 836.609130859375, - "learning_rate": 2.6503565902725945e-05, - "loss": 43.5089, - "step": 133190 - }, - { - "epoch": 0.5381448546968491, - "grad_norm": 999.381103515625, - "learning_rate": 2.650008154839052e-05, - "loss": 61.5361, - "step": 133200 - }, - { - "epoch": 0.5381852559622168, - "grad_norm": 806.6781616210938, - "learning_rate": 2.6496597164810228e-05, - "loss": 82.2622, - "step": 133210 - }, - { - "epoch": 0.5382256572275844, - "grad_norm": 576.73974609375, - "learning_rate": 2.6493112752053e-05, - "loss": 32.6687, - "step": 133220 - }, - { - "epoch": 0.538266058492952, - "grad_norm": 362.6260986328125, - "learning_rate": 2.6489628310186777e-05, - "loss": 27.9188, - "step": 133230 - }, - { - "epoch": 0.5383064597583196, - "grad_norm": 422.7489013671875, - "learning_rate": 2.6486143839279487e-05, - "loss": 54.0248, - "step": 133240 - }, - { - "epoch": 0.5383468610236872, - "grad_norm": 578.5451049804688, - "learning_rate": 2.6482659339399045e-05, - "loss": 60.191, - "step": 133250 - }, - { - "epoch": 0.5383872622890549, - "grad_norm": 2537.281005859375, - "learning_rate": 2.64791748106134e-05, - "loss": 58.6423, - "step": 133260 - }, - { - "epoch": 0.5384276635544225, - "grad_norm": 537.8567504882812, - "learning_rate": 2.647569025299048e-05, - "loss": 39.9818, - "step": 133270 - }, - { - "epoch": 0.5384680648197901, - "grad_norm": 415.552001953125, - "learning_rate": 2.647220566659822e-05, - "loss": 41.9125, - "step": 133280 - }, - { - "epoch": 0.5385084660851578, - "grad_norm": 603.8895874023438, - "learning_rate": 2.6468721051504554e-05, - "loss": 74.081, - "step": 133290 - }, - { - "epoch": 0.5385488673505254, - "grad_norm": 918.3076171875, - "learning_rate": 2.646523640777741e-05, - "loss": 67.8838, - "step": 133300 - }, - { - "epoch": 0.5385892686158931, - "grad_norm": 644.3486938476562, - "learning_rate": 2.646175173548474e-05, - "loss": 39.718, - "step": 133310 - }, - { - "epoch": 0.5386296698812607, - "grad_norm": 1127.198486328125, - "learning_rate": 2.6458267034694463e-05, - "loss": 50.1396, - "step": 133320 - }, - { - "epoch": 0.5386700711466283, - "grad_norm": 1032.7508544921875, - "learning_rate": 2.645478230547451e-05, - "loss": 60.5964, - "step": 133330 - }, - { - "epoch": 0.538710472411996, - "grad_norm": 2308.70654296875, - "learning_rate": 2.6451297547892834e-05, - "loss": 86.4414, - "step": 133340 - }, - { - "epoch": 0.5387508736773636, - "grad_norm": 250.09999084472656, - "learning_rate": 2.644781276201736e-05, - "loss": 38.5809, - "step": 133350 - }, - { - "epoch": 0.5387912749427312, - "grad_norm": 892.7601318359375, - "learning_rate": 2.6444327947916036e-05, - "loss": 54.9328, - "step": 133360 - }, - { - "epoch": 0.5388316762080988, - "grad_norm": 742.9862670898438, - "learning_rate": 2.6440843105656793e-05, - "loss": 53.7666, - "step": 133370 - }, - { - "epoch": 0.5388720774734664, - "grad_norm": 580.2630004882812, - "learning_rate": 2.6437358235307576e-05, - "loss": 42.9191, - "step": 133380 - }, - { - "epoch": 0.5389124787388341, - "grad_norm": 657.9290161132812, - "learning_rate": 2.643387333693631e-05, - "loss": 48.353, - "step": 133390 - }, - { - "epoch": 0.5389528800042017, - "grad_norm": 278.8210144042969, - "learning_rate": 2.6430388410610955e-05, - "loss": 28.9319, - "step": 133400 - }, - { - "epoch": 0.5389932812695694, - "grad_norm": 4367.41015625, - "learning_rate": 2.6426903456399442e-05, - "loss": 80.4687, - "step": 133410 - }, - { - "epoch": 0.539033682534937, - "grad_norm": 192.4734649658203, - "learning_rate": 2.6423418474369704e-05, - "loss": 34.9511, - "step": 133420 - }, - { - "epoch": 0.5390740838003046, - "grad_norm": 2244.14892578125, - "learning_rate": 2.6419933464589695e-05, - "loss": 88.4328, - "step": 133430 - }, - { - "epoch": 0.5391144850656723, - "grad_norm": 520.8280639648438, - "learning_rate": 2.641644842712735e-05, - "loss": 60.3455, - "step": 133440 - }, - { - "epoch": 0.5391548863310399, - "grad_norm": 356.8963623046875, - "learning_rate": 2.6412963362050618e-05, - "loss": 42.0201, - "step": 133450 - }, - { - "epoch": 0.5391952875964076, - "grad_norm": 578.19873046875, - "learning_rate": 2.640947826942743e-05, - "loss": 80.3492, - "step": 133460 - }, - { - "epoch": 0.5392356888617752, - "grad_norm": 855.69677734375, - "learning_rate": 2.640599314932574e-05, - "loss": 64.0816, - "step": 133470 - }, - { - "epoch": 0.5392760901271428, - "grad_norm": 1241.7418212890625, - "learning_rate": 2.6402508001813496e-05, - "loss": 59.2113, - "step": 133480 - }, - { - "epoch": 0.5393164913925104, - "grad_norm": 245.85995483398438, - "learning_rate": 2.6399022826958635e-05, - "loss": 40.4469, - "step": 133490 - }, - { - "epoch": 0.539356892657878, - "grad_norm": 1086.880615234375, - "learning_rate": 2.6395537624829096e-05, - "loss": 47.158, - "step": 133500 - }, - { - "epoch": 0.5393972939232456, - "grad_norm": 1283.5257568359375, - "learning_rate": 2.639205239549284e-05, - "loss": 75.8677, - "step": 133510 - }, - { - "epoch": 0.5394376951886133, - "grad_norm": 973.451904296875, - "learning_rate": 2.63885671390178e-05, - "loss": 60.6037, - "step": 133520 - }, - { - "epoch": 0.5394780964539809, - "grad_norm": 743.983642578125, - "learning_rate": 2.6385081855471937e-05, - "loss": 46.7652, - "step": 133530 - }, - { - "epoch": 0.5395184977193486, - "grad_norm": 338.3520812988281, - "learning_rate": 2.638159654492318e-05, - "loss": 84.5076, - "step": 133540 - }, - { - "epoch": 0.5395588989847162, - "grad_norm": 460.87103271484375, - "learning_rate": 2.6378111207439494e-05, - "loss": 51.0171, - "step": 133550 - }, - { - "epoch": 0.5395993002500838, - "grad_norm": 0.0, - "learning_rate": 2.637462584308881e-05, - "loss": 49.7129, - "step": 133560 - }, - { - "epoch": 0.5396397015154515, - "grad_norm": 1022.263427734375, - "learning_rate": 2.6371140451939103e-05, - "loss": 69.8698, - "step": 133570 - }, - { - "epoch": 0.5396801027808191, - "grad_norm": 1827.470947265625, - "learning_rate": 2.6367655034058302e-05, - "loss": 79.5532, - "step": 133580 - }, - { - "epoch": 0.5397205040461868, - "grad_norm": 608.9415893554688, - "learning_rate": 2.6364169589514358e-05, - "loss": 61.6891, - "step": 133590 - }, - { - "epoch": 0.5397609053115544, - "grad_norm": 1445.496826171875, - "learning_rate": 2.636068411837523e-05, - "loss": 67.8332, - "step": 133600 - }, - { - "epoch": 0.5398013065769219, - "grad_norm": 633.453369140625, - "learning_rate": 2.6357198620708868e-05, - "loss": 41.0135, - "step": 133610 - }, - { - "epoch": 0.5398417078422896, - "grad_norm": 1123.755615234375, - "learning_rate": 2.635371309658321e-05, - "loss": 65.9337, - "step": 133620 - }, - { - "epoch": 0.5398821091076572, - "grad_norm": 412.05029296875, - "learning_rate": 2.6350227546066218e-05, - "loss": 57.8581, - "step": 133630 - }, - { - "epoch": 0.5399225103730249, - "grad_norm": 794.2384643554688, - "learning_rate": 2.634674196922585e-05, - "loss": 47.7789, - "step": 133640 - }, - { - "epoch": 0.5399629116383925, - "grad_norm": 839.2406616210938, - "learning_rate": 2.6343256366130054e-05, - "loss": 49.6203, - "step": 133650 - }, - { - "epoch": 0.5400033129037601, - "grad_norm": 474.2958068847656, - "learning_rate": 2.633977073684679e-05, - "loss": 73.9534, - "step": 133660 - }, - { - "epoch": 0.5400437141691278, - "grad_norm": 611.3136596679688, - "learning_rate": 2.6336285081443996e-05, - "loss": 90.5097, - "step": 133670 - }, - { - "epoch": 0.5400841154344954, - "grad_norm": 1800.31884765625, - "learning_rate": 2.633279939998964e-05, - "loss": 66.3581, - "step": 133680 - }, - { - "epoch": 0.540124516699863, - "grad_norm": 437.4395751953125, - "learning_rate": 2.6329313692551672e-05, - "loss": 54.8113, - "step": 133690 - }, - { - "epoch": 0.5401649179652307, - "grad_norm": 891.3457641601562, - "learning_rate": 2.6325827959198045e-05, - "loss": 59.0138, - "step": 133700 - }, - { - "epoch": 0.5402053192305983, - "grad_norm": 971.3360595703125, - "learning_rate": 2.6322342199996726e-05, - "loss": 42.0446, - "step": 133710 - }, - { - "epoch": 0.540245720495966, - "grad_norm": 1470.095703125, - "learning_rate": 2.6318856415015664e-05, - "loss": 67.5125, - "step": 133720 - }, - { - "epoch": 0.5402861217613336, - "grad_norm": 553.43408203125, - "learning_rate": 2.631537060432282e-05, - "loss": 45.4572, - "step": 133730 - }, - { - "epoch": 0.5403265230267011, - "grad_norm": 1151.4344482421875, - "learning_rate": 2.631188476798614e-05, - "loss": 41.0355, - "step": 133740 - }, - { - "epoch": 0.5403669242920688, - "grad_norm": 1145.021728515625, - "learning_rate": 2.63083989060736e-05, - "loss": 48.7941, - "step": 133750 - }, - { - "epoch": 0.5404073255574364, - "grad_norm": 640.4161987304688, - "learning_rate": 2.6304913018653144e-05, - "loss": 58.756, - "step": 133760 - }, - { - "epoch": 0.5404477268228041, - "grad_norm": 887.5857543945312, - "learning_rate": 2.630142710579274e-05, - "loss": 54.0097, - "step": 133770 - }, - { - "epoch": 0.5404881280881717, - "grad_norm": 817.3486328125, - "learning_rate": 2.6297941167560346e-05, - "loss": 45.1971, - "step": 133780 - }, - { - "epoch": 0.5405285293535393, - "grad_norm": 959.50732421875, - "learning_rate": 2.6294455204023915e-05, - "loss": 43.2953, - "step": 133790 - }, - { - "epoch": 0.540568930618907, - "grad_norm": 315.968017578125, - "learning_rate": 2.6290969215251416e-05, - "loss": 76.2952, - "step": 133800 - }, - { - "epoch": 0.5406093318842746, - "grad_norm": 1908.3179931640625, - "learning_rate": 2.628748320131081e-05, - "loss": 71.9718, - "step": 133810 - }, - { - "epoch": 0.5406497331496423, - "grad_norm": 869.3490600585938, - "learning_rate": 2.6283997162270052e-05, - "loss": 48.5477, - "step": 133820 - }, - { - "epoch": 0.5406901344150099, - "grad_norm": 1390.5584716796875, - "learning_rate": 2.6280511098197113e-05, - "loss": 57.5297, - "step": 133830 - }, - { - "epoch": 0.5407305356803775, - "grad_norm": 1634.8409423828125, - "learning_rate": 2.627702500915995e-05, - "loss": 56.8467, - "step": 133840 - }, - { - "epoch": 0.5407709369457452, - "grad_norm": 796.09716796875, - "learning_rate": 2.6273538895226522e-05, - "loss": 43.6418, - "step": 133850 - }, - { - "epoch": 0.5408113382111128, - "grad_norm": 676.2175903320312, - "learning_rate": 2.6270052756464803e-05, - "loss": 61.2231, - "step": 133860 - }, - { - "epoch": 0.5408517394764804, - "grad_norm": 843.5305786132812, - "learning_rate": 2.626656659294275e-05, - "loss": 39.5511, - "step": 133870 - }, - { - "epoch": 0.540892140741848, - "grad_norm": 584.4107055664062, - "learning_rate": 2.6263080404728325e-05, - "loss": 40.8211, - "step": 133880 - }, - { - "epoch": 0.5409325420072156, - "grad_norm": 617.6050415039062, - "learning_rate": 2.62595941918895e-05, - "loss": 46.0637, - "step": 133890 - }, - { - "epoch": 0.5409729432725833, - "grad_norm": 323.1057434082031, - "learning_rate": 2.6256107954494242e-05, - "loss": 59.272, - "step": 133900 - }, - { - "epoch": 0.5410133445379509, - "grad_norm": 534.6497802734375, - "learning_rate": 2.6252621692610507e-05, - "loss": 47.1891, - "step": 133910 - }, - { - "epoch": 0.5410537458033186, - "grad_norm": 1699.9815673828125, - "learning_rate": 2.6249135406306273e-05, - "loss": 34.6852, - "step": 133920 - }, - { - "epoch": 0.5410941470686862, - "grad_norm": 595.4637451171875, - "learning_rate": 2.6245649095649494e-05, - "loss": 54.051, - "step": 133930 - }, - { - "epoch": 0.5411345483340538, - "grad_norm": 538.2552490234375, - "learning_rate": 2.6242162760708154e-05, - "loss": 35.2021, - "step": 133940 - }, - { - "epoch": 0.5411749495994215, - "grad_norm": 1009.6656494140625, - "learning_rate": 2.6238676401550207e-05, - "loss": 40.208, - "step": 133950 - }, - { - "epoch": 0.5412153508647891, - "grad_norm": 941.228515625, - "learning_rate": 2.623519001824362e-05, - "loss": 48.1016, - "step": 133960 - }, - { - "epoch": 0.5412557521301568, - "grad_norm": 884.5780029296875, - "learning_rate": 2.6231703610856373e-05, - "loss": 57.1634, - "step": 133970 - }, - { - "epoch": 0.5412961533955244, - "grad_norm": 670.623291015625, - "learning_rate": 2.6228217179456433e-05, - "loss": 56.2344, - "step": 133980 - }, - { - "epoch": 0.541336554660892, - "grad_norm": 432.82928466796875, - "learning_rate": 2.6224730724111758e-05, - "loss": 37.2939, - "step": 133990 - }, - { - "epoch": 0.5413769559262596, - "grad_norm": 505.16387939453125, - "learning_rate": 2.6221244244890336e-05, - "loss": 44.5701, - "step": 134000 - }, - { - "epoch": 0.5414173571916272, - "grad_norm": 661.7396240234375, - "learning_rate": 2.6217757741860123e-05, - "loss": 52.0883, - "step": 134010 - }, - { - "epoch": 0.5414577584569948, - "grad_norm": 419.75286865234375, - "learning_rate": 2.6214271215089104e-05, - "loss": 51.607, - "step": 134020 - }, - { - "epoch": 0.5414981597223625, - "grad_norm": 303.2333984375, - "learning_rate": 2.621078466464523e-05, - "loss": 23.728, - "step": 134030 - }, - { - "epoch": 0.5415385609877301, - "grad_norm": 1355.118408203125, - "learning_rate": 2.6207298090596493e-05, - "loss": 61.6765, - "step": 134040 - }, - { - "epoch": 0.5415789622530978, - "grad_norm": 1755.49853515625, - "learning_rate": 2.6203811493010856e-05, - "loss": 44.694, - "step": 134050 - }, - { - "epoch": 0.5416193635184654, - "grad_norm": 396.1255798339844, - "learning_rate": 2.6200324871956295e-05, - "loss": 76.6947, - "step": 134060 - }, - { - "epoch": 0.541659764783833, - "grad_norm": 1570.49072265625, - "learning_rate": 2.619683822750078e-05, - "loss": 64.72, - "step": 134070 - }, - { - "epoch": 0.5417001660492007, - "grad_norm": 632.6025390625, - "learning_rate": 2.6193351559712292e-05, - "loss": 69.9561, - "step": 134080 - }, - { - "epoch": 0.5417405673145683, - "grad_norm": 579.8509521484375, - "learning_rate": 2.6189864868658803e-05, - "loss": 69.4796, - "step": 134090 - }, - { - "epoch": 0.541780968579936, - "grad_norm": 274.0668640136719, - "learning_rate": 2.6186378154408286e-05, - "loss": 46.6143, - "step": 134100 - }, - { - "epoch": 0.5418213698453036, - "grad_norm": 633.9044189453125, - "learning_rate": 2.618289141702871e-05, - "loss": 52.109, - "step": 134110 - }, - { - "epoch": 0.5418617711106711, - "grad_norm": 948.8427124023438, - "learning_rate": 2.6179404656588058e-05, - "loss": 71.229, - "step": 134120 - }, - { - "epoch": 0.5419021723760388, - "grad_norm": 1424.39208984375, - "learning_rate": 2.6175917873154303e-05, - "loss": 61.8993, - "step": 134130 - }, - { - "epoch": 0.5419425736414064, - "grad_norm": 464.7524108886719, - "learning_rate": 2.6172431066795428e-05, - "loss": 60.2696, - "step": 134140 - }, - { - "epoch": 0.541982974906774, - "grad_norm": 422.7701721191406, - "learning_rate": 2.6168944237579406e-05, - "loss": 57.3853, - "step": 134150 - }, - { - "epoch": 0.5420233761721417, - "grad_norm": 1085.177001953125, - "learning_rate": 2.616545738557421e-05, - "loss": 81.7053, - "step": 134160 - }, - { - "epoch": 0.5420637774375093, - "grad_norm": 518.3070068359375, - "learning_rate": 2.6161970510847826e-05, - "loss": 62.6892, - "step": 134170 - }, - { - "epoch": 0.542104178702877, - "grad_norm": 746.0762939453125, - "learning_rate": 2.6158483613468227e-05, - "loss": 48.352, - "step": 134180 - }, - { - "epoch": 0.5421445799682446, - "grad_norm": 437.72576904296875, - "learning_rate": 2.6154996693503396e-05, - "loss": 47.7975, - "step": 134190 - }, - { - "epoch": 0.5421849812336123, - "grad_norm": 1170.718505859375, - "learning_rate": 2.615150975102131e-05, - "loss": 53.6286, - "step": 134200 - }, - { - "epoch": 0.5422253824989799, - "grad_norm": 435.82586669921875, - "learning_rate": 2.6148022786089944e-05, - "loss": 47.8883, - "step": 134210 - }, - { - "epoch": 0.5422657837643475, - "grad_norm": 860.9259643554688, - "learning_rate": 2.6144535798777286e-05, - "loss": 53.4346, - "step": 134220 - }, - { - "epoch": 0.5423061850297152, - "grad_norm": 888.4954223632812, - "learning_rate": 2.6141048789151314e-05, - "loss": 52.0218, - "step": 134230 - }, - { - "epoch": 0.5423465862950828, - "grad_norm": 998.63623046875, - "learning_rate": 2.6137561757280003e-05, - "loss": 33.0164, - "step": 134240 - }, - { - "epoch": 0.5423869875604503, - "grad_norm": 950.3190307617188, - "learning_rate": 2.6134074703231344e-05, - "loss": 58.7313, - "step": 134250 - }, - { - "epoch": 0.542427388825818, - "grad_norm": 1152.3165283203125, - "learning_rate": 2.6130587627073315e-05, - "loss": 65.6751, - "step": 134260 - }, - { - "epoch": 0.5424677900911856, - "grad_norm": 518.1432495117188, - "learning_rate": 2.6127100528873904e-05, - "loss": 42.0353, - "step": 134270 - }, - { - "epoch": 0.5425081913565533, - "grad_norm": 858.6574096679688, - "learning_rate": 2.6123613408701082e-05, - "loss": 64.6581, - "step": 134280 - }, - { - "epoch": 0.5425485926219209, - "grad_norm": 709.0899658203125, - "learning_rate": 2.6120126266622836e-05, - "loss": 41.2389, - "step": 134290 - }, - { - "epoch": 0.5425889938872885, - "grad_norm": 1239.24365234375, - "learning_rate": 2.6116639102707156e-05, - "loss": 35.4557, - "step": 134300 - }, - { - "epoch": 0.5426293951526562, - "grad_norm": 486.78656005859375, - "learning_rate": 2.6113151917022018e-05, - "loss": 28.9147, - "step": 134310 - }, - { - "epoch": 0.5426697964180238, - "grad_norm": 784.9171142578125, - "learning_rate": 2.610966470963541e-05, - "loss": 31.0144, - "step": 134320 - }, - { - "epoch": 0.5427101976833915, - "grad_norm": 698.8657836914062, - "learning_rate": 2.6106177480615318e-05, - "loss": 67.4756, - "step": 134330 - }, - { - "epoch": 0.5427505989487591, - "grad_norm": 612.57958984375, - "learning_rate": 2.610269023002973e-05, - "loss": 34.029, - "step": 134340 - }, - { - "epoch": 0.5427910002141267, - "grad_norm": 658.6572265625, - "learning_rate": 2.6099202957946624e-05, - "loss": 44.2772, - "step": 134350 - }, - { - "epoch": 0.5428314014794944, - "grad_norm": 1145.214111328125, - "learning_rate": 2.6095715664433995e-05, - "loss": 62.3332, - "step": 134360 - }, - { - "epoch": 0.542871802744862, - "grad_norm": 735.1185913085938, - "learning_rate": 2.6092228349559822e-05, - "loss": 43.975, - "step": 134370 - }, - { - "epoch": 0.5429122040102295, - "grad_norm": 706.6088256835938, - "learning_rate": 2.6088741013392098e-05, - "loss": 65.3609, - "step": 134380 - }, - { - "epoch": 0.5429526052755972, - "grad_norm": 851.6122436523438, - "learning_rate": 2.6085253655998805e-05, - "loss": 52.3161, - "step": 134390 - }, - { - "epoch": 0.5429930065409648, - "grad_norm": 286.50323486328125, - "learning_rate": 2.6081766277447927e-05, - "loss": 57.4317, - "step": 134400 - }, - { - "epoch": 0.5430334078063325, - "grad_norm": 781.5896606445312, - "learning_rate": 2.6078278877807467e-05, - "loss": 39.7626, - "step": 134410 - }, - { - "epoch": 0.5430738090717001, - "grad_norm": 825.2921142578125, - "learning_rate": 2.6074791457145398e-05, - "loss": 48.5062, - "step": 134420 - }, - { - "epoch": 0.5431142103370677, - "grad_norm": 396.2430114746094, - "learning_rate": 2.6071304015529726e-05, - "loss": 53.2996, - "step": 134430 - }, - { - "epoch": 0.5431546116024354, - "grad_norm": 781.8477172851562, - "learning_rate": 2.606781655302843e-05, - "loss": 43.2636, - "step": 134440 - }, - { - "epoch": 0.543195012867803, - "grad_norm": 1481.7069091796875, - "learning_rate": 2.6064329069709493e-05, - "loss": 64.267, - "step": 134450 - }, - { - "epoch": 0.5432354141331707, - "grad_norm": 272.7846984863281, - "learning_rate": 2.6060841565640913e-05, - "loss": 41.5866, - "step": 134460 - }, - { - "epoch": 0.5432758153985383, - "grad_norm": 696.9764404296875, - "learning_rate": 2.6057354040890685e-05, - "loss": 53.8912, - "step": 134470 - }, - { - "epoch": 0.543316216663906, - "grad_norm": 426.2699279785156, - "learning_rate": 2.605386649552679e-05, - "loss": 39.0158, - "step": 134480 - }, - { - "epoch": 0.5433566179292736, - "grad_norm": 325.85888671875, - "learning_rate": 2.605037892961723e-05, - "loss": 55.8296, - "step": 134490 - }, - { - "epoch": 0.5433970191946412, - "grad_norm": 485.76611328125, - "learning_rate": 2.604689134322999e-05, - "loss": 85.8357, - "step": 134500 - }, - { - "epoch": 0.5434374204600088, - "grad_norm": 769.1482543945312, - "learning_rate": 2.604340373643307e-05, - "loss": 47.1949, - "step": 134510 - }, - { - "epoch": 0.5434778217253764, - "grad_norm": 2402.829833984375, - "learning_rate": 2.603991610929445e-05, - "loss": 55.1604, - "step": 134520 - }, - { - "epoch": 0.543518222990744, - "grad_norm": 572.4566040039062, - "learning_rate": 2.6036428461882133e-05, - "loss": 46.891, - "step": 134530 - }, - { - "epoch": 0.5435586242561117, - "grad_norm": 387.22119140625, - "learning_rate": 2.603294079426411e-05, - "loss": 61.2975, - "step": 134540 - }, - { - "epoch": 0.5435990255214793, - "grad_norm": 427.8232116699219, - "learning_rate": 2.602945310650838e-05, - "loss": 30.9293, - "step": 134550 - }, - { - "epoch": 0.543639426786847, - "grad_norm": 595.3522338867188, - "learning_rate": 2.6025965398682916e-05, - "loss": 42.5073, - "step": 134560 - }, - { - "epoch": 0.5436798280522146, - "grad_norm": 946.1700439453125, - "learning_rate": 2.602247767085574e-05, - "loss": 60.556, - "step": 134570 - }, - { - "epoch": 0.5437202293175822, - "grad_norm": 1055.9263916015625, - "learning_rate": 2.6018989923094828e-05, - "loss": 50.0367, - "step": 134580 - }, - { - "epoch": 0.5437606305829499, - "grad_norm": 1703.814453125, - "learning_rate": 2.6015502155468192e-05, - "loss": 41.7789, - "step": 134590 - }, - { - "epoch": 0.5438010318483175, - "grad_norm": 632.1807250976562, - "learning_rate": 2.6012014368043814e-05, - "loss": 72.0978, - "step": 134600 - }, - { - "epoch": 0.5438414331136852, - "grad_norm": 1356.0809326171875, - "learning_rate": 2.6008526560889694e-05, - "loss": 66.3761, - "step": 134610 - }, - { - "epoch": 0.5438818343790528, - "grad_norm": 691.0104370117188, - "learning_rate": 2.6005038734073833e-05, - "loss": 52.5915, - "step": 134620 - }, - { - "epoch": 0.5439222356444204, - "grad_norm": 687.1522216796875, - "learning_rate": 2.600155088766423e-05, - "loss": 40.2666, - "step": 134630 - }, - { - "epoch": 0.543962636909788, - "grad_norm": 682.845947265625, - "learning_rate": 2.5998063021728865e-05, - "loss": 49.6002, - "step": 134640 - }, - { - "epoch": 0.5440030381751556, - "grad_norm": 1438.562255859375, - "learning_rate": 2.599457513633575e-05, - "loss": 60.5318, - "step": 134650 - }, - { - "epoch": 0.5440434394405232, - "grad_norm": 388.2558288574219, - "learning_rate": 2.599108723155288e-05, - "loss": 37.3879, - "step": 134660 - }, - { - "epoch": 0.5440838407058909, - "grad_norm": 543.9114379882812, - "learning_rate": 2.5987599307448256e-05, - "loss": 63.803, - "step": 134670 - }, - { - "epoch": 0.5441242419712585, - "grad_norm": 2017.16796875, - "learning_rate": 2.5984111364089876e-05, - "loss": 64.5238, - "step": 134680 - }, - { - "epoch": 0.5441646432366262, - "grad_norm": 572.1366577148438, - "learning_rate": 2.598062340154574e-05, - "loss": 50.1777, - "step": 134690 - }, - { - "epoch": 0.5442050445019938, - "grad_norm": 289.9861145019531, - "learning_rate": 2.5977135419883842e-05, - "loss": 53.5399, - "step": 134700 - }, - { - "epoch": 0.5442454457673614, - "grad_norm": 889.2007446289062, - "learning_rate": 2.597364741917219e-05, - "loss": 72.6208, - "step": 134710 - }, - { - "epoch": 0.5442858470327291, - "grad_norm": 1161.2286376953125, - "learning_rate": 2.597015939947878e-05, - "loss": 59.4138, - "step": 134720 - }, - { - "epoch": 0.5443262482980967, - "grad_norm": 737.5440063476562, - "learning_rate": 2.59666713608716e-05, - "loss": 67.8319, - "step": 134730 - }, - { - "epoch": 0.5443666495634644, - "grad_norm": 346.41070556640625, - "learning_rate": 2.596318330341868e-05, - "loss": 59.8536, - "step": 134740 - }, - { - "epoch": 0.544407050828832, - "grad_norm": 390.0060119628906, - "learning_rate": 2.5959695227188004e-05, - "loss": 38.0195, - "step": 134750 - }, - { - "epoch": 0.5444474520941995, - "grad_norm": 577.295166015625, - "learning_rate": 2.595620713224757e-05, - "loss": 59.1009, - "step": 134760 - }, - { - "epoch": 0.5444878533595672, - "grad_norm": 509.3887939453125, - "learning_rate": 2.5952719018665382e-05, - "loss": 47.7892, - "step": 134770 - }, - { - "epoch": 0.5445282546249348, - "grad_norm": 715.0154418945312, - "learning_rate": 2.5949230886509457e-05, - "loss": 66.1531, - "step": 134780 - }, - { - "epoch": 0.5445686558903025, - "grad_norm": 1432.1541748046875, - "learning_rate": 2.594574273584779e-05, - "loss": 52.1664, - "step": 134790 - }, - { - "epoch": 0.5446090571556701, - "grad_norm": 882.5562133789062, - "learning_rate": 2.594225456674837e-05, - "loss": 72.5866, - "step": 134800 - }, - { - "epoch": 0.5446494584210377, - "grad_norm": 658.062744140625, - "learning_rate": 2.5938766379279212e-05, - "loss": 62.3709, - "step": 134810 - }, - { - "epoch": 0.5446898596864054, - "grad_norm": 738.4263916015625, - "learning_rate": 2.593527817350832e-05, - "loss": 64.2618, - "step": 134820 - }, - { - "epoch": 0.544730260951773, - "grad_norm": 459.119140625, - "learning_rate": 2.593178994950371e-05, - "loss": 47.4513, - "step": 134830 - }, - { - "epoch": 0.5447706622171407, - "grad_norm": 598.9742431640625, - "learning_rate": 2.5928301707333365e-05, - "loss": 37.9745, - "step": 134840 - }, - { - "epoch": 0.5448110634825083, - "grad_norm": 628.7976684570312, - "learning_rate": 2.59248134470653e-05, - "loss": 29.9893, - "step": 134850 - }, - { - "epoch": 0.5448514647478759, - "grad_norm": 485.0265808105469, - "learning_rate": 2.592132516876753e-05, - "loss": 62.2994, - "step": 134860 - }, - { - "epoch": 0.5448918660132436, - "grad_norm": 485.9800720214844, - "learning_rate": 2.591783687250804e-05, - "loss": 63.8224, - "step": 134870 - }, - { - "epoch": 0.5449322672786112, - "grad_norm": 473.7646484375, - "learning_rate": 2.5914348558354857e-05, - "loss": 37.5881, - "step": 134880 - }, - { - "epoch": 0.5449726685439787, - "grad_norm": 760.0999755859375, - "learning_rate": 2.5910860226375972e-05, - "loss": 70.4306, - "step": 134890 - }, - { - "epoch": 0.5450130698093464, - "grad_norm": 886.9143676757812, - "learning_rate": 2.5907371876639398e-05, - "loss": 59.3818, - "step": 134900 - }, - { - "epoch": 0.545053471074714, - "grad_norm": 765.5799560546875, - "learning_rate": 2.5903883509213146e-05, - "loss": 26.8561, - "step": 134910 - }, - { - "epoch": 0.5450938723400817, - "grad_norm": 939.5110473632812, - "learning_rate": 2.5900395124165218e-05, - "loss": 54.5256, - "step": 134920 - }, - { - "epoch": 0.5451342736054493, - "grad_norm": 1170.457275390625, - "learning_rate": 2.589690672156362e-05, - "loss": 57.3966, - "step": 134930 - }, - { - "epoch": 0.5451746748708169, - "grad_norm": 346.5357666015625, - "learning_rate": 2.5893418301476364e-05, - "loss": 63.307, - "step": 134940 - }, - { - "epoch": 0.5452150761361846, - "grad_norm": 643.980224609375, - "learning_rate": 2.5889929863971462e-05, - "loss": 49.9001, - "step": 134950 - }, - { - "epoch": 0.5452554774015522, - "grad_norm": 1780.751220703125, - "learning_rate": 2.5886441409116923e-05, - "loss": 78.8434, - "step": 134960 - }, - { - "epoch": 0.5452958786669199, - "grad_norm": 705.0413208007812, - "learning_rate": 2.5882952936980746e-05, - "loss": 74.3941, - "step": 134970 - }, - { - "epoch": 0.5453362799322875, - "grad_norm": 1106.158935546875, - "learning_rate": 2.5879464447630946e-05, - "loss": 45.0385, - "step": 134980 - }, - { - "epoch": 0.5453766811976551, - "grad_norm": 502.71795654296875, - "learning_rate": 2.587597594113554e-05, - "loss": 59.7987, - "step": 134990 - }, - { - "epoch": 0.5454170824630228, - "grad_norm": 651.170654296875, - "learning_rate": 2.587248741756253e-05, - "loss": 51.0282, - "step": 135000 - }, - { - "epoch": 0.5454574837283904, - "grad_norm": 539.2788696289062, - "learning_rate": 2.5868998876979923e-05, - "loss": 86.3156, - "step": 135010 - }, - { - "epoch": 0.545497884993758, - "grad_norm": 836.768310546875, - "learning_rate": 2.5865510319455737e-05, - "loss": 73.081, - "step": 135020 - }, - { - "epoch": 0.5455382862591256, - "grad_norm": 370.10723876953125, - "learning_rate": 2.586202174505799e-05, - "loss": 62.4055, - "step": 135030 - }, - { - "epoch": 0.5455786875244932, - "grad_norm": 620.5315551757812, - "learning_rate": 2.5858533153854675e-05, - "loss": 49.814, - "step": 135040 - }, - { - "epoch": 0.5456190887898609, - "grad_norm": 597.864501953125, - "learning_rate": 2.5855044545913825e-05, - "loss": 35.0934, - "step": 135050 - }, - { - "epoch": 0.5456594900552285, - "grad_norm": 2643.165283203125, - "learning_rate": 2.5851555921303434e-05, - "loss": 59.2937, - "step": 135060 - }, - { - "epoch": 0.5456998913205962, - "grad_norm": 0.0, - "learning_rate": 2.584806728009152e-05, - "loss": 39.2349, - "step": 135070 - }, - { - "epoch": 0.5457402925859638, - "grad_norm": 1795.3609619140625, - "learning_rate": 2.58445786223461e-05, - "loss": 59.5827, - "step": 135080 - }, - { - "epoch": 0.5457806938513314, - "grad_norm": 647.521484375, - "learning_rate": 2.584108994813518e-05, - "loss": 39.9268, - "step": 135090 - }, - { - "epoch": 0.5458210951166991, - "grad_norm": 280.2913818359375, - "learning_rate": 2.583760125752679e-05, - "loss": 34.3207, - "step": 135100 - }, - { - "epoch": 0.5458614963820667, - "grad_norm": 788.9907836914062, - "learning_rate": 2.5834112550588922e-05, - "loss": 70.1502, - "step": 135110 - }, - { - "epoch": 0.5459018976474344, - "grad_norm": 1395.0435791015625, - "learning_rate": 2.5830623827389612e-05, - "loss": 68.4634, - "step": 135120 - }, - { - "epoch": 0.545942298912802, - "grad_norm": 507.60723876953125, - "learning_rate": 2.5827135087996858e-05, - "loss": 60.2177, - "step": 135130 - }, - { - "epoch": 0.5459827001781696, - "grad_norm": 657.4443969726562, - "learning_rate": 2.5823646332478674e-05, - "loss": 115.6672, - "step": 135140 - }, - { - "epoch": 0.5460231014435372, - "grad_norm": 0.0, - "learning_rate": 2.582015756090308e-05, - "loss": 91.5032, - "step": 135150 - }, - { - "epoch": 0.5460635027089048, - "grad_norm": 770.5996704101562, - "learning_rate": 2.5816668773338098e-05, - "loss": 49.6053, - "step": 135160 - }, - { - "epoch": 0.5461039039742724, - "grad_norm": 915.6427612304688, - "learning_rate": 2.5813179969851737e-05, - "loss": 74.4677, - "step": 135170 - }, - { - "epoch": 0.5461443052396401, - "grad_norm": 642.6882934570312, - "learning_rate": 2.5809691150512012e-05, - "loss": 39.5173, - "step": 135180 - }, - { - "epoch": 0.5461847065050077, - "grad_norm": 1886.410888671875, - "learning_rate": 2.5806202315386934e-05, - "loss": 62.8264, - "step": 135190 - }, - { - "epoch": 0.5462251077703754, - "grad_norm": 546.2448120117188, - "learning_rate": 2.5802713464544542e-05, - "loss": 45.3419, - "step": 135200 - }, - { - "epoch": 0.546265509035743, - "grad_norm": 10494.7939453125, - "learning_rate": 2.5799224598052828e-05, - "loss": 71.9984, - "step": 135210 - }, - { - "epoch": 0.5463059103011106, - "grad_norm": 660.2236938476562, - "learning_rate": 2.5795735715979823e-05, - "loss": 71.6231, - "step": 135220 - }, - { - "epoch": 0.5463463115664783, - "grad_norm": 1856.3646240234375, - "learning_rate": 2.579224681839354e-05, - "loss": 60.5392, - "step": 135230 - }, - { - "epoch": 0.5463867128318459, - "grad_norm": 680.1478881835938, - "learning_rate": 2.5788757905362e-05, - "loss": 38.0885, - "step": 135240 - }, - { - "epoch": 0.5464271140972136, - "grad_norm": 603.8659057617188, - "learning_rate": 2.578526897695321e-05, - "loss": 76.2062, - "step": 135250 - }, - { - "epoch": 0.5464675153625812, - "grad_norm": 587.8827514648438, - "learning_rate": 2.5781780033235204e-05, - "loss": 47.7149, - "step": 135260 - }, - { - "epoch": 0.5465079166279488, - "grad_norm": 704.3698120117188, - "learning_rate": 2.5778291074275983e-05, - "loss": 38.3459, - "step": 135270 - }, - { - "epoch": 0.5465483178933164, - "grad_norm": 840.8921508789062, - "learning_rate": 2.5774802100143592e-05, - "loss": 47.8546, - "step": 135280 - }, - { - "epoch": 0.546588719158684, - "grad_norm": 855.8980102539062, - "learning_rate": 2.5771313110906026e-05, - "loss": 59.4118, - "step": 135290 - }, - { - "epoch": 0.5466291204240517, - "grad_norm": 342.91058349609375, - "learning_rate": 2.576782410663132e-05, - "loss": 68.2463, - "step": 135300 - }, - { - "epoch": 0.5466695216894193, - "grad_norm": 579.0650634765625, - "learning_rate": 2.576433508738748e-05, - "loss": 44.4399, - "step": 135310 - }, - { - "epoch": 0.5467099229547869, - "grad_norm": 1014.5051879882812, - "learning_rate": 2.5760846053242544e-05, - "loss": 61.9376, - "step": 135320 - }, - { - "epoch": 0.5467503242201546, - "grad_norm": 702.8917236328125, - "learning_rate": 2.5757357004264514e-05, - "loss": 84.4671, - "step": 135330 - }, - { - "epoch": 0.5467907254855222, - "grad_norm": 780.0779418945312, - "learning_rate": 2.575386794052142e-05, - "loss": 59.8046, - "step": 135340 - }, - { - "epoch": 0.5468311267508899, - "grad_norm": 1222.5645751953125, - "learning_rate": 2.575037886208128e-05, - "loss": 53.4124, - "step": 135350 - }, - { - "epoch": 0.5468715280162575, - "grad_norm": 512.1873779296875, - "learning_rate": 2.5746889769012128e-05, - "loss": 50.8429, - "step": 135360 - }, - { - "epoch": 0.5469119292816251, - "grad_norm": 329.0351257324219, - "learning_rate": 2.5743400661381968e-05, - "loss": 33.7403, - "step": 135370 - }, - { - "epoch": 0.5469523305469928, - "grad_norm": 1148.149169921875, - "learning_rate": 2.573991153925883e-05, - "loss": 71.7041, - "step": 135380 - }, - { - "epoch": 0.5469927318123604, - "grad_norm": 1018.9400634765625, - "learning_rate": 2.5736422402710742e-05, - "loss": 63.8832, - "step": 135390 - }, - { - "epoch": 0.5470331330777279, - "grad_norm": 339.0023193359375, - "learning_rate": 2.5732933251805713e-05, - "loss": 63.8842, - "step": 135400 - }, - { - "epoch": 0.5470735343430956, - "grad_norm": 316.14666748046875, - "learning_rate": 2.5729444086611777e-05, - "loss": 35.9186, - "step": 135410 - }, - { - "epoch": 0.5471139356084632, - "grad_norm": 304.40130615234375, - "learning_rate": 2.5725954907196947e-05, - "loss": 32.5816, - "step": 135420 - }, - { - "epoch": 0.5471543368738309, - "grad_norm": 640.4146118164062, - "learning_rate": 2.572246571362925e-05, - "loss": 56.8944, - "step": 135430 - }, - { - "epoch": 0.5471947381391985, - "grad_norm": 1118.9725341796875, - "learning_rate": 2.5718976505976717e-05, - "loss": 42.6715, - "step": 135440 - }, - { - "epoch": 0.5472351394045661, - "grad_norm": 3967.9638671875, - "learning_rate": 2.571548728430737e-05, - "loss": 68.9002, - "step": 135450 - }, - { - "epoch": 0.5472755406699338, - "grad_norm": 647.3598022460938, - "learning_rate": 2.5711998048689227e-05, - "loss": 69.0035, - "step": 135460 - }, - { - "epoch": 0.5473159419353014, - "grad_norm": 724.192138671875, - "learning_rate": 2.5708508799190322e-05, - "loss": 59.7795, - "step": 135470 - }, - { - "epoch": 0.5473563432006691, - "grad_norm": 251.8446807861328, - "learning_rate": 2.5705019535878668e-05, - "loss": 53.0616, - "step": 135480 - }, - { - "epoch": 0.5473967444660367, - "grad_norm": 777.9346923828125, - "learning_rate": 2.5701530258822294e-05, - "loss": 75.1509, - "step": 135490 - }, - { - "epoch": 0.5474371457314043, - "grad_norm": 483.5498352050781, - "learning_rate": 2.5698040968089225e-05, - "loss": 53.8124, - "step": 135500 - }, - { - "epoch": 0.547477546996772, - "grad_norm": 1032.5994873046875, - "learning_rate": 2.569455166374749e-05, - "loss": 44.2035, - "step": 135510 - }, - { - "epoch": 0.5475179482621396, - "grad_norm": 527.7037353515625, - "learning_rate": 2.569106234586511e-05, - "loss": 67.0334, - "step": 135520 - }, - { - "epoch": 0.5475583495275071, - "grad_norm": 494.0249328613281, - "learning_rate": 2.568757301451012e-05, - "loss": 34.6893, - "step": 135530 - }, - { - "epoch": 0.5475987507928748, - "grad_norm": 351.9414978027344, - "learning_rate": 2.5684083669750537e-05, - "loss": 59.7537, - "step": 135540 - }, - { - "epoch": 0.5476391520582424, - "grad_norm": 922.2720336914062, - "learning_rate": 2.568059431165438e-05, - "loss": 64.9178, - "step": 135550 - }, - { - "epoch": 0.5476795533236101, - "grad_norm": 570.0916137695312, - "learning_rate": 2.5677104940289702e-05, - "loss": 40.3571, - "step": 135560 - }, - { - "epoch": 0.5477199545889777, - "grad_norm": 652.1029663085938, - "learning_rate": 2.567361555572452e-05, - "loss": 49.9205, - "step": 135570 - }, - { - "epoch": 0.5477603558543453, - "grad_norm": 826.6941528320312, - "learning_rate": 2.5670126158026842e-05, - "loss": 54.1507, - "step": 135580 - }, - { - "epoch": 0.547800757119713, - "grad_norm": 547.8671875, - "learning_rate": 2.5666636747264705e-05, - "loss": 59.982, - "step": 135590 - }, - { - "epoch": 0.5478411583850806, - "grad_norm": 757.22998046875, - "learning_rate": 2.566314732350615e-05, - "loss": 56.2098, - "step": 135600 - }, - { - "epoch": 0.5478815596504483, - "grad_norm": 351.811767578125, - "learning_rate": 2.56596578868192e-05, - "loss": 65.6569, - "step": 135610 - }, - { - "epoch": 0.5479219609158159, - "grad_norm": 1225.1767578125, - "learning_rate": 2.565616843727187e-05, - "loss": 53.0704, - "step": 135620 - }, - { - "epoch": 0.5479623621811835, - "grad_norm": 1242.2264404296875, - "learning_rate": 2.5652678974932204e-05, - "loss": 71.0538, - "step": 135630 - }, - { - "epoch": 0.5480027634465512, - "grad_norm": 195.21212768554688, - "learning_rate": 2.5649189499868232e-05, - "loss": 52.8797, - "step": 135640 - }, - { - "epoch": 0.5480431647119188, - "grad_norm": 1128.5284423828125, - "learning_rate": 2.564570001214797e-05, - "loss": 51.5018, - "step": 135650 - }, - { - "epoch": 0.5480835659772864, - "grad_norm": 694.2415161132812, - "learning_rate": 2.5642210511839452e-05, - "loss": 61.8394, - "step": 135660 - }, - { - "epoch": 0.548123967242654, - "grad_norm": 901.8828735351562, - "learning_rate": 2.5638720999010713e-05, - "loss": 48.585, - "step": 135670 - }, - { - "epoch": 0.5481643685080216, - "grad_norm": 640.2681274414062, - "learning_rate": 2.5635231473729772e-05, - "loss": 43.3395, - "step": 135680 - }, - { - "epoch": 0.5482047697733893, - "grad_norm": 672.07421875, - "learning_rate": 2.5631741936064675e-05, - "loss": 46.9696, - "step": 135690 - }, - { - "epoch": 0.5482451710387569, - "grad_norm": 883.359130859375, - "learning_rate": 2.562825238608344e-05, - "loss": 43.4834, - "step": 135700 - }, - { - "epoch": 0.5482855723041246, - "grad_norm": 1151.069091796875, - "learning_rate": 2.56247628238541e-05, - "loss": 52.7021, - "step": 135710 - }, - { - "epoch": 0.5483259735694922, - "grad_norm": 1026.870361328125, - "learning_rate": 2.562127324944469e-05, - "loss": 67.7404, - "step": 135720 - }, - { - "epoch": 0.5483663748348598, - "grad_norm": 348.47821044921875, - "learning_rate": 2.561778366292324e-05, - "loss": 36.8606, - "step": 135730 - }, - { - "epoch": 0.5484067761002275, - "grad_norm": 1066.57666015625, - "learning_rate": 2.5614294064357773e-05, - "loss": 95.5207, - "step": 135740 - }, - { - "epoch": 0.5484471773655951, - "grad_norm": 504.4042053222656, - "learning_rate": 2.5610804453816333e-05, - "loss": 50.8941, - "step": 135750 - }, - { - "epoch": 0.5484875786309628, - "grad_norm": 811.9468383789062, - "learning_rate": 2.560731483136694e-05, - "loss": 39.8252, - "step": 135760 - }, - { - "epoch": 0.5485279798963304, - "grad_norm": 0.0, - "learning_rate": 2.560382519707764e-05, - "loss": 78.3535, - "step": 135770 - }, - { - "epoch": 0.548568381161698, - "grad_norm": 522.0183715820312, - "learning_rate": 2.5600335551016445e-05, - "loss": 57.2499, - "step": 135780 - }, - { - "epoch": 0.5486087824270656, - "grad_norm": 385.5660705566406, - "learning_rate": 2.559684589325141e-05, - "loss": 64.1449, - "step": 135790 - }, - { - "epoch": 0.5486491836924332, - "grad_norm": 1208.748779296875, - "learning_rate": 2.559335622385055e-05, - "loss": 71.5, - "step": 135800 - }, - { - "epoch": 0.5486895849578008, - "grad_norm": 794.9710083007812, - "learning_rate": 2.5589866542881912e-05, - "loss": 72.0629, - "step": 135810 - }, - { - "epoch": 0.5487299862231685, - "grad_norm": 589.7503662109375, - "learning_rate": 2.5586376850413517e-05, - "loss": 50.9222, - "step": 135820 - }, - { - "epoch": 0.5487703874885361, - "grad_norm": 743.7083129882812, - "learning_rate": 2.5582887146513406e-05, - "loss": 36.8839, - "step": 135830 - }, - { - "epoch": 0.5488107887539038, - "grad_norm": 826.81787109375, - "learning_rate": 2.5579397431249606e-05, - "loss": 52.2726, - "step": 135840 - }, - { - "epoch": 0.5488511900192714, - "grad_norm": 924.5673828125, - "learning_rate": 2.557590770469016e-05, - "loss": 45.644, - "step": 135850 - }, - { - "epoch": 0.548891591284639, - "grad_norm": 1268.26123046875, - "learning_rate": 2.5572417966903094e-05, - "loss": 78.6158, - "step": 135860 - }, - { - "epoch": 0.5489319925500067, - "grad_norm": 822.70703125, - "learning_rate": 2.5568928217956446e-05, - "loss": 36.7586, - "step": 135870 - }, - { - "epoch": 0.5489723938153743, - "grad_norm": 612.1898193359375, - "learning_rate": 2.5565438457918244e-05, - "loss": 35.3985, - "step": 135880 - }, - { - "epoch": 0.549012795080742, - "grad_norm": 3193.71435546875, - "learning_rate": 2.5561948686856535e-05, - "loss": 47.6024, - "step": 135890 - }, - { - "epoch": 0.5490531963461096, - "grad_norm": 145.80606079101562, - "learning_rate": 2.5558458904839345e-05, - "loss": 25.6671, - "step": 135900 - }, - { - "epoch": 0.5490935976114772, - "grad_norm": 1195.0611572265625, - "learning_rate": 2.5554969111934713e-05, - "loss": 62.6013, - "step": 135910 - }, - { - "epoch": 0.5491339988768448, - "grad_norm": 651.7173461914062, - "learning_rate": 2.5551479308210668e-05, - "loss": 56.0175, - "step": 135920 - }, - { - "epoch": 0.5491744001422124, - "grad_norm": 658.3231201171875, - "learning_rate": 2.5547989493735258e-05, - "loss": 45.2132, - "step": 135930 - }, - { - "epoch": 0.54921480140758, - "grad_norm": 629.660400390625, - "learning_rate": 2.5544499668576505e-05, - "loss": 56.837, - "step": 135940 - }, - { - "epoch": 0.5492552026729477, - "grad_norm": 558.2518920898438, - "learning_rate": 2.5541009832802448e-05, - "loss": 47.7196, - "step": 135950 - }, - { - "epoch": 0.5492956039383153, - "grad_norm": 552.365478515625, - "learning_rate": 2.5537519986481122e-05, - "loss": 78.8988, - "step": 135960 - }, - { - "epoch": 0.549336005203683, - "grad_norm": 1013.0728149414062, - "learning_rate": 2.5534030129680577e-05, - "loss": 47.0896, - "step": 135970 - }, - { - "epoch": 0.5493764064690506, - "grad_norm": 625.7086181640625, - "learning_rate": 2.5530540262468837e-05, - "loss": 45.671, - "step": 135980 - }, - { - "epoch": 0.5494168077344183, - "grad_norm": 4387.12646484375, - "learning_rate": 2.552705038491394e-05, - "loss": 62.9142, - "step": 135990 - }, - { - "epoch": 0.5494572089997859, - "grad_norm": 943.5322875976562, - "learning_rate": 2.5523560497083926e-05, - "loss": 55.8262, - "step": 136000 - }, - { - "epoch": 0.5494976102651535, - "grad_norm": 213.67581176757812, - "learning_rate": 2.552007059904683e-05, - "loss": 48.1262, - "step": 136010 - }, - { - "epoch": 0.5495380115305212, - "grad_norm": 1800.0438232421875, - "learning_rate": 2.5516580690870695e-05, - "loss": 46.4066, - "step": 136020 - }, - { - "epoch": 0.5495784127958888, - "grad_norm": 747.0396728515625, - "learning_rate": 2.5513090772623543e-05, - "loss": 72.1643, - "step": 136030 - }, - { - "epoch": 0.5496188140612563, - "grad_norm": 852.1857299804688, - "learning_rate": 2.5509600844373427e-05, - "loss": 48.1462, - "step": 136040 - }, - { - "epoch": 0.549659215326624, - "grad_norm": 584.1296997070312, - "learning_rate": 2.550611090618838e-05, - "loss": 64.4517, - "step": 136050 - }, - { - "epoch": 0.5496996165919916, - "grad_norm": 639.3566284179688, - "learning_rate": 2.5502620958136443e-05, - "loss": 55.9694, - "step": 136060 - }, - { - "epoch": 0.5497400178573593, - "grad_norm": 634.5128173828125, - "learning_rate": 2.5499131000285646e-05, - "loss": 44.5816, - "step": 136070 - }, - { - "epoch": 0.5497804191227269, - "grad_norm": 403.07159423828125, - "learning_rate": 2.5495641032704043e-05, - "loss": 63.8988, - "step": 136080 - }, - { - "epoch": 0.5498208203880945, - "grad_norm": 770.9432373046875, - "learning_rate": 2.549215105545965e-05, - "loss": 61.3341, - "step": 136090 - }, - { - "epoch": 0.5498612216534622, - "grad_norm": 0.0, - "learning_rate": 2.5488661068620533e-05, - "loss": 37.3569, - "step": 136100 - }, - { - "epoch": 0.5499016229188298, - "grad_norm": 493.512939453125, - "learning_rate": 2.5485171072254704e-05, - "loss": 54.7347, - "step": 136110 - }, - { - "epoch": 0.5499420241841975, - "grad_norm": 0.0, - "learning_rate": 2.5481681066430217e-05, - "loss": 82.7196, - "step": 136120 - }, - { - "epoch": 0.5499824254495651, - "grad_norm": 815.2086791992188, - "learning_rate": 2.5478191051215117e-05, - "loss": 39.4943, - "step": 136130 - }, - { - "epoch": 0.5500228267149327, - "grad_norm": 294.3896789550781, - "learning_rate": 2.5474701026677433e-05, - "loss": 37.5106, - "step": 136140 - }, - { - "epoch": 0.5500632279803004, - "grad_norm": 645.8425903320312, - "learning_rate": 2.547121099288521e-05, - "loss": 51.6582, - "step": 136150 - }, - { - "epoch": 0.550103629245668, - "grad_norm": 259.2788391113281, - "learning_rate": 2.5467720949906483e-05, - "loss": 63.0887, - "step": 136160 - }, - { - "epoch": 0.5501440305110356, - "grad_norm": 341.1513977050781, - "learning_rate": 2.5464230897809294e-05, - "loss": 37.9722, - "step": 136170 - }, - { - "epoch": 0.5501844317764032, - "grad_norm": 1073.13427734375, - "learning_rate": 2.546074083666169e-05, - "loss": 63.717, - "step": 136180 - }, - { - "epoch": 0.5502248330417708, - "grad_norm": 564.9509887695312, - "learning_rate": 2.54572507665317e-05, - "loss": 50.2804, - "step": 136190 - }, - { - "epoch": 0.5502652343071385, - "grad_norm": 547.4306640625, - "learning_rate": 2.545376068748737e-05, - "loss": 52.0443, - "step": 136200 - }, - { - "epoch": 0.5503056355725061, - "grad_norm": 2279.78662109375, - "learning_rate": 2.545027059959674e-05, - "loss": 59.4126, - "step": 136210 - }, - { - "epoch": 0.5503460368378738, - "grad_norm": 826.3348388671875, - "learning_rate": 2.544678050292787e-05, - "loss": 47.2906, - "step": 136220 - }, - { - "epoch": 0.5503864381032414, - "grad_norm": 681.75927734375, - "learning_rate": 2.5443290397548768e-05, - "loss": 44.3834, - "step": 136230 - }, - { - "epoch": 0.550426839368609, - "grad_norm": 677.0407104492188, - "learning_rate": 2.5439800283527494e-05, - "loss": 78.2347, - "step": 136240 - }, - { - "epoch": 0.5504672406339767, - "grad_norm": 516.7835693359375, - "learning_rate": 2.5436310160932092e-05, - "loss": 52.4863, - "step": 136250 - }, - { - "epoch": 0.5505076418993443, - "grad_norm": 830.3655395507812, - "learning_rate": 2.54328200298306e-05, - "loss": 44.6782, - "step": 136260 - }, - { - "epoch": 0.550548043164712, - "grad_norm": 475.61456298828125, - "learning_rate": 2.542932989029105e-05, - "loss": 47.868, - "step": 136270 - }, - { - "epoch": 0.5505884444300796, - "grad_norm": 869.2997436523438, - "learning_rate": 2.5425839742381498e-05, - "loss": 58.4036, - "step": 136280 - }, - { - "epoch": 0.5506288456954472, - "grad_norm": 452.5263671875, - "learning_rate": 2.5422349586169974e-05, - "loss": 54.134, - "step": 136290 - }, - { - "epoch": 0.5506692469608148, - "grad_norm": 624.853759765625, - "learning_rate": 2.5418859421724538e-05, - "loss": 67.5689, - "step": 136300 - }, - { - "epoch": 0.5507096482261824, - "grad_norm": 680.9571533203125, - "learning_rate": 2.5415369249113212e-05, - "loss": 68.5904, - "step": 136310 - }, - { - "epoch": 0.55075004949155, - "grad_norm": 326.5666198730469, - "learning_rate": 2.5411879068404056e-05, - "loss": 84.7915, - "step": 136320 - }, - { - "epoch": 0.5507904507569177, - "grad_norm": 239.2113494873047, - "learning_rate": 2.5408388879665108e-05, - "loss": 37.1209, - "step": 136330 - }, - { - "epoch": 0.5508308520222853, - "grad_norm": 844.7891845703125, - "learning_rate": 2.54048986829644e-05, - "loss": 43.2222, - "step": 136340 - }, - { - "epoch": 0.550871253287653, - "grad_norm": 908.1921997070312, - "learning_rate": 2.540140847836999e-05, - "loss": 35.1715, - "step": 136350 - }, - { - "epoch": 0.5509116545530206, - "grad_norm": 649.4369506835938, - "learning_rate": 2.539791826594991e-05, - "loss": 35.0707, - "step": 136360 - }, - { - "epoch": 0.5509520558183882, - "grad_norm": 816.3155517578125, - "learning_rate": 2.5394428045772212e-05, - "loss": 46.9021, - "step": 136370 - }, - { - "epoch": 0.5509924570837559, - "grad_norm": 1066.6396484375, - "learning_rate": 2.539093781790494e-05, - "loss": 49.8077, - "step": 136380 - }, - { - "epoch": 0.5510328583491235, - "grad_norm": 581.2196044921875, - "learning_rate": 2.5387447582416123e-05, - "loss": 113.6221, - "step": 136390 - }, - { - "epoch": 0.5510732596144912, - "grad_norm": 908.7518310546875, - "learning_rate": 2.5383957339373825e-05, - "loss": 55.2377, - "step": 136400 - }, - { - "epoch": 0.5511136608798588, - "grad_norm": 683.6449584960938, - "learning_rate": 2.5380467088846077e-05, - "loss": 51.5513, - "step": 136410 - }, - { - "epoch": 0.5511540621452264, - "grad_norm": 1066.43994140625, - "learning_rate": 2.537697683090093e-05, - "loss": 54.2596, - "step": 136420 - }, - { - "epoch": 0.551194463410594, - "grad_norm": 1037.6993408203125, - "learning_rate": 2.537348656560643e-05, - "loss": 97.0623, - "step": 136430 - }, - { - "epoch": 0.5512348646759616, - "grad_norm": 524.144287109375, - "learning_rate": 2.5369996293030606e-05, - "loss": 64.2785, - "step": 136440 - }, - { - "epoch": 0.5512752659413293, - "grad_norm": 847.4541015625, - "learning_rate": 2.536650601324152e-05, - "loss": 52.7042, - "step": 136450 - }, - { - "epoch": 0.5513156672066969, - "grad_norm": 603.0610961914062, - "learning_rate": 2.536301572630721e-05, - "loss": 36.3441, - "step": 136460 - }, - { - "epoch": 0.5513560684720645, - "grad_norm": 818.6026000976562, - "learning_rate": 2.535952543229572e-05, - "loss": 49.2163, - "step": 136470 - }, - { - "epoch": 0.5513964697374322, - "grad_norm": 1117.1014404296875, - "learning_rate": 2.5356035131275096e-05, - "loss": 54.204, - "step": 136480 - }, - { - "epoch": 0.5514368710027998, - "grad_norm": 1214.039794921875, - "learning_rate": 2.5352544823313384e-05, - "loss": 67.4448, - "step": 136490 - }, - { - "epoch": 0.5514772722681675, - "grad_norm": 671.1990356445312, - "learning_rate": 2.5349054508478637e-05, - "loss": 52.2546, - "step": 136500 - }, - { - "epoch": 0.5515176735335351, - "grad_norm": 832.948974609375, - "learning_rate": 2.534556418683888e-05, - "loss": 52.6752, - "step": 136510 - }, - { - "epoch": 0.5515580747989027, - "grad_norm": 684.7549438476562, - "learning_rate": 2.5342073858462185e-05, - "loss": 67.4169, - "step": 136520 - }, - { - "epoch": 0.5515984760642704, - "grad_norm": 284.89532470703125, - "learning_rate": 2.5338583523416575e-05, - "loss": 66.7467, - "step": 136530 - }, - { - "epoch": 0.551638877329638, - "grad_norm": 538.3557739257812, - "learning_rate": 2.5335093181770104e-05, - "loss": 44.1986, - "step": 136540 - }, - { - "epoch": 0.5516792785950057, - "grad_norm": 1377.850341796875, - "learning_rate": 2.5331602833590824e-05, - "loss": 59.193, - "step": 136550 - }, - { - "epoch": 0.5517196798603732, - "grad_norm": 873.0044555664062, - "learning_rate": 2.532811247894677e-05, - "loss": 55.7959, - "step": 136560 - }, - { - "epoch": 0.5517600811257408, - "grad_norm": 1132.6865234375, - "learning_rate": 2.5324622117906e-05, - "loss": 46.0268, - "step": 136570 - }, - { - "epoch": 0.5518004823911085, - "grad_norm": 4735.5791015625, - "learning_rate": 2.5321131750536547e-05, - "loss": 54.9213, - "step": 136580 - }, - { - "epoch": 0.5518408836564761, - "grad_norm": 715.4901123046875, - "learning_rate": 2.5317641376906475e-05, - "loss": 56.9693, - "step": 136590 - }, - { - "epoch": 0.5518812849218437, - "grad_norm": 998.7701416015625, - "learning_rate": 2.531415099708382e-05, - "loss": 52.3184, - "step": 136600 - }, - { - "epoch": 0.5519216861872114, - "grad_norm": 2349.027099609375, - "learning_rate": 2.5310660611136628e-05, - "loss": 64.0717, - "step": 136610 - }, - { - "epoch": 0.551962087452579, - "grad_norm": 1127.0347900390625, - "learning_rate": 2.530717021913294e-05, - "loss": 43.0908, - "step": 136620 - }, - { - "epoch": 0.5520024887179467, - "grad_norm": 574.6187133789062, - "learning_rate": 2.530367982114082e-05, - "loss": 40.4501, - "step": 136630 - }, - { - "epoch": 0.5520428899833143, - "grad_norm": 2183.83544921875, - "learning_rate": 2.53001894172283e-05, - "loss": 64.5596, - "step": 136640 - }, - { - "epoch": 0.5520832912486819, - "grad_norm": 508.4299621582031, - "learning_rate": 2.5296699007463433e-05, - "loss": 48.9093, - "step": 136650 - }, - { - "epoch": 0.5521236925140496, - "grad_norm": 1008.5830688476562, - "learning_rate": 2.5293208591914263e-05, - "loss": 71.5341, - "step": 136660 - }, - { - "epoch": 0.5521640937794172, - "grad_norm": 573.5001220703125, - "learning_rate": 2.5289718170648853e-05, - "loss": 35.5604, - "step": 136670 - }, - { - "epoch": 0.5522044950447847, - "grad_norm": 1256.3994140625, - "learning_rate": 2.5286227743735225e-05, - "loss": 60.7903, - "step": 136680 - }, - { - "epoch": 0.5522448963101524, - "grad_norm": 427.8146057128906, - "learning_rate": 2.528273731124145e-05, - "loss": 62.5258, - "step": 136690 - }, - { - "epoch": 0.55228529757552, - "grad_norm": 1044.406982421875, - "learning_rate": 2.527924687323556e-05, - "loss": 71.0137, - "step": 136700 - }, - { - "epoch": 0.5523256988408877, - "grad_norm": 916.89599609375, - "learning_rate": 2.5275756429785608e-05, - "loss": 52.149, - "step": 136710 - }, - { - "epoch": 0.5523661001062553, - "grad_norm": 551.5916137695312, - "learning_rate": 2.527226598095964e-05, - "loss": 71.3226, - "step": 136720 - }, - { - "epoch": 0.552406501371623, - "grad_norm": 668.67431640625, - "learning_rate": 2.526877552682571e-05, - "loss": 66.0503, - "step": 136730 - }, - { - "epoch": 0.5524469026369906, - "grad_norm": 1805.9869384765625, - "learning_rate": 2.5265285067451862e-05, - "loss": 63.5316, - "step": 136740 - }, - { - "epoch": 0.5524873039023582, - "grad_norm": 1308.774169921875, - "learning_rate": 2.5261794602906145e-05, - "loss": 45.1674, - "step": 136750 - }, - { - "epoch": 0.5525277051677259, - "grad_norm": 242.80426025390625, - "learning_rate": 2.5258304133256612e-05, - "loss": 59.7236, - "step": 136760 - }, - { - "epoch": 0.5525681064330935, - "grad_norm": 341.7113952636719, - "learning_rate": 2.52548136585713e-05, - "loss": 42.4413, - "step": 136770 - }, - { - "epoch": 0.5526085076984611, - "grad_norm": 571.553955078125, - "learning_rate": 2.5251323178918268e-05, - "loss": 47.814, - "step": 136780 - }, - { - "epoch": 0.5526489089638288, - "grad_norm": 2131.88525390625, - "learning_rate": 2.5247832694365565e-05, - "loss": 65.054, - "step": 136790 - }, - { - "epoch": 0.5526893102291964, - "grad_norm": 412.0196533203125, - "learning_rate": 2.524434220498123e-05, - "loss": 57.1556, - "step": 136800 - }, - { - "epoch": 0.552729711494564, - "grad_norm": 347.2409362792969, - "learning_rate": 2.524085171083332e-05, - "loss": 51.0059, - "step": 136810 - }, - { - "epoch": 0.5527701127599316, - "grad_norm": 1224.644287109375, - "learning_rate": 2.523736121198988e-05, - "loss": 40.1427, - "step": 136820 - }, - { - "epoch": 0.5528105140252992, - "grad_norm": 1251.9407958984375, - "learning_rate": 2.5233870708518964e-05, - "loss": 45.218, - "step": 136830 - }, - { - "epoch": 0.5528509152906669, - "grad_norm": 1171.9188232421875, - "learning_rate": 2.5230380200488613e-05, - "loss": 57.0013, - "step": 136840 - }, - { - "epoch": 0.5528913165560345, - "grad_norm": 820.2431640625, - "learning_rate": 2.5226889687966882e-05, - "loss": 61.3478, - "step": 136850 - }, - { - "epoch": 0.5529317178214022, - "grad_norm": 1104.1824951171875, - "learning_rate": 2.5223399171021827e-05, - "loss": 47.4096, - "step": 136860 - }, - { - "epoch": 0.5529721190867698, - "grad_norm": 1183.5810546875, - "learning_rate": 2.5219908649721492e-05, - "loss": 62.6051, - "step": 136870 - }, - { - "epoch": 0.5530125203521374, - "grad_norm": 736.5234375, - "learning_rate": 2.5216418124133916e-05, - "loss": 48.401, - "step": 136880 - }, - { - "epoch": 0.5530529216175051, - "grad_norm": 710.7758178710938, - "learning_rate": 2.5212927594327157e-05, - "loss": 60.8935, - "step": 136890 - }, - { - "epoch": 0.5530933228828727, - "grad_norm": 590.540771484375, - "learning_rate": 2.520943706036927e-05, - "loss": 46.2608, - "step": 136900 - }, - { - "epoch": 0.5531337241482404, - "grad_norm": 2185.54833984375, - "learning_rate": 2.52059465223283e-05, - "loss": 63.3829, - "step": 136910 - }, - { - "epoch": 0.553174125413608, - "grad_norm": 604.2235107421875, - "learning_rate": 2.520245598027229e-05, - "loss": 40.5135, - "step": 136920 - }, - { - "epoch": 0.5532145266789756, - "grad_norm": 400.40631103515625, - "learning_rate": 2.5198965434269294e-05, - "loss": 40.3837, - "step": 136930 - }, - { - "epoch": 0.5532549279443432, - "grad_norm": 770.1949462890625, - "learning_rate": 2.5195474884387376e-05, - "loss": 59.8826, - "step": 136940 - }, - { - "epoch": 0.5532953292097108, - "grad_norm": 817.746337890625, - "learning_rate": 2.5191984330694573e-05, - "loss": 66.2542, - "step": 136950 - }, - { - "epoch": 0.5533357304750784, - "grad_norm": 758.0955810546875, - "learning_rate": 2.518849377325893e-05, - "loss": 62.4526, - "step": 136960 - }, - { - "epoch": 0.5533761317404461, - "grad_norm": 451.2418518066406, - "learning_rate": 2.5185003212148507e-05, - "loss": 53.2829, - "step": 136970 - }, - { - "epoch": 0.5534165330058137, - "grad_norm": 2254.761474609375, - "learning_rate": 2.518151264743135e-05, - "loss": 50.6571, - "step": 136980 - }, - { - "epoch": 0.5534569342711814, - "grad_norm": 1203.5234375, - "learning_rate": 2.5178022079175506e-05, - "loss": 69.3263, - "step": 136990 - }, - { - "epoch": 0.553497335536549, - "grad_norm": 707.7672119140625, - "learning_rate": 2.517453150744904e-05, - "loss": 65.1655, - "step": 137000 - }, - { - "epoch": 0.5535377368019166, - "grad_norm": 330.6707763671875, - "learning_rate": 2.517104093231999e-05, - "loss": 59.2168, - "step": 137010 - }, - { - "epoch": 0.5535781380672843, - "grad_norm": 910.3479614257812, - "learning_rate": 2.5167550353856402e-05, - "loss": 35.873, - "step": 137020 - }, - { - "epoch": 0.5536185393326519, - "grad_norm": 888.7249755859375, - "learning_rate": 2.516405977212634e-05, - "loss": 48.7685, - "step": 137030 - }, - { - "epoch": 0.5536589405980196, - "grad_norm": 696.96533203125, - "learning_rate": 2.5160569187197852e-05, - "loss": 31.9463, - "step": 137040 - }, - { - "epoch": 0.5536993418633872, - "grad_norm": 561.5834350585938, - "learning_rate": 2.5157078599138977e-05, - "loss": 59.4334, - "step": 137050 - }, - { - "epoch": 0.5537397431287548, - "grad_norm": 636.3404541015625, - "learning_rate": 2.5153588008017776e-05, - "loss": 45.7352, - "step": 137060 - }, - { - "epoch": 0.5537801443941224, - "grad_norm": 775.7879028320312, - "learning_rate": 2.5150097413902297e-05, - "loss": 56.8609, - "step": 137070 - }, - { - "epoch": 0.55382054565949, - "grad_norm": 1065.6773681640625, - "learning_rate": 2.5146606816860597e-05, - "loss": 42.274, - "step": 137080 - }, - { - "epoch": 0.5538609469248577, - "grad_norm": 1046.0736083984375, - "learning_rate": 2.5143116216960717e-05, - "loss": 47.2468, - "step": 137090 - }, - { - "epoch": 0.5539013481902253, - "grad_norm": 508.81201171875, - "learning_rate": 2.5139625614270706e-05, - "loss": 74.2703, - "step": 137100 - }, - { - "epoch": 0.5539417494555929, - "grad_norm": 254.36724853515625, - "learning_rate": 2.5136135008858637e-05, - "loss": 40.8546, - "step": 137110 - }, - { - "epoch": 0.5539821507209606, - "grad_norm": 481.7911682128906, - "learning_rate": 2.513264440079254e-05, - "loss": 61.7229, - "step": 137120 - }, - { - "epoch": 0.5540225519863282, - "grad_norm": 1409.0474853515625, - "learning_rate": 2.5129153790140463e-05, - "loss": 63.4276, - "step": 137130 - }, - { - "epoch": 0.5540629532516959, - "grad_norm": 1046.4349365234375, - "learning_rate": 2.5125663176970476e-05, - "loss": 41.4423, - "step": 137140 - }, - { - "epoch": 0.5541033545170635, - "grad_norm": 860.8171997070312, - "learning_rate": 2.5122172561350616e-05, - "loss": 61.4118, - "step": 137150 - }, - { - "epoch": 0.5541437557824311, - "grad_norm": 733.4225463867188, - "learning_rate": 2.5118681943348944e-05, - "loss": 36.8318, - "step": 137160 - }, - { - "epoch": 0.5541841570477988, - "grad_norm": 582.643310546875, - "learning_rate": 2.51151913230335e-05, - "loss": 37.8533, - "step": 137170 - }, - { - "epoch": 0.5542245583131664, - "grad_norm": 1445.744873046875, - "learning_rate": 2.5111700700472346e-05, - "loss": 70.5182, - "step": 137180 - }, - { - "epoch": 0.5542649595785339, - "grad_norm": 709.0853271484375, - "learning_rate": 2.5108210075733523e-05, - "loss": 45.407, - "step": 137190 - }, - { - "epoch": 0.5543053608439016, - "grad_norm": 413.23663330078125, - "learning_rate": 2.51047194488851e-05, - "loss": 57.8371, - "step": 137200 - }, - { - "epoch": 0.5543457621092692, - "grad_norm": 660.6998901367188, - "learning_rate": 2.5101228819995116e-05, - "loss": 75.2915, - "step": 137210 - }, - { - "epoch": 0.5543861633746369, - "grad_norm": 959.9091796875, - "learning_rate": 2.5097738189131614e-05, - "loss": 69.5816, - "step": 137220 - }, - { - "epoch": 0.5544265646400045, - "grad_norm": 401.2200012207031, - "learning_rate": 2.509424755636266e-05, - "loss": 53.4176, - "step": 137230 - }, - { - "epoch": 0.5544669659053721, - "grad_norm": 673.7756958007812, - "learning_rate": 2.509075692175631e-05, - "loss": 47.1198, - "step": 137240 - }, - { - "epoch": 0.5545073671707398, - "grad_norm": 1097.22998046875, - "learning_rate": 2.5087266285380596e-05, - "loss": 68.0358, - "step": 137250 - }, - { - "epoch": 0.5545477684361074, - "grad_norm": 893.8514404296875, - "learning_rate": 2.508377564730358e-05, - "loss": 75.7999, - "step": 137260 - }, - { - "epoch": 0.5545881697014751, - "grad_norm": 618.3114624023438, - "learning_rate": 2.508028500759332e-05, - "loss": 61.6705, - "step": 137270 - }, - { - "epoch": 0.5546285709668427, - "grad_norm": 694.9136962890625, - "learning_rate": 2.5076794366317867e-05, - "loss": 71.5496, - "step": 137280 - }, - { - "epoch": 0.5546689722322103, - "grad_norm": 558.5074462890625, - "learning_rate": 2.5073303723545265e-05, - "loss": 68.4956, - "step": 137290 - }, - { - "epoch": 0.554709373497578, - "grad_norm": 609.846435546875, - "learning_rate": 2.506981307934357e-05, - "loss": 63.5097, - "step": 137300 - }, - { - "epoch": 0.5547497747629456, - "grad_norm": 332.0170593261719, - "learning_rate": 2.5066322433780827e-05, - "loss": 33.8037, - "step": 137310 - }, - { - "epoch": 0.5547901760283132, - "grad_norm": 1042.158447265625, - "learning_rate": 2.5062831786925102e-05, - "loss": 43.3467, - "step": 137320 - }, - { - "epoch": 0.5548305772936808, - "grad_norm": 1328.8170166015625, - "learning_rate": 2.505934113884443e-05, - "loss": 59.4562, - "step": 137330 - }, - { - "epoch": 0.5548709785590484, - "grad_norm": 1468.5960693359375, - "learning_rate": 2.5055850489606875e-05, - "loss": 46.9289, - "step": 137340 - }, - { - "epoch": 0.5549113798244161, - "grad_norm": 968.5849609375, - "learning_rate": 2.505235983928048e-05, - "loss": 54.1335, - "step": 137350 - }, - { - "epoch": 0.5549517810897837, - "grad_norm": 1135.44091796875, - "learning_rate": 2.5048869187933316e-05, - "loss": 44.6849, - "step": 137360 - }, - { - "epoch": 0.5549921823551514, - "grad_norm": 748.2118530273438, - "learning_rate": 2.504537853563342e-05, - "loss": 50.3875, - "step": 137370 - }, - { - "epoch": 0.555032583620519, - "grad_norm": 301.2623596191406, - "learning_rate": 2.5041887882448844e-05, - "loss": 67.9134, - "step": 137380 - }, - { - "epoch": 0.5550729848858866, - "grad_norm": 1220.4659423828125, - "learning_rate": 2.5038397228447634e-05, - "loss": 26.7308, - "step": 137390 - }, - { - "epoch": 0.5551133861512543, - "grad_norm": 1018.2291259765625, - "learning_rate": 2.5034906573697864e-05, - "loss": 49.0374, - "step": 137400 - }, - { - "epoch": 0.5551537874166219, - "grad_norm": 808.0350952148438, - "learning_rate": 2.5031415918267564e-05, - "loss": 77.4769, - "step": 137410 - }, - { - "epoch": 0.5551941886819896, - "grad_norm": 1768.087646484375, - "learning_rate": 2.5027925262224795e-05, - "loss": 77.0356, - "step": 137420 - }, - { - "epoch": 0.5552345899473572, - "grad_norm": 568.6273803710938, - "learning_rate": 2.5024434605637604e-05, - "loss": 68.0419, - "step": 137430 - }, - { - "epoch": 0.5552749912127248, - "grad_norm": 688.5547485351562, - "learning_rate": 2.5020943948574055e-05, - "loss": 62.4521, - "step": 137440 - }, - { - "epoch": 0.5553153924780924, - "grad_norm": 561.1484985351562, - "learning_rate": 2.501745329110219e-05, - "loss": 47.788, - "step": 137450 - }, - { - "epoch": 0.55535579374346, - "grad_norm": 1244.0697021484375, - "learning_rate": 2.5013962633290072e-05, - "loss": 47.7701, - "step": 137460 - }, - { - "epoch": 0.5553961950088276, - "grad_norm": 647.6009521484375, - "learning_rate": 2.5010471975205733e-05, - "loss": 69.8529, - "step": 137470 - }, - { - "epoch": 0.5554365962741953, - "grad_norm": 2588.730712890625, - "learning_rate": 2.500698131691725e-05, - "loss": 54.0912, - "step": 137480 - }, - { - "epoch": 0.5554769975395629, - "grad_norm": 540.1988525390625, - "learning_rate": 2.500349065849265e-05, - "loss": 78.9863, - "step": 137490 - }, - { - "epoch": 0.5555173988049306, - "grad_norm": 1182.00732421875, - "learning_rate": 2.5e-05, - "loss": 77.0684, - "step": 137500 - }, - { - "epoch": 0.5555578000702982, - "grad_norm": 668.05517578125, - "learning_rate": 2.499650934150736e-05, - "loss": 35.7313, - "step": 137510 - }, - { - "epoch": 0.5555982013356658, - "grad_norm": 337.5390930175781, - "learning_rate": 2.499301868308276e-05, - "loss": 47.2015, - "step": 137520 - }, - { - "epoch": 0.5556386026010335, - "grad_norm": 656.7407836914062, - "learning_rate": 2.498952802479427e-05, - "loss": 43.5451, - "step": 137530 - }, - { - "epoch": 0.5556790038664011, - "grad_norm": 710.9033813476562, - "learning_rate": 2.4986037366709937e-05, - "loss": 49.4604, - "step": 137540 - }, - { - "epoch": 0.5557194051317688, - "grad_norm": 1154.9649658203125, - "learning_rate": 2.4982546708897814e-05, - "loss": 59.851, - "step": 137550 - }, - { - "epoch": 0.5557598063971364, - "grad_norm": 548.6576538085938, - "learning_rate": 2.4979056051425954e-05, - "loss": 92.8117, - "step": 137560 - }, - { - "epoch": 0.555800207662504, - "grad_norm": 622.8199462890625, - "learning_rate": 2.4975565394362395e-05, - "loss": 37.9003, - "step": 137570 - }, - { - "epoch": 0.5558406089278716, - "grad_norm": 559.3489379882812, - "learning_rate": 2.4972074737775214e-05, - "loss": 48.2217, - "step": 137580 - }, - { - "epoch": 0.5558810101932392, - "grad_norm": 1215.4371337890625, - "learning_rate": 2.4968584081732448e-05, - "loss": 83.8556, - "step": 137590 - }, - { - "epoch": 0.5559214114586069, - "grad_norm": 371.1655578613281, - "learning_rate": 2.496509342630214e-05, - "loss": 40.4336, - "step": 137600 - }, - { - "epoch": 0.5559618127239745, - "grad_norm": 3215.786376953125, - "learning_rate": 2.496160277155237e-05, - "loss": 81.3515, - "step": 137610 - }, - { - "epoch": 0.5560022139893421, - "grad_norm": 420.7957458496094, - "learning_rate": 2.4958112117551165e-05, - "loss": 84.0413, - "step": 137620 - }, - { - "epoch": 0.5560426152547098, - "grad_norm": 889.9630737304688, - "learning_rate": 2.495462146436659e-05, - "loss": 46.8617, - "step": 137630 - }, - { - "epoch": 0.5560830165200774, - "grad_norm": 1820.2559814453125, - "learning_rate": 2.4951130812066686e-05, - "loss": 71.0172, - "step": 137640 - }, - { - "epoch": 0.556123417785445, - "grad_norm": 1237.7259521484375, - "learning_rate": 2.4947640160719515e-05, - "loss": 27.0079, - "step": 137650 - }, - { - "epoch": 0.5561638190508127, - "grad_norm": 641.0753784179688, - "learning_rate": 2.4944149510393134e-05, - "loss": 57.1568, - "step": 137660 - }, - { - "epoch": 0.5562042203161803, - "grad_norm": 450.4702453613281, - "learning_rate": 2.4940658861155584e-05, - "loss": 50.1699, - "step": 137670 - }, - { - "epoch": 0.556244621581548, - "grad_norm": 1086.3056640625, - "learning_rate": 2.4937168213074907e-05, - "loss": 46.7683, - "step": 137680 - }, - { - "epoch": 0.5562850228469156, - "grad_norm": 404.1825866699219, - "learning_rate": 2.493367756621918e-05, - "loss": 35.0012, - "step": 137690 - }, - { - "epoch": 0.5563254241122833, - "grad_norm": 943.6931762695312, - "learning_rate": 2.493018692065644e-05, - "loss": 54.1753, - "step": 137700 - }, - { - "epoch": 0.5563658253776508, - "grad_norm": 530.2384033203125, - "learning_rate": 2.4926696276454737e-05, - "loss": 41.3772, - "step": 137710 - }, - { - "epoch": 0.5564062266430184, - "grad_norm": 347.5144348144531, - "learning_rate": 2.492320563368214e-05, - "loss": 44.261, - "step": 137720 - }, - { - "epoch": 0.5564466279083861, - "grad_norm": 996.7993774414062, - "learning_rate": 2.4919714992406677e-05, - "loss": 74.4061, - "step": 137730 - }, - { - "epoch": 0.5564870291737537, - "grad_norm": 922.4503173828125, - "learning_rate": 2.491622435269642e-05, - "loss": 62.489, - "step": 137740 - }, - { - "epoch": 0.5565274304391213, - "grad_norm": 500.00689697265625, - "learning_rate": 2.4912733714619417e-05, - "loss": 28.2802, - "step": 137750 - }, - { - "epoch": 0.556567831704489, - "grad_norm": 333.4855041503906, - "learning_rate": 2.4909243078243696e-05, - "loss": 47.8612, - "step": 137760 - }, - { - "epoch": 0.5566082329698566, - "grad_norm": 898.7628173828125, - "learning_rate": 2.4905752443637345e-05, - "loss": 46.9569, - "step": 137770 - }, - { - "epoch": 0.5566486342352243, - "grad_norm": 303.6540832519531, - "learning_rate": 2.490226181086838e-05, - "loss": 44.0417, - "step": 137780 - }, - { - "epoch": 0.5566890355005919, - "grad_norm": 8116.3369140625, - "learning_rate": 2.489877118000489e-05, - "loss": 56.3838, - "step": 137790 - }, - { - "epoch": 0.5567294367659595, - "grad_norm": 549.66943359375, - "learning_rate": 2.4895280551114907e-05, - "loss": 50.9485, - "step": 137800 - }, - { - "epoch": 0.5567698380313272, - "grad_norm": 622.78564453125, - "learning_rate": 2.4891789924266476e-05, - "loss": 44.1438, - "step": 137810 - }, - { - "epoch": 0.5568102392966948, - "grad_norm": 762.0851440429688, - "learning_rate": 2.4888299299527663e-05, - "loss": 62.1743, - "step": 137820 - }, - { - "epoch": 0.5568506405620623, - "grad_norm": 685.5193481445312, - "learning_rate": 2.488480867696651e-05, - "loss": 33.5156, - "step": 137830 - }, - { - "epoch": 0.55689104182743, - "grad_norm": 534.5140991210938, - "learning_rate": 2.4881318056651062e-05, - "loss": 50.1773, - "step": 137840 - }, - { - "epoch": 0.5569314430927976, - "grad_norm": 592.9160766601562, - "learning_rate": 2.4877827438649393e-05, - "loss": 36.2395, - "step": 137850 - }, - { - "epoch": 0.5569718443581653, - "grad_norm": 296.64471435546875, - "learning_rate": 2.4874336823029526e-05, - "loss": 79.5094, - "step": 137860 - }, - { - "epoch": 0.5570122456235329, - "grad_norm": 933.6554565429688, - "learning_rate": 2.487084620985954e-05, - "loss": 67.565, - "step": 137870 - }, - { - "epoch": 0.5570526468889005, - "grad_norm": 498.0602722167969, - "learning_rate": 2.4867355599207474e-05, - "loss": 55.1443, - "step": 137880 - }, - { - "epoch": 0.5570930481542682, - "grad_norm": 635.6414794921875, - "learning_rate": 2.4863864991141372e-05, - "loss": 50.4595, - "step": 137890 - }, - { - "epoch": 0.5571334494196358, - "grad_norm": 509.6726379394531, - "learning_rate": 2.48603743857293e-05, - "loss": 45.6835, - "step": 137900 - }, - { - "epoch": 0.5571738506850035, - "grad_norm": 458.8603515625, - "learning_rate": 2.48568837830393e-05, - "loss": 52.0281, - "step": 137910 - }, - { - "epoch": 0.5572142519503711, - "grad_norm": 1398.501220703125, - "learning_rate": 2.4853393183139412e-05, - "loss": 74.1148, - "step": 137920 - }, - { - "epoch": 0.5572546532157387, - "grad_norm": 501.7352600097656, - "learning_rate": 2.484990258609771e-05, - "loss": 38.4496, - "step": 137930 - }, - { - "epoch": 0.5572950544811064, - "grad_norm": 354.11016845703125, - "learning_rate": 2.4846411991982223e-05, - "loss": 52.2043, - "step": 137940 - }, - { - "epoch": 0.557335455746474, - "grad_norm": 889.2261352539062, - "learning_rate": 2.484292140086103e-05, - "loss": 52.7851, - "step": 137950 - }, - { - "epoch": 0.5573758570118416, - "grad_norm": 566.2061767578125, - "learning_rate": 2.4839430812802157e-05, - "loss": 75.0353, - "step": 137960 - }, - { - "epoch": 0.5574162582772092, - "grad_norm": 1404.2596435546875, - "learning_rate": 2.4835940227873662e-05, - "loss": 66.9802, - "step": 137970 - }, - { - "epoch": 0.5574566595425768, - "grad_norm": 673.5670166015625, - "learning_rate": 2.4832449646143604e-05, - "loss": 63.724, - "step": 137980 - }, - { - "epoch": 0.5574970608079445, - "grad_norm": 693.6373291015625, - "learning_rate": 2.482895906768002e-05, - "loss": 52.0077, - "step": 137990 - }, - { - "epoch": 0.5575374620733121, - "grad_norm": 672.2484741210938, - "learning_rate": 2.4825468492550964e-05, - "loss": 39.796, - "step": 138000 - }, - { - "epoch": 0.5575778633386798, - "grad_norm": 1082.0235595703125, - "learning_rate": 2.4821977920824497e-05, - "loss": 39.0499, - "step": 138010 - }, - { - "epoch": 0.5576182646040474, - "grad_norm": 1062.3084716796875, - "learning_rate": 2.481848735256865e-05, - "loss": 76.0777, - "step": 138020 - }, - { - "epoch": 0.557658665869415, - "grad_norm": 1710.125732421875, - "learning_rate": 2.4814996787851495e-05, - "loss": 61.158, - "step": 138030 - }, - { - "epoch": 0.5576990671347827, - "grad_norm": 868.0357055664062, - "learning_rate": 2.481150622674108e-05, - "loss": 42.1739, - "step": 138040 - }, - { - "epoch": 0.5577394684001503, - "grad_norm": 746.6460571289062, - "learning_rate": 2.4808015669305433e-05, - "loss": 50.9672, - "step": 138050 - }, - { - "epoch": 0.557779869665518, - "grad_norm": 444.3233337402344, - "learning_rate": 2.480452511561263e-05, - "loss": 37.3236, - "step": 138060 - }, - { - "epoch": 0.5578202709308856, - "grad_norm": 914.9209594726562, - "learning_rate": 2.48010345657307e-05, - "loss": 51.8856, - "step": 138070 - }, - { - "epoch": 0.5578606721962532, - "grad_norm": 663.5612182617188, - "learning_rate": 2.4797544019727717e-05, - "loss": 28.8472, - "step": 138080 - }, - { - "epoch": 0.5579010734616208, - "grad_norm": 731.248046875, - "learning_rate": 2.4794053477671713e-05, - "loss": 43.9837, - "step": 138090 - }, - { - "epoch": 0.5579414747269884, - "grad_norm": 371.97589111328125, - "learning_rate": 2.4790562939630734e-05, - "loss": 35.7051, - "step": 138100 - }, - { - "epoch": 0.557981875992356, - "grad_norm": 582.62353515625, - "learning_rate": 2.478707240567285e-05, - "loss": 46.4093, - "step": 138110 - }, - { - "epoch": 0.5580222772577237, - "grad_norm": 626.8383178710938, - "learning_rate": 2.4783581875866097e-05, - "loss": 62.2491, - "step": 138120 - }, - { - "epoch": 0.5580626785230913, - "grad_norm": 956.4906005859375, - "learning_rate": 2.4780091350278514e-05, - "loss": 45.8993, - "step": 138130 - }, - { - "epoch": 0.558103079788459, - "grad_norm": 1804.482421875, - "learning_rate": 2.477660082897818e-05, - "loss": 69.7101, - "step": 138140 - }, - { - "epoch": 0.5581434810538266, - "grad_norm": 856.2649536132812, - "learning_rate": 2.4773110312033117e-05, - "loss": 50.3831, - "step": 138150 - }, - { - "epoch": 0.5581838823191942, - "grad_norm": 671.4500732421875, - "learning_rate": 2.4769619799511393e-05, - "loss": 64.6485, - "step": 138160 - }, - { - "epoch": 0.5582242835845619, - "grad_norm": 861.5789184570312, - "learning_rate": 2.4766129291481048e-05, - "loss": 77.0027, - "step": 138170 - }, - { - "epoch": 0.5582646848499295, - "grad_norm": 462.4314880371094, - "learning_rate": 2.4762638788010122e-05, - "loss": 60.2575, - "step": 138180 - }, - { - "epoch": 0.5583050861152972, - "grad_norm": 970.7669067382812, - "learning_rate": 2.4759148289166687e-05, - "loss": 98.2652, - "step": 138190 - }, - { - "epoch": 0.5583454873806648, - "grad_norm": 1482.3714599609375, - "learning_rate": 2.475565779501878e-05, - "loss": 41.1083, - "step": 138200 - }, - { - "epoch": 0.5583858886460324, - "grad_norm": 703.8884887695312, - "learning_rate": 2.475216730563444e-05, - "loss": 49.8838, - "step": 138210 - }, - { - "epoch": 0.5584262899114, - "grad_norm": 150.86387634277344, - "learning_rate": 2.4748676821081738e-05, - "loss": 40.4376, - "step": 138220 - }, - { - "epoch": 0.5584666911767676, - "grad_norm": 944.3568725585938, - "learning_rate": 2.47451863414287e-05, - "loss": 57.4413, - "step": 138230 - }, - { - "epoch": 0.5585070924421353, - "grad_norm": 659.5052490234375, - "learning_rate": 2.4741695866743397e-05, - "loss": 47.3132, - "step": 138240 - }, - { - "epoch": 0.5585474937075029, - "grad_norm": 506.6811828613281, - "learning_rate": 2.4738205397093864e-05, - "loss": 78.2513, - "step": 138250 - }, - { - "epoch": 0.5585878949728705, - "grad_norm": 689.1748046875, - "learning_rate": 2.473471493254814e-05, - "loss": 60.5565, - "step": 138260 - }, - { - "epoch": 0.5586282962382382, - "grad_norm": 548.3529052734375, - "learning_rate": 2.4731224473174295e-05, - "loss": 54.7061, - "step": 138270 - }, - { - "epoch": 0.5586686975036058, - "grad_norm": 626.2839965820312, - "learning_rate": 2.472773401904037e-05, - "loss": 59.1142, - "step": 138280 - }, - { - "epoch": 0.5587090987689735, - "grad_norm": 472.8750305175781, - "learning_rate": 2.4724243570214398e-05, - "loss": 56.8832, - "step": 138290 - }, - { - "epoch": 0.5587495000343411, - "grad_norm": 623.64794921875, - "learning_rate": 2.4720753126764447e-05, - "loss": 41.724, - "step": 138300 - }, - { - "epoch": 0.5587899012997087, - "grad_norm": 493.74017333984375, - "learning_rate": 2.4717262688758557e-05, - "loss": 35.2544, - "step": 138310 - }, - { - "epoch": 0.5588303025650764, - "grad_norm": 706.80126953125, - "learning_rate": 2.471377225626478e-05, - "loss": 64.1044, - "step": 138320 - }, - { - "epoch": 0.558870703830444, - "grad_norm": 702.6170043945312, - "learning_rate": 2.4710281829351156e-05, - "loss": 26.0041, - "step": 138330 - }, - { - "epoch": 0.5589111050958117, - "grad_norm": 1678.392333984375, - "learning_rate": 2.4706791408085736e-05, - "loss": 70.0253, - "step": 138340 - }, - { - "epoch": 0.5589515063611792, - "grad_norm": 1137.38818359375, - "learning_rate": 2.4703300992536573e-05, - "loss": 63.3032, - "step": 138350 - }, - { - "epoch": 0.5589919076265468, - "grad_norm": 1046.39990234375, - "learning_rate": 2.4699810582771713e-05, - "loss": 52.2509, - "step": 138360 - }, - { - "epoch": 0.5590323088919145, - "grad_norm": 657.4580078125, - "learning_rate": 2.4696320178859187e-05, - "loss": 52.0369, - "step": 138370 - }, - { - "epoch": 0.5590727101572821, - "grad_norm": 866.4102172851562, - "learning_rate": 2.4692829780867065e-05, - "loss": 56.7212, - "step": 138380 - }, - { - "epoch": 0.5591131114226497, - "grad_norm": 531.6983642578125, - "learning_rate": 2.4689339388863374e-05, - "loss": 41.7753, - "step": 138390 - }, - { - "epoch": 0.5591535126880174, - "grad_norm": 506.9232482910156, - "learning_rate": 2.4685849002916183e-05, - "loss": 34.2688, - "step": 138400 - }, - { - "epoch": 0.559193913953385, - "grad_norm": 803.9929809570312, - "learning_rate": 2.468235862309353e-05, - "loss": 52.5372, - "step": 138410 - }, - { - "epoch": 0.5592343152187527, - "grad_norm": 1219.44970703125, - "learning_rate": 2.4678868249463452e-05, - "loss": 62.4074, - "step": 138420 - }, - { - "epoch": 0.5592747164841203, - "grad_norm": 802.236328125, - "learning_rate": 2.4675377882094007e-05, - "loss": 39.6712, - "step": 138430 - }, - { - "epoch": 0.5593151177494879, - "grad_norm": 560.7493896484375, - "learning_rate": 2.4671887521053237e-05, - "loss": 71.1677, - "step": 138440 - }, - { - "epoch": 0.5593555190148556, - "grad_norm": 443.1597595214844, - "learning_rate": 2.466839716640918e-05, - "loss": 43.8879, - "step": 138450 - }, - { - "epoch": 0.5593959202802232, - "grad_norm": 4036.05810546875, - "learning_rate": 2.46649068182299e-05, - "loss": 84.4937, - "step": 138460 - }, - { - "epoch": 0.5594363215455908, - "grad_norm": 1169.473876953125, - "learning_rate": 2.4661416476583428e-05, - "loss": 52.9824, - "step": 138470 - }, - { - "epoch": 0.5594767228109584, - "grad_norm": 186.98561096191406, - "learning_rate": 2.465792614153782e-05, - "loss": 74.6176, - "step": 138480 - }, - { - "epoch": 0.559517124076326, - "grad_norm": 1336.8724365234375, - "learning_rate": 2.465443581316112e-05, - "loss": 56.0161, - "step": 138490 - }, - { - "epoch": 0.5595575253416937, - "grad_norm": 1130.03564453125, - "learning_rate": 2.4650945491521372e-05, - "loss": 76.923, - "step": 138500 - }, - { - "epoch": 0.5595979266070613, - "grad_norm": 351.6864318847656, - "learning_rate": 2.4647455176686622e-05, - "loss": 51.3765, - "step": 138510 - }, - { - "epoch": 0.559638327872429, - "grad_norm": 799.4815063476562, - "learning_rate": 2.4643964868724914e-05, - "loss": 77.3449, - "step": 138520 - }, - { - "epoch": 0.5596787291377966, - "grad_norm": 691.9085693359375, - "learning_rate": 2.4640474567704285e-05, - "loss": 38.7629, - "step": 138530 - }, - { - "epoch": 0.5597191304031642, - "grad_norm": 446.3203125, - "learning_rate": 2.46369842736928e-05, - "loss": 55.0333, - "step": 138540 - }, - { - "epoch": 0.5597595316685319, - "grad_norm": 991.9611206054688, - "learning_rate": 2.4633493986758484e-05, - "loss": 54.1677, - "step": 138550 - }, - { - "epoch": 0.5597999329338995, - "grad_norm": 1506.22607421875, - "learning_rate": 2.46300037069694e-05, - "loss": 37.269, - "step": 138560 - }, - { - "epoch": 0.5598403341992672, - "grad_norm": 915.8712768554688, - "learning_rate": 2.4626513434393584e-05, - "loss": 71.686, - "step": 138570 - }, - { - "epoch": 0.5598807354646348, - "grad_norm": 507.74920654296875, - "learning_rate": 2.4623023169099073e-05, - "loss": 52.8719, - "step": 138580 - }, - { - "epoch": 0.5599211367300024, - "grad_norm": 471.32745361328125, - "learning_rate": 2.4619532911153932e-05, - "loss": 57.2638, - "step": 138590 - }, - { - "epoch": 0.55996153799537, - "grad_norm": 660.4644775390625, - "learning_rate": 2.4616042660626177e-05, - "loss": 38.7556, - "step": 138600 - }, - { - "epoch": 0.5600019392607376, - "grad_norm": 1198.232421875, - "learning_rate": 2.461255241758388e-05, - "loss": 34.7362, - "step": 138610 - }, - { - "epoch": 0.5600423405261052, - "grad_norm": 1170.1473388671875, - "learning_rate": 2.4609062182095072e-05, - "loss": 56.8247, - "step": 138620 - }, - { - "epoch": 0.5600827417914729, - "grad_norm": 505.6723327636719, - "learning_rate": 2.4605571954227787e-05, - "loss": 62.388, - "step": 138630 - }, - { - "epoch": 0.5601231430568405, - "grad_norm": 983.4881591796875, - "learning_rate": 2.4602081734050093e-05, - "loss": 29.2795, - "step": 138640 - }, - { - "epoch": 0.5601635443222082, - "grad_norm": 346.18719482421875, - "learning_rate": 2.459859152163002e-05, - "loss": 65.6756, - "step": 138650 - }, - { - "epoch": 0.5602039455875758, - "grad_norm": 1048.971923828125, - "learning_rate": 2.4595101317035603e-05, - "loss": 60.1032, - "step": 138660 - }, - { - "epoch": 0.5602443468529434, - "grad_norm": 1085.2611083984375, - "learning_rate": 2.4591611120334898e-05, - "loss": 61.6514, - "step": 138670 - }, - { - "epoch": 0.5602847481183111, - "grad_norm": 320.9248046875, - "learning_rate": 2.4588120931595947e-05, - "loss": 38.1592, - "step": 138680 - }, - { - "epoch": 0.5603251493836787, - "grad_norm": 478.6922302246094, - "learning_rate": 2.458463075088679e-05, - "loss": 59.3947, - "step": 138690 - }, - { - "epoch": 0.5603655506490464, - "grad_norm": 874.609130859375, - "learning_rate": 2.458114057827547e-05, - "loss": 61.9768, - "step": 138700 - }, - { - "epoch": 0.560405951914414, - "grad_norm": 894.510498046875, - "learning_rate": 2.4577650413830025e-05, - "loss": 46.5881, - "step": 138710 - }, - { - "epoch": 0.5604463531797816, - "grad_norm": 621.612548828125, - "learning_rate": 2.4574160257618508e-05, - "loss": 43.6552, - "step": 138720 - }, - { - "epoch": 0.5604867544451492, - "grad_norm": 1093.0335693359375, - "learning_rate": 2.457067010970896e-05, - "loss": 50.9287, - "step": 138730 - }, - { - "epoch": 0.5605271557105168, - "grad_norm": 519.4285278320312, - "learning_rate": 2.4567179970169407e-05, - "loss": 45.5831, - "step": 138740 - }, - { - "epoch": 0.5605675569758845, - "grad_norm": 280.7900695800781, - "learning_rate": 2.4563689839067913e-05, - "loss": 70.7343, - "step": 138750 - }, - { - "epoch": 0.5606079582412521, - "grad_norm": 411.0301513671875, - "learning_rate": 2.4560199716472508e-05, - "loss": 34.1959, - "step": 138760 - }, - { - "epoch": 0.5606483595066197, - "grad_norm": 399.39208984375, - "learning_rate": 2.4556709602451238e-05, - "loss": 54.2914, - "step": 138770 - }, - { - "epoch": 0.5606887607719874, - "grad_norm": 739.2390747070312, - "learning_rate": 2.4553219497072143e-05, - "loss": 58.249, - "step": 138780 - }, - { - "epoch": 0.560729162037355, - "grad_norm": 522.2506713867188, - "learning_rate": 2.4549729400403254e-05, - "loss": 38.7297, - "step": 138790 - }, - { - "epoch": 0.5607695633027227, - "grad_norm": 320.0365295410156, - "learning_rate": 2.4546239312512635e-05, - "loss": 33.2641, - "step": 138800 - }, - { - "epoch": 0.5608099645680903, - "grad_norm": 449.9502258300781, - "learning_rate": 2.454274923346831e-05, - "loss": 29.3585, - "step": 138810 - }, - { - "epoch": 0.5608503658334579, - "grad_norm": 849.8306884765625, - "learning_rate": 2.4539259163338315e-05, - "loss": 35.6265, - "step": 138820 - }, - { - "epoch": 0.5608907670988256, - "grad_norm": 693.4956665039062, - "learning_rate": 2.4535769102190712e-05, - "loss": 39.2643, - "step": 138830 - }, - { - "epoch": 0.5609311683641932, - "grad_norm": 631.84423828125, - "learning_rate": 2.4532279050093523e-05, - "loss": 42.8962, - "step": 138840 - }, - { - "epoch": 0.5609715696295609, - "grad_norm": 846.31298828125, - "learning_rate": 2.45287890071148e-05, - "loss": 58.0521, - "step": 138850 - }, - { - "epoch": 0.5610119708949284, - "grad_norm": 468.14959716796875, - "learning_rate": 2.4525298973322576e-05, - "loss": 60.8873, - "step": 138860 - }, - { - "epoch": 0.561052372160296, - "grad_norm": 1028.050048828125, - "learning_rate": 2.4521808948784886e-05, - "loss": 40.6721, - "step": 138870 - }, - { - "epoch": 0.5610927734256637, - "grad_norm": 2670.3349609375, - "learning_rate": 2.4518318933569785e-05, - "loss": 65.057, - "step": 138880 - }, - { - "epoch": 0.5611331746910313, - "grad_norm": 1479.5867919921875, - "learning_rate": 2.4514828927745305e-05, - "loss": 71.9667, - "step": 138890 - }, - { - "epoch": 0.5611735759563989, - "grad_norm": 1349.51123046875, - "learning_rate": 2.4511338931379473e-05, - "loss": 71.3655, - "step": 138900 - }, - { - "epoch": 0.5612139772217666, - "grad_norm": 686.8629760742188, - "learning_rate": 2.4507848944540355e-05, - "loss": 52.6041, - "step": 138910 - }, - { - "epoch": 0.5612543784871342, - "grad_norm": 901.5208740234375, - "learning_rate": 2.4504358967295966e-05, - "loss": 75.5176, - "step": 138920 - }, - { - "epoch": 0.5612947797525019, - "grad_norm": 729.52197265625, - "learning_rate": 2.450086899971436e-05, - "loss": 47.1792, - "step": 138930 - }, - { - "epoch": 0.5613351810178695, - "grad_norm": 546.037841796875, - "learning_rate": 2.449737904186357e-05, - "loss": 43.953, - "step": 138940 - }, - { - "epoch": 0.5613755822832371, - "grad_norm": 1656.893798828125, - "learning_rate": 2.4493889093811622e-05, - "loss": 48.267, - "step": 138950 - }, - { - "epoch": 0.5614159835486048, - "grad_norm": 861.8642578125, - "learning_rate": 2.449039915562658e-05, - "loss": 74.9551, - "step": 138960 - }, - { - "epoch": 0.5614563848139724, - "grad_norm": 749.0470581054688, - "learning_rate": 2.4486909227376466e-05, - "loss": 65.9108, - "step": 138970 - }, - { - "epoch": 0.5614967860793401, - "grad_norm": 1193.1160888671875, - "learning_rate": 2.4483419309129315e-05, - "loss": 53.8905, - "step": 138980 - }, - { - "epoch": 0.5615371873447076, - "grad_norm": 3202.59521484375, - "learning_rate": 2.4479929400953178e-05, - "loss": 63.3941, - "step": 138990 - }, - { - "epoch": 0.5615775886100752, - "grad_norm": 3087.826171875, - "learning_rate": 2.447643950291608e-05, - "loss": 85.127, - "step": 139000 - }, - { - "epoch": 0.5616179898754429, - "grad_norm": 636.1074829101562, - "learning_rate": 2.447294961508606e-05, - "loss": 50.8728, - "step": 139010 - }, - { - "epoch": 0.5616583911408105, - "grad_norm": 771.0210571289062, - "learning_rate": 2.446945973753117e-05, - "loss": 43.0814, - "step": 139020 - }, - { - "epoch": 0.5616987924061781, - "grad_norm": 304.8282165527344, - "learning_rate": 2.4465969870319426e-05, - "loss": 57.8161, - "step": 139030 - }, - { - "epoch": 0.5617391936715458, - "grad_norm": 921.5098266601562, - "learning_rate": 2.4462480013518883e-05, - "loss": 58.7815, - "step": 139040 - }, - { - "epoch": 0.5617795949369134, - "grad_norm": 516.6439819335938, - "learning_rate": 2.4458990167197555e-05, - "loss": 74.0419, - "step": 139050 - }, - { - "epoch": 0.5618199962022811, - "grad_norm": 490.185546875, - "learning_rate": 2.4455500331423504e-05, - "loss": 43.1365, - "step": 139060 - }, - { - "epoch": 0.5618603974676487, - "grad_norm": 716.62109375, - "learning_rate": 2.4452010506264755e-05, - "loss": 50.1602, - "step": 139070 - }, - { - "epoch": 0.5619007987330163, - "grad_norm": 493.0600891113281, - "learning_rate": 2.444852069178933e-05, - "loss": 54.9955, - "step": 139080 - }, - { - "epoch": 0.561941199998384, - "grad_norm": 525.10205078125, - "learning_rate": 2.4445030888065293e-05, - "loss": 69.9999, - "step": 139090 - }, - { - "epoch": 0.5619816012637516, - "grad_norm": 700.7877197265625, - "learning_rate": 2.444154109516066e-05, - "loss": 60.9567, - "step": 139100 - }, - { - "epoch": 0.5620220025291192, - "grad_norm": 408.17535400390625, - "learning_rate": 2.443805131314347e-05, - "loss": 56.1129, - "step": 139110 - }, - { - "epoch": 0.5620624037944868, - "grad_norm": 700.475341796875, - "learning_rate": 2.4434561542081762e-05, - "loss": 78.0615, - "step": 139120 - }, - { - "epoch": 0.5621028050598544, - "grad_norm": 1132.6651611328125, - "learning_rate": 2.4431071782043556e-05, - "loss": 44.449, - "step": 139130 - }, - { - "epoch": 0.5621432063252221, - "grad_norm": 405.23724365234375, - "learning_rate": 2.442758203309691e-05, - "loss": 65.6864, - "step": 139140 - }, - { - "epoch": 0.5621836075905897, - "grad_norm": 628.817138671875, - "learning_rate": 2.442409229530985e-05, - "loss": 58.2262, - "step": 139150 - }, - { - "epoch": 0.5622240088559574, - "grad_norm": 616.2658081054688, - "learning_rate": 2.4420602568750393e-05, - "loss": 56.0243, - "step": 139160 - }, - { - "epoch": 0.562264410121325, - "grad_norm": 520.2138061523438, - "learning_rate": 2.44171128534866e-05, - "loss": 50.8846, - "step": 139170 - }, - { - "epoch": 0.5623048113866926, - "grad_norm": 615.4083251953125, - "learning_rate": 2.441362314958649e-05, - "loss": 56.1158, - "step": 139180 - }, - { - "epoch": 0.5623452126520603, - "grad_norm": 764.9922485351562, - "learning_rate": 2.4410133457118097e-05, - "loss": 66.4539, - "step": 139190 - }, - { - "epoch": 0.5623856139174279, - "grad_norm": 628.9678344726562, - "learning_rate": 2.4406643776149458e-05, - "loss": 83.326, - "step": 139200 - }, - { - "epoch": 0.5624260151827956, - "grad_norm": 1019.9813232421875, - "learning_rate": 2.4403154106748592e-05, - "loss": 61.2499, - "step": 139210 - }, - { - "epoch": 0.5624664164481632, - "grad_norm": 714.0093994140625, - "learning_rate": 2.4399664448983557e-05, - "loss": 47.1922, - "step": 139220 - }, - { - "epoch": 0.5625068177135308, - "grad_norm": 1170.500732421875, - "learning_rate": 2.4396174802922372e-05, - "loss": 68.0282, - "step": 139230 - }, - { - "epoch": 0.5625472189788984, - "grad_norm": 788.6015014648438, - "learning_rate": 2.439268516863306e-05, - "loss": 47.13, - "step": 139240 - }, - { - "epoch": 0.562587620244266, - "grad_norm": 572.7946166992188, - "learning_rate": 2.4389195546183673e-05, - "loss": 64.8227, - "step": 139250 - }, - { - "epoch": 0.5626280215096336, - "grad_norm": 511.4493713378906, - "learning_rate": 2.4385705935642232e-05, - "loss": 68.7031, - "step": 139260 - }, - { - "epoch": 0.5626684227750013, - "grad_norm": 736.9781494140625, - "learning_rate": 2.4382216337076767e-05, - "loss": 52.4168, - "step": 139270 - }, - { - "epoch": 0.5627088240403689, - "grad_norm": 686.5897827148438, - "learning_rate": 2.437872675055532e-05, - "loss": 33.2072, - "step": 139280 - }, - { - "epoch": 0.5627492253057366, - "grad_norm": 851.3128662109375, - "learning_rate": 2.43752371761459e-05, - "loss": 70.3393, - "step": 139290 - }, - { - "epoch": 0.5627896265711042, - "grad_norm": 363.2201232910156, - "learning_rate": 2.4371747613916566e-05, - "loss": 51.8346, - "step": 139300 - }, - { - "epoch": 0.5628300278364718, - "grad_norm": 997.1961669921875, - "learning_rate": 2.4368258063935335e-05, - "loss": 56.4316, - "step": 139310 - }, - { - "epoch": 0.5628704291018395, - "grad_norm": 683.9410400390625, - "learning_rate": 2.4364768526270227e-05, - "loss": 31.9099, - "step": 139320 - }, - { - "epoch": 0.5629108303672071, - "grad_norm": 339.04949951171875, - "learning_rate": 2.4361279000989296e-05, - "loss": 56.1524, - "step": 139330 - }, - { - "epoch": 0.5629512316325748, - "grad_norm": 1158.9278564453125, - "learning_rate": 2.4357789488160557e-05, - "loss": 67.9605, - "step": 139340 - }, - { - "epoch": 0.5629916328979424, - "grad_norm": 823.2213134765625, - "learning_rate": 2.4354299987852035e-05, - "loss": 77.956, - "step": 139350 - }, - { - "epoch": 0.56303203416331, - "grad_norm": 238.20631408691406, - "learning_rate": 2.4350810500131777e-05, - "loss": 76.2513, - "step": 139360 - }, - { - "epoch": 0.5630724354286776, - "grad_norm": 540.9037475585938, - "learning_rate": 2.4347321025067795e-05, - "loss": 59.1482, - "step": 139370 - }, - { - "epoch": 0.5631128366940452, - "grad_norm": 968.451904296875, - "learning_rate": 2.4343831562728135e-05, - "loss": 58.3164, - "step": 139380 - }, - { - "epoch": 0.5631532379594129, - "grad_norm": 870.2232666015625, - "learning_rate": 2.434034211318081e-05, - "loss": 71.8574, - "step": 139390 - }, - { - "epoch": 0.5631936392247805, - "grad_norm": 329.17626953125, - "learning_rate": 2.4336852676493847e-05, - "loss": 49.5366, - "step": 139400 - }, - { - "epoch": 0.5632340404901481, - "grad_norm": 534.2448120117188, - "learning_rate": 2.4333363252735297e-05, - "loss": 44.9457, - "step": 139410 - }, - { - "epoch": 0.5632744417555158, - "grad_norm": 509.2497863769531, - "learning_rate": 2.432987384197317e-05, - "loss": 75.4781, - "step": 139420 - }, - { - "epoch": 0.5633148430208834, - "grad_norm": 333.1900634765625, - "learning_rate": 2.432638444427549e-05, - "loss": 47.3443, - "step": 139430 - }, - { - "epoch": 0.5633552442862511, - "grad_norm": 676.5568237304688, - "learning_rate": 2.43228950597103e-05, - "loss": 42.6256, - "step": 139440 - }, - { - "epoch": 0.5633956455516187, - "grad_norm": 794.4973754882812, - "learning_rate": 2.4319405688345614e-05, - "loss": 51.409, - "step": 139450 - }, - { - "epoch": 0.5634360468169863, - "grad_norm": 753.9666137695312, - "learning_rate": 2.431591633024947e-05, - "loss": 55.2452, - "step": 139460 - }, - { - "epoch": 0.563476448082354, - "grad_norm": 753.4974975585938, - "learning_rate": 2.431242698548989e-05, - "loss": 51.5466, - "step": 139470 - }, - { - "epoch": 0.5635168493477216, - "grad_norm": 870.044189453125, - "learning_rate": 2.4308937654134893e-05, - "loss": 40.7293, - "step": 139480 - }, - { - "epoch": 0.5635572506130893, - "grad_norm": 375.4700012207031, - "learning_rate": 2.4305448336252518e-05, - "loss": 55.643, - "step": 139490 - }, - { - "epoch": 0.5635976518784568, - "grad_norm": 647.6185913085938, - "learning_rate": 2.4301959031910784e-05, - "loss": 44.7589, - "step": 139500 - }, - { - "epoch": 0.5636380531438244, - "grad_norm": 535.95947265625, - "learning_rate": 2.4298469741177708e-05, - "loss": 40.7305, - "step": 139510 - }, - { - "epoch": 0.5636784544091921, - "grad_norm": 1032.9677734375, - "learning_rate": 2.429498046412134e-05, - "loss": 43.6444, - "step": 139520 - }, - { - "epoch": 0.5637188556745597, - "grad_norm": 699.4422607421875, - "learning_rate": 2.4291491200809684e-05, - "loss": 42.5152, - "step": 139530 - }, - { - "epoch": 0.5637592569399273, - "grad_norm": 357.033935546875, - "learning_rate": 2.4288001951310775e-05, - "loss": 63.6277, - "step": 139540 - }, - { - "epoch": 0.563799658205295, - "grad_norm": 744.2186889648438, - "learning_rate": 2.4284512715692636e-05, - "loss": 50.3803, - "step": 139550 - }, - { - "epoch": 0.5638400594706626, - "grad_norm": 766.5017700195312, - "learning_rate": 2.428102349402328e-05, - "loss": 54.6011, - "step": 139560 - }, - { - "epoch": 0.5638804607360303, - "grad_norm": 793.3067626953125, - "learning_rate": 2.4277534286370752e-05, - "loss": 51.7648, - "step": 139570 - }, - { - "epoch": 0.5639208620013979, - "grad_norm": 1126.599609375, - "learning_rate": 2.4274045092803056e-05, - "loss": 55.7164, - "step": 139580 - }, - { - "epoch": 0.5639612632667655, - "grad_norm": 623.4227905273438, - "learning_rate": 2.427055591338823e-05, - "loss": 47.8686, - "step": 139590 - }, - { - "epoch": 0.5640016645321332, - "grad_norm": 767.97607421875, - "learning_rate": 2.4267066748194296e-05, - "loss": 36.6273, - "step": 139600 - }, - { - "epoch": 0.5640420657975008, - "grad_norm": 803.25927734375, - "learning_rate": 2.4263577597289267e-05, - "loss": 63.7557, - "step": 139610 - }, - { - "epoch": 0.5640824670628685, - "grad_norm": 576.4552001953125, - "learning_rate": 2.4260088460741175e-05, - "loss": 47.2425, - "step": 139620 - }, - { - "epoch": 0.564122868328236, - "grad_norm": 218.04110717773438, - "learning_rate": 2.4256599338618034e-05, - "loss": 56.5604, - "step": 139630 - }, - { - "epoch": 0.5641632695936036, - "grad_norm": 884.3212890625, - "learning_rate": 2.4253110230987878e-05, - "loss": 61.0225, - "step": 139640 - }, - { - "epoch": 0.5642036708589713, - "grad_norm": 1012.44189453125, - "learning_rate": 2.4249621137918722e-05, - "loss": 56.3514, - "step": 139650 - }, - { - "epoch": 0.5642440721243389, - "grad_norm": 636.1995239257812, - "learning_rate": 2.4246132059478578e-05, - "loss": 93.1031, - "step": 139660 - }, - { - "epoch": 0.5642844733897066, - "grad_norm": 1277.3994140625, - "learning_rate": 2.424264299573549e-05, - "loss": 71.3306, - "step": 139670 - }, - { - "epoch": 0.5643248746550742, - "grad_norm": 436.1991882324219, - "learning_rate": 2.4239153946757468e-05, - "loss": 52.4229, - "step": 139680 - }, - { - "epoch": 0.5643652759204418, - "grad_norm": 711.0999755859375, - "learning_rate": 2.4235664912612515e-05, - "loss": 48.8079, - "step": 139690 - }, - { - "epoch": 0.5644056771858095, - "grad_norm": 1402.82568359375, - "learning_rate": 2.423217589336868e-05, - "loss": 54.2262, - "step": 139700 - }, - { - "epoch": 0.5644460784511771, - "grad_norm": 1108.6195068359375, - "learning_rate": 2.422868688909398e-05, - "loss": 62.0169, - "step": 139710 - }, - { - "epoch": 0.5644864797165448, - "grad_norm": 677.495849609375, - "learning_rate": 2.4225197899856414e-05, - "loss": 91.8257, - "step": 139720 - }, - { - "epoch": 0.5645268809819124, - "grad_norm": 770.8883666992188, - "learning_rate": 2.422170892572402e-05, - "loss": 36.889, - "step": 139730 - }, - { - "epoch": 0.56456728224728, - "grad_norm": 1669.2225341796875, - "learning_rate": 2.4218219966764798e-05, - "loss": 69.1814, - "step": 139740 - }, - { - "epoch": 0.5646076835126476, - "grad_norm": 247.09124755859375, - "learning_rate": 2.4214731023046793e-05, - "loss": 50.9243, - "step": 139750 - }, - { - "epoch": 0.5646480847780152, - "grad_norm": 625.1920166015625, - "learning_rate": 2.4211242094638013e-05, - "loss": 85.2901, - "step": 139760 - }, - { - "epoch": 0.5646884860433828, - "grad_norm": 1005.7395629882812, - "learning_rate": 2.420775318160646e-05, - "loss": 67.7253, - "step": 139770 - }, - { - "epoch": 0.5647288873087505, - "grad_norm": 1117.547607421875, - "learning_rate": 2.4204264284020182e-05, - "loss": 71.7562, - "step": 139780 - }, - { - "epoch": 0.5647692885741181, - "grad_norm": 449.2171325683594, - "learning_rate": 2.4200775401947178e-05, - "loss": 50.0482, - "step": 139790 - }, - { - "epoch": 0.5648096898394858, - "grad_norm": 399.94256591796875, - "learning_rate": 2.4197286535455464e-05, - "loss": 49.0233, - "step": 139800 - }, - { - "epoch": 0.5648500911048534, - "grad_norm": 936.6152954101562, - "learning_rate": 2.419379768461307e-05, - "loss": 64.9566, - "step": 139810 - }, - { - "epoch": 0.564890492370221, - "grad_norm": 843.4839477539062, - "learning_rate": 2.419030884948799e-05, - "loss": 81.1626, - "step": 139820 - }, - { - "epoch": 0.5649308936355887, - "grad_norm": 725.7382202148438, - "learning_rate": 2.418682003014827e-05, - "loss": 49.8391, - "step": 139830 - }, - { - "epoch": 0.5649712949009563, - "grad_norm": 691.9060668945312, - "learning_rate": 2.418333122666191e-05, - "loss": 71.7343, - "step": 139840 - }, - { - "epoch": 0.565011696166324, - "grad_norm": 2030.63037109375, - "learning_rate": 2.417984243909692e-05, - "loss": 61.1143, - "step": 139850 - }, - { - "epoch": 0.5650520974316916, - "grad_norm": 636.2049560546875, - "learning_rate": 2.4176353667521335e-05, - "loss": 53.0477, - "step": 139860 - }, - { - "epoch": 0.5650924986970592, - "grad_norm": 443.2159118652344, - "learning_rate": 2.4172864912003158e-05, - "loss": 47.2716, - "step": 139870 - }, - { - "epoch": 0.5651328999624268, - "grad_norm": 727.209228515625, - "learning_rate": 2.4169376172610397e-05, - "loss": 65.8904, - "step": 139880 - }, - { - "epoch": 0.5651733012277944, - "grad_norm": 1317.1082763671875, - "learning_rate": 2.4165887449411084e-05, - "loss": 54.5445, - "step": 139890 - }, - { - "epoch": 0.565213702493162, - "grad_norm": 913.7978515625, - "learning_rate": 2.4162398742473214e-05, - "loss": 52.2337, - "step": 139900 - }, - { - "epoch": 0.5652541037585297, - "grad_norm": 667.3104858398438, - "learning_rate": 2.415891005186482e-05, - "loss": 47.764, - "step": 139910 - }, - { - "epoch": 0.5652945050238973, - "grad_norm": 442.9791259765625, - "learning_rate": 2.415542137765391e-05, - "loss": 53.1622, - "step": 139920 - }, - { - "epoch": 0.565334906289265, - "grad_norm": 817.6068115234375, - "learning_rate": 2.4151932719908483e-05, - "loss": 61.2308, - "step": 139930 - }, - { - "epoch": 0.5653753075546326, - "grad_norm": 594.9141845703125, - "learning_rate": 2.4148444078696575e-05, - "loss": 32.9943, - "step": 139940 - }, - { - "epoch": 0.5654157088200003, - "grad_norm": 689.52197265625, - "learning_rate": 2.4144955454086188e-05, - "loss": 54.7586, - "step": 139950 - }, - { - "epoch": 0.5654561100853679, - "grad_norm": 1552.0382080078125, - "learning_rate": 2.414146684614533e-05, - "loss": 60.1999, - "step": 139960 - }, - { - "epoch": 0.5654965113507355, - "grad_norm": 684.204833984375, - "learning_rate": 2.4137978254942018e-05, - "loss": 45.3223, - "step": 139970 - }, - { - "epoch": 0.5655369126161032, - "grad_norm": 389.4147644042969, - "learning_rate": 2.413448968054426e-05, - "loss": 40.717, - "step": 139980 - }, - { - "epoch": 0.5655773138814708, - "grad_norm": 486.74945068359375, - "learning_rate": 2.4131001123020083e-05, - "loss": 43.9063, - "step": 139990 - }, - { - "epoch": 0.5656177151468385, - "grad_norm": 2609.690673828125, - "learning_rate": 2.4127512582437485e-05, - "loss": 47.5488, - "step": 140000 - }, - { - "epoch": 0.565658116412206, - "grad_norm": 1355.923828125, - "learning_rate": 2.4124024058864464e-05, - "loss": 69.8932, - "step": 140010 - }, - { - "epoch": 0.5656985176775736, - "grad_norm": 567.3590698242188, - "learning_rate": 2.412053555236906e-05, - "loss": 63.0039, - "step": 140020 - }, - { - "epoch": 0.5657389189429413, - "grad_norm": 779.9495849609375, - "learning_rate": 2.4117047063019263e-05, - "loss": 39.6256, - "step": 140030 - }, - { - "epoch": 0.5657793202083089, - "grad_norm": 684.2831420898438, - "learning_rate": 2.411355859088308e-05, - "loss": 42.6509, - "step": 140040 - }, - { - "epoch": 0.5658197214736765, - "grad_norm": 1118.098876953125, - "learning_rate": 2.411007013602854e-05, - "loss": 68.7989, - "step": 140050 - }, - { - "epoch": 0.5658601227390442, - "grad_norm": 652.873046875, - "learning_rate": 2.4106581698523635e-05, - "loss": 61.522, - "step": 140060 - }, - { - "epoch": 0.5659005240044118, - "grad_norm": 1058.7735595703125, - "learning_rate": 2.4103093278436386e-05, - "loss": 60.4243, - "step": 140070 - }, - { - "epoch": 0.5659409252697795, - "grad_norm": 875.0758056640625, - "learning_rate": 2.4099604875834795e-05, - "loss": 60.9868, - "step": 140080 - }, - { - "epoch": 0.5659813265351471, - "grad_norm": 504.15289306640625, - "learning_rate": 2.4096116490786856e-05, - "loss": 53.1008, - "step": 140090 - }, - { - "epoch": 0.5660217278005147, - "grad_norm": 647.9260864257812, - "learning_rate": 2.4092628123360608e-05, - "loss": 44.3043, - "step": 140100 - }, - { - "epoch": 0.5660621290658824, - "grad_norm": 445.2146301269531, - "learning_rate": 2.4089139773624027e-05, - "loss": 32.4979, - "step": 140110 - }, - { - "epoch": 0.56610253033125, - "grad_norm": 583.020751953125, - "learning_rate": 2.408565144164515e-05, - "loss": 65.0225, - "step": 140120 - }, - { - "epoch": 0.5661429315966177, - "grad_norm": 782.4393310546875, - "learning_rate": 2.4082163127491967e-05, - "loss": 49.7005, - "step": 140130 - }, - { - "epoch": 0.5661833328619852, - "grad_norm": 1884.6944580078125, - "learning_rate": 2.407867483123248e-05, - "loss": 82.0714, - "step": 140140 - }, - { - "epoch": 0.5662237341273528, - "grad_norm": 710.6357421875, - "learning_rate": 2.4075186552934707e-05, - "loss": 69.646, - "step": 140150 - }, - { - "epoch": 0.5662641353927205, - "grad_norm": 165.2595672607422, - "learning_rate": 2.4071698292666648e-05, - "loss": 46.273, - "step": 140160 - }, - { - "epoch": 0.5663045366580881, - "grad_norm": 537.0858764648438, - "learning_rate": 2.4068210050496298e-05, - "loss": 76.4967, - "step": 140170 - }, - { - "epoch": 0.5663449379234557, - "grad_norm": 792.4481811523438, - "learning_rate": 2.406472182649168e-05, - "loss": 43.8985, - "step": 140180 - }, - { - "epoch": 0.5663853391888234, - "grad_norm": 611.4661865234375, - "learning_rate": 2.406123362072079e-05, - "loss": 48.2827, - "step": 140190 - }, - { - "epoch": 0.566425740454191, - "grad_norm": 1082.0413818359375, - "learning_rate": 2.4057745433251635e-05, - "loss": 72.3503, - "step": 140200 - }, - { - "epoch": 0.5664661417195587, - "grad_norm": 902.6494750976562, - "learning_rate": 2.4054257264152226e-05, - "loss": 44.0646, - "step": 140210 - }, - { - "epoch": 0.5665065429849263, - "grad_norm": 738.3258666992188, - "learning_rate": 2.405076911349055e-05, - "loss": 44.8827, - "step": 140220 - }, - { - "epoch": 0.566546944250294, - "grad_norm": 540.25732421875, - "learning_rate": 2.404728098133462e-05, - "loss": 55.6525, - "step": 140230 - }, - { - "epoch": 0.5665873455156616, - "grad_norm": 646.385498046875, - "learning_rate": 2.4043792867752444e-05, - "loss": 37.5222, - "step": 140240 - }, - { - "epoch": 0.5666277467810292, - "grad_norm": 320.0557861328125, - "learning_rate": 2.4040304772812002e-05, - "loss": 45.5283, - "step": 140250 - }, - { - "epoch": 0.5666681480463968, - "grad_norm": 398.5991516113281, - "learning_rate": 2.4036816696581326e-05, - "loss": 58.8736, - "step": 140260 - }, - { - "epoch": 0.5667085493117644, - "grad_norm": 558.6793212890625, - "learning_rate": 2.4033328639128394e-05, - "loss": 57.9081, - "step": 140270 - }, - { - "epoch": 0.566748950577132, - "grad_norm": 3701.99072265625, - "learning_rate": 2.4029840600521227e-05, - "loss": 52.849, - "step": 140280 - }, - { - "epoch": 0.5667893518424997, - "grad_norm": 725.6563110351562, - "learning_rate": 2.4026352580827822e-05, - "loss": 49.8771, - "step": 140290 - }, - { - "epoch": 0.5668297531078673, - "grad_norm": 4818.69287109375, - "learning_rate": 2.4022864580116164e-05, - "loss": 81.0483, - "step": 140300 - }, - { - "epoch": 0.566870154373235, - "grad_norm": 231.59661865234375, - "learning_rate": 2.4019376598454266e-05, - "loss": 36.7556, - "step": 140310 - }, - { - "epoch": 0.5669105556386026, - "grad_norm": 1639.919921875, - "learning_rate": 2.401588863591013e-05, - "loss": 38.1198, - "step": 140320 - }, - { - "epoch": 0.5669509569039702, - "grad_norm": 1159.2486572265625, - "learning_rate": 2.4012400692551746e-05, - "loss": 67.2019, - "step": 140330 - }, - { - "epoch": 0.5669913581693379, - "grad_norm": 452.9656982421875, - "learning_rate": 2.4008912768447127e-05, - "loss": 60.1592, - "step": 140340 - }, - { - "epoch": 0.5670317594347055, - "grad_norm": 194.69638061523438, - "learning_rate": 2.4005424863664254e-05, - "loss": 82.0656, - "step": 140350 - }, - { - "epoch": 0.5670721607000732, - "grad_norm": 792.1279907226562, - "learning_rate": 2.4001936978271144e-05, - "loss": 60.3676, - "step": 140360 - }, - { - "epoch": 0.5671125619654408, - "grad_norm": 1072.4195556640625, - "learning_rate": 2.3998449112335784e-05, - "loss": 54.9496, - "step": 140370 - }, - { - "epoch": 0.5671529632308084, - "grad_norm": 667.0706787109375, - "learning_rate": 2.3994961265926166e-05, - "loss": 74.0401, - "step": 140380 - }, - { - "epoch": 0.567193364496176, - "grad_norm": 1137.287109375, - "learning_rate": 2.3991473439110308e-05, - "loss": 47.5522, - "step": 140390 - }, - { - "epoch": 0.5672337657615436, - "grad_norm": 685.4043579101562, - "learning_rate": 2.398798563195619e-05, - "loss": 63.7304, - "step": 140400 - }, - { - "epoch": 0.5672741670269112, - "grad_norm": 426.1322937011719, - "learning_rate": 2.3984497844531814e-05, - "loss": 53.3877, - "step": 140410 - }, - { - "epoch": 0.5673145682922789, - "grad_norm": 226.16949462890625, - "learning_rate": 2.3981010076905174e-05, - "loss": 56.9861, - "step": 140420 - }, - { - "epoch": 0.5673549695576465, - "grad_norm": 736.9807739257812, - "learning_rate": 2.3977522329144263e-05, - "loss": 42.9066, - "step": 140430 - }, - { - "epoch": 0.5673953708230142, - "grad_norm": 768.5642700195312, - "learning_rate": 2.3974034601317086e-05, - "loss": 75.2792, - "step": 140440 - }, - { - "epoch": 0.5674357720883818, - "grad_norm": 561.4569702148438, - "learning_rate": 2.3970546893491637e-05, - "loss": 58.4712, - "step": 140450 - }, - { - "epoch": 0.5674761733537494, - "grad_norm": 2184.9736328125, - "learning_rate": 2.396705920573589e-05, - "loss": 69.7435, - "step": 140460 - }, - { - "epoch": 0.5675165746191171, - "grad_norm": 1226.76904296875, - "learning_rate": 2.3963571538117873e-05, - "loss": 86.3856, - "step": 140470 - }, - { - "epoch": 0.5675569758844847, - "grad_norm": 888.7282104492188, - "learning_rate": 2.3960083890705557e-05, - "loss": 46.3673, - "step": 140480 - }, - { - "epoch": 0.5675973771498524, - "grad_norm": 884.1262817382812, - "learning_rate": 2.395659626356694e-05, - "loss": 54.7737, - "step": 140490 - }, - { - "epoch": 0.56763777841522, - "grad_norm": 250.46376037597656, - "learning_rate": 2.3953108656770016e-05, - "loss": 36.1049, - "step": 140500 - }, - { - "epoch": 0.5676781796805876, - "grad_norm": 977.3258056640625, - "learning_rate": 2.394962107038277e-05, - "loss": 64.6896, - "step": 140510 - }, - { - "epoch": 0.5677185809459552, - "grad_norm": 875.897216796875, - "learning_rate": 2.394613350447321e-05, - "loss": 51.8041, - "step": 140520 - }, - { - "epoch": 0.5677589822113228, - "grad_norm": 382.20147705078125, - "learning_rate": 2.3942645959109324e-05, - "loss": 41.5387, - "step": 140530 - }, - { - "epoch": 0.5677993834766905, - "grad_norm": 0.0, - "learning_rate": 2.393915843435909e-05, - "loss": 51.2521, - "step": 140540 - }, - { - "epoch": 0.5678397847420581, - "grad_norm": 1551.5985107421875, - "learning_rate": 2.3935670930290512e-05, - "loss": 53.0409, - "step": 140550 - }, - { - "epoch": 0.5678801860074257, - "grad_norm": 665.3966674804688, - "learning_rate": 2.3932183446971583e-05, - "loss": 44.8342, - "step": 140560 - }, - { - "epoch": 0.5679205872727934, - "grad_norm": 786.4059448242188, - "learning_rate": 2.392869598447028e-05, - "loss": 54.9068, - "step": 140570 - }, - { - "epoch": 0.567960988538161, - "grad_norm": 685.6342163085938, - "learning_rate": 2.3925208542854608e-05, - "loss": 44.5049, - "step": 140580 - }, - { - "epoch": 0.5680013898035287, - "grad_norm": 780.075927734375, - "learning_rate": 2.3921721122192535e-05, - "loss": 35.4166, - "step": 140590 - }, - { - "epoch": 0.5680417910688963, - "grad_norm": 994.8262329101562, - "learning_rate": 2.391823372255208e-05, - "loss": 35.6253, - "step": 140600 - }, - { - "epoch": 0.5680821923342639, - "grad_norm": 468.07171630859375, - "learning_rate": 2.391474634400121e-05, - "loss": 56.5801, - "step": 140610 - }, - { - "epoch": 0.5681225935996316, - "grad_norm": 423.82196044921875, - "learning_rate": 2.3911258986607908e-05, - "loss": 43.0342, - "step": 140620 - }, - { - "epoch": 0.5681629948649992, - "grad_norm": 627.2518920898438, - "learning_rate": 2.3907771650440187e-05, - "loss": 41.6314, - "step": 140630 - }, - { - "epoch": 0.5682033961303669, - "grad_norm": 609.6311645507812, - "learning_rate": 2.390428433556601e-05, - "loss": 46.9864, - "step": 140640 - }, - { - "epoch": 0.5682437973957344, - "grad_norm": 608.6633911132812, - "learning_rate": 2.3900797042053382e-05, - "loss": 72.6592, - "step": 140650 - }, - { - "epoch": 0.568284198661102, - "grad_norm": 491.6672668457031, - "learning_rate": 2.3897309769970276e-05, - "loss": 37.3915, - "step": 140660 - }, - { - "epoch": 0.5683245999264697, - "grad_norm": 307.1319274902344, - "learning_rate": 2.3893822519384684e-05, - "loss": 51.833, - "step": 140670 - }, - { - "epoch": 0.5683650011918373, - "grad_norm": 899.0912475585938, - "learning_rate": 2.3890335290364595e-05, - "loss": 85.2399, - "step": 140680 - }, - { - "epoch": 0.568405402457205, - "grad_norm": 365.262451171875, - "learning_rate": 2.3886848082977995e-05, - "loss": 41.261, - "step": 140690 - }, - { - "epoch": 0.5684458037225726, - "grad_norm": 606.3248901367188, - "learning_rate": 2.388336089729285e-05, - "loss": 41.9415, - "step": 140700 - }, - { - "epoch": 0.5684862049879402, - "grad_norm": 764.3125, - "learning_rate": 2.387987373337717e-05, - "loss": 56.6378, - "step": 140710 - }, - { - "epoch": 0.5685266062533079, - "grad_norm": 835.7235107421875, - "learning_rate": 2.387638659129892e-05, - "loss": 41.2721, - "step": 140720 - }, - { - "epoch": 0.5685670075186755, - "grad_norm": 497.7888488769531, - "learning_rate": 2.3872899471126102e-05, - "loss": 51.5741, - "step": 140730 - }, - { - "epoch": 0.5686074087840431, - "grad_norm": 536.2766723632812, - "learning_rate": 2.3869412372926687e-05, - "loss": 43.3223, - "step": 140740 - }, - { - "epoch": 0.5686478100494108, - "grad_norm": 869.4346313476562, - "learning_rate": 2.386592529676866e-05, - "loss": 45.3884, - "step": 140750 - }, - { - "epoch": 0.5686882113147784, - "grad_norm": 813.7835083007812, - "learning_rate": 2.386243824272e-05, - "loss": 34.9353, - "step": 140760 - }, - { - "epoch": 0.5687286125801461, - "grad_norm": 553.7960815429688, - "learning_rate": 2.38589512108487e-05, - "loss": 55.3369, - "step": 140770 - }, - { - "epoch": 0.5687690138455136, - "grad_norm": 370.2120666503906, - "learning_rate": 2.3855464201222716e-05, - "loss": 55.3215, - "step": 140780 - }, - { - "epoch": 0.5688094151108812, - "grad_norm": 0.0, - "learning_rate": 2.385197721391006e-05, - "loss": 50.498, - "step": 140790 - }, - { - "epoch": 0.5688498163762489, - "grad_norm": 461.68670654296875, - "learning_rate": 2.384849024897869e-05, - "loss": 57.4703, - "step": 140800 - }, - { - "epoch": 0.5688902176416165, - "grad_norm": 1943.5701904296875, - "learning_rate": 2.3845003306496606e-05, - "loss": 68.1128, - "step": 140810 - }, - { - "epoch": 0.5689306189069842, - "grad_norm": 1182.188720703125, - "learning_rate": 2.384151638653178e-05, - "loss": 59.1778, - "step": 140820 - }, - { - "epoch": 0.5689710201723518, - "grad_norm": 343.8730773925781, - "learning_rate": 2.3838029489152176e-05, - "loss": 57.4862, - "step": 140830 - }, - { - "epoch": 0.5690114214377194, - "grad_norm": 609.5772094726562, - "learning_rate": 2.38345426144258e-05, - "loss": 50.4306, - "step": 140840 - }, - { - "epoch": 0.5690518227030871, - "grad_norm": 1227.27734375, - "learning_rate": 2.3831055762420607e-05, - "loss": 50.9431, - "step": 140850 - }, - { - "epoch": 0.5690922239684547, - "grad_norm": 471.8846435546875, - "learning_rate": 2.3827568933204575e-05, - "loss": 59.7956, - "step": 140860 - }, - { - "epoch": 0.5691326252338224, - "grad_norm": 601.4916381835938, - "learning_rate": 2.3824082126845703e-05, - "loss": 55.0637, - "step": 140870 - }, - { - "epoch": 0.56917302649919, - "grad_norm": 1994.75830078125, - "learning_rate": 2.3820595343411944e-05, - "loss": 69.4372, - "step": 140880 - }, - { - "epoch": 0.5692134277645576, - "grad_norm": 927.0828247070312, - "learning_rate": 2.3817108582971298e-05, - "loss": 60.4966, - "step": 140890 - }, - { - "epoch": 0.5692538290299252, - "grad_norm": 859.7433471679688, - "learning_rate": 2.3813621845591727e-05, - "loss": 42.6294, - "step": 140900 - }, - { - "epoch": 0.5692942302952928, - "grad_norm": 626.2705688476562, - "learning_rate": 2.3810135131341203e-05, - "loss": 91.8611, - "step": 140910 - }, - { - "epoch": 0.5693346315606604, - "grad_norm": 4675.134765625, - "learning_rate": 2.3806648440287714e-05, - "loss": 105.6002, - "step": 140920 - }, - { - "epoch": 0.5693750328260281, - "grad_norm": 1433.595947265625, - "learning_rate": 2.380316177249923e-05, - "loss": 60.9849, - "step": 140930 - }, - { - "epoch": 0.5694154340913957, - "grad_norm": 416.4643859863281, - "learning_rate": 2.3799675128043707e-05, - "loss": 56.4546, - "step": 140940 - }, - { - "epoch": 0.5694558353567634, - "grad_norm": 858.3992919921875, - "learning_rate": 2.3796188506989153e-05, - "loss": 51.5956, - "step": 140950 - }, - { - "epoch": 0.569496236622131, - "grad_norm": 1188.315185546875, - "learning_rate": 2.379270190940351e-05, - "loss": 63.5035, - "step": 140960 - }, - { - "epoch": 0.5695366378874986, - "grad_norm": 919.1412353515625, - "learning_rate": 2.3789215335354774e-05, - "loss": 114.3202, - "step": 140970 - }, - { - "epoch": 0.5695770391528663, - "grad_norm": 594.3109741210938, - "learning_rate": 2.378572878491091e-05, - "loss": 78.9417, - "step": 140980 - }, - { - "epoch": 0.5696174404182339, - "grad_norm": 725.9608154296875, - "learning_rate": 2.3782242258139883e-05, - "loss": 46.3523, - "step": 140990 - }, - { - "epoch": 0.5696578416836016, - "grad_norm": 424.02703857421875, - "learning_rate": 2.377875575510967e-05, - "loss": 50.4055, - "step": 141000 - }, - { - "epoch": 0.5696982429489692, - "grad_norm": 680.8141479492188, - "learning_rate": 2.3775269275888248e-05, - "loss": 51.2761, - "step": 141010 - }, - { - "epoch": 0.5697386442143368, - "grad_norm": 1021.9234008789062, - "learning_rate": 2.3771782820543577e-05, - "loss": 74.4177, - "step": 141020 - }, - { - "epoch": 0.5697790454797044, - "grad_norm": 1398.9102783203125, - "learning_rate": 2.3768296389143636e-05, - "loss": 75.1736, - "step": 141030 - }, - { - "epoch": 0.569819446745072, - "grad_norm": 403.35345458984375, - "learning_rate": 2.376480998175638e-05, - "loss": 36.8003, - "step": 141040 - }, - { - "epoch": 0.5698598480104397, - "grad_norm": 1651.0067138671875, - "learning_rate": 2.3761323598449803e-05, - "loss": 64.2051, - "step": 141050 - }, - { - "epoch": 0.5699002492758073, - "grad_norm": 802.9590454101562, - "learning_rate": 2.375783723929186e-05, - "loss": 64.763, - "step": 141060 - }, - { - "epoch": 0.5699406505411749, - "grad_norm": 294.2562255859375, - "learning_rate": 2.3754350904350502e-05, - "loss": 70.0117, - "step": 141070 - }, - { - "epoch": 0.5699810518065426, - "grad_norm": 493.0975341796875, - "learning_rate": 2.3750864593693732e-05, - "loss": 48.1589, - "step": 141080 - }, - { - "epoch": 0.5700214530719102, - "grad_norm": 789.9282836914062, - "learning_rate": 2.37473783073895e-05, - "loss": 74.4661, - "step": 141090 - }, - { - "epoch": 0.5700618543372779, - "grad_norm": 655.0347900390625, - "learning_rate": 2.3743892045505764e-05, - "loss": 55.9553, - "step": 141100 - }, - { - "epoch": 0.5701022556026455, - "grad_norm": 469.515380859375, - "learning_rate": 2.3740405808110504e-05, - "loss": 44.5552, - "step": 141110 - }, - { - "epoch": 0.5701426568680131, - "grad_norm": 2083.58251953125, - "learning_rate": 2.3736919595271677e-05, - "loss": 63.5822, - "step": 141120 - }, - { - "epoch": 0.5701830581333808, - "grad_norm": 913.0497436523438, - "learning_rate": 2.3733433407057257e-05, - "loss": 43.4769, - "step": 141130 - }, - { - "epoch": 0.5702234593987484, - "grad_norm": 1263.47119140625, - "learning_rate": 2.372994724353521e-05, - "loss": 79.1157, - "step": 141140 - }, - { - "epoch": 0.570263860664116, - "grad_norm": 968.7415771484375, - "learning_rate": 2.372646110477348e-05, - "loss": 53.7039, - "step": 141150 - }, - { - "epoch": 0.5703042619294836, - "grad_norm": 671.2269287109375, - "learning_rate": 2.3722974990840058e-05, - "loss": 48.0864, - "step": 141160 - }, - { - "epoch": 0.5703446631948512, - "grad_norm": 968.9181518554688, - "learning_rate": 2.3719488901802893e-05, - "loss": 39.3307, - "step": 141170 - }, - { - "epoch": 0.5703850644602189, - "grad_norm": 480.0361022949219, - "learning_rate": 2.3716002837729954e-05, - "loss": 68.5519, - "step": 141180 - }, - { - "epoch": 0.5704254657255865, - "grad_norm": 835.8157958984375, - "learning_rate": 2.3712516798689203e-05, - "loss": 57.1123, - "step": 141190 - }, - { - "epoch": 0.5704658669909541, - "grad_norm": 438.4929504394531, - "learning_rate": 2.3709030784748587e-05, - "loss": 38.9969, - "step": 141200 - }, - { - "epoch": 0.5705062682563218, - "grad_norm": 859.7994995117188, - "learning_rate": 2.370554479597609e-05, - "loss": 78.8213, - "step": 141210 - }, - { - "epoch": 0.5705466695216894, - "grad_norm": 1708.9825439453125, - "learning_rate": 2.3702058832439667e-05, - "loss": 62.3029, - "step": 141220 - }, - { - "epoch": 0.5705870707870571, - "grad_norm": 777.35400390625, - "learning_rate": 2.3698572894207262e-05, - "loss": 66.144, - "step": 141230 - }, - { - "epoch": 0.5706274720524247, - "grad_norm": 1250.974853515625, - "learning_rate": 2.369508698134686e-05, - "loss": 59.3208, - "step": 141240 - }, - { - "epoch": 0.5706678733177923, - "grad_norm": 541.2228393554688, - "learning_rate": 2.3691601093926404e-05, - "loss": 43.8948, - "step": 141250 - }, - { - "epoch": 0.57070827458316, - "grad_norm": 1093.102294921875, - "learning_rate": 2.3688115232013866e-05, - "loss": 59.8695, - "step": 141260 - }, - { - "epoch": 0.5707486758485276, - "grad_norm": 675.7007446289062, - "learning_rate": 2.3684629395677194e-05, - "loss": 70.8972, - "step": 141270 - }, - { - "epoch": 0.5707890771138953, - "grad_norm": 751.9618530273438, - "learning_rate": 2.368114358498434e-05, - "loss": 43.2623, - "step": 141280 - }, - { - "epoch": 0.5708294783792628, - "grad_norm": 923.1489868164062, - "learning_rate": 2.367765780000328e-05, - "loss": 68.1855, - "step": 141290 - }, - { - "epoch": 0.5708698796446304, - "grad_norm": 514.2269897460938, - "learning_rate": 2.367417204080196e-05, - "loss": 86.9622, - "step": 141300 - }, - { - "epoch": 0.5709102809099981, - "grad_norm": 423.8339538574219, - "learning_rate": 2.3670686307448333e-05, - "loss": 36.2946, - "step": 141310 - }, - { - "epoch": 0.5709506821753657, - "grad_norm": 883.6669311523438, - "learning_rate": 2.366720060001037e-05, - "loss": 67.147, - "step": 141320 - }, - { - "epoch": 0.5709910834407333, - "grad_norm": 901.2623291015625, - "learning_rate": 2.366371491855601e-05, - "loss": 44.5435, - "step": 141330 - }, - { - "epoch": 0.571031484706101, - "grad_norm": 574.0656127929688, - "learning_rate": 2.3660229263153215e-05, - "loss": 68.7471, - "step": 141340 - }, - { - "epoch": 0.5710718859714686, - "grad_norm": 710.9147338867188, - "learning_rate": 2.3656743633869948e-05, - "loss": 51.1492, - "step": 141350 - }, - { - "epoch": 0.5711122872368363, - "grad_norm": 663.8213500976562, - "learning_rate": 2.365325803077415e-05, - "loss": 38.9658, - "step": 141360 - }, - { - "epoch": 0.5711526885022039, - "grad_norm": 0.0, - "learning_rate": 2.3649772453933784e-05, - "loss": 47.6883, - "step": 141370 - }, - { - "epoch": 0.5711930897675715, - "grad_norm": 1073.9561767578125, - "learning_rate": 2.36462869034168e-05, - "loss": 55.3894, - "step": 141380 - }, - { - "epoch": 0.5712334910329392, - "grad_norm": 463.0362243652344, - "learning_rate": 2.364280137929114e-05, - "loss": 54.0964, - "step": 141390 - }, - { - "epoch": 0.5712738922983068, - "grad_norm": 1073.5634765625, - "learning_rate": 2.3639315881624777e-05, - "loss": 32.8544, - "step": 141400 - }, - { - "epoch": 0.5713142935636745, - "grad_norm": 334.5204162597656, - "learning_rate": 2.3635830410485638e-05, - "loss": 59.4493, - "step": 141410 - }, - { - "epoch": 0.571354694829042, - "grad_norm": 698.5248413085938, - "learning_rate": 2.36323449659417e-05, - "loss": 76.9526, - "step": 141420 - }, - { - "epoch": 0.5713950960944096, - "grad_norm": 510.2958068847656, - "learning_rate": 2.36288595480609e-05, - "loss": 51.604, - "step": 141430 - }, - { - "epoch": 0.5714354973597773, - "grad_norm": 393.1766052246094, - "learning_rate": 2.3625374156911185e-05, - "loss": 30.1209, - "step": 141440 - }, - { - "epoch": 0.5714758986251449, - "grad_norm": 996.3530883789062, - "learning_rate": 2.3621888792560515e-05, - "loss": 94.3675, - "step": 141450 - }, - { - "epoch": 0.5715162998905126, - "grad_norm": 583.70458984375, - "learning_rate": 2.361840345507683e-05, - "loss": 57.4963, - "step": 141460 - }, - { - "epoch": 0.5715567011558802, - "grad_norm": 797.1538696289062, - "learning_rate": 2.361491814452807e-05, - "loss": 36.3796, - "step": 141470 - }, - { - "epoch": 0.5715971024212478, - "grad_norm": 973.2113647460938, - "learning_rate": 2.3611432860982204e-05, - "loss": 51.5345, - "step": 141480 - }, - { - "epoch": 0.5716375036866155, - "grad_norm": 1032.868896484375, - "learning_rate": 2.360794760450716e-05, - "loss": 46.4819, - "step": 141490 - }, - { - "epoch": 0.5716779049519831, - "grad_norm": 393.41485595703125, - "learning_rate": 2.3604462375170906e-05, - "loss": 34.917, - "step": 141500 - }, - { - "epoch": 0.5717183062173508, - "grad_norm": 296.49212646484375, - "learning_rate": 2.3600977173041374e-05, - "loss": 51.7042, - "step": 141510 - }, - { - "epoch": 0.5717587074827184, - "grad_norm": 1662.1243896484375, - "learning_rate": 2.3597491998186506e-05, - "loss": 45.6466, - "step": 141520 - }, - { - "epoch": 0.571799108748086, - "grad_norm": 413.155029296875, - "learning_rate": 2.3594006850674262e-05, - "loss": 59.2692, - "step": 141530 - }, - { - "epoch": 0.5718395100134536, - "grad_norm": 1063.03076171875, - "learning_rate": 2.359052173057258e-05, - "loss": 57.3971, - "step": 141540 - }, - { - "epoch": 0.5718799112788212, - "grad_norm": 440.7333679199219, - "learning_rate": 2.3587036637949388e-05, - "loss": 41.32, - "step": 141550 - }, - { - "epoch": 0.5719203125441888, - "grad_norm": 868.6510620117188, - "learning_rate": 2.3583551572872656e-05, - "loss": 56.1956, - "step": 141560 - }, - { - "epoch": 0.5719607138095565, - "grad_norm": 1081.4976806640625, - "learning_rate": 2.3580066535410307e-05, - "loss": 45.4585, - "step": 141570 - }, - { - "epoch": 0.5720011150749241, - "grad_norm": 320.47918701171875, - "learning_rate": 2.35765815256303e-05, - "loss": 53.1529, - "step": 141580 - }, - { - "epoch": 0.5720415163402918, - "grad_norm": 540.5079956054688, - "learning_rate": 2.3573096543600567e-05, - "loss": 33.4318, - "step": 141590 - }, - { - "epoch": 0.5720819176056594, - "grad_norm": 695.275146484375, - "learning_rate": 2.3569611589389047e-05, - "loss": 55.8058, - "step": 141600 - }, - { - "epoch": 0.572122318871027, - "grad_norm": 744.6594848632812, - "learning_rate": 2.3566126663063695e-05, - "loss": 69.8493, - "step": 141610 - }, - { - "epoch": 0.5721627201363947, - "grad_norm": 1078.035400390625, - "learning_rate": 2.3562641764692433e-05, - "loss": 42.8643, - "step": 141620 - }, - { - "epoch": 0.5722031214017623, - "grad_norm": 1568.65625, - "learning_rate": 2.3559156894343213e-05, - "loss": 76.2597, - "step": 141630 - }, - { - "epoch": 0.57224352266713, - "grad_norm": 221.61920166015625, - "learning_rate": 2.355567205208397e-05, - "loss": 50.2042, - "step": 141640 - }, - { - "epoch": 0.5722839239324976, - "grad_norm": 523.4913940429688, - "learning_rate": 2.355218723798264e-05, - "loss": 62.5304, - "step": 141650 - }, - { - "epoch": 0.5723243251978652, - "grad_norm": 475.74395751953125, - "learning_rate": 2.3548702452107175e-05, - "loss": 43.9872, - "step": 141660 - }, - { - "epoch": 0.5723647264632328, - "grad_norm": 1220.044921875, - "learning_rate": 2.3545217694525498e-05, - "loss": 67.9373, - "step": 141670 - }, - { - "epoch": 0.5724051277286004, - "grad_norm": 1851.209228515625, - "learning_rate": 2.3541732965305543e-05, - "loss": 57.7291, - "step": 141680 - }, - { - "epoch": 0.5724455289939681, - "grad_norm": 757.2409057617188, - "learning_rate": 2.3538248264515266e-05, - "loss": 49.6187, - "step": 141690 - }, - { - "epoch": 0.5724859302593357, - "grad_norm": 5554.1015625, - "learning_rate": 2.3534763592222586e-05, - "loss": 68.57, - "step": 141700 - }, - { - "epoch": 0.5725263315247033, - "grad_norm": 962.51611328125, - "learning_rate": 2.353127894849545e-05, - "loss": 64.5327, - "step": 141710 - }, - { - "epoch": 0.572566732790071, - "grad_norm": 1030.315673828125, - "learning_rate": 2.3527794333401786e-05, - "loss": 101.8608, - "step": 141720 - }, - { - "epoch": 0.5726071340554386, - "grad_norm": 1969.6497802734375, - "learning_rate": 2.3524309747009522e-05, - "loss": 75.3899, - "step": 141730 - }, - { - "epoch": 0.5726475353208063, - "grad_norm": 591.5418701171875, - "learning_rate": 2.3520825189386604e-05, - "loss": 48.2578, - "step": 141740 - }, - { - "epoch": 0.5726879365861739, - "grad_norm": 844.501953125, - "learning_rate": 2.3517340660600964e-05, - "loss": 56.6854, - "step": 141750 - }, - { - "epoch": 0.5727283378515415, - "grad_norm": 694.3187866210938, - "learning_rate": 2.3513856160720522e-05, - "loss": 81.1811, - "step": 141760 - }, - { - "epoch": 0.5727687391169092, - "grad_norm": 1127.83544921875, - "learning_rate": 2.3510371689813226e-05, - "loss": 62.8335, - "step": 141770 - }, - { - "epoch": 0.5728091403822768, - "grad_norm": 544.0736694335938, - "learning_rate": 2.3506887247947e-05, - "loss": 46.8202, - "step": 141780 - }, - { - "epoch": 0.5728495416476445, - "grad_norm": 1501.36083984375, - "learning_rate": 2.3503402835189778e-05, - "loss": 52.4827, - "step": 141790 - }, - { - "epoch": 0.572889942913012, - "grad_norm": 1339.47705078125, - "learning_rate": 2.349991845160949e-05, - "loss": 54.5597, - "step": 141800 - }, - { - "epoch": 0.5729303441783796, - "grad_norm": 1593.72705078125, - "learning_rate": 2.349643409727405e-05, - "loss": 53.1381, - "step": 141810 - }, - { - "epoch": 0.5729707454437473, - "grad_norm": 657.8176879882812, - "learning_rate": 2.3492949772251414e-05, - "loss": 51.3417, - "step": 141820 - }, - { - "epoch": 0.5730111467091149, - "grad_norm": 1007.0821533203125, - "learning_rate": 2.3489465476609502e-05, - "loss": 77.4722, - "step": 141830 - }, - { - "epoch": 0.5730515479744825, - "grad_norm": 613.6268920898438, - "learning_rate": 2.348598121041622e-05, - "loss": 64.9442, - "step": 141840 - }, - { - "epoch": 0.5730919492398502, - "grad_norm": 847.4960327148438, - "learning_rate": 2.348249697373953e-05, - "loss": 36.6914, - "step": 141850 - }, - { - "epoch": 0.5731323505052178, - "grad_norm": 692.1813354492188, - "learning_rate": 2.3479012766647335e-05, - "loss": 68.5336, - "step": 141860 - }, - { - "epoch": 0.5731727517705855, - "grad_norm": 813.51123046875, - "learning_rate": 2.3475528589207576e-05, - "loss": 48.4441, - "step": 141870 - }, - { - "epoch": 0.5732131530359531, - "grad_norm": 572.866943359375, - "learning_rate": 2.3472044441488174e-05, - "loss": 45.9235, - "step": 141880 - }, - { - "epoch": 0.5732535543013207, - "grad_norm": 273.5608215332031, - "learning_rate": 2.3468560323557036e-05, - "loss": 57.0392, - "step": 141890 - }, - { - "epoch": 0.5732939555666884, - "grad_norm": 889.541015625, - "learning_rate": 2.3465076235482116e-05, - "loss": 33.2406, - "step": 141900 - }, - { - "epoch": 0.573334356832056, - "grad_norm": 0.0, - "learning_rate": 2.3461592177331325e-05, - "loss": 46.7832, - "step": 141910 - }, - { - "epoch": 0.5733747580974237, - "grad_norm": 558.1715698242188, - "learning_rate": 2.345810814917258e-05, - "loss": 43.9247, - "step": 141920 - }, - { - "epoch": 0.5734151593627912, - "grad_norm": 800.1122436523438, - "learning_rate": 2.3454624151073815e-05, - "loss": 51.9048, - "step": 141930 - }, - { - "epoch": 0.5734555606281588, - "grad_norm": 1590.205078125, - "learning_rate": 2.3451140183102945e-05, - "loss": 58.2996, - "step": 141940 - }, - { - "epoch": 0.5734959618935265, - "grad_norm": 594.8118896484375, - "learning_rate": 2.34476562453279e-05, - "loss": 56.2581, - "step": 141950 - }, - { - "epoch": 0.5735363631588941, - "grad_norm": 734.5135498046875, - "learning_rate": 2.3444172337816592e-05, - "loss": 51.2526, - "step": 141960 - }, - { - "epoch": 0.5735767644242618, - "grad_norm": 316.3956298828125, - "learning_rate": 2.344068846063694e-05, - "loss": 46.3092, - "step": 141970 - }, - { - "epoch": 0.5736171656896294, - "grad_norm": 703.2752075195312, - "learning_rate": 2.343720461385688e-05, - "loss": 38.9623, - "step": 141980 - }, - { - "epoch": 0.573657566954997, - "grad_norm": 471.1736755371094, - "learning_rate": 2.3433720797544316e-05, - "loss": 47.8452, - "step": 141990 - }, - { - "epoch": 0.5736979682203647, - "grad_norm": 1363.9534912109375, - "learning_rate": 2.3430237011767167e-05, - "loss": 46.777, - "step": 142000 - }, - { - "epoch": 0.5737383694857323, - "grad_norm": 956.2443237304688, - "learning_rate": 2.3426753256593363e-05, - "loss": 52.6443, - "step": 142010 - }, - { - "epoch": 0.5737787707511, - "grad_norm": 627.6842041015625, - "learning_rate": 2.3423269532090802e-05, - "loss": 55.9855, - "step": 142020 - }, - { - "epoch": 0.5738191720164676, - "grad_norm": 1048.773681640625, - "learning_rate": 2.3419785838327425e-05, - "loss": 65.7966, - "step": 142030 - }, - { - "epoch": 0.5738595732818352, - "grad_norm": 985.0260620117188, - "learning_rate": 2.3416302175371138e-05, - "loss": 53.6217, - "step": 142040 - }, - { - "epoch": 0.5738999745472029, - "grad_norm": 530.5432739257812, - "learning_rate": 2.341281854328985e-05, - "loss": 77.8192, - "step": 142050 - }, - { - "epoch": 0.5739403758125704, - "grad_norm": 1114.1192626953125, - "learning_rate": 2.3409334942151484e-05, - "loss": 64.7953, - "step": 142060 - }, - { - "epoch": 0.573980777077938, - "grad_norm": 861.7225952148438, - "learning_rate": 2.3405851372023953e-05, - "loss": 51.9311, - "step": 142070 - }, - { - "epoch": 0.5740211783433057, - "grad_norm": 636.5674438476562, - "learning_rate": 2.3402367832975163e-05, - "loss": 49.7294, - "step": 142080 - }, - { - "epoch": 0.5740615796086733, - "grad_norm": 892.6287841796875, - "learning_rate": 2.339888432507304e-05, - "loss": 67.6866, - "step": 142090 - }, - { - "epoch": 0.574101980874041, - "grad_norm": 596.37646484375, - "learning_rate": 2.3395400848385486e-05, - "loss": 55.9081, - "step": 142100 - }, - { - "epoch": 0.5741423821394086, - "grad_norm": 514.5144653320312, - "learning_rate": 2.3391917402980426e-05, - "loss": 54.0444, - "step": 142110 - }, - { - "epoch": 0.5741827834047762, - "grad_norm": 2546.439208984375, - "learning_rate": 2.3388433988925763e-05, - "loss": 93.0408, - "step": 142120 - }, - { - "epoch": 0.5742231846701439, - "grad_norm": 704.0580444335938, - "learning_rate": 2.3384950606289406e-05, - "loss": 76.4964, - "step": 142130 - }, - { - "epoch": 0.5742635859355115, - "grad_norm": 818.4609985351562, - "learning_rate": 2.3381467255139276e-05, - "loss": 37.8892, - "step": 142140 - }, - { - "epoch": 0.5743039872008792, - "grad_norm": 854.7376708984375, - "learning_rate": 2.3377983935543264e-05, - "loss": 50.8855, - "step": 142150 - }, - { - "epoch": 0.5743443884662468, - "grad_norm": 517.0084838867188, - "learning_rate": 2.3374500647569297e-05, - "loss": 60.2467, - "step": 142160 - }, - { - "epoch": 0.5743847897316144, - "grad_norm": 550.582275390625, - "learning_rate": 2.3371017391285278e-05, - "loss": 59.1301, - "step": 142170 - }, - { - "epoch": 0.574425190996982, - "grad_norm": 652.0669555664062, - "learning_rate": 2.3367534166759102e-05, - "loss": 57.5477, - "step": 142180 - }, - { - "epoch": 0.5744655922623496, - "grad_norm": 714.7897338867188, - "learning_rate": 2.3364050974058703e-05, - "loss": 71.8189, - "step": 142190 - }, - { - "epoch": 0.5745059935277173, - "grad_norm": 288.60345458984375, - "learning_rate": 2.336056781325197e-05, - "loss": 40.1994, - "step": 142200 - }, - { - "epoch": 0.5745463947930849, - "grad_norm": 4422.25830078125, - "learning_rate": 2.3357084684406806e-05, - "loss": 63.4722, - "step": 142210 - }, - { - "epoch": 0.5745867960584525, - "grad_norm": 474.19500732421875, - "learning_rate": 2.3353601587591125e-05, - "loss": 53.761, - "step": 142220 - }, - { - "epoch": 0.5746271973238202, - "grad_norm": 0.0, - "learning_rate": 2.3350118522872823e-05, - "loss": 50.8316, - "step": 142230 - }, - { - "epoch": 0.5746675985891878, - "grad_norm": 610.2474975585938, - "learning_rate": 2.3346635490319814e-05, - "loss": 58.9436, - "step": 142240 - }, - { - "epoch": 0.5747079998545555, - "grad_norm": 1580.07421875, - "learning_rate": 2.3343152490000004e-05, - "loss": 88.0132, - "step": 142250 - }, - { - "epoch": 0.5747484011199231, - "grad_norm": 365.8472900390625, - "learning_rate": 2.3339669521981273e-05, - "loss": 51.9967, - "step": 142260 - }, - { - "epoch": 0.5747888023852907, - "grad_norm": 1166.15087890625, - "learning_rate": 2.3336186586331555e-05, - "loss": 51.4411, - "step": 142270 - }, - { - "epoch": 0.5748292036506584, - "grad_norm": 1141.8216552734375, - "learning_rate": 2.3332703683118732e-05, - "loss": 78.2708, - "step": 142280 - }, - { - "epoch": 0.574869604916026, - "grad_norm": 956.0130004882812, - "learning_rate": 2.3329220812410704e-05, - "loss": 40.9031, - "step": 142290 - }, - { - "epoch": 0.5749100061813937, - "grad_norm": 1446.7919921875, - "learning_rate": 2.332573797427538e-05, - "loss": 78.6235, - "step": 142300 - }, - { - "epoch": 0.5749504074467612, - "grad_norm": 834.1378784179688, - "learning_rate": 2.3322255168780655e-05, - "loss": 33.3753, - "step": 142310 - }, - { - "epoch": 0.5749908087121288, - "grad_norm": 970.3228759765625, - "learning_rate": 2.3318772395994433e-05, - "loss": 57.855, - "step": 142320 - }, - { - "epoch": 0.5750312099774965, - "grad_norm": 1489.4097900390625, - "learning_rate": 2.3315289655984605e-05, - "loss": 63.0613, - "step": 142330 - }, - { - "epoch": 0.5750716112428641, - "grad_norm": 610.0813598632812, - "learning_rate": 2.331180694881907e-05, - "loss": 73.1477, - "step": 142340 - }, - { - "epoch": 0.5751120125082317, - "grad_norm": 694.2882080078125, - "learning_rate": 2.3308324274565734e-05, - "loss": 38.6415, - "step": 142350 - }, - { - "epoch": 0.5751524137735994, - "grad_norm": 648.3965454101562, - "learning_rate": 2.3304841633292487e-05, - "loss": 41.1692, - "step": 142360 - }, - { - "epoch": 0.575192815038967, - "grad_norm": 966.6323852539062, - "learning_rate": 2.3301359025067216e-05, - "loss": 56.3727, - "step": 142370 - }, - { - "epoch": 0.5752332163043347, - "grad_norm": 769.1686401367188, - "learning_rate": 2.3297876449957834e-05, - "loss": 78.7668, - "step": 142380 - }, - { - "epoch": 0.5752736175697023, - "grad_norm": 397.4100341796875, - "learning_rate": 2.329439390803222e-05, - "loss": 61.6241, - "step": 142390 - }, - { - "epoch": 0.5753140188350699, - "grad_norm": 809.5210571289062, - "learning_rate": 2.3290911399358285e-05, - "loss": 49.5255, - "step": 142400 - }, - { - "epoch": 0.5753544201004376, - "grad_norm": 293.4340515136719, - "learning_rate": 2.3287428924003912e-05, - "loss": 43.1965, - "step": 142410 - }, - { - "epoch": 0.5753948213658052, - "grad_norm": 966.3424682617188, - "learning_rate": 2.3283946482036982e-05, - "loss": 53.9423, - "step": 142420 - }, - { - "epoch": 0.5754352226311729, - "grad_norm": 483.5935974121094, - "learning_rate": 2.3280464073525407e-05, - "loss": 47.5288, - "step": 142430 - }, - { - "epoch": 0.5754756238965404, - "grad_norm": 422.25970458984375, - "learning_rate": 2.327698169853707e-05, - "loss": 26.3258, - "step": 142440 - }, - { - "epoch": 0.575516025161908, - "grad_norm": 942.5641479492188, - "learning_rate": 2.3273499357139855e-05, - "loss": 57.1318, - "step": 142450 - }, - { - "epoch": 0.5755564264272757, - "grad_norm": 736.4271240234375, - "learning_rate": 2.3270017049401664e-05, - "loss": 56.0266, - "step": 142460 - }, - { - "epoch": 0.5755968276926433, - "grad_norm": 952.470947265625, - "learning_rate": 2.3266534775390383e-05, - "loss": 61.0546, - "step": 142470 - }, - { - "epoch": 0.575637228958011, - "grad_norm": 410.7485656738281, - "learning_rate": 2.32630525351739e-05, - "loss": 32.2695, - "step": 142480 - }, - { - "epoch": 0.5756776302233786, - "grad_norm": 528.5092163085938, - "learning_rate": 2.3259570328820106e-05, - "loss": 46.2177, - "step": 142490 - }, - { - "epoch": 0.5757180314887462, - "grad_norm": 528.0546264648438, - "learning_rate": 2.3256088156396868e-05, - "loss": 44.6508, - "step": 142500 - }, - { - "epoch": 0.5757584327541139, - "grad_norm": 658.5557861328125, - "learning_rate": 2.3252606017972103e-05, - "loss": 34.0906, - "step": 142510 - }, - { - "epoch": 0.5757988340194815, - "grad_norm": 507.3339538574219, - "learning_rate": 2.324912391361368e-05, - "loss": 49.9197, - "step": 142520 - }, - { - "epoch": 0.5758392352848491, - "grad_norm": 940.519775390625, - "learning_rate": 2.324564184338948e-05, - "loss": 64.702, - "step": 142530 - }, - { - "epoch": 0.5758796365502168, - "grad_norm": 806.0038452148438, - "learning_rate": 2.3242159807367408e-05, - "loss": 70.4329, - "step": 142540 - }, - { - "epoch": 0.5759200378155844, - "grad_norm": 641.6761474609375, - "learning_rate": 2.323867780561533e-05, - "loss": 43.513, - "step": 142550 - }, - { - "epoch": 0.5759604390809521, - "grad_norm": 577.0625610351562, - "learning_rate": 2.323519583820114e-05, - "loss": 72.0487, - "step": 142560 - }, - { - "epoch": 0.5760008403463196, - "grad_norm": 625.9312744140625, - "learning_rate": 2.323171390519271e-05, - "loss": 43.7065, - "step": 142570 - }, - { - "epoch": 0.5760412416116872, - "grad_norm": 567.3580322265625, - "learning_rate": 2.3228232006657923e-05, - "loss": 54.7438, - "step": 142580 - }, - { - "epoch": 0.5760816428770549, - "grad_norm": 770.6826782226562, - "learning_rate": 2.3224750142664677e-05, - "loss": 59.1607, - "step": 142590 - }, - { - "epoch": 0.5761220441424225, - "grad_norm": 758.7730712890625, - "learning_rate": 2.3221268313280838e-05, - "loss": 63.5247, - "step": 142600 - }, - { - "epoch": 0.5761624454077902, - "grad_norm": 665.5321044921875, - "learning_rate": 2.3217786518574273e-05, - "loss": 41.7696, - "step": 142610 - }, - { - "epoch": 0.5762028466731578, - "grad_norm": 671.5497436523438, - "learning_rate": 2.32143047586129e-05, - "loss": 58.2926, - "step": 142620 - }, - { - "epoch": 0.5762432479385254, - "grad_norm": 843.5665283203125, - "learning_rate": 2.321082303346456e-05, - "loss": 52.1115, - "step": 142630 - }, - { - "epoch": 0.5762836492038931, - "grad_norm": 1791.599853515625, - "learning_rate": 2.320734134319715e-05, - "loss": 52.7765, - "step": 142640 - }, - { - "epoch": 0.5763240504692607, - "grad_norm": 618.1298217773438, - "learning_rate": 2.3203859687878548e-05, - "loss": 50.9905, - "step": 142650 - }, - { - "epoch": 0.5763644517346284, - "grad_norm": 620.7821655273438, - "learning_rate": 2.320037806757662e-05, - "loss": 45.4659, - "step": 142660 - }, - { - "epoch": 0.576404852999996, - "grad_norm": 1306.3067626953125, - "learning_rate": 2.3196896482359253e-05, - "loss": 58.8106, - "step": 142670 - }, - { - "epoch": 0.5764452542653636, - "grad_norm": 582.6734619140625, - "learning_rate": 2.319341493229431e-05, - "loss": 48.8326, - "step": 142680 - }, - { - "epoch": 0.5764856555307313, - "grad_norm": 733.7221069335938, - "learning_rate": 2.318993341744968e-05, - "loss": 61.6943, - "step": 142690 - }, - { - "epoch": 0.5765260567960988, - "grad_norm": 670.154296875, - "learning_rate": 2.3186451937893235e-05, - "loss": 67.0611, - "step": 142700 - }, - { - "epoch": 0.5765664580614664, - "grad_norm": 633.4531860351562, - "learning_rate": 2.3182970493692827e-05, - "loss": 60.647, - "step": 142710 - }, - { - "epoch": 0.5766068593268341, - "grad_norm": 878.5447998046875, - "learning_rate": 2.3179489084916358e-05, - "loss": 43.6026, - "step": 142720 - }, - { - "epoch": 0.5766472605922017, - "grad_norm": 410.3040466308594, - "learning_rate": 2.3176007711631687e-05, - "loss": 25.5606, - "step": 142730 - }, - { - "epoch": 0.5766876618575694, - "grad_norm": 833.0294189453125, - "learning_rate": 2.317252637390668e-05, - "loss": 48.7416, - "step": 142740 - }, - { - "epoch": 0.576728063122937, - "grad_norm": 1507.3426513671875, - "learning_rate": 2.3169045071809215e-05, - "loss": 62.4276, - "step": 142750 - }, - { - "epoch": 0.5767684643883046, - "grad_norm": 1039.6998291015625, - "learning_rate": 2.316556380540715e-05, - "loss": 44.9483, - "step": 142760 - }, - { - "epoch": 0.5768088656536723, - "grad_norm": 594.3389892578125, - "learning_rate": 2.316208257476837e-05, - "loss": 56.9623, - "step": 142770 - }, - { - "epoch": 0.5768492669190399, - "grad_norm": 1160.0947265625, - "learning_rate": 2.315860137996074e-05, - "loss": 62.822, - "step": 142780 - }, - { - "epoch": 0.5768896681844076, - "grad_norm": 651.6566162109375, - "learning_rate": 2.3155120221052112e-05, - "loss": 54.3397, - "step": 142790 - }, - { - "epoch": 0.5769300694497752, - "grad_norm": 0.0, - "learning_rate": 2.3151639098110377e-05, - "loss": 64.4152, - "step": 142800 - }, - { - "epoch": 0.5769704707151428, - "grad_norm": 653.4903564453125, - "learning_rate": 2.3148158011203388e-05, - "loss": 77.8834, - "step": 142810 - }, - { - "epoch": 0.5770108719805104, - "grad_norm": 596.7733154296875, - "learning_rate": 2.314467696039901e-05, - "loss": 65.638, - "step": 142820 - }, - { - "epoch": 0.577051273245878, - "grad_norm": 552.674072265625, - "learning_rate": 2.314119594576511e-05, - "loss": 54.3161, - "step": 142830 - }, - { - "epoch": 0.5770916745112457, - "grad_norm": 296.8179931640625, - "learning_rate": 2.3137714967369545e-05, - "loss": 38.5794, - "step": 142840 - }, - { - "epoch": 0.5771320757766133, - "grad_norm": 487.05706787109375, - "learning_rate": 2.3134234025280196e-05, - "loss": 60.3549, - "step": 142850 - }, - { - "epoch": 0.5771724770419809, - "grad_norm": 747.7566528320312, - "learning_rate": 2.3130753119564915e-05, - "loss": 53.9551, - "step": 142860 - }, - { - "epoch": 0.5772128783073486, - "grad_norm": 469.7563171386719, - "learning_rate": 2.3127272250291553e-05, - "loss": 56.6801, - "step": 142870 - }, - { - "epoch": 0.5772532795727162, - "grad_norm": 433.9674377441406, - "learning_rate": 2.3123791417527994e-05, - "loss": 62.3665, - "step": 142880 - }, - { - "epoch": 0.5772936808380839, - "grad_norm": 355.9599304199219, - "learning_rate": 2.3120310621342086e-05, - "loss": 49.0493, - "step": 142890 - }, - { - "epoch": 0.5773340821034515, - "grad_norm": 1589.1834716796875, - "learning_rate": 2.3116829861801686e-05, - "loss": 76.2406, - "step": 142900 - }, - { - "epoch": 0.5773744833688191, - "grad_norm": 1414.6934814453125, - "learning_rate": 2.311334913897466e-05, - "loss": 66.9008, - "step": 142910 - }, - { - "epoch": 0.5774148846341868, - "grad_norm": 681.8200073242188, - "learning_rate": 2.3109868452928855e-05, - "loss": 50.0082, - "step": 142920 - }, - { - "epoch": 0.5774552858995544, - "grad_norm": 1097.9447021484375, - "learning_rate": 2.310638780373215e-05, - "loss": 59.7922, - "step": 142930 - }, - { - "epoch": 0.5774956871649221, - "grad_norm": 532.4230346679688, - "learning_rate": 2.3102907191452388e-05, - "loss": 41.8895, - "step": 142940 - }, - { - "epoch": 0.5775360884302896, - "grad_norm": 990.6885986328125, - "learning_rate": 2.309942661615742e-05, - "loss": 60.9387, - "step": 142950 - }, - { - "epoch": 0.5775764896956572, - "grad_norm": 980.983154296875, - "learning_rate": 2.3095946077915114e-05, - "loss": 61.0354, - "step": 142960 - }, - { - "epoch": 0.5776168909610249, - "grad_norm": 697.8203735351562, - "learning_rate": 2.309246557679332e-05, - "loss": 44.8736, - "step": 142970 - }, - { - "epoch": 0.5776572922263925, - "grad_norm": 645.3919067382812, - "learning_rate": 2.3088985112859884e-05, - "loss": 37.8088, - "step": 142980 - }, - { - "epoch": 0.5776976934917601, - "grad_norm": 1883.624755859375, - "learning_rate": 2.3085504686182673e-05, - "loss": 40.0972, - "step": 142990 - }, - { - "epoch": 0.5777380947571278, - "grad_norm": 841.05029296875, - "learning_rate": 2.3082024296829536e-05, - "loss": 60.6423, - "step": 143000 - }, - { - "epoch": 0.5777784960224954, - "grad_norm": 396.6905212402344, - "learning_rate": 2.3078543944868323e-05, - "loss": 45.2093, - "step": 143010 - }, - { - "epoch": 0.5778188972878631, - "grad_norm": 663.75, - "learning_rate": 2.3075063630366884e-05, - "loss": 70.7723, - "step": 143020 - }, - { - "epoch": 0.5778592985532307, - "grad_norm": 396.67694091796875, - "learning_rate": 2.307158335339306e-05, - "loss": 47.533, - "step": 143030 - }, - { - "epoch": 0.5778996998185983, - "grad_norm": 704.8739624023438, - "learning_rate": 2.3068103114014726e-05, - "loss": 41.4563, - "step": 143040 - }, - { - "epoch": 0.577940101083966, - "grad_norm": 1157.4110107421875, - "learning_rate": 2.3064622912299712e-05, - "loss": 49.6344, - "step": 143050 - }, - { - "epoch": 0.5779805023493336, - "grad_norm": 665.005615234375, - "learning_rate": 2.3061142748315865e-05, - "loss": 58.9296, - "step": 143060 - }, - { - "epoch": 0.5780209036147013, - "grad_norm": 542.858154296875, - "learning_rate": 2.3057662622131047e-05, - "loss": 49.8475, - "step": 143070 - }, - { - "epoch": 0.5780613048800688, - "grad_norm": 755.5459594726562, - "learning_rate": 2.3054182533813087e-05, - "loss": 47.4976, - "step": 143080 - }, - { - "epoch": 0.5781017061454364, - "grad_norm": 445.7283935546875, - "learning_rate": 2.3050702483429846e-05, - "loss": 48.5714, - "step": 143090 - }, - { - "epoch": 0.5781421074108041, - "grad_norm": 512.7163696289062, - "learning_rate": 2.304722247104917e-05, - "loss": 73.921, - "step": 143100 - }, - { - "epoch": 0.5781825086761717, - "grad_norm": 332.2102355957031, - "learning_rate": 2.3043742496738883e-05, - "loss": 48.0645, - "step": 143110 - }, - { - "epoch": 0.5782229099415394, - "grad_norm": 330.4541015625, - "learning_rate": 2.304026256056685e-05, - "loss": 43.9408, - "step": 143120 - }, - { - "epoch": 0.578263311206907, - "grad_norm": 639.76416015625, - "learning_rate": 2.3036782662600915e-05, - "loss": 55.066, - "step": 143130 - }, - { - "epoch": 0.5783037124722746, - "grad_norm": 491.55560302734375, - "learning_rate": 2.3033302802908894e-05, - "loss": 52.4497, - "step": 143140 - }, - { - "epoch": 0.5783441137376423, - "grad_norm": 1402.951416015625, - "learning_rate": 2.3029822981558667e-05, - "loss": 55.6616, - "step": 143150 - }, - { - "epoch": 0.5783845150030099, - "grad_norm": 418.091552734375, - "learning_rate": 2.3026343198618043e-05, - "loss": 36.3308, - "step": 143160 - }, - { - "epoch": 0.5784249162683776, - "grad_norm": 1122.232177734375, - "learning_rate": 2.302286345415488e-05, - "loss": 37.5329, - "step": 143170 - }, - { - "epoch": 0.5784653175337452, - "grad_norm": 1635.8270263671875, - "learning_rate": 2.3019383748237015e-05, - "loss": 118.2091, - "step": 143180 - }, - { - "epoch": 0.5785057187991128, - "grad_norm": 747.5432739257812, - "learning_rate": 2.3015904080932272e-05, - "loss": 49.2059, - "step": 143190 - }, - { - "epoch": 0.5785461200644805, - "grad_norm": 894.6597900390625, - "learning_rate": 2.301242445230851e-05, - "loss": 51.8367, - "step": 143200 - }, - { - "epoch": 0.578586521329848, - "grad_norm": 758.6695556640625, - "learning_rate": 2.3008944862433547e-05, - "loss": 64.8202, - "step": 143210 - }, - { - "epoch": 0.5786269225952156, - "grad_norm": 359.57843017578125, - "learning_rate": 2.3005465311375237e-05, - "loss": 30.1586, - "step": 143220 - }, - { - "epoch": 0.5786673238605833, - "grad_norm": 484.9412536621094, - "learning_rate": 2.300198579920141e-05, - "loss": 49.8715, - "step": 143230 - }, - { - "epoch": 0.5787077251259509, - "grad_norm": 1004.665283203125, - "learning_rate": 2.2998506325979894e-05, - "loss": 66.9634, - "step": 143240 - }, - { - "epoch": 0.5787481263913186, - "grad_norm": 980.5902099609375, - "learning_rate": 2.299502689177853e-05, - "loss": 69.9582, - "step": 143250 - }, - { - "epoch": 0.5787885276566862, - "grad_norm": 621.208984375, - "learning_rate": 2.2991547496665148e-05, - "loss": 52.7556, - "step": 143260 - }, - { - "epoch": 0.5788289289220538, - "grad_norm": 673.4203491210938, - "learning_rate": 2.298806814070758e-05, - "loss": 60.5647, - "step": 143270 - }, - { - "epoch": 0.5788693301874215, - "grad_norm": 748.7210693359375, - "learning_rate": 2.2984588823973662e-05, - "loss": 61.0294, - "step": 143280 - }, - { - "epoch": 0.5789097314527891, - "grad_norm": 510.8747863769531, - "learning_rate": 2.2981109546531215e-05, - "loss": 54.1331, - "step": 143290 - }, - { - "epoch": 0.5789501327181568, - "grad_norm": 573.7908935546875, - "learning_rate": 2.2977630308448085e-05, - "loss": 50.7809, - "step": 143300 - }, - { - "epoch": 0.5789905339835244, - "grad_norm": 1115.3363037109375, - "learning_rate": 2.2974151109792096e-05, - "loss": 77.0252, - "step": 143310 - }, - { - "epoch": 0.579030935248892, - "grad_norm": 732.9327392578125, - "learning_rate": 2.2970671950631064e-05, - "loss": 41.6058, - "step": 143320 - }, - { - "epoch": 0.5790713365142596, - "grad_norm": 749.0936889648438, - "learning_rate": 2.296719283103283e-05, - "loss": 46.5037, - "step": 143330 - }, - { - "epoch": 0.5791117377796272, - "grad_norm": 416.95855712890625, - "learning_rate": 2.2963713751065223e-05, - "loss": 68.9422, - "step": 143340 - }, - { - "epoch": 0.5791521390449949, - "grad_norm": 234.63787841796875, - "learning_rate": 2.2960234710796063e-05, - "loss": 44.6314, - "step": 143350 - }, - { - "epoch": 0.5791925403103625, - "grad_norm": 1270.1083984375, - "learning_rate": 2.2956755710293183e-05, - "loss": 74.6942, - "step": 143360 - }, - { - "epoch": 0.5792329415757301, - "grad_norm": 1411.5767822265625, - "learning_rate": 2.2953276749624386e-05, - "loss": 63.6014, - "step": 143370 - }, - { - "epoch": 0.5792733428410978, - "grad_norm": 371.9848327636719, - "learning_rate": 2.2949797828857525e-05, - "loss": 38.8278, - "step": 143380 - }, - { - "epoch": 0.5793137441064654, - "grad_norm": 1254.4080810546875, - "learning_rate": 2.2946318948060413e-05, - "loss": 59.7256, - "step": 143390 - }, - { - "epoch": 0.579354145371833, - "grad_norm": 1216.3907470703125, - "learning_rate": 2.294284010730086e-05, - "loss": 38.042, - "step": 143400 - }, - { - "epoch": 0.5793945466372007, - "grad_norm": 910.777099609375, - "learning_rate": 2.2939361306646706e-05, - "loss": 69.6151, - "step": 143410 - }, - { - "epoch": 0.5794349479025683, - "grad_norm": 640.028564453125, - "learning_rate": 2.2935882546165766e-05, - "loss": 58.462, - "step": 143420 - }, - { - "epoch": 0.579475349167936, - "grad_norm": 915.5157470703125, - "learning_rate": 2.293240382592585e-05, - "loss": 55.9273, - "step": 143430 - }, - { - "epoch": 0.5795157504333036, - "grad_norm": 819.3345336914062, - "learning_rate": 2.2928925145994794e-05, - "loss": 47.2641, - "step": 143440 - }, - { - "epoch": 0.5795561516986713, - "grad_norm": 288.2989196777344, - "learning_rate": 2.29254465064404e-05, - "loss": 61.8187, - "step": 143450 - }, - { - "epoch": 0.5795965529640388, - "grad_norm": 467.6305847167969, - "learning_rate": 2.2921967907330504e-05, - "loss": 59.036, - "step": 143460 - }, - { - "epoch": 0.5796369542294064, - "grad_norm": 1217.71142578125, - "learning_rate": 2.291848934873291e-05, - "loss": 53.756, - "step": 143470 - }, - { - "epoch": 0.5796773554947741, - "grad_norm": 964.2899780273438, - "learning_rate": 2.291501083071543e-05, - "loss": 42.6057, - "step": 143480 - }, - { - "epoch": 0.5797177567601417, - "grad_norm": 787.3900146484375, - "learning_rate": 2.29115323533459e-05, - "loss": 45.8454, - "step": 143490 - }, - { - "epoch": 0.5797581580255093, - "grad_norm": 519.1593627929688, - "learning_rate": 2.2908053916692117e-05, - "loss": 56.6687, - "step": 143500 - }, - { - "epoch": 0.579798559290877, - "grad_norm": 649.1287841796875, - "learning_rate": 2.2904575520821897e-05, - "loss": 38.6635, - "step": 143510 - }, - { - "epoch": 0.5798389605562446, - "grad_norm": 696.3875122070312, - "learning_rate": 2.2901097165803062e-05, - "loss": 93.9158, - "step": 143520 - }, - { - "epoch": 0.5798793618216123, - "grad_norm": 826.119384765625, - "learning_rate": 2.2897618851703407e-05, - "loss": 55.8066, - "step": 143530 - }, - { - "epoch": 0.5799197630869799, - "grad_norm": 742.7290649414062, - "learning_rate": 2.289414057859077e-05, - "loss": 31.2436, - "step": 143540 - }, - { - "epoch": 0.5799601643523475, - "grad_norm": 682.001220703125, - "learning_rate": 2.2890662346532936e-05, - "loss": 60.7501, - "step": 143550 - }, - { - "epoch": 0.5800005656177152, - "grad_norm": 1402.470703125, - "learning_rate": 2.2887184155597723e-05, - "loss": 54.5233, - "step": 143560 - }, - { - "epoch": 0.5800409668830828, - "grad_norm": 1101.815673828125, - "learning_rate": 2.288370600585295e-05, - "loss": 46.7544, - "step": 143570 - }, - { - "epoch": 0.5800813681484505, - "grad_norm": 836.9398803710938, - "learning_rate": 2.2880227897366422e-05, - "loss": 45.7828, - "step": 143580 - }, - { - "epoch": 0.580121769413818, - "grad_norm": 322.18975830078125, - "learning_rate": 2.2876749830205934e-05, - "loss": 29.6777, - "step": 143590 - }, - { - "epoch": 0.5801621706791856, - "grad_norm": 852.6254272460938, - "learning_rate": 2.28732718044393e-05, - "loss": 44.7059, - "step": 143600 - }, - { - "epoch": 0.5802025719445533, - "grad_norm": 1030.384033203125, - "learning_rate": 2.2869793820134332e-05, - "loss": 41.8577, - "step": 143610 - }, - { - "epoch": 0.5802429732099209, - "grad_norm": 798.349609375, - "learning_rate": 2.286631587735883e-05, - "loss": 53.5231, - "step": 143620 - }, - { - "epoch": 0.5802833744752885, - "grad_norm": 440.0967712402344, - "learning_rate": 2.28628379761806e-05, - "loss": 43.9023, - "step": 143630 - }, - { - "epoch": 0.5803237757406562, - "grad_norm": 1532.7510986328125, - "learning_rate": 2.2859360116667432e-05, - "loss": 55.8753, - "step": 143640 - }, - { - "epoch": 0.5803641770060238, - "grad_norm": 280.5191345214844, - "learning_rate": 2.2855882298887156e-05, - "loss": 76.918, - "step": 143650 - }, - { - "epoch": 0.5804045782713915, - "grad_norm": 721.1527709960938, - "learning_rate": 2.2852404522907556e-05, - "loss": 51.7469, - "step": 143660 - }, - { - "epoch": 0.5804449795367591, - "grad_norm": 0.0, - "learning_rate": 2.2848926788796425e-05, - "loss": 49.8166, - "step": 143670 - }, - { - "epoch": 0.5804853808021267, - "grad_norm": 638.0519409179688, - "learning_rate": 2.284544909662158e-05, - "loss": 34.2589, - "step": 143680 - }, - { - "epoch": 0.5805257820674944, - "grad_norm": 576.2401733398438, - "learning_rate": 2.2841971446450815e-05, - "loss": 54.998, - "step": 143690 - }, - { - "epoch": 0.580566183332862, - "grad_norm": 785.57568359375, - "learning_rate": 2.2838493838351933e-05, - "loss": 65.9362, - "step": 143700 - }, - { - "epoch": 0.5806065845982297, - "grad_norm": 753.3636474609375, - "learning_rate": 2.2835016272392722e-05, - "loss": 45.9941, - "step": 143710 - }, - { - "epoch": 0.5806469858635972, - "grad_norm": 822.4500122070312, - "learning_rate": 2.2831538748640974e-05, - "loss": 50.9894, - "step": 143720 - }, - { - "epoch": 0.5806873871289648, - "grad_norm": 608.1663818359375, - "learning_rate": 2.2828061267164508e-05, - "loss": 38.5246, - "step": 143730 - }, - { - "epoch": 0.5807277883943325, - "grad_norm": 460.6668701171875, - "learning_rate": 2.282458382803109e-05, - "loss": 36.4291, - "step": 143740 - }, - { - "epoch": 0.5807681896597001, - "grad_norm": 427.8645935058594, - "learning_rate": 2.2821106431308544e-05, - "loss": 34.1155, - "step": 143750 - }, - { - "epoch": 0.5808085909250678, - "grad_norm": 926.2526245117188, - "learning_rate": 2.281762907706465e-05, - "loss": 67.177, - "step": 143760 - }, - { - "epoch": 0.5808489921904354, - "grad_norm": 851.1618041992188, - "learning_rate": 2.28141517653672e-05, - "loss": 58.3684, - "step": 143770 - }, - { - "epoch": 0.580889393455803, - "grad_norm": 392.8139953613281, - "learning_rate": 2.2810674496283984e-05, - "loss": 61.7282, - "step": 143780 - }, - { - "epoch": 0.5809297947211707, - "grad_norm": 1116.66357421875, - "learning_rate": 2.28071972698828e-05, - "loss": 39.3322, - "step": 143790 - }, - { - "epoch": 0.5809701959865383, - "grad_norm": 493.0304870605469, - "learning_rate": 2.280372008623142e-05, - "loss": 44.2897, - "step": 143800 - }, - { - "epoch": 0.581010597251906, - "grad_norm": 582.5764770507812, - "learning_rate": 2.280024294539766e-05, - "loss": 44.4858, - "step": 143810 - }, - { - "epoch": 0.5810509985172736, - "grad_norm": 3156.2939453125, - "learning_rate": 2.279676584744929e-05, - "loss": 63.7611, - "step": 143820 - }, - { - "epoch": 0.5810913997826412, - "grad_norm": 694.3815307617188, - "learning_rate": 2.279328879245411e-05, - "loss": 50.649, - "step": 143830 - }, - { - "epoch": 0.5811318010480089, - "grad_norm": 732.0067749023438, - "learning_rate": 2.27898117804799e-05, - "loss": 33.2757, - "step": 143840 - }, - { - "epoch": 0.5811722023133764, - "grad_norm": 2266.329345703125, - "learning_rate": 2.2786334811594446e-05, - "loss": 49.3162, - "step": 143850 - }, - { - "epoch": 0.581212603578744, - "grad_norm": 1178.98193359375, - "learning_rate": 2.278285788586554e-05, - "loss": 66.4311, - "step": 143860 - }, - { - "epoch": 0.5812530048441117, - "grad_norm": 523.5665283203125, - "learning_rate": 2.2779381003360958e-05, - "loss": 56.1309, - "step": 143870 - }, - { - "epoch": 0.5812934061094793, - "grad_norm": 926.5538940429688, - "learning_rate": 2.2775904164148477e-05, - "loss": 44.5801, - "step": 143880 - }, - { - "epoch": 0.581333807374847, - "grad_norm": 311.15753173828125, - "learning_rate": 2.27724273682959e-05, - "loss": 35.8682, - "step": 143890 - }, - { - "epoch": 0.5813742086402146, - "grad_norm": 473.8144226074219, - "learning_rate": 2.2768950615870986e-05, - "loss": 40.8588, - "step": 143900 - }, - { - "epoch": 0.5814146099055822, - "grad_norm": 1385.28125, - "learning_rate": 2.276547390694154e-05, - "loss": 50.3639, - "step": 143910 - }, - { - "epoch": 0.5814550111709499, - "grad_norm": 824.149169921875, - "learning_rate": 2.2761997241575333e-05, - "loss": 49.5366, - "step": 143920 - }, - { - "epoch": 0.5814954124363175, - "grad_norm": 310.1435546875, - "learning_rate": 2.2758520619840134e-05, - "loss": 45.1334, - "step": 143930 - }, - { - "epoch": 0.5815358137016852, - "grad_norm": 384.5504455566406, - "learning_rate": 2.2755044041803738e-05, - "loss": 64.554, - "step": 143940 - }, - { - "epoch": 0.5815762149670528, - "grad_norm": 1101.164306640625, - "learning_rate": 2.2751567507533906e-05, - "loss": 55.4663, - "step": 143950 - }, - { - "epoch": 0.5816166162324204, - "grad_norm": 806.0096435546875, - "learning_rate": 2.2748091017098423e-05, - "loss": 58.164, - "step": 143960 - }, - { - "epoch": 0.581657017497788, - "grad_norm": 1249.18212890625, - "learning_rate": 2.274461457056507e-05, - "loss": 78.7904, - "step": 143970 - }, - { - "epoch": 0.5816974187631556, - "grad_norm": 484.7062072753906, - "learning_rate": 2.2741138168001608e-05, - "loss": 44.8782, - "step": 143980 - }, - { - "epoch": 0.5817378200285233, - "grad_norm": 744.9406127929688, - "learning_rate": 2.273766180947583e-05, - "loss": 62.8914, - "step": 143990 - }, - { - "epoch": 0.5817782212938909, - "grad_norm": 422.4091491699219, - "learning_rate": 2.2734185495055503e-05, - "loss": 50.1495, - "step": 144000 - }, - { - "epoch": 0.5818186225592585, - "grad_norm": 967.376708984375, - "learning_rate": 2.273070922480838e-05, - "loss": 48.5318, - "step": 144010 - }, - { - "epoch": 0.5818590238246262, - "grad_norm": 876.5336303710938, - "learning_rate": 2.2727232998802263e-05, - "loss": 63.444, - "step": 144020 - }, - { - "epoch": 0.5818994250899938, - "grad_norm": 1013.9330444335938, - "learning_rate": 2.272375681710491e-05, - "loss": 63.0844, - "step": 144030 - }, - { - "epoch": 0.5819398263553615, - "grad_norm": 759.7598876953125, - "learning_rate": 2.272028067978408e-05, - "loss": 56.8256, - "step": 144040 - }, - { - "epoch": 0.5819802276207291, - "grad_norm": 357.03948974609375, - "learning_rate": 2.271680458690756e-05, - "loss": 76.433, - "step": 144050 - }, - { - "epoch": 0.5820206288860967, - "grad_norm": 662.3284912109375, - "learning_rate": 2.27133285385431e-05, - "loss": 38.5279, - "step": 144060 - }, - { - "epoch": 0.5820610301514644, - "grad_norm": 1271.0579833984375, - "learning_rate": 2.270985253475849e-05, - "loss": 65.1111, - "step": 144070 - }, - { - "epoch": 0.582101431416832, - "grad_norm": 302.46990966796875, - "learning_rate": 2.270637657562148e-05, - "loss": 31.1196, - "step": 144080 - }, - { - "epoch": 0.5821418326821997, - "grad_norm": 683.177490234375, - "learning_rate": 2.2702900661199827e-05, - "loss": 48.2238, - "step": 144090 - }, - { - "epoch": 0.5821822339475672, - "grad_norm": 970.8692016601562, - "learning_rate": 2.2699424791561326e-05, - "loss": 65.3873, - "step": 144100 - }, - { - "epoch": 0.5822226352129348, - "grad_norm": 547.6918334960938, - "learning_rate": 2.2695948966773718e-05, - "loss": 62.7877, - "step": 144110 - }, - { - "epoch": 0.5822630364783025, - "grad_norm": 952.3994140625, - "learning_rate": 2.2692473186904765e-05, - "loss": 58.3059, - "step": 144120 - }, - { - "epoch": 0.5823034377436701, - "grad_norm": 368.6611633300781, - "learning_rate": 2.2688997452022244e-05, - "loss": 63.0227, - "step": 144130 - }, - { - "epoch": 0.5823438390090377, - "grad_norm": 1159.48876953125, - "learning_rate": 2.2685521762193892e-05, - "loss": 58.016, - "step": 144140 - }, - { - "epoch": 0.5823842402744054, - "grad_norm": 823.868408203125, - "learning_rate": 2.2682046117487498e-05, - "loss": 40.8645, - "step": 144150 - }, - { - "epoch": 0.582424641539773, - "grad_norm": 655.6223754882812, - "learning_rate": 2.267857051797081e-05, - "loss": 88.7146, - "step": 144160 - }, - { - "epoch": 0.5824650428051407, - "grad_norm": 1600.1575927734375, - "learning_rate": 2.2675094963711572e-05, - "loss": 49.5671, - "step": 144170 - }, - { - "epoch": 0.5825054440705083, - "grad_norm": 1083.625, - "learning_rate": 2.2671619454777566e-05, - "loss": 68.5955, - "step": 144180 - }, - { - "epoch": 0.582545845335876, - "grad_norm": 686.4727172851562, - "learning_rate": 2.266814399123654e-05, - "loss": 51.8318, - "step": 144190 - }, - { - "epoch": 0.5825862466012436, - "grad_norm": 745.5236206054688, - "learning_rate": 2.266466857315624e-05, - "loss": 50.2613, - "step": 144200 - }, - { - "epoch": 0.5826266478666112, - "grad_norm": 1290.810302734375, - "learning_rate": 2.2661193200604432e-05, - "loss": 53.2357, - "step": 144210 - }, - { - "epoch": 0.5826670491319789, - "grad_norm": 589.5169067382812, - "learning_rate": 2.265771787364886e-05, - "loss": 62.4375, - "step": 144220 - }, - { - "epoch": 0.5827074503973464, - "grad_norm": 1309.25927734375, - "learning_rate": 2.265424259235729e-05, - "loss": 48.7557, - "step": 144230 - }, - { - "epoch": 0.582747851662714, - "grad_norm": 578.2619018554688, - "learning_rate": 2.2650767356797474e-05, - "loss": 46.7097, - "step": 144240 - }, - { - "epoch": 0.5827882529280817, - "grad_norm": 348.2593994140625, - "learning_rate": 2.2647292167037144e-05, - "loss": 38.4774, - "step": 144250 - }, - { - "epoch": 0.5828286541934493, - "grad_norm": 1188.864990234375, - "learning_rate": 2.2643817023144072e-05, - "loss": 83.3154, - "step": 144260 - }, - { - "epoch": 0.582869055458817, - "grad_norm": 400.77227783203125, - "learning_rate": 2.2640341925185997e-05, - "loss": 34.9298, - "step": 144270 - }, - { - "epoch": 0.5829094567241846, - "grad_norm": 786.1428833007812, - "learning_rate": 2.2636866873230677e-05, - "loss": 68.8352, - "step": 144280 - }, - { - "epoch": 0.5829498579895522, - "grad_norm": 822.0831298828125, - "learning_rate": 2.263339186734585e-05, - "loss": 52.5729, - "step": 144290 - }, - { - "epoch": 0.5829902592549199, - "grad_norm": 477.41424560546875, - "learning_rate": 2.2629916907599268e-05, - "loss": 48.0709, - "step": 144300 - }, - { - "epoch": 0.5830306605202875, - "grad_norm": 962.1996459960938, - "learning_rate": 2.262644199405868e-05, - "loss": 64.8363, - "step": 144310 - }, - { - "epoch": 0.5830710617856552, - "grad_norm": 490.6531066894531, - "learning_rate": 2.2622967126791823e-05, - "loss": 51.404, - "step": 144320 - }, - { - "epoch": 0.5831114630510228, - "grad_norm": 430.4482727050781, - "learning_rate": 2.2619492305866437e-05, - "loss": 35.495, - "step": 144330 - }, - { - "epoch": 0.5831518643163904, - "grad_norm": 1517.2744140625, - "learning_rate": 2.2616017531350288e-05, - "loss": 64.5694, - "step": 144340 - }, - { - "epoch": 0.5831922655817581, - "grad_norm": 534.8115234375, - "learning_rate": 2.2612542803311094e-05, - "loss": 50.1855, - "step": 144350 - }, - { - "epoch": 0.5832326668471256, - "grad_norm": 859.8734741210938, - "learning_rate": 2.2609068121816612e-05, - "loss": 54.9618, - "step": 144360 - }, - { - "epoch": 0.5832730681124932, - "grad_norm": 407.89892578125, - "learning_rate": 2.2605593486934583e-05, - "loss": 45.6449, - "step": 144370 - }, - { - "epoch": 0.5833134693778609, - "grad_norm": 837.1588745117188, - "learning_rate": 2.2602118898732736e-05, - "loss": 53.8316, - "step": 144380 - }, - { - "epoch": 0.5833538706432285, - "grad_norm": 351.80755615234375, - "learning_rate": 2.259864435727882e-05, - "loss": 61.8373, - "step": 144390 - }, - { - "epoch": 0.5833942719085962, - "grad_norm": 1027.636962890625, - "learning_rate": 2.2595169862640568e-05, - "loss": 73.4547, - "step": 144400 - }, - { - "epoch": 0.5834346731739638, - "grad_norm": 592.4967651367188, - "learning_rate": 2.2591695414885708e-05, - "loss": 54.6856, - "step": 144410 - }, - { - "epoch": 0.5834750744393314, - "grad_norm": 711.134521484375, - "learning_rate": 2.2588221014081996e-05, - "loss": 52.1077, - "step": 144420 - }, - { - "epoch": 0.5835154757046991, - "grad_norm": 541.2207641601562, - "learning_rate": 2.2584746660297144e-05, - "loss": 30.0791, - "step": 144430 - }, - { - "epoch": 0.5835558769700667, - "grad_norm": 475.95074462890625, - "learning_rate": 2.2581272353598915e-05, - "loss": 59.0276, - "step": 144440 - }, - { - "epoch": 0.5835962782354344, - "grad_norm": 687.62548828125, - "learning_rate": 2.2577798094055023e-05, - "loss": 48.3016, - "step": 144450 - }, - { - "epoch": 0.583636679500802, - "grad_norm": 595.2200927734375, - "learning_rate": 2.25743238817332e-05, - "loss": 49.3811, - "step": 144460 - }, - { - "epoch": 0.5836770807661696, - "grad_norm": 812.7335205078125, - "learning_rate": 2.2570849716701185e-05, - "loss": 52.779, - "step": 144470 - }, - { - "epoch": 0.5837174820315373, - "grad_norm": 784.1361694335938, - "learning_rate": 2.256737559902671e-05, - "loss": 56.5076, - "step": 144480 - }, - { - "epoch": 0.5837578832969048, - "grad_norm": 862.160888671875, - "learning_rate": 2.2563901528777487e-05, - "loss": 36.3721, - "step": 144490 - }, - { - "epoch": 0.5837982845622725, - "grad_norm": 959.0160522460938, - "learning_rate": 2.2560427506021266e-05, - "loss": 62.3528, - "step": 144500 - }, - { - "epoch": 0.5838386858276401, - "grad_norm": 665.8561401367188, - "learning_rate": 2.2556953530825762e-05, - "loss": 49.8962, - "step": 144510 - }, - { - "epoch": 0.5838790870930077, - "grad_norm": 338.06787109375, - "learning_rate": 2.255347960325871e-05, - "loss": 69.7289, - "step": 144520 - }, - { - "epoch": 0.5839194883583754, - "grad_norm": 543.3985595703125, - "learning_rate": 2.2550005723387838e-05, - "loss": 37.3539, - "step": 144530 - }, - { - "epoch": 0.583959889623743, - "grad_norm": 1258.537353515625, - "learning_rate": 2.2546531891280857e-05, - "loss": 37.1479, - "step": 144540 - }, - { - "epoch": 0.5840002908891107, - "grad_norm": 1533.1956787109375, - "learning_rate": 2.254305810700551e-05, - "loss": 56.9214, - "step": 144550 - }, - { - "epoch": 0.5840406921544783, - "grad_norm": 497.5652770996094, - "learning_rate": 2.2539584370629508e-05, - "loss": 48.9781, - "step": 144560 - }, - { - "epoch": 0.5840810934198459, - "grad_norm": 1660.9959716796875, - "learning_rate": 2.2536110682220563e-05, - "loss": 51.9823, - "step": 144570 - }, - { - "epoch": 0.5841214946852136, - "grad_norm": 882.5232543945312, - "learning_rate": 2.2532637041846422e-05, - "loss": 47.9464, - "step": 144580 - }, - { - "epoch": 0.5841618959505812, - "grad_norm": 1142.7230224609375, - "learning_rate": 2.2529163449574778e-05, - "loss": 74.0414, - "step": 144590 - }, - { - "epoch": 0.5842022972159489, - "grad_norm": 899.859375, - "learning_rate": 2.2525689905473376e-05, - "loss": 41.7679, - "step": 144600 - }, - { - "epoch": 0.5842426984813164, - "grad_norm": 1208.34765625, - "learning_rate": 2.2522216409609924e-05, - "loss": 46.6088, - "step": 144610 - }, - { - "epoch": 0.584283099746684, - "grad_norm": 609.228515625, - "learning_rate": 2.2518742962052137e-05, - "loss": 49.8582, - "step": 144620 - }, - { - "epoch": 0.5843235010120517, - "grad_norm": 791.5541381835938, - "learning_rate": 2.2515269562867728e-05, - "loss": 46.628, - "step": 144630 - }, - { - "epoch": 0.5843639022774193, - "grad_norm": 550.5863037109375, - "learning_rate": 2.2511796212124425e-05, - "loss": 57.1984, - "step": 144640 - }, - { - "epoch": 0.5844043035427869, - "grad_norm": 1214.3863525390625, - "learning_rate": 2.250832290988993e-05, - "loss": 59.4914, - "step": 144650 - }, - { - "epoch": 0.5844447048081546, - "grad_norm": 2203.878173828125, - "learning_rate": 2.250484965623197e-05, - "loss": 51.4256, - "step": 144660 - }, - { - "epoch": 0.5844851060735222, - "grad_norm": 786.3572998046875, - "learning_rate": 2.250137645121824e-05, - "loss": 42.0186, - "step": 144670 - }, - { - "epoch": 0.5845255073388899, - "grad_norm": 846.0687255859375, - "learning_rate": 2.2497903294916474e-05, - "loss": 49.2691, - "step": 144680 - }, - { - "epoch": 0.5845659086042575, - "grad_norm": 1059.890380859375, - "learning_rate": 2.2494430187394365e-05, - "loss": 58.0176, - "step": 144690 - }, - { - "epoch": 0.5846063098696251, - "grad_norm": 653.216552734375, - "learning_rate": 2.2490957128719624e-05, - "loss": 50.118, - "step": 144700 - }, - { - "epoch": 0.5846467111349928, - "grad_norm": 1580.69873046875, - "learning_rate": 2.2487484118959978e-05, - "loss": 65.8694, - "step": 144710 - }, - { - "epoch": 0.5846871124003604, - "grad_norm": 964.9862670898438, - "learning_rate": 2.248401115818312e-05, - "loss": 44.7298, - "step": 144720 - }, - { - "epoch": 0.5847275136657281, - "grad_norm": 1182.8531494140625, - "learning_rate": 2.2480538246456752e-05, - "loss": 48.8191, - "step": 144730 - }, - { - "epoch": 0.5847679149310956, - "grad_norm": 617.6116943359375, - "learning_rate": 2.24770653838486e-05, - "loss": 52.9255, - "step": 144740 - }, - { - "epoch": 0.5848083161964632, - "grad_norm": 639.6339721679688, - "learning_rate": 2.247359257042634e-05, - "loss": 55.6195, - "step": 144750 - }, - { - "epoch": 0.5848487174618309, - "grad_norm": 574.1550903320312, - "learning_rate": 2.247011980625771e-05, - "loss": 50.8011, - "step": 144760 - }, - { - "epoch": 0.5848891187271985, - "grad_norm": 733.3055419921875, - "learning_rate": 2.246664709141039e-05, - "loss": 62.5022, - "step": 144770 - }, - { - "epoch": 0.5849295199925661, - "grad_norm": 1303.0869140625, - "learning_rate": 2.2463174425952084e-05, - "loss": 50.9016, - "step": 144780 - }, - { - "epoch": 0.5849699212579338, - "grad_norm": 424.1533203125, - "learning_rate": 2.2459701809950506e-05, - "loss": 31.3942, - "step": 144790 - }, - { - "epoch": 0.5850103225233014, - "grad_norm": 719.8305053710938, - "learning_rate": 2.2456229243473345e-05, - "loss": 59.4844, - "step": 144800 - }, - { - "epoch": 0.5850507237886691, - "grad_norm": 1231.0697021484375, - "learning_rate": 2.2452756726588307e-05, - "loss": 61.0486, - "step": 144810 - }, - { - "epoch": 0.5850911250540367, - "grad_norm": 0.0, - "learning_rate": 2.2449284259363093e-05, - "loss": 59.8069, - "step": 144820 - }, - { - "epoch": 0.5851315263194043, - "grad_norm": 1179.6441650390625, - "learning_rate": 2.2445811841865383e-05, - "loss": 48.1206, - "step": 144830 - }, - { - "epoch": 0.585171927584772, - "grad_norm": 588.29638671875, - "learning_rate": 2.2442339474162898e-05, - "loss": 79.9188, - "step": 144840 - }, - { - "epoch": 0.5852123288501396, - "grad_norm": 1237.6119384765625, - "learning_rate": 2.243886715632332e-05, - "loss": 48.1361, - "step": 144850 - }, - { - "epoch": 0.5852527301155073, - "grad_norm": 945.63232421875, - "learning_rate": 2.2435394888414334e-05, - "loss": 44.111, - "step": 144860 - }, - { - "epoch": 0.5852931313808748, - "grad_norm": 789.8206176757812, - "learning_rate": 2.243192267050366e-05, - "loss": 62.4617, - "step": 144870 - }, - { - "epoch": 0.5853335326462424, - "grad_norm": 2056.07470703125, - "learning_rate": 2.2428450502658967e-05, - "loss": 79.2203, - "step": 144880 - }, - { - "epoch": 0.5853739339116101, - "grad_norm": 576.0573120117188, - "learning_rate": 2.242497838494796e-05, - "loss": 45.161, - "step": 144890 - }, - { - "epoch": 0.5854143351769777, - "grad_norm": 596.4187622070312, - "learning_rate": 2.2421506317438325e-05, - "loss": 75.1115, - "step": 144900 - }, - { - "epoch": 0.5854547364423454, - "grad_norm": 652.2193603515625, - "learning_rate": 2.241803430019774e-05, - "loss": 51.4237, - "step": 144910 - }, - { - "epoch": 0.585495137707713, - "grad_norm": 946.1011352539062, - "learning_rate": 2.241456233329392e-05, - "loss": 47.0027, - "step": 144920 - }, - { - "epoch": 0.5855355389730806, - "grad_norm": 711.0994873046875, - "learning_rate": 2.2411090416794538e-05, - "loss": 62.8833, - "step": 144930 - }, - { - "epoch": 0.5855759402384483, - "grad_norm": 1949.346923828125, - "learning_rate": 2.2407618550767268e-05, - "loss": 68.7776, - "step": 144940 - }, - { - "epoch": 0.5856163415038159, - "grad_norm": 676.929931640625, - "learning_rate": 2.2404146735279822e-05, - "loss": 46.8256, - "step": 144950 - }, - { - "epoch": 0.5856567427691836, - "grad_norm": 271.9763488769531, - "learning_rate": 2.2400674970399863e-05, - "loss": 46.4138, - "step": 144960 - }, - { - "epoch": 0.5856971440345512, - "grad_norm": 278.78790283203125, - "learning_rate": 2.2397203256195087e-05, - "loss": 46.952, - "step": 144970 - }, - { - "epoch": 0.5857375452999188, - "grad_norm": 603.0307006835938, - "learning_rate": 2.239373159273318e-05, - "loss": 42.9002, - "step": 144980 - }, - { - "epoch": 0.5857779465652865, - "grad_norm": 573.30322265625, - "learning_rate": 2.2390259980081805e-05, - "loss": 56.5838, - "step": 144990 - }, - { - "epoch": 0.585818347830654, - "grad_norm": 972.4603271484375, - "learning_rate": 2.238678841830867e-05, - "loss": 52.5977, - "step": 145000 - }, - { - "epoch": 0.5858587490960216, - "grad_norm": 383.62628173828125, - "learning_rate": 2.2383316907481433e-05, - "loss": 37.6184, - "step": 145010 - }, - { - "epoch": 0.5858991503613893, - "grad_norm": 496.90045166015625, - "learning_rate": 2.237984544766777e-05, - "loss": 44.9758, - "step": 145020 - }, - { - "epoch": 0.5859395516267569, - "grad_norm": 528.111572265625, - "learning_rate": 2.2376374038935384e-05, - "loss": 51.424, - "step": 145030 - }, - { - "epoch": 0.5859799528921246, - "grad_norm": 857.6492919921875, - "learning_rate": 2.2372902681351923e-05, - "loss": 49.9991, - "step": 145040 - }, - { - "epoch": 0.5860203541574922, - "grad_norm": 642.615966796875, - "learning_rate": 2.236943137498509e-05, - "loss": 42.532, - "step": 145050 - }, - { - "epoch": 0.5860607554228598, - "grad_norm": 1281.65625, - "learning_rate": 2.2365960119902545e-05, - "loss": 71.6778, - "step": 145060 - }, - { - "epoch": 0.5861011566882275, - "grad_norm": 406.2398986816406, - "learning_rate": 2.236248891617196e-05, - "loss": 41.7716, - "step": 145070 - }, - { - "epoch": 0.5861415579535951, - "grad_norm": 514.5000610351562, - "learning_rate": 2.235901776386101e-05, - "loss": 93.0862, - "step": 145080 - }, - { - "epoch": 0.5861819592189628, - "grad_norm": 429.3301696777344, - "learning_rate": 2.2355546663037373e-05, - "loss": 72.3577, - "step": 145090 - }, - { - "epoch": 0.5862223604843304, - "grad_norm": 928.6071166992188, - "learning_rate": 2.2352075613768707e-05, - "loss": 49.8982, - "step": 145100 - }, - { - "epoch": 0.586262761749698, - "grad_norm": 854.5071411132812, - "learning_rate": 2.2348604616122698e-05, - "loss": 55.09, - "step": 145110 - }, - { - "epoch": 0.5863031630150657, - "grad_norm": 980.25390625, - "learning_rate": 2.2345133670166997e-05, - "loss": 63.9885, - "step": 145120 - }, - { - "epoch": 0.5863435642804332, - "grad_norm": 1633.0760498046875, - "learning_rate": 2.2341662775969295e-05, - "loss": 69.4659, - "step": 145130 - }, - { - "epoch": 0.5863839655458009, - "grad_norm": 855.7648315429688, - "learning_rate": 2.2338191933597242e-05, - "loss": 62.3209, - "step": 145140 - }, - { - "epoch": 0.5864243668111685, - "grad_norm": 747.5701293945312, - "learning_rate": 2.2334721143118504e-05, - "loss": 69.4531, - "step": 145150 - }, - { - "epoch": 0.5864647680765361, - "grad_norm": 923.6497802734375, - "learning_rate": 2.2331250404600755e-05, - "loss": 56.0059, - "step": 145160 - }, - { - "epoch": 0.5865051693419038, - "grad_norm": 832.5083618164062, - "learning_rate": 2.2327779718111652e-05, - "loss": 71.0052, - "step": 145170 - }, - { - "epoch": 0.5865455706072714, - "grad_norm": 700.8128051757812, - "learning_rate": 2.232430908371885e-05, - "loss": 56.0096, - "step": 145180 - }, - { - "epoch": 0.5865859718726391, - "grad_norm": 651.7989501953125, - "learning_rate": 2.232083850149003e-05, - "loss": 44.317, - "step": 145190 - }, - { - "epoch": 0.5866263731380067, - "grad_norm": 846.4608764648438, - "learning_rate": 2.2317367971492835e-05, - "loss": 68.4186, - "step": 145200 - }, - { - "epoch": 0.5866667744033743, - "grad_norm": 601.2640380859375, - "learning_rate": 2.2313897493794938e-05, - "loss": 64.6769, - "step": 145210 - }, - { - "epoch": 0.586707175668742, - "grad_norm": 393.1311950683594, - "learning_rate": 2.2310427068463996e-05, - "loss": 35.9002, - "step": 145220 - }, - { - "epoch": 0.5867475769341096, - "grad_norm": 591.4609375, - "learning_rate": 2.2306956695567655e-05, - "loss": 51.3608, - "step": 145230 - }, - { - "epoch": 0.5867879781994773, - "grad_norm": 760.8458862304688, - "learning_rate": 2.2303486375173585e-05, - "loss": 77.252, - "step": 145240 - }, - { - "epoch": 0.5868283794648448, - "grad_norm": 1265.822021484375, - "learning_rate": 2.230001610734943e-05, - "loss": 52.4753, - "step": 145250 - }, - { - "epoch": 0.5868687807302124, - "grad_norm": 751.3007202148438, - "learning_rate": 2.2296545892162858e-05, - "loss": 43.7793, - "step": 145260 - }, - { - "epoch": 0.5869091819955801, - "grad_norm": 706.1170043945312, - "learning_rate": 2.2293075729681513e-05, - "loss": 41.4295, - "step": 145270 - }, - { - "epoch": 0.5869495832609477, - "grad_norm": 778.751708984375, - "learning_rate": 2.2289605619973045e-05, - "loss": 36.7155, - "step": 145280 - }, - { - "epoch": 0.5869899845263153, - "grad_norm": 2680.686767578125, - "learning_rate": 2.2286135563105117e-05, - "loss": 60.0938, - "step": 145290 - }, - { - "epoch": 0.587030385791683, - "grad_norm": 557.2347412109375, - "learning_rate": 2.2282665559145378e-05, - "loss": 51.4661, - "step": 145300 - }, - { - "epoch": 0.5870707870570506, - "grad_norm": 905.8478393554688, - "learning_rate": 2.227919560816146e-05, - "loss": 65.2185, - "step": 145310 - }, - { - "epoch": 0.5871111883224183, - "grad_norm": 652.237548828125, - "learning_rate": 2.2275725710221035e-05, - "loss": 43.8062, - "step": 145320 - }, - { - "epoch": 0.5871515895877859, - "grad_norm": 666.0792846679688, - "learning_rate": 2.2272255865391733e-05, - "loss": 47.7678, - "step": 145330 - }, - { - "epoch": 0.5871919908531535, - "grad_norm": 1246.526611328125, - "learning_rate": 2.2268786073741216e-05, - "loss": 68.3254, - "step": 145340 - }, - { - "epoch": 0.5872323921185212, - "grad_norm": 414.19940185546875, - "learning_rate": 2.226531633533712e-05, - "loss": 52.0656, - "step": 145350 - }, - { - "epoch": 0.5872727933838888, - "grad_norm": 870.029541015625, - "learning_rate": 2.2261846650247075e-05, - "loss": 54.3534, - "step": 145360 - }, - { - "epoch": 0.5873131946492565, - "grad_norm": 1294.477294921875, - "learning_rate": 2.2258377018538753e-05, - "loss": 47.3047, - "step": 145370 - }, - { - "epoch": 0.587353595914624, - "grad_norm": 661.3151245117188, - "learning_rate": 2.2254907440279786e-05, - "loss": 48.7553, - "step": 145380 - }, - { - "epoch": 0.5873939971799916, - "grad_norm": 624.5767822265625, - "learning_rate": 2.2251437915537797e-05, - "loss": 41.0314, - "step": 145390 - }, - { - "epoch": 0.5874343984453593, - "grad_norm": 1039.283935546875, - "learning_rate": 2.224796844438045e-05, - "loss": 59.0014, - "step": 145400 - }, - { - "epoch": 0.5874747997107269, - "grad_norm": 1308.9102783203125, - "learning_rate": 2.2244499026875374e-05, - "loss": 70.056, - "step": 145410 - }, - { - "epoch": 0.5875152009760946, - "grad_norm": 752.0531005859375, - "learning_rate": 2.224102966309021e-05, - "loss": 37.8407, - "step": 145420 - }, - { - "epoch": 0.5875556022414622, - "grad_norm": 584.2730712890625, - "learning_rate": 2.2237560353092592e-05, - "loss": 53.7454, - "step": 145430 - }, - { - "epoch": 0.5875960035068298, - "grad_norm": 825.1495971679688, - "learning_rate": 2.223409109695015e-05, - "loss": 53.0734, - "step": 145440 - }, - { - "epoch": 0.5876364047721975, - "grad_norm": 853.8151245117188, - "learning_rate": 2.2230621894730536e-05, - "loss": 39.1982, - "step": 145450 - }, - { - "epoch": 0.5876768060375651, - "grad_norm": 0.0, - "learning_rate": 2.2227152746501373e-05, - "loss": 28.6321, - "step": 145460 - }, - { - "epoch": 0.5877172073029328, - "grad_norm": 499.9764404296875, - "learning_rate": 2.2223683652330283e-05, - "loss": 39.2175, - "step": 145470 - }, - { - "epoch": 0.5877576085683004, - "grad_norm": 1175.066650390625, - "learning_rate": 2.2220214612284924e-05, - "loss": 57.1001, - "step": 145480 - }, - { - "epoch": 0.587798009833668, - "grad_norm": 566.9341430664062, - "learning_rate": 2.2216745626432906e-05, - "loss": 70.2406, - "step": 145490 - }, - { - "epoch": 0.5878384110990357, - "grad_norm": 664.6567993164062, - "learning_rate": 2.2213276694841866e-05, - "loss": 46.9813, - "step": 145500 - }, - { - "epoch": 0.5878788123644032, - "grad_norm": 540.4669799804688, - "learning_rate": 2.2209807817579438e-05, - "loss": 72.8235, - "step": 145510 - }, - { - "epoch": 0.5879192136297708, - "grad_norm": 761.8043823242188, - "learning_rate": 2.2206338994713228e-05, - "loss": 45.2406, - "step": 145520 - }, - { - "epoch": 0.5879596148951385, - "grad_norm": 331.9342346191406, - "learning_rate": 2.2202870226310888e-05, - "loss": 35.2402, - "step": 145530 - }, - { - "epoch": 0.5880000161605061, - "grad_norm": 220.01025390625, - "learning_rate": 2.2199401512440034e-05, - "loss": 38.3728, - "step": 145540 - }, - { - "epoch": 0.5880404174258738, - "grad_norm": 498.82073974609375, - "learning_rate": 2.2195932853168278e-05, - "loss": 29.1557, - "step": 145550 - }, - { - "epoch": 0.5880808186912414, - "grad_norm": 2570.12255859375, - "learning_rate": 2.2192464248563265e-05, - "loss": 71.8326, - "step": 145560 - }, - { - "epoch": 0.588121219956609, - "grad_norm": 857.0679931640625, - "learning_rate": 2.2188995698692606e-05, - "loss": 53.2321, - "step": 145570 - }, - { - "epoch": 0.5881616212219767, - "grad_norm": 427.507568359375, - "learning_rate": 2.2185527203623922e-05, - "loss": 40.6259, - "step": 145580 - }, - { - "epoch": 0.5882020224873443, - "grad_norm": 223.01893615722656, - "learning_rate": 2.2182058763424833e-05, - "loss": 37.5061, - "step": 145590 - }, - { - "epoch": 0.588242423752712, - "grad_norm": 557.50390625, - "learning_rate": 2.217859037816296e-05, - "loss": 82.0438, - "step": 145600 - }, - { - "epoch": 0.5882828250180796, - "grad_norm": 779.0130615234375, - "learning_rate": 2.217512204790592e-05, - "loss": 44.8452, - "step": 145610 - }, - { - "epoch": 0.5883232262834472, - "grad_norm": 323.9913024902344, - "learning_rate": 2.2171653772721335e-05, - "loss": 51.8958, - "step": 145620 - }, - { - "epoch": 0.5883636275488149, - "grad_norm": 635.4793090820312, - "learning_rate": 2.2168185552676805e-05, - "loss": 64.8756, - "step": 145630 - }, - { - "epoch": 0.5884040288141824, - "grad_norm": 477.123779296875, - "learning_rate": 2.2164717387839966e-05, - "loss": 37.9061, - "step": 145640 - }, - { - "epoch": 0.58844443007955, - "grad_norm": 990.1459350585938, - "learning_rate": 2.2161249278278405e-05, - "loss": 57.033, - "step": 145650 - }, - { - "epoch": 0.5884848313449177, - "grad_norm": 519.134765625, - "learning_rate": 2.2157781224059772e-05, - "loss": 63.8006, - "step": 145660 - }, - { - "epoch": 0.5885252326102853, - "grad_norm": 540.0624389648438, - "learning_rate": 2.215431322525165e-05, - "loss": 66.9346, - "step": 145670 - }, - { - "epoch": 0.588565633875653, - "grad_norm": 429.2971496582031, - "learning_rate": 2.215084528192165e-05, - "loss": 41.9368, - "step": 145680 - }, - { - "epoch": 0.5886060351410206, - "grad_norm": 287.19329833984375, - "learning_rate": 2.2147377394137398e-05, - "loss": 53.8232, - "step": 145690 - }, - { - "epoch": 0.5886464364063883, - "grad_norm": 2038.7117919921875, - "learning_rate": 2.214390956196649e-05, - "loss": 53.2171, - "step": 145700 - }, - { - "epoch": 0.5886868376717559, - "grad_norm": 342.96429443359375, - "learning_rate": 2.2140441785476526e-05, - "loss": 70.9862, - "step": 145710 - }, - { - "epoch": 0.5887272389371235, - "grad_norm": 1174.8426513671875, - "learning_rate": 2.213697406473513e-05, - "loss": 59.1554, - "step": 145720 - }, - { - "epoch": 0.5887676402024912, - "grad_norm": 446.2856750488281, - "learning_rate": 2.213350639980989e-05, - "loss": 84.1959, - "step": 145730 - }, - { - "epoch": 0.5888080414678588, - "grad_norm": 910.0787353515625, - "learning_rate": 2.213003879076843e-05, - "loss": 59.0944, - "step": 145740 - }, - { - "epoch": 0.5888484427332265, - "grad_norm": 379.2336120605469, - "learning_rate": 2.212657123767834e-05, - "loss": 47.8588, - "step": 145750 - }, - { - "epoch": 0.5888888439985941, - "grad_norm": 545.8311767578125, - "learning_rate": 2.2123103740607215e-05, - "loss": 41.2895, - "step": 145760 - }, - { - "epoch": 0.5889292452639616, - "grad_norm": 451.2540283203125, - "learning_rate": 2.2119636299622675e-05, - "loss": 49.8382, - "step": 145770 - }, - { - "epoch": 0.5889696465293293, - "grad_norm": 2494.452880859375, - "learning_rate": 2.2116168914792292e-05, - "loss": 51.3358, - "step": 145780 - }, - { - "epoch": 0.5890100477946969, - "grad_norm": 1018.56103515625, - "learning_rate": 2.211270158618369e-05, - "loss": 80.1487, - "step": 145790 - }, - { - "epoch": 0.5890504490600645, - "grad_norm": 700.343994140625, - "learning_rate": 2.2109234313864465e-05, - "loss": 52.8399, - "step": 145800 - }, - { - "epoch": 0.5890908503254322, - "grad_norm": 401.8301696777344, - "learning_rate": 2.210576709790219e-05, - "loss": 38.5016, - "step": 145810 - }, - { - "epoch": 0.5891312515907998, - "grad_norm": 379.638427734375, - "learning_rate": 2.210229993836449e-05, - "loss": 67.7898, - "step": 145820 - }, - { - "epoch": 0.5891716528561675, - "grad_norm": 1105.470458984375, - "learning_rate": 2.209883283531894e-05, - "loss": 62.1052, - "step": 145830 - }, - { - "epoch": 0.5892120541215351, - "grad_norm": 409.69854736328125, - "learning_rate": 2.209536578883313e-05, - "loss": 52.7116, - "step": 145840 - }, - { - "epoch": 0.5892524553869027, - "grad_norm": 442.1845397949219, - "learning_rate": 2.209189879897467e-05, - "loss": 45.8233, - "step": 145850 - }, - { - "epoch": 0.5892928566522704, - "grad_norm": 284.5019836425781, - "learning_rate": 2.2088431865811127e-05, - "loss": 58.4883, - "step": 145860 - }, - { - "epoch": 0.589333257917638, - "grad_norm": 762.030029296875, - "learning_rate": 2.2084964989410113e-05, - "loss": 61.4194, - "step": 145870 - }, - { - "epoch": 0.5893736591830057, - "grad_norm": 770.3872680664062, - "learning_rate": 2.208149816983921e-05, - "loss": 69.5855, - "step": 145880 - }, - { - "epoch": 0.5894140604483732, - "grad_norm": 322.16644287109375, - "learning_rate": 2.2078031407165993e-05, - "loss": 64.2785, - "step": 145890 - }, - { - "epoch": 0.5894544617137408, - "grad_norm": 483.3517150878906, - "learning_rate": 2.2074564701458065e-05, - "loss": 39.8775, - "step": 145900 - }, - { - "epoch": 0.5894948629791085, - "grad_norm": 506.798828125, - "learning_rate": 2.2071098052783008e-05, - "loss": 52.2202, - "step": 145910 - }, - { - "epoch": 0.5895352642444761, - "grad_norm": 342.7799072265625, - "learning_rate": 2.2067631461208393e-05, - "loss": 38.5798, - "step": 145920 - }, - { - "epoch": 0.5895756655098437, - "grad_norm": 892.741943359375, - "learning_rate": 2.2064164926801817e-05, - "loss": 56.9719, - "step": 145930 - }, - { - "epoch": 0.5896160667752114, - "grad_norm": 1077.632080078125, - "learning_rate": 2.2060698449630853e-05, - "loss": 61.5984, - "step": 145940 - }, - { - "epoch": 0.589656468040579, - "grad_norm": 469.6213684082031, - "learning_rate": 2.205723202976309e-05, - "loss": 33.4894, - "step": 145950 - }, - { - "epoch": 0.5896968693059467, - "grad_norm": 283.9982604980469, - "learning_rate": 2.205376566726611e-05, - "loss": 30.2017, - "step": 145960 - }, - { - "epoch": 0.5897372705713143, - "grad_norm": 1458.912841796875, - "learning_rate": 2.2050299362207472e-05, - "loss": 77.3268, - "step": 145970 - }, - { - "epoch": 0.589777671836682, - "grad_norm": 503.4773254394531, - "learning_rate": 2.2046833114654773e-05, - "loss": 56.1382, - "step": 145980 - }, - { - "epoch": 0.5898180731020496, - "grad_norm": 2263.208740234375, - "learning_rate": 2.204336692467559e-05, - "loss": 53.7279, - "step": 145990 - }, - { - "epoch": 0.5898584743674172, - "grad_norm": 752.0081176757812, - "learning_rate": 2.2039900792337474e-05, - "loss": 52.1207, - "step": 146000 - }, - { - "epoch": 0.5898988756327849, - "grad_norm": 748.5038452148438, - "learning_rate": 2.203643471770803e-05, - "loss": 49.4564, - "step": 146010 - }, - { - "epoch": 0.5899392768981524, - "grad_norm": 529.9098510742188, - "learning_rate": 2.203296870085481e-05, - "loss": 60.0309, - "step": 146020 - }, - { - "epoch": 0.58997967816352, - "grad_norm": 878.1099853515625, - "learning_rate": 2.20295027418454e-05, - "loss": 63.0526, - "step": 146030 - }, - { - "epoch": 0.5900200794288877, - "grad_norm": 1283.2969970703125, - "learning_rate": 2.202603684074736e-05, - "loss": 59.6128, - "step": 146040 - }, - { - "epoch": 0.5900604806942553, - "grad_norm": 2465.196533203125, - "learning_rate": 2.2022570997628256e-05, - "loss": 76.0184, - "step": 146050 - }, - { - "epoch": 0.590100881959623, - "grad_norm": 223.83999633789062, - "learning_rate": 2.2019105212555675e-05, - "loss": 44.1915, - "step": 146060 - }, - { - "epoch": 0.5901412832249906, - "grad_norm": 1009.5184326171875, - "learning_rate": 2.2015639485597168e-05, - "loss": 47.8159, - "step": 146070 - }, - { - "epoch": 0.5901816844903582, - "grad_norm": 774.2605590820312, - "learning_rate": 2.2012173816820297e-05, - "loss": 42.978, - "step": 146080 - }, - { - "epoch": 0.5902220857557259, - "grad_norm": 537.9838256835938, - "learning_rate": 2.2008708206292645e-05, - "loss": 58.8493, - "step": 146090 - }, - { - "epoch": 0.5902624870210935, - "grad_norm": 209.31063842773438, - "learning_rate": 2.200524265408176e-05, - "loss": 48.89, - "step": 146100 - }, - { - "epoch": 0.5903028882864612, - "grad_norm": 923.2644653320312, - "learning_rate": 2.2001777160255222e-05, - "loss": 57.573, - "step": 146110 - }, - { - "epoch": 0.5903432895518288, - "grad_norm": 531.9500122070312, - "learning_rate": 2.199831172488058e-05, - "loss": 63.6238, - "step": 146120 - }, - { - "epoch": 0.5903836908171964, - "grad_norm": 1522.965576171875, - "learning_rate": 2.1994846348025385e-05, - "loss": 35.7507, - "step": 146130 - }, - { - "epoch": 0.5904240920825641, - "grad_norm": 1366.9095458984375, - "learning_rate": 2.1991381029757215e-05, - "loss": 90.262, - "step": 146140 - }, - { - "epoch": 0.5904644933479316, - "grad_norm": 804.5481567382812, - "learning_rate": 2.1987915770143624e-05, - "loss": 53.6699, - "step": 146150 - }, - { - "epoch": 0.5905048946132992, - "grad_norm": 509.3193054199219, - "learning_rate": 2.1984450569252154e-05, - "loss": 51.2727, - "step": 146160 - }, - { - "epoch": 0.5905452958786669, - "grad_norm": 789.99755859375, - "learning_rate": 2.1980985427150385e-05, - "loss": 44.7793, - "step": 146170 - }, - { - "epoch": 0.5905856971440345, - "grad_norm": 731.342041015625, - "learning_rate": 2.197752034390585e-05, - "loss": 62.3214, - "step": 146180 - }, - { - "epoch": 0.5906260984094022, - "grad_norm": 371.409423828125, - "learning_rate": 2.1974055319586124e-05, - "loss": 78.2004, - "step": 146190 - }, - { - "epoch": 0.5906664996747698, - "grad_norm": 3826.60009765625, - "learning_rate": 2.1970590354258745e-05, - "loss": 73.2504, - "step": 146200 - }, - { - "epoch": 0.5907069009401374, - "grad_norm": 564.4078979492188, - "learning_rate": 2.1967125447991254e-05, - "loss": 42.8259, - "step": 146210 - }, - { - "epoch": 0.5907473022055051, - "grad_norm": 309.908203125, - "learning_rate": 2.1963660600851225e-05, - "loss": 58.1054, - "step": 146220 - }, - { - "epoch": 0.5907877034708727, - "grad_norm": 501.643310546875, - "learning_rate": 2.1960195812906196e-05, - "loss": 41.4755, - "step": 146230 - }, - { - "epoch": 0.5908281047362404, - "grad_norm": 546.9186401367188, - "learning_rate": 2.1956731084223702e-05, - "loss": 48.2207, - "step": 146240 - }, - { - "epoch": 0.590868506001608, - "grad_norm": 934.6657104492188, - "learning_rate": 2.195326641487132e-05, - "loss": 44.1907, - "step": 146250 - }, - { - "epoch": 0.5909089072669756, - "grad_norm": 864.5132446289062, - "learning_rate": 2.1949801804916566e-05, - "loss": 76.9655, - "step": 146260 - }, - { - "epoch": 0.5909493085323433, - "grad_norm": 763.5232543945312, - "learning_rate": 2.1946337254426998e-05, - "loss": 34.84, - "step": 146270 - }, - { - "epoch": 0.5909897097977108, - "grad_norm": 1223.5108642578125, - "learning_rate": 2.194287276347016e-05, - "loss": 55.184, - "step": 146280 - }, - { - "epoch": 0.5910301110630785, - "grad_norm": 392.8327331542969, - "learning_rate": 2.193940833211359e-05, - "loss": 36.6568, - "step": 146290 - }, - { - "epoch": 0.5910705123284461, - "grad_norm": 873.4593505859375, - "learning_rate": 2.1935943960424833e-05, - "loss": 68.934, - "step": 146300 - }, - { - "epoch": 0.5911109135938137, - "grad_norm": 858.9896240234375, - "learning_rate": 2.1932479648471416e-05, - "loss": 52.654, - "step": 146310 - }, - { - "epoch": 0.5911513148591814, - "grad_norm": 2630.4091796875, - "learning_rate": 2.19290153963209e-05, - "loss": 80.9151, - "step": 146320 - }, - { - "epoch": 0.591191716124549, - "grad_norm": 438.961669921875, - "learning_rate": 2.1925551204040806e-05, - "loss": 76.9184, - "step": 146330 - }, - { - "epoch": 0.5912321173899167, - "grad_norm": 916.41357421875, - "learning_rate": 2.1922087071698665e-05, - "loss": 42.8016, - "step": 146340 - }, - { - "epoch": 0.5912725186552843, - "grad_norm": 1062.0850830078125, - "learning_rate": 2.1918622999362035e-05, - "loss": 52.5087, - "step": 146350 - }, - { - "epoch": 0.5913129199206519, - "grad_norm": 918.3807983398438, - "learning_rate": 2.1915158987098432e-05, - "loss": 52.7011, - "step": 146360 - }, - { - "epoch": 0.5913533211860196, - "grad_norm": 549.4228515625, - "learning_rate": 2.191169503497539e-05, - "loss": 57.5452, - "step": 146370 - }, - { - "epoch": 0.5913937224513872, - "grad_norm": 684.5689697265625, - "learning_rate": 2.190823114306045e-05, - "loss": 58.4194, - "step": 146380 - }, - { - "epoch": 0.5914341237167549, - "grad_norm": 633.138427734375, - "learning_rate": 2.190476731142112e-05, - "loss": 70.747, - "step": 146390 - }, - { - "epoch": 0.5914745249821224, - "grad_norm": 353.6347351074219, - "learning_rate": 2.1901303540124956e-05, - "loss": 47.8663, - "step": 146400 - }, - { - "epoch": 0.59151492624749, - "grad_norm": 1246.96826171875, - "learning_rate": 2.189783982923948e-05, - "loss": 49.267, - "step": 146410 - }, - { - "epoch": 0.5915553275128577, - "grad_norm": 963.0771484375, - "learning_rate": 2.18943761788322e-05, - "loss": 74.9258, - "step": 146420 - }, - { - "epoch": 0.5915957287782253, - "grad_norm": 543.4385986328125, - "learning_rate": 2.1890912588970662e-05, - "loss": 48.036, - "step": 146430 - }, - { - "epoch": 0.591636130043593, - "grad_norm": 1466.5035400390625, - "learning_rate": 2.188744905972239e-05, - "loss": 56.9263, - "step": 146440 - }, - { - "epoch": 0.5916765313089606, - "grad_norm": 893.92333984375, - "learning_rate": 2.1883985591154893e-05, - "loss": 65.8888, - "step": 146450 - }, - { - "epoch": 0.5917169325743282, - "grad_norm": 614.3671264648438, - "learning_rate": 2.188052218333571e-05, - "loss": 35.0252, - "step": 146460 - }, - { - "epoch": 0.5917573338396959, - "grad_norm": 581.8492431640625, - "learning_rate": 2.1877058836332335e-05, - "loss": 46.298, - "step": 146470 - }, - { - "epoch": 0.5917977351050635, - "grad_norm": 1633.851318359375, - "learning_rate": 2.187359555021232e-05, - "loss": 63.4169, - "step": 146480 - }, - { - "epoch": 0.5918381363704311, - "grad_norm": 484.6391296386719, - "learning_rate": 2.187013232504317e-05, - "loss": 114.912, - "step": 146490 - }, - { - "epoch": 0.5918785376357988, - "grad_norm": 1368.1236572265625, - "learning_rate": 2.186666916089239e-05, - "loss": 54.1732, - "step": 146500 - }, - { - "epoch": 0.5919189389011664, - "grad_norm": 131.18450927734375, - "learning_rate": 2.1863206057827523e-05, - "loss": 41.559, - "step": 146510 - }, - { - "epoch": 0.5919593401665341, - "grad_norm": 682.2334594726562, - "learning_rate": 2.1859743015916065e-05, - "loss": 79.2423, - "step": 146520 - }, - { - "epoch": 0.5919997414319016, - "grad_norm": 236.24301147460938, - "learning_rate": 2.1856280035225527e-05, - "loss": 37.946, - "step": 146530 - }, - { - "epoch": 0.5920401426972692, - "grad_norm": 541.71728515625, - "learning_rate": 2.1852817115823436e-05, - "loss": 45.4416, - "step": 146540 - }, - { - "epoch": 0.5920805439626369, - "grad_norm": 1449.23046875, - "learning_rate": 2.184935425777728e-05, - "loss": 76.4084, - "step": 146550 - }, - { - "epoch": 0.5921209452280045, - "grad_norm": 752.7134399414062, - "learning_rate": 2.1845891461154602e-05, - "loss": 50.192, - "step": 146560 - }, - { - "epoch": 0.5921613464933722, - "grad_norm": 1388.42626953125, - "learning_rate": 2.1842428726022892e-05, - "loss": 48.8798, - "step": 146570 - }, - { - "epoch": 0.5922017477587398, - "grad_norm": 1135.522216796875, - "learning_rate": 2.183896605244965e-05, - "loss": 66.7696, - "step": 146580 - }, - { - "epoch": 0.5922421490241074, - "grad_norm": 1091.169921875, - "learning_rate": 2.18355034405024e-05, - "loss": 52.7894, - "step": 146590 - }, - { - "epoch": 0.5922825502894751, - "grad_norm": 315.2186279296875, - "learning_rate": 2.183204089024864e-05, - "loss": 31.6009, - "step": 146600 - }, - { - "epoch": 0.5923229515548427, - "grad_norm": 1093.932861328125, - "learning_rate": 2.182857840175587e-05, - "loss": 56.5924, - "step": 146610 - }, - { - "epoch": 0.5923633528202104, - "grad_norm": 504.3761291503906, - "learning_rate": 2.1825115975091596e-05, - "loss": 51.7465, - "step": 146620 - }, - { - "epoch": 0.592403754085578, - "grad_norm": 1324.4403076171875, - "learning_rate": 2.1821653610323318e-05, - "loss": 62.9345, - "step": 146630 - }, - { - "epoch": 0.5924441553509456, - "grad_norm": 867.4586791992188, - "learning_rate": 2.181819130751855e-05, - "loss": 51.7099, - "step": 146640 - }, - { - "epoch": 0.5924845566163133, - "grad_norm": 476.7613525390625, - "learning_rate": 2.1814729066744776e-05, - "loss": 50.1964, - "step": 146650 - }, - { - "epoch": 0.5925249578816808, - "grad_norm": 654.5791015625, - "learning_rate": 2.181126688806949e-05, - "loss": 56.1941, - "step": 146660 - }, - { - "epoch": 0.5925653591470484, - "grad_norm": 834.60498046875, - "learning_rate": 2.180780477156021e-05, - "loss": 52.9529, - "step": 146670 - }, - { - "epoch": 0.5926057604124161, - "grad_norm": 4547.328125, - "learning_rate": 2.1804342717284415e-05, - "loss": 59.4421, - "step": 146680 - }, - { - "epoch": 0.5926461616777837, - "grad_norm": 492.46307373046875, - "learning_rate": 2.18008807253096e-05, - "loss": 50.6839, - "step": 146690 - }, - { - "epoch": 0.5926865629431514, - "grad_norm": 3489.217041015625, - "learning_rate": 2.1797418795703267e-05, - "loss": 61.9006, - "step": 146700 - }, - { - "epoch": 0.592726964208519, - "grad_norm": 392.3635559082031, - "learning_rate": 2.17939569285329e-05, - "loss": 35.6025, - "step": 146710 - }, - { - "epoch": 0.5927673654738866, - "grad_norm": 1170.8079833984375, - "learning_rate": 2.1790495123866e-05, - "loss": 68.8476, - "step": 146720 - }, - { - "epoch": 0.5928077667392543, - "grad_norm": 654.0852661132812, - "learning_rate": 2.1787033381770045e-05, - "loss": 49.868, - "step": 146730 - }, - { - "epoch": 0.5928481680046219, - "grad_norm": 387.08111572265625, - "learning_rate": 2.1783571702312523e-05, - "loss": 39.509, - "step": 146740 - }, - { - "epoch": 0.5928885692699896, - "grad_norm": 876.9544067382812, - "learning_rate": 2.1780110085560935e-05, - "loss": 54.9167, - "step": 146750 - }, - { - "epoch": 0.5929289705353572, - "grad_norm": 867.4058227539062, - "learning_rate": 2.177664853158276e-05, - "loss": 58.304, - "step": 146760 - }, - { - "epoch": 0.5929693718007248, - "grad_norm": 269.5369873046875, - "learning_rate": 2.1773187040445465e-05, - "loss": 44.2142, - "step": 146770 - }, - { - "epoch": 0.5930097730660925, - "grad_norm": 1281.8651123046875, - "learning_rate": 2.1769725612216567e-05, - "loss": 52.3775, - "step": 146780 - }, - { - "epoch": 0.59305017433146, - "grad_norm": 701.8643188476562, - "learning_rate": 2.1766264246963525e-05, - "loss": 54.0386, - "step": 146790 - }, - { - "epoch": 0.5930905755968277, - "grad_norm": 791.13720703125, - "learning_rate": 2.176280294475383e-05, - "loss": 49.9338, - "step": 146800 - }, - { - "epoch": 0.5931309768621953, - "grad_norm": 181.1816864013672, - "learning_rate": 2.1759341705654958e-05, - "loss": 45.6333, - "step": 146810 - }, - { - "epoch": 0.5931713781275629, - "grad_norm": 940.6576538085938, - "learning_rate": 2.175588052973438e-05, - "loss": 68.6767, - "step": 146820 - }, - { - "epoch": 0.5932117793929306, - "grad_norm": 583.0791625976562, - "learning_rate": 2.175241941705959e-05, - "loss": 59.7825, - "step": 146830 - }, - { - "epoch": 0.5932521806582982, - "grad_norm": 924.9185791015625, - "learning_rate": 2.1748958367698046e-05, - "loss": 42.2929, - "step": 146840 - }, - { - "epoch": 0.5932925819236659, - "grad_norm": 464.9294128417969, - "learning_rate": 2.174549738171724e-05, - "loss": 40.7214, - "step": 146850 - }, - { - "epoch": 0.5933329831890335, - "grad_norm": 495.89080810546875, - "learning_rate": 2.174203645918464e-05, - "loss": 90.9891, - "step": 146860 - }, - { - "epoch": 0.5933733844544011, - "grad_norm": 715.9097290039062, - "learning_rate": 2.1738575600167713e-05, - "loss": 46.5196, - "step": 146870 - }, - { - "epoch": 0.5934137857197688, - "grad_norm": 630.5188598632812, - "learning_rate": 2.1735114804733938e-05, - "loss": 34.2869, - "step": 146880 - }, - { - "epoch": 0.5934541869851364, - "grad_norm": 923.3815307617188, - "learning_rate": 2.173165407295078e-05, - "loss": 80.3715, - "step": 146890 - }, - { - "epoch": 0.593494588250504, - "grad_norm": 985.5263671875, - "learning_rate": 2.1728193404885697e-05, - "loss": 68.0232, - "step": 146900 - }, - { - "epoch": 0.5935349895158717, - "grad_norm": 1455.7779541015625, - "learning_rate": 2.172473280060618e-05, - "loss": 55.3433, - "step": 146910 - }, - { - "epoch": 0.5935753907812392, - "grad_norm": 679.5105590820312, - "learning_rate": 2.172127226017967e-05, - "loss": 48.74, - "step": 146920 - }, - { - "epoch": 0.5936157920466069, - "grad_norm": 484.81591796875, - "learning_rate": 2.1717811783673657e-05, - "loss": 29.9719, - "step": 146930 - }, - { - "epoch": 0.5936561933119745, - "grad_norm": 623.7422485351562, - "learning_rate": 2.1714351371155596e-05, - "loss": 70.4198, - "step": 146940 - }, - { - "epoch": 0.5936965945773421, - "grad_norm": 789.708740234375, - "learning_rate": 2.171089102269294e-05, - "loss": 34.6678, - "step": 146950 - }, - { - "epoch": 0.5937369958427098, - "grad_norm": 1196.701904296875, - "learning_rate": 2.170743073835316e-05, - "loss": 52.6208, - "step": 146960 - }, - { - "epoch": 0.5937773971080774, - "grad_norm": 540.9883422851562, - "learning_rate": 2.170397051820371e-05, - "loss": 57.1448, - "step": 146970 - }, - { - "epoch": 0.5938177983734451, - "grad_norm": 4241.60205078125, - "learning_rate": 2.1700510362312052e-05, - "loss": 70.6284, - "step": 146980 - }, - { - "epoch": 0.5938581996388127, - "grad_norm": 664.571044921875, - "learning_rate": 2.1697050270745648e-05, - "loss": 86.9309, - "step": 146990 - }, - { - "epoch": 0.5938986009041803, - "grad_norm": 1107.1243896484375, - "learning_rate": 2.1693590243571938e-05, - "loss": 43.3203, - "step": 147000 - }, - { - "epoch": 0.593939002169548, - "grad_norm": 1379.3629150390625, - "learning_rate": 2.1690130280858398e-05, - "loss": 63.4872, - "step": 147010 - }, - { - "epoch": 0.5939794034349156, - "grad_norm": 534.1649780273438, - "learning_rate": 2.1686670382672475e-05, - "loss": 56.6214, - "step": 147020 - }, - { - "epoch": 0.5940198047002833, - "grad_norm": 675.9949951171875, - "learning_rate": 2.1683210549081607e-05, - "loss": 53.4801, - "step": 147030 - }, - { - "epoch": 0.5940602059656508, - "grad_norm": 450.9206848144531, - "learning_rate": 2.1679750780153267e-05, - "loss": 45.3882, - "step": 147040 - }, - { - "epoch": 0.5941006072310184, - "grad_norm": 513.6524047851562, - "learning_rate": 2.1676291075954894e-05, - "loss": 56.7767, - "step": 147050 - }, - { - "epoch": 0.5941410084963861, - "grad_norm": 938.4927368164062, - "learning_rate": 2.1672831436553935e-05, - "loss": 38.4072, - "step": 147060 - }, - { - "epoch": 0.5941814097617537, - "grad_norm": 520.1245727539062, - "learning_rate": 2.166937186201784e-05, - "loss": 49.0356, - "step": 147070 - }, - { - "epoch": 0.5942218110271213, - "grad_norm": 508.855224609375, - "learning_rate": 2.166591235241405e-05, - "loss": 45.4661, - "step": 147080 - }, - { - "epoch": 0.594262212292489, - "grad_norm": 1004.2674560546875, - "learning_rate": 2.1662452907810024e-05, - "loss": 49.2036, - "step": 147090 - }, - { - "epoch": 0.5943026135578566, - "grad_norm": 769.294189453125, - "learning_rate": 2.1658993528273197e-05, - "loss": 65.9896, - "step": 147100 - }, - { - "epoch": 0.5943430148232243, - "grad_norm": 256.8412780761719, - "learning_rate": 2.1655534213871e-05, - "loss": 55.592, - "step": 147110 - }, - { - "epoch": 0.5943834160885919, - "grad_norm": 1590.532958984375, - "learning_rate": 2.16520749646709e-05, - "loss": 56.2808, - "step": 147120 - }, - { - "epoch": 0.5944238173539595, - "grad_norm": 846.234619140625, - "learning_rate": 2.1648615780740316e-05, - "loss": 50.1012, - "step": 147130 - }, - { - "epoch": 0.5944642186193272, - "grad_norm": 884.344482421875, - "learning_rate": 2.164515666214669e-05, - "loss": 40.8878, - "step": 147140 - }, - { - "epoch": 0.5945046198846948, - "grad_norm": 409.51025390625, - "learning_rate": 2.1641697608957466e-05, - "loss": 50.304, - "step": 147150 - }, - { - "epoch": 0.5945450211500625, - "grad_norm": 934.0576782226562, - "learning_rate": 2.163823862124007e-05, - "loss": 46.8148, - "step": 147160 - }, - { - "epoch": 0.59458542241543, - "grad_norm": 1109.9046630859375, - "learning_rate": 2.163477969906195e-05, - "loss": 47.4143, - "step": 147170 - }, - { - "epoch": 0.5946258236807976, - "grad_norm": 733.655517578125, - "learning_rate": 2.1631320842490532e-05, - "loss": 42.5417, - "step": 147180 - }, - { - "epoch": 0.5946662249461653, - "grad_norm": 121.8224868774414, - "learning_rate": 2.162786205159324e-05, - "loss": 52.3802, - "step": 147190 - }, - { - "epoch": 0.5947066262115329, - "grad_norm": 1324.3228759765625, - "learning_rate": 2.1624403326437523e-05, - "loss": 67.4435, - "step": 147200 - }, - { - "epoch": 0.5947470274769006, - "grad_norm": 911.5564575195312, - "learning_rate": 2.16209446670908e-05, - "loss": 43.5101, - "step": 147210 - }, - { - "epoch": 0.5947874287422682, - "grad_norm": 460.3110656738281, - "learning_rate": 2.1617486073620498e-05, - "loss": 59.1632, - "step": 147220 - }, - { - "epoch": 0.5948278300076358, - "grad_norm": 569.986572265625, - "learning_rate": 2.1614027546094048e-05, - "loss": 35.5932, - "step": 147230 - }, - { - "epoch": 0.5948682312730035, - "grad_norm": 1580.7315673828125, - "learning_rate": 2.1610569084578867e-05, - "loss": 39.1384, - "step": 147240 - }, - { - "epoch": 0.5949086325383711, - "grad_norm": 675.512939453125, - "learning_rate": 2.1607110689142393e-05, - "loss": 55.1546, - "step": 147250 - }, - { - "epoch": 0.5949490338037388, - "grad_norm": 978.5493774414062, - "learning_rate": 2.1603652359852044e-05, - "loss": 38.9832, - "step": 147260 - }, - { - "epoch": 0.5949894350691064, - "grad_norm": 1820.423583984375, - "learning_rate": 2.1600194096775233e-05, - "loss": 65.3882, - "step": 147270 - }, - { - "epoch": 0.595029836334474, - "grad_norm": 451.9998474121094, - "learning_rate": 2.1596735899979396e-05, - "loss": 40.6909, - "step": 147280 - }, - { - "epoch": 0.5950702375998417, - "grad_norm": 1065.4068603515625, - "learning_rate": 2.159327776953195e-05, - "loss": 66.7301, - "step": 147290 - }, - { - "epoch": 0.5951106388652092, - "grad_norm": 672.516845703125, - "learning_rate": 2.158981970550029e-05, - "loss": 50.5191, - "step": 147300 - }, - { - "epoch": 0.5951510401305768, - "grad_norm": 2511.90087890625, - "learning_rate": 2.1586361707951866e-05, - "loss": 59.1653, - "step": 147310 - }, - { - "epoch": 0.5951914413959445, - "grad_norm": 489.2138366699219, - "learning_rate": 2.158290377695407e-05, - "loss": 50.135, - "step": 147320 - }, - { - "epoch": 0.5952318426613121, - "grad_norm": 790.0176391601562, - "learning_rate": 2.1579445912574333e-05, - "loss": 47.7796, - "step": 147330 - }, - { - "epoch": 0.5952722439266798, - "grad_norm": 1207.177734375, - "learning_rate": 2.157598811488006e-05, - "loss": 53.8064, - "step": 147340 - }, - { - "epoch": 0.5953126451920474, - "grad_norm": 1556.337158203125, - "learning_rate": 2.1572530383938645e-05, - "loss": 74.1998, - "step": 147350 - }, - { - "epoch": 0.595353046457415, - "grad_norm": 289.1591796875, - "learning_rate": 2.1569072719817526e-05, - "loss": 47.7599, - "step": 147360 - }, - { - "epoch": 0.5953934477227827, - "grad_norm": 793.8142700195312, - "learning_rate": 2.1565615122584092e-05, - "loss": 50.358, - "step": 147370 - }, - { - "epoch": 0.5954338489881503, - "grad_norm": 424.2123718261719, - "learning_rate": 2.156215759230577e-05, - "loss": 50.9866, - "step": 147380 - }, - { - "epoch": 0.595474250253518, - "grad_norm": 680.01318359375, - "learning_rate": 2.155870012904996e-05, - "loss": 34.8699, - "step": 147390 - }, - { - "epoch": 0.5955146515188856, - "grad_norm": 1084.6480712890625, - "learning_rate": 2.155524273288405e-05, - "loss": 54.3668, - "step": 147400 - }, - { - "epoch": 0.5955550527842532, - "grad_norm": 1713.6904296875, - "learning_rate": 2.155178540387546e-05, - "loss": 55.3956, - "step": 147410 - }, - { - "epoch": 0.5955954540496209, - "grad_norm": 677.2518310546875, - "learning_rate": 2.1548328142091598e-05, - "loss": 48.0534, - "step": 147420 - }, - { - "epoch": 0.5956358553149884, - "grad_norm": 976.79443359375, - "learning_rate": 2.154487094759984e-05, - "loss": 50.823, - "step": 147430 - }, - { - "epoch": 0.5956762565803561, - "grad_norm": 306.4274597167969, - "learning_rate": 2.1541413820467615e-05, - "loss": 70.2195, - "step": 147440 - }, - { - "epoch": 0.5957166578457237, - "grad_norm": 870.5831298828125, - "learning_rate": 2.1537956760762295e-05, - "loss": 44.3455, - "step": 147450 - }, - { - "epoch": 0.5957570591110913, - "grad_norm": 2688.650146484375, - "learning_rate": 2.15344997685513e-05, - "loss": 55.4343, - "step": 147460 - }, - { - "epoch": 0.595797460376459, - "grad_norm": 1122.9935302734375, - "learning_rate": 2.1531042843902018e-05, - "loss": 54.8769, - "step": 147470 - }, - { - "epoch": 0.5958378616418266, - "grad_norm": 810.6016845703125, - "learning_rate": 2.1527585986881837e-05, - "loss": 33.0876, - "step": 147480 - }, - { - "epoch": 0.5958782629071943, - "grad_norm": 579.2329711914062, - "learning_rate": 2.152412919755816e-05, - "loss": 56.4312, - "step": 147490 - }, - { - "epoch": 0.5959186641725619, - "grad_norm": 578.1658325195312, - "learning_rate": 2.1520672475998373e-05, - "loss": 60.0798, - "step": 147500 - }, - { - "epoch": 0.5959590654379295, - "grad_norm": 828.1384887695312, - "learning_rate": 2.1517215822269857e-05, - "loss": 53.2631, - "step": 147510 - }, - { - "epoch": 0.5959994667032972, - "grad_norm": 278.0408935546875, - "learning_rate": 2.1513759236440023e-05, - "loss": 46.6886, - "step": 147520 - }, - { - "epoch": 0.5960398679686648, - "grad_norm": 1062.5751953125, - "learning_rate": 2.1510302718576232e-05, - "loss": 67.722, - "step": 147530 - }, - { - "epoch": 0.5960802692340325, - "grad_norm": 1039.3131103515625, - "learning_rate": 2.1506846268745903e-05, - "loss": 37.7568, - "step": 147540 - }, - { - "epoch": 0.5961206704994001, - "grad_norm": 186.1059112548828, - "learning_rate": 2.1503389887016404e-05, - "loss": 49.6921, - "step": 147550 - }, - { - "epoch": 0.5961610717647676, - "grad_norm": 664.5225219726562, - "learning_rate": 2.149993357345511e-05, - "loss": 50.4448, - "step": 147560 - }, - { - "epoch": 0.5962014730301353, - "grad_norm": 636.2140502929688, - "learning_rate": 2.149647732812942e-05, - "loss": 46.299, - "step": 147570 - }, - { - "epoch": 0.5962418742955029, - "grad_norm": 631.2529907226562, - "learning_rate": 2.1493021151106703e-05, - "loss": 51.5016, - "step": 147580 - }, - { - "epoch": 0.5962822755608705, - "grad_norm": 579.6639404296875, - "learning_rate": 2.1489565042454344e-05, - "loss": 56.0493, - "step": 147590 - }, - { - "epoch": 0.5963226768262382, - "grad_norm": 571.759033203125, - "learning_rate": 2.148610900223973e-05, - "loss": 59.5269, - "step": 147600 - }, - { - "epoch": 0.5963630780916058, - "grad_norm": 580.6973876953125, - "learning_rate": 2.1482653030530217e-05, - "loss": 55.8021, - "step": 147610 - }, - { - "epoch": 0.5964034793569735, - "grad_norm": 921.6025390625, - "learning_rate": 2.1479197127393204e-05, - "loss": 43.2684, - "step": 147620 - }, - { - "epoch": 0.5964438806223411, - "grad_norm": 646.3934326171875, - "learning_rate": 2.1475741292896055e-05, - "loss": 64.2098, - "step": 147630 - }, - { - "epoch": 0.5964842818877087, - "grad_norm": 453.28741455078125, - "learning_rate": 2.1472285527106137e-05, - "loss": 40.0422, - "step": 147640 - }, - { - "epoch": 0.5965246831530764, - "grad_norm": 952.0294799804688, - "learning_rate": 2.1468829830090838e-05, - "loss": 54.1033, - "step": 147650 - }, - { - "epoch": 0.596565084418444, - "grad_norm": 332.8349609375, - "learning_rate": 2.1465374201917518e-05, - "loss": 39.744, - "step": 147660 - }, - { - "epoch": 0.5966054856838117, - "grad_norm": 783.0602416992188, - "learning_rate": 2.146191864265354e-05, - "loss": 43.308, - "step": 147670 - }, - { - "epoch": 0.5966458869491792, - "grad_norm": 1387.0821533203125, - "learning_rate": 2.145846315236629e-05, - "loss": 70.0475, - "step": 147680 - }, - { - "epoch": 0.5966862882145468, - "grad_norm": 699.0653076171875, - "learning_rate": 2.1455007731123112e-05, - "loss": 33.2471, - "step": 147690 - }, - { - "epoch": 0.5967266894799145, - "grad_norm": 684.11572265625, - "learning_rate": 2.1451552378991392e-05, - "loss": 51.8979, - "step": 147700 - }, - { - "epoch": 0.5967670907452821, - "grad_norm": 799.9059448242188, - "learning_rate": 2.144809709603849e-05, - "loss": 90.1518, - "step": 147710 - }, - { - "epoch": 0.5968074920106498, - "grad_norm": 564.4153442382812, - "learning_rate": 2.1444641882331744e-05, - "loss": 33.3515, - "step": 147720 - }, - { - "epoch": 0.5968478932760174, - "grad_norm": 900.5286865234375, - "learning_rate": 2.1441186737938555e-05, - "loss": 50.0462, - "step": 147730 - }, - { - "epoch": 0.596888294541385, - "grad_norm": 479.7302551269531, - "learning_rate": 2.1437731662926258e-05, - "loss": 41.8366, - "step": 147740 - }, - { - "epoch": 0.5969286958067527, - "grad_norm": 1118.731201171875, - "learning_rate": 2.1434276657362213e-05, - "loss": 68.8751, - "step": 147750 - }, - { - "epoch": 0.5969690970721203, - "grad_norm": 297.614013671875, - "learning_rate": 2.1430821721313782e-05, - "loss": 66.7755, - "step": 147760 - }, - { - "epoch": 0.597009498337488, - "grad_norm": 3487.298828125, - "learning_rate": 2.142736685484831e-05, - "loss": 63.8574, - "step": 147770 - }, - { - "epoch": 0.5970498996028556, - "grad_norm": 596.0402221679688, - "learning_rate": 2.1423912058033174e-05, - "loss": 51.5885, - "step": 147780 - }, - { - "epoch": 0.5970903008682232, - "grad_norm": 1751.547119140625, - "learning_rate": 2.142045733093571e-05, - "loss": 62.8217, - "step": 147790 - }, - { - "epoch": 0.5971307021335909, - "grad_norm": 951.2716674804688, - "learning_rate": 2.1417002673623264e-05, - "loss": 44.525, - "step": 147800 - }, - { - "epoch": 0.5971711033989584, - "grad_norm": 464.1297912597656, - "learning_rate": 2.141354808616321e-05, - "loss": 52.8699, - "step": 147810 - }, - { - "epoch": 0.597211504664326, - "grad_norm": 581.2355346679688, - "learning_rate": 2.1410093568622878e-05, - "loss": 50.8966, - "step": 147820 - }, - { - "epoch": 0.5972519059296937, - "grad_norm": 861.3636474609375, - "learning_rate": 2.1406639121069617e-05, - "loss": 56.8085, - "step": 147830 - }, - { - "epoch": 0.5972923071950613, - "grad_norm": 1141.43505859375, - "learning_rate": 2.1403184743570778e-05, - "loss": 49.3954, - "step": 147840 - }, - { - "epoch": 0.597332708460429, - "grad_norm": 708.7974853515625, - "learning_rate": 2.1399730436193697e-05, - "loss": 37.2018, - "step": 147850 - }, - { - "epoch": 0.5973731097257966, - "grad_norm": 584.9114990234375, - "learning_rate": 2.1396276199005734e-05, - "loss": 39.1667, - "step": 147860 - }, - { - "epoch": 0.5974135109911642, - "grad_norm": 890.6394653320312, - "learning_rate": 2.1392822032074224e-05, - "loss": 41.9098, - "step": 147870 - }, - { - "epoch": 0.5974539122565319, - "grad_norm": 1730.406494140625, - "learning_rate": 2.138936793546649e-05, - "loss": 51.9415, - "step": 147880 - }, - { - "epoch": 0.5974943135218995, - "grad_norm": 970.1226806640625, - "learning_rate": 2.13859139092499e-05, - "loss": 54.9884, - "step": 147890 - }, - { - "epoch": 0.5975347147872672, - "grad_norm": 559.8754272460938, - "learning_rate": 2.1382459953491774e-05, - "loss": 48.8598, - "step": 147900 - }, - { - "epoch": 0.5975751160526348, - "grad_norm": 5372.072265625, - "learning_rate": 2.137900606825946e-05, - "loss": 54.8929, - "step": 147910 - }, - { - "epoch": 0.5976155173180024, - "grad_norm": 871.8217163085938, - "learning_rate": 2.137555225362028e-05, - "loss": 46.6193, - "step": 147920 - }, - { - "epoch": 0.5976559185833701, - "grad_norm": 585.6782836914062, - "learning_rate": 2.1372098509641574e-05, - "loss": 47.909, - "step": 147930 - }, - { - "epoch": 0.5976963198487376, - "grad_norm": 884.8446655273438, - "learning_rate": 2.1368644836390684e-05, - "loss": 41.8532, - "step": 147940 - }, - { - "epoch": 0.5977367211141053, - "grad_norm": 589.468017578125, - "learning_rate": 2.136519123393493e-05, - "loss": 57.2476, - "step": 147950 - }, - { - "epoch": 0.5977771223794729, - "grad_norm": 1855.3509521484375, - "learning_rate": 2.1361737702341634e-05, - "loss": 71.2147, - "step": 147960 - }, - { - "epoch": 0.5978175236448405, - "grad_norm": 304.5381774902344, - "learning_rate": 2.1358284241678146e-05, - "loss": 42.8469, - "step": 147970 - }, - { - "epoch": 0.5978579249102082, - "grad_norm": 451.1543273925781, - "learning_rate": 2.135483085201177e-05, - "loss": 64.4844, - "step": 147980 - }, - { - "epoch": 0.5978983261755758, - "grad_norm": 1317.0662841796875, - "learning_rate": 2.1351377533409855e-05, - "loss": 83.4466, - "step": 147990 - }, - { - "epoch": 0.5979387274409435, - "grad_norm": 754.2716064453125, - "learning_rate": 2.1347924285939714e-05, - "loss": 61.419, - "step": 148000 - }, - { - "epoch": 0.5979791287063111, - "grad_norm": 934.2620239257812, - "learning_rate": 2.1344471109668662e-05, - "loss": 56.7021, - "step": 148010 - }, - { - "epoch": 0.5980195299716787, - "grad_norm": 708.87451171875, - "learning_rate": 2.134101800466403e-05, - "loss": 59.7696, - "step": 148020 - }, - { - "epoch": 0.5980599312370464, - "grad_norm": 717.4935302734375, - "learning_rate": 2.1337564970993145e-05, - "loss": 41.5032, - "step": 148030 - }, - { - "epoch": 0.598100332502414, - "grad_norm": 664.8682861328125, - "learning_rate": 2.1334112008723297e-05, - "loss": 44.9038, - "step": 148040 - }, - { - "epoch": 0.5981407337677817, - "grad_norm": 607.526611328125, - "learning_rate": 2.1330659117921837e-05, - "loss": 72.0059, - "step": 148050 - }, - { - "epoch": 0.5981811350331493, - "grad_norm": 848.9324340820312, - "learning_rate": 2.1327206298656056e-05, - "loss": 56.5129, - "step": 148060 - }, - { - "epoch": 0.5982215362985168, - "grad_norm": 790.6737670898438, - "learning_rate": 2.1323753550993288e-05, - "loss": 40.4335, - "step": 148070 - }, - { - "epoch": 0.5982619375638845, - "grad_norm": 196.77239990234375, - "learning_rate": 2.132030087500084e-05, - "loss": 32.2374, - "step": 148080 - }, - { - "epoch": 0.5983023388292521, - "grad_norm": 645.4754638671875, - "learning_rate": 2.1316848270746015e-05, - "loss": 67.4574, - "step": 148090 - }, - { - "epoch": 0.5983427400946197, - "grad_norm": 1457.8206787109375, - "learning_rate": 2.1313395738296134e-05, - "loss": 56.8633, - "step": 148100 - }, - { - "epoch": 0.5983831413599874, - "grad_norm": 773.4266357421875, - "learning_rate": 2.1309943277718497e-05, - "loss": 50.3852, - "step": 148110 - }, - { - "epoch": 0.598423542625355, - "grad_norm": 988.7295532226562, - "learning_rate": 2.130649088908041e-05, - "loss": 49.5678, - "step": 148120 - }, - { - "epoch": 0.5984639438907227, - "grad_norm": 0.0, - "learning_rate": 2.1303038572449192e-05, - "loss": 40.0559, - "step": 148130 - }, - { - "epoch": 0.5985043451560903, - "grad_norm": 1050.348876953125, - "learning_rate": 2.1299586327892133e-05, - "loss": 58.7035, - "step": 148140 - }, - { - "epoch": 0.5985447464214579, - "grad_norm": 1048.6171875, - "learning_rate": 2.129613415547655e-05, - "loss": 62.4022, - "step": 148150 - }, - { - "epoch": 0.5985851476868256, - "grad_norm": 391.7165832519531, - "learning_rate": 2.1292682055269745e-05, - "loss": 64.7473, - "step": 148160 - }, - { - "epoch": 0.5986255489521932, - "grad_norm": 542.6089477539062, - "learning_rate": 2.1289230027339003e-05, - "loss": 72.982, - "step": 148170 - }, - { - "epoch": 0.5986659502175609, - "grad_norm": 1377.5733642578125, - "learning_rate": 2.1285778071751634e-05, - "loss": 50.1366, - "step": 148180 - }, - { - "epoch": 0.5987063514829285, - "grad_norm": 574.2361450195312, - "learning_rate": 2.128232618857494e-05, - "loss": 64.3552, - "step": 148190 - }, - { - "epoch": 0.598746752748296, - "grad_norm": 1206.60888671875, - "learning_rate": 2.1278874377876197e-05, - "loss": 71.0976, - "step": 148200 - }, - { - "epoch": 0.5987871540136637, - "grad_norm": 402.2405090332031, - "learning_rate": 2.1275422639722724e-05, - "loss": 42.0494, - "step": 148210 - }, - { - "epoch": 0.5988275552790313, - "grad_norm": 777.2664184570312, - "learning_rate": 2.1271970974181795e-05, - "loss": 48.6818, - "step": 148220 - }, - { - "epoch": 0.598867956544399, - "grad_norm": 840.8900146484375, - "learning_rate": 2.1268519381320724e-05, - "loss": 63.5646, - "step": 148230 - }, - { - "epoch": 0.5989083578097666, - "grad_norm": 532.3710327148438, - "learning_rate": 2.1265067861206784e-05, - "loss": 55.8238, - "step": 148240 - }, - { - "epoch": 0.5989487590751342, - "grad_norm": 466.35003662109375, - "learning_rate": 2.1261616413907265e-05, - "loss": 58.5717, - "step": 148250 - }, - { - "epoch": 0.5989891603405019, - "grad_norm": 329.7406311035156, - "learning_rate": 2.125816503948946e-05, - "loss": 54.9311, - "step": 148260 - }, - { - "epoch": 0.5990295616058695, - "grad_norm": 347.29327392578125, - "learning_rate": 2.1254713738020658e-05, - "loss": 55.2146, - "step": 148270 - }, - { - "epoch": 0.5990699628712371, - "grad_norm": 513.181640625, - "learning_rate": 2.1251262509568133e-05, - "loss": 45.6338, - "step": 148280 - }, - { - "epoch": 0.5991103641366048, - "grad_norm": 308.05279541015625, - "learning_rate": 2.1247811354199184e-05, - "loss": 51.3038, - "step": 148290 - }, - { - "epoch": 0.5991507654019724, - "grad_norm": 831.5238037109375, - "learning_rate": 2.1244360271981073e-05, - "loss": 60.9007, - "step": 148300 - }, - { - "epoch": 0.5991911666673401, - "grad_norm": 539.5789184570312, - "learning_rate": 2.12409092629811e-05, - "loss": 41.7726, - "step": 148310 - }, - { - "epoch": 0.5992315679327076, - "grad_norm": 257.03363037109375, - "learning_rate": 2.123745832726654e-05, - "loss": 55.673, - "step": 148320 - }, - { - "epoch": 0.5992719691980752, - "grad_norm": 380.1696472167969, - "learning_rate": 2.1234007464904654e-05, - "loss": 42.6603, - "step": 148330 - }, - { - "epoch": 0.5993123704634429, - "grad_norm": 809.4993286132812, - "learning_rate": 2.1230556675962744e-05, - "loss": 30.5962, - "step": 148340 - }, - { - "epoch": 0.5993527717288105, - "grad_norm": 761.6078491210938, - "learning_rate": 2.1227105960508063e-05, - "loss": 53.1363, - "step": 148350 - }, - { - "epoch": 0.5993931729941782, - "grad_norm": 471.2385559082031, - "learning_rate": 2.1223655318607904e-05, - "loss": 54.6683, - "step": 148360 - }, - { - "epoch": 0.5994335742595458, - "grad_norm": 379.0174865722656, - "learning_rate": 2.1220204750329525e-05, - "loss": 41.1171, - "step": 148370 - }, - { - "epoch": 0.5994739755249134, - "grad_norm": 635.4507446289062, - "learning_rate": 2.1216754255740193e-05, - "loss": 43.5764, - "step": 148380 - }, - { - "epoch": 0.5995143767902811, - "grad_norm": 243.54612731933594, - "learning_rate": 2.121330383490719e-05, - "loss": 54.9277, - "step": 148390 - }, - { - "epoch": 0.5995547780556487, - "grad_norm": 855.86181640625, - "learning_rate": 2.1209853487897784e-05, - "loss": 58.4136, - "step": 148400 - }, - { - "epoch": 0.5995951793210164, - "grad_norm": 666.0252685546875, - "learning_rate": 2.1206403214779223e-05, - "loss": 63.9464, - "step": 148410 - }, - { - "epoch": 0.599635580586384, - "grad_norm": 909.7586059570312, - "learning_rate": 2.1202953015618794e-05, - "loss": 57.6522, - "step": 148420 - }, - { - "epoch": 0.5996759818517516, - "grad_norm": 1767.159912109375, - "learning_rate": 2.1199502890483747e-05, - "loss": 58.3725, - "step": 148430 - }, - { - "epoch": 0.5997163831171193, - "grad_norm": 640.2388916015625, - "learning_rate": 2.119605283944135e-05, - "loss": 40.1175, - "step": 148440 - }, - { - "epoch": 0.5997567843824868, - "grad_norm": 512.8927001953125, - "learning_rate": 2.1192602862558864e-05, - "loss": 59.5971, - "step": 148450 - }, - { - "epoch": 0.5997971856478544, - "grad_norm": 613.088134765625, - "learning_rate": 2.1189152959903536e-05, - "loss": 59.3687, - "step": 148460 - }, - { - "epoch": 0.5998375869132221, - "grad_norm": 489.1797180175781, - "learning_rate": 2.118570313154264e-05, - "loss": 53.7565, - "step": 148470 - }, - { - "epoch": 0.5998779881785897, - "grad_norm": 593.52734375, - "learning_rate": 2.1182253377543425e-05, - "loss": 53.75, - "step": 148480 - }, - { - "epoch": 0.5999183894439574, - "grad_norm": 644.7229614257812, - "learning_rate": 2.117880369797314e-05, - "loss": 67.9577, - "step": 148490 - }, - { - "epoch": 0.599958790709325, - "grad_norm": 755.2145385742188, - "learning_rate": 2.117535409289905e-05, - "loss": 43.5806, - "step": 148500 - }, - { - "epoch": 0.5999991919746926, - "grad_norm": 499.0184326171875, - "learning_rate": 2.11719045623884e-05, - "loss": 47.2123, - "step": 148510 - }, - { - "epoch": 0.6000395932400603, - "grad_norm": 620.1091918945312, - "learning_rate": 2.1168455106508446e-05, - "loss": 105.0849, - "step": 148520 - }, - { - "epoch": 0.6000799945054279, - "grad_norm": 655.6963500976562, - "learning_rate": 2.1165005725326427e-05, - "loss": 65.4218, - "step": 148530 - }, - { - "epoch": 0.6001203957707956, - "grad_norm": 615.6967163085938, - "learning_rate": 2.116155641890959e-05, - "loss": 38.5551, - "step": 148540 - }, - { - "epoch": 0.6001607970361632, - "grad_norm": 774.8275756835938, - "learning_rate": 2.1158107187325198e-05, - "loss": 57.5716, - "step": 148550 - }, - { - "epoch": 0.6002011983015308, - "grad_norm": 1074.4395751953125, - "learning_rate": 2.1154658030640483e-05, - "loss": 75.7318, - "step": 148560 - }, - { - "epoch": 0.6002415995668985, - "grad_norm": 223.1709747314453, - "learning_rate": 2.1151208948922676e-05, - "loss": 35.348, - "step": 148570 - }, - { - "epoch": 0.600282000832266, - "grad_norm": 767.1890869140625, - "learning_rate": 2.1147759942239046e-05, - "loss": 54.7006, - "step": 148580 - }, - { - "epoch": 0.6003224020976337, - "grad_norm": 979.3482666015625, - "learning_rate": 2.1144311010656813e-05, - "loss": 57.0623, - "step": 148590 - }, - { - "epoch": 0.6003628033630013, - "grad_norm": 669.6612548828125, - "learning_rate": 2.114086215424322e-05, - "loss": 51.9669, - "step": 148600 - }, - { - "epoch": 0.6004032046283689, - "grad_norm": 1702.1258544921875, - "learning_rate": 2.1137413373065515e-05, - "loss": 69.3868, - "step": 148610 - }, - { - "epoch": 0.6004436058937366, - "grad_norm": 1155.5137939453125, - "learning_rate": 2.1133964667190918e-05, - "loss": 63.2453, - "step": 148620 - }, - { - "epoch": 0.6004840071591042, - "grad_norm": 1104.260986328125, - "learning_rate": 2.1130516036686675e-05, - "loss": 43.4092, - "step": 148630 - }, - { - "epoch": 0.6005244084244719, - "grad_norm": 491.9853820800781, - "learning_rate": 2.1127067481620013e-05, - "loss": 44.5126, - "step": 148640 - }, - { - "epoch": 0.6005648096898395, - "grad_norm": 737.5925903320312, - "learning_rate": 2.1123619002058155e-05, - "loss": 51.0688, - "step": 148650 - }, - { - "epoch": 0.6006052109552071, - "grad_norm": 823.9745483398438, - "learning_rate": 2.112017059806835e-05, - "loss": 35.5344, - "step": 148660 - }, - { - "epoch": 0.6006456122205748, - "grad_norm": 581.1257934570312, - "learning_rate": 2.111672226971781e-05, - "loss": 58.1983, - "step": 148670 - }, - { - "epoch": 0.6006860134859424, - "grad_norm": 1033.891845703125, - "learning_rate": 2.1113274017073774e-05, - "loss": 51.1567, - "step": 148680 - }, - { - "epoch": 0.6007264147513101, - "grad_norm": 786.7816772460938, - "learning_rate": 2.1109825840203464e-05, - "loss": 69.1057, - "step": 148690 - }, - { - "epoch": 0.6007668160166777, - "grad_norm": 376.516357421875, - "learning_rate": 2.1106377739174098e-05, - "loss": 79.144, - "step": 148700 - }, - { - "epoch": 0.6008072172820452, - "grad_norm": 815.7255859375, - "learning_rate": 2.1102929714052908e-05, - "loss": 45.9788, - "step": 148710 - }, - { - "epoch": 0.6008476185474129, - "grad_norm": 1409.89794921875, - "learning_rate": 2.1099481764907108e-05, - "loss": 83.6714, - "step": 148720 - }, - { - "epoch": 0.6008880198127805, - "grad_norm": 766.6243286132812, - "learning_rate": 2.109603389180391e-05, - "loss": 60.9154, - "step": 148730 - }, - { - "epoch": 0.6009284210781481, - "grad_norm": 440.70526123046875, - "learning_rate": 2.1092586094810553e-05, - "loss": 46.9509, - "step": 148740 - }, - { - "epoch": 0.6009688223435158, - "grad_norm": 544.2029418945312, - "learning_rate": 2.1089138373994223e-05, - "loss": 28.7837, - "step": 148750 - }, - { - "epoch": 0.6010092236088834, - "grad_norm": 557.417236328125, - "learning_rate": 2.108569072942217e-05, - "loss": 50.8612, - "step": 148760 - }, - { - "epoch": 0.6010496248742511, - "grad_norm": 1011.0460205078125, - "learning_rate": 2.1082243161161595e-05, - "loss": 71.0695, - "step": 148770 - }, - { - "epoch": 0.6010900261396187, - "grad_norm": 582.4788208007812, - "learning_rate": 2.10787956692797e-05, - "loss": 48.1233, - "step": 148780 - }, - { - "epoch": 0.6011304274049863, - "grad_norm": 3780.80419921875, - "learning_rate": 2.10753482538437e-05, - "loss": 88.0165, - "step": 148790 - }, - { - "epoch": 0.601170828670354, - "grad_norm": 610.1556396484375, - "learning_rate": 2.1071900914920816e-05, - "loss": 40.7018, - "step": 148800 - }, - { - "epoch": 0.6012112299357216, - "grad_norm": 762.4368896484375, - "learning_rate": 2.106845365257823e-05, - "loss": 54.8154, - "step": 148810 - }, - { - "epoch": 0.6012516312010893, - "grad_norm": 200.17686462402344, - "learning_rate": 2.1065006466883177e-05, - "loss": 43.5993, - "step": 148820 - }, - { - "epoch": 0.6012920324664568, - "grad_norm": 2111.885498046875, - "learning_rate": 2.1061559357902838e-05, - "loss": 42.0404, - "step": 148830 - }, - { - "epoch": 0.6013324337318244, - "grad_norm": 846.29833984375, - "learning_rate": 2.1058112325704436e-05, - "loss": 53.8223, - "step": 148840 - }, - { - "epoch": 0.6013728349971921, - "grad_norm": 426.0714416503906, - "learning_rate": 2.1054665370355166e-05, - "loss": 43.2251, - "step": 148850 - }, - { - "epoch": 0.6014132362625597, - "grad_norm": 958.9960327148438, - "learning_rate": 2.1051218491922216e-05, - "loss": 84.0954, - "step": 148860 - }, - { - "epoch": 0.6014536375279274, - "grad_norm": 308.5674743652344, - "learning_rate": 2.1047771690472804e-05, - "loss": 40.4277, - "step": 148870 - }, - { - "epoch": 0.601494038793295, - "grad_norm": 955.7950439453125, - "learning_rate": 2.1044324966074104e-05, - "loss": 66.7933, - "step": 148880 - }, - { - "epoch": 0.6015344400586626, - "grad_norm": 2592.542724609375, - "learning_rate": 2.104087831879334e-05, - "loss": 59.7257, - "step": 148890 - }, - { - "epoch": 0.6015748413240303, - "grad_norm": 468.540771484375, - "learning_rate": 2.1037431748697688e-05, - "loss": 57.7334, - "step": 148900 - }, - { - "epoch": 0.6016152425893979, - "grad_norm": 1156.9520263671875, - "learning_rate": 2.1033985255854336e-05, - "loss": 95.6471, - "step": 148910 - }, - { - "epoch": 0.6016556438547656, - "grad_norm": 992.9965209960938, - "learning_rate": 2.103053884033049e-05, - "loss": 54.1949, - "step": 148920 - }, - { - "epoch": 0.6016960451201332, - "grad_norm": 868.084228515625, - "learning_rate": 2.1027092502193334e-05, - "loss": 46.5351, - "step": 148930 - }, - { - "epoch": 0.6017364463855008, - "grad_norm": 575.07666015625, - "learning_rate": 2.1023646241510048e-05, - "loss": 39.6612, - "step": 148940 - }, - { - "epoch": 0.6017768476508685, - "grad_norm": 750.913330078125, - "learning_rate": 2.1020200058347833e-05, - "loss": 68.0529, - "step": 148950 - }, - { - "epoch": 0.601817248916236, - "grad_norm": 508.6002502441406, - "learning_rate": 2.1016753952773867e-05, - "loss": 33.4746, - "step": 148960 - }, - { - "epoch": 0.6018576501816036, - "grad_norm": 769.2796020507812, - "learning_rate": 2.1013307924855333e-05, - "loss": 50.7453, - "step": 148970 - }, - { - "epoch": 0.6018980514469713, - "grad_norm": 458.1070861816406, - "learning_rate": 2.1009861974659413e-05, - "loss": 51.415, - "step": 148980 - }, - { - "epoch": 0.6019384527123389, - "grad_norm": 310.19415283203125, - "learning_rate": 2.100641610225328e-05, - "loss": 74.4553, - "step": 148990 - }, - { - "epoch": 0.6019788539777066, - "grad_norm": 488.74908447265625, - "learning_rate": 2.1002970307704132e-05, - "loss": 50.668, - "step": 149000 - }, - { - "epoch": 0.6020192552430742, - "grad_norm": 1650.5919189453125, - "learning_rate": 2.0999524591079133e-05, - "loss": 64.8103, - "step": 149010 - }, - { - "epoch": 0.6020596565084418, - "grad_norm": 556.3172607421875, - "learning_rate": 2.0996078952445452e-05, - "loss": 75.6496, - "step": 149020 - }, - { - "epoch": 0.6021000577738095, - "grad_norm": 964.8404541015625, - "learning_rate": 2.0992633391870286e-05, - "loss": 53.5392, - "step": 149030 - }, - { - "epoch": 0.6021404590391771, - "grad_norm": 740.5578002929688, - "learning_rate": 2.0989187909420786e-05, - "loss": 55.9263, - "step": 149040 - }, - { - "epoch": 0.6021808603045448, - "grad_norm": 771.8296508789062, - "learning_rate": 2.0985742505164144e-05, - "loss": 65.7144, - "step": 149050 - }, - { - "epoch": 0.6022212615699124, - "grad_norm": 998.5512084960938, - "learning_rate": 2.0982297179167515e-05, - "loss": 44.6573, - "step": 149060 - }, - { - "epoch": 0.60226166283528, - "grad_norm": 330.9595947265625, - "learning_rate": 2.097885193149806e-05, - "loss": 37.0711, - "step": 149070 - }, - { - "epoch": 0.6023020641006477, - "grad_norm": 484.8772888183594, - "learning_rate": 2.0975406762222966e-05, - "loss": 63.3068, - "step": 149080 - }, - { - "epoch": 0.6023424653660152, - "grad_norm": 790.2227172851562, - "learning_rate": 2.0971961671409392e-05, - "loss": 34.2787, - "step": 149090 - }, - { - "epoch": 0.6023828666313829, - "grad_norm": 723.9091186523438, - "learning_rate": 2.0968516659124486e-05, - "loss": 26.5191, - "step": 149100 - }, - { - "epoch": 0.6024232678967505, - "grad_norm": 1749.529052734375, - "learning_rate": 2.0965071725435436e-05, - "loss": 41.8332, - "step": 149110 - }, - { - "epoch": 0.6024636691621181, - "grad_norm": 528.8098754882812, - "learning_rate": 2.0961626870409383e-05, - "loss": 39.1988, - "step": 149120 - }, - { - "epoch": 0.6025040704274858, - "grad_norm": 985.6099243164062, - "learning_rate": 2.0958182094113498e-05, - "loss": 51.4806, - "step": 149130 - }, - { - "epoch": 0.6025444716928534, - "grad_norm": 532.384033203125, - "learning_rate": 2.0954737396614937e-05, - "loss": 42.4193, - "step": 149140 - }, - { - "epoch": 0.602584872958221, - "grad_norm": 533.1931762695312, - "learning_rate": 2.095129277798084e-05, - "loss": 55.3392, - "step": 149150 - }, - { - "epoch": 0.6026252742235887, - "grad_norm": 636.3084106445312, - "learning_rate": 2.0947848238278385e-05, - "loss": 51.3045, - "step": 149160 - }, - { - "epoch": 0.6026656754889563, - "grad_norm": 726.8486938476562, - "learning_rate": 2.0944403777574718e-05, - "loss": 46.1654, - "step": 149170 - }, - { - "epoch": 0.602706076754324, - "grad_norm": 1090.1893310546875, - "learning_rate": 2.0940959395936975e-05, - "loss": 54.1789, - "step": 149180 - }, - { - "epoch": 0.6027464780196916, - "grad_norm": 727.7335205078125, - "learning_rate": 2.0937515093432327e-05, - "loss": 40.0967, - "step": 149190 - }, - { - "epoch": 0.6027868792850593, - "grad_norm": 450.4414367675781, - "learning_rate": 2.0934070870127912e-05, - "loss": 65.3961, - "step": 149200 - }, - { - "epoch": 0.6028272805504269, - "grad_norm": 1227.625732421875, - "learning_rate": 2.0930626726090883e-05, - "loss": 49.5206, - "step": 149210 - }, - { - "epoch": 0.6028676818157944, - "grad_norm": 586.02001953125, - "learning_rate": 2.0927182661388373e-05, - "loss": 51.2468, - "step": 149220 - }, - { - "epoch": 0.6029080830811621, - "grad_norm": 452.15679931640625, - "learning_rate": 2.0923738676087534e-05, - "loss": 60.5171, - "step": 149230 - }, - { - "epoch": 0.6029484843465297, - "grad_norm": 593.4280395507812, - "learning_rate": 2.0920294770255517e-05, - "loss": 54.6655, - "step": 149240 - }, - { - "epoch": 0.6029888856118973, - "grad_norm": 536.1670532226562, - "learning_rate": 2.0916850943959452e-05, - "loss": 54.9936, - "step": 149250 - }, - { - "epoch": 0.603029286877265, - "grad_norm": 498.478515625, - "learning_rate": 2.091340719726647e-05, - "loss": 44.6644, - "step": 149260 - }, - { - "epoch": 0.6030696881426326, - "grad_norm": 610.2139892578125, - "learning_rate": 2.0909963530243726e-05, - "loss": 47.0653, - "step": 149270 - }, - { - "epoch": 0.6031100894080003, - "grad_norm": 909.2549438476562, - "learning_rate": 2.0906519942958347e-05, - "loss": 37.5893, - "step": 149280 - }, - { - "epoch": 0.6031504906733679, - "grad_norm": 1333.1292724609375, - "learning_rate": 2.0903076435477467e-05, - "loss": 59.148, - "step": 149290 - }, - { - "epoch": 0.6031908919387355, - "grad_norm": 560.1817016601562, - "learning_rate": 2.0899633007868227e-05, - "loss": 72.2637, - "step": 149300 - }, - { - "epoch": 0.6032312932041032, - "grad_norm": 931.7703857421875, - "learning_rate": 2.089618966019775e-05, - "loss": 46.3886, - "step": 149310 - }, - { - "epoch": 0.6032716944694708, - "grad_norm": 628.0271606445312, - "learning_rate": 2.089274639253317e-05, - "loss": 56.8358, - "step": 149320 - }, - { - "epoch": 0.6033120957348385, - "grad_norm": 679.5213623046875, - "learning_rate": 2.0889303204941615e-05, - "loss": 50.709, - "step": 149330 - }, - { - "epoch": 0.6033524970002061, - "grad_norm": 473.5839538574219, - "learning_rate": 2.0885860097490202e-05, - "loss": 61.7202, - "step": 149340 - }, - { - "epoch": 0.6033928982655736, - "grad_norm": 786.1121215820312, - "learning_rate": 2.088241707024607e-05, - "loss": 36.438, - "step": 149350 - }, - { - "epoch": 0.6034332995309413, - "grad_norm": 739.2479858398438, - "learning_rate": 2.0878974123276328e-05, - "loss": 40.9667, - "step": 149360 - }, - { - "epoch": 0.6034737007963089, - "grad_norm": 310.1951599121094, - "learning_rate": 2.0875531256648122e-05, - "loss": 57.812, - "step": 149370 - }, - { - "epoch": 0.6035141020616765, - "grad_norm": 1145.41748046875, - "learning_rate": 2.0872088470428553e-05, - "loss": 59.6196, - "step": 149380 - }, - { - "epoch": 0.6035545033270442, - "grad_norm": 484.976806640625, - "learning_rate": 2.086864576468474e-05, - "loss": 55.1442, - "step": 149390 - }, - { - "epoch": 0.6035949045924118, - "grad_norm": 0.0, - "learning_rate": 2.0865203139483812e-05, - "loss": 58.5609, - "step": 149400 - }, - { - "epoch": 0.6036353058577795, - "grad_norm": 340.4298400878906, - "learning_rate": 2.0861760594892867e-05, - "loss": 71.7529, - "step": 149410 - }, - { - "epoch": 0.6036757071231471, - "grad_norm": 583.3339233398438, - "learning_rate": 2.0858318130979038e-05, - "loss": 46.8282, - "step": 149420 - }, - { - "epoch": 0.6037161083885147, - "grad_norm": 696.9962768554688, - "learning_rate": 2.0854875747809428e-05, - "loss": 36.0242, - "step": 149430 - }, - { - "epoch": 0.6037565096538824, - "grad_norm": 530.3546752929688, - "learning_rate": 2.085143344545114e-05, - "loss": 70.6566, - "step": 149440 - }, - { - "epoch": 0.60379691091925, - "grad_norm": 764.704345703125, - "learning_rate": 2.0847991223971306e-05, - "loss": 51.4275, - "step": 149450 - }, - { - "epoch": 0.6038373121846177, - "grad_norm": 489.4782409667969, - "learning_rate": 2.0844549083437022e-05, - "loss": 69.5268, - "step": 149460 - }, - { - "epoch": 0.6038777134499852, - "grad_norm": 779.9122924804688, - "learning_rate": 2.084110702391538e-05, - "loss": 46.3204, - "step": 149470 - }, - { - "epoch": 0.6039181147153528, - "grad_norm": 636.275146484375, - "learning_rate": 2.083766504547351e-05, - "loss": 46.5596, - "step": 149480 - }, - { - "epoch": 0.6039585159807205, - "grad_norm": 759.5249633789062, - "learning_rate": 2.0834223148178488e-05, - "loss": 46.3107, - "step": 149490 - }, - { - "epoch": 0.6039989172460881, - "grad_norm": 575.3982543945312, - "learning_rate": 2.0830781332097446e-05, - "loss": 81.1673, - "step": 149500 - }, - { - "epoch": 0.6040393185114558, - "grad_norm": 952.708740234375, - "learning_rate": 2.0827339597297466e-05, - "loss": 39.2819, - "step": 149510 - }, - { - "epoch": 0.6040797197768234, - "grad_norm": 628.7777099609375, - "learning_rate": 2.082389794384564e-05, - "loss": 51.4413, - "step": 149520 - }, - { - "epoch": 0.604120121042191, - "grad_norm": 675.4315795898438, - "learning_rate": 2.0820456371809078e-05, - "loss": 49.7467, - "step": 149530 - }, - { - "epoch": 0.6041605223075587, - "grad_norm": 915.1492919921875, - "learning_rate": 2.0817014881254876e-05, - "loss": 53.7017, - "step": 149540 - }, - { - "epoch": 0.6042009235729263, - "grad_norm": 1439.67431640625, - "learning_rate": 2.0813573472250114e-05, - "loss": 49.7932, - "step": 149550 - }, - { - "epoch": 0.604241324838294, - "grad_norm": 819.9117431640625, - "learning_rate": 2.08101321448619e-05, - "loss": 54.5777, - "step": 149560 - }, - { - "epoch": 0.6042817261036616, - "grad_norm": 747.6610107421875, - "learning_rate": 2.0806690899157306e-05, - "loss": 79.415, - "step": 149570 - }, - { - "epoch": 0.6043221273690292, - "grad_norm": 2668.103759765625, - "learning_rate": 2.080324973520344e-05, - "loss": 90.0351, - "step": 149580 - }, - { - "epoch": 0.6043625286343969, - "grad_norm": 1174.483154296875, - "learning_rate": 2.079980865306739e-05, - "loss": 69.4175, - "step": 149590 - }, - { - "epoch": 0.6044029298997644, - "grad_norm": 467.97698974609375, - "learning_rate": 2.0796367652816213e-05, - "loss": 58.8468, - "step": 149600 - }, - { - "epoch": 0.604443331165132, - "grad_norm": 751.82177734375, - "learning_rate": 2.0792926734517024e-05, - "loss": 38.2991, - "step": 149610 - }, - { - "epoch": 0.6044837324304997, - "grad_norm": 512.2536010742188, - "learning_rate": 2.0789485898236896e-05, - "loss": 43.8234, - "step": 149620 - }, - { - "epoch": 0.6045241336958673, - "grad_norm": 1900.0654296875, - "learning_rate": 2.07860451440429e-05, - "loss": 62.5554, - "step": 149630 - }, - { - "epoch": 0.604564534961235, - "grad_norm": 802.1771850585938, - "learning_rate": 2.0782604472002128e-05, - "loss": 68.0368, - "step": 149640 - }, - { - "epoch": 0.6046049362266026, - "grad_norm": 1752.5911865234375, - "learning_rate": 2.0779163882181655e-05, - "loss": 66.3808, - "step": 149650 - }, - { - "epoch": 0.6046453374919702, - "grad_norm": 680.7577514648438, - "learning_rate": 2.0775723374648552e-05, - "loss": 48.9644, - "step": 149660 - }, - { - "epoch": 0.6046857387573379, - "grad_norm": 433.6819152832031, - "learning_rate": 2.0772282949469905e-05, - "loss": 41.0124, - "step": 149670 - }, - { - "epoch": 0.6047261400227055, - "grad_norm": 327.3218078613281, - "learning_rate": 2.076884260671276e-05, - "loss": 36.1269, - "step": 149680 - }, - { - "epoch": 0.6047665412880732, - "grad_norm": 310.0195617675781, - "learning_rate": 2.0765402346444226e-05, - "loss": 53.7047, - "step": 149690 - }, - { - "epoch": 0.6048069425534408, - "grad_norm": 1084.9659423828125, - "learning_rate": 2.076196216873135e-05, - "loss": 68.674, - "step": 149700 - }, - { - "epoch": 0.6048473438188084, - "grad_norm": 1128.5260009765625, - "learning_rate": 2.075852207364119e-05, - "loss": 73.8726, - "step": 149710 - }, - { - "epoch": 0.6048877450841761, - "grad_norm": 1129.6939697265625, - "learning_rate": 2.075508206124084e-05, - "loss": 45.7701, - "step": 149720 - }, - { - "epoch": 0.6049281463495436, - "grad_norm": 755.3470458984375, - "learning_rate": 2.0751642131597344e-05, - "loss": 40.1757, - "step": 149730 - }, - { - "epoch": 0.6049685476149113, - "grad_norm": 577.21435546875, - "learning_rate": 2.0748202284777777e-05, - "loss": 79.7054, - "step": 149740 - }, - { - "epoch": 0.6050089488802789, - "grad_norm": 755.05126953125, - "learning_rate": 2.0744762520849193e-05, - "loss": 68.8899, - "step": 149750 - }, - { - "epoch": 0.6050493501456465, - "grad_norm": 641.5477905273438, - "learning_rate": 2.0741322839878647e-05, - "loss": 64.6525, - "step": 149760 - }, - { - "epoch": 0.6050897514110142, - "grad_norm": 826.8701782226562, - "learning_rate": 2.0737883241933213e-05, - "loss": 63.9232, - "step": 149770 - }, - { - "epoch": 0.6051301526763818, - "grad_norm": 84.71437072753906, - "learning_rate": 2.0734443727079943e-05, - "loss": 50.706, - "step": 149780 - }, - { - "epoch": 0.6051705539417495, - "grad_norm": 494.0994567871094, - "learning_rate": 2.0731004295385874e-05, - "loss": 55.4391, - "step": 149790 - }, - { - "epoch": 0.6052109552071171, - "grad_norm": 783.1380004882812, - "learning_rate": 2.0727564946918087e-05, - "loss": 36.2091, - "step": 149800 - }, - { - "epoch": 0.6052513564724847, - "grad_norm": 1930.76416015625, - "learning_rate": 2.0724125681743618e-05, - "loss": 49.5212, - "step": 149810 - }, - { - "epoch": 0.6052917577378524, - "grad_norm": 949.4205322265625, - "learning_rate": 2.072068649992952e-05, - "loss": 50.3558, - "step": 149820 - }, - { - "epoch": 0.60533215900322, - "grad_norm": 826.8944091796875, - "learning_rate": 2.0717247401542844e-05, - "loss": 53.6828, - "step": 149830 - }, - { - "epoch": 0.6053725602685877, - "grad_norm": 732.9508666992188, - "learning_rate": 2.0713808386650625e-05, - "loss": 45.9292, - "step": 149840 - }, - { - "epoch": 0.6054129615339553, - "grad_norm": 828.0859985351562, - "learning_rate": 2.0710369455319928e-05, - "loss": 56.6847, - "step": 149850 - }, - { - "epoch": 0.6054533627993228, - "grad_norm": 1403.733642578125, - "learning_rate": 2.070693060761779e-05, - "loss": 57.0237, - "step": 149860 - }, - { - "epoch": 0.6054937640646905, - "grad_norm": 722.1435546875, - "learning_rate": 2.0703491843611234e-05, - "loss": 62.3158, - "step": 149870 - }, - { - "epoch": 0.6055341653300581, - "grad_norm": 853.1331787109375, - "learning_rate": 2.070005316336733e-05, - "loss": 61.9291, - "step": 149880 - }, - { - "epoch": 0.6055745665954257, - "grad_norm": 985.730712890625, - "learning_rate": 2.06966145669531e-05, - "loss": 62.4463, - "step": 149890 - }, - { - "epoch": 0.6056149678607934, - "grad_norm": 261.92486572265625, - "learning_rate": 2.0693176054435587e-05, - "loss": 58.0517, - "step": 149900 - }, - { - "epoch": 0.605655369126161, - "grad_norm": 1041.611083984375, - "learning_rate": 2.068973762588182e-05, - "loss": 61.4181, - "step": 149910 - }, - { - "epoch": 0.6056957703915287, - "grad_norm": 319.9795227050781, - "learning_rate": 2.0686299281358835e-05, - "loss": 57.8208, - "step": 149920 - }, - { - "epoch": 0.6057361716568963, - "grad_norm": 975.4109497070312, - "learning_rate": 2.0682861020933675e-05, - "loss": 69.4663, - "step": 149930 - }, - { - "epoch": 0.605776572922264, - "grad_norm": 1784.4403076171875, - "learning_rate": 2.0679422844673348e-05, - "loss": 50.2619, - "step": 149940 - }, - { - "epoch": 0.6058169741876316, - "grad_norm": 855.6481323242188, - "learning_rate": 2.067598475264491e-05, - "loss": 33.051, - "step": 149950 - }, - { - "epoch": 0.6058573754529992, - "grad_norm": 934.8955078125, - "learning_rate": 2.067254674491538e-05, - "loss": 69.8998, - "step": 149960 - }, - { - "epoch": 0.6058977767183669, - "grad_norm": 446.6639709472656, - "learning_rate": 2.0669108821551757e-05, - "loss": 57.6521, - "step": 149970 - }, - { - "epoch": 0.6059381779837345, - "grad_norm": 1895.581298828125, - "learning_rate": 2.0665670982621105e-05, - "loss": 64.6645, - "step": 149980 - }, - { - "epoch": 0.605978579249102, - "grad_norm": 645.7675170898438, - "learning_rate": 2.066223322819043e-05, - "loss": 60.931, - "step": 149990 - }, - { - "epoch": 0.6060189805144697, - "grad_norm": 384.6867980957031, - "learning_rate": 2.0658795558326743e-05, - "loss": 61.6462, - "step": 150000 - }, - { - "epoch": 0.6060593817798373, - "grad_norm": 0.0, - "learning_rate": 2.065535797309708e-05, - "loss": 58.9445, - "step": 150010 - }, - { - "epoch": 0.606099783045205, - "grad_norm": 629.1178588867188, - "learning_rate": 2.0651920472568435e-05, - "loss": 32.2629, - "step": 150020 - }, - { - "epoch": 0.6061401843105726, - "grad_norm": 704.248291015625, - "learning_rate": 2.064848305680785e-05, - "loss": 60.3656, - "step": 150030 - }, - { - "epoch": 0.6061805855759402, - "grad_norm": 662.4960327148438, - "learning_rate": 2.0645045725882332e-05, - "loss": 74.1694, - "step": 150040 - }, - { - "epoch": 0.6062209868413079, - "grad_norm": 374.74383544921875, - "learning_rate": 2.0641608479858877e-05, - "loss": 42.1155, - "step": 150050 - }, - { - "epoch": 0.6062613881066755, - "grad_norm": 195.5003204345703, - "learning_rate": 2.0638171318804518e-05, - "loss": 60.8555, - "step": 150060 - }, - { - "epoch": 0.6063017893720432, - "grad_norm": 248.31491088867188, - "learning_rate": 2.0634734242786257e-05, - "loss": 46.3665, - "step": 150070 - }, - { - "epoch": 0.6063421906374108, - "grad_norm": 615.802734375, - "learning_rate": 2.0631297251871093e-05, - "loss": 42.9527, - "step": 150080 - }, - { - "epoch": 0.6063825919027784, - "grad_norm": 783.1875610351562, - "learning_rate": 2.0627860346126045e-05, - "loss": 42.422, - "step": 150090 - }, - { - "epoch": 0.6064229931681461, - "grad_norm": 728.5169067382812, - "learning_rate": 2.0624423525618098e-05, - "loss": 56.8203, - "step": 150100 - }, - { - "epoch": 0.6064633944335136, - "grad_norm": 745.2196655273438, - "learning_rate": 2.0620986790414276e-05, - "loss": 50.3501, - "step": 150110 - }, - { - "epoch": 0.6065037956988812, - "grad_norm": 662.663330078125, - "learning_rate": 2.0617550140581578e-05, - "loss": 54.6142, - "step": 150120 - }, - { - "epoch": 0.6065441969642489, - "grad_norm": 1055.7906494140625, - "learning_rate": 2.0614113576186978e-05, - "loss": 44.9255, - "step": 150130 - }, - { - "epoch": 0.6065845982296165, - "grad_norm": 2168.784912109375, - "learning_rate": 2.0610677097297505e-05, - "loss": 57.7951, - "step": 150140 - }, - { - "epoch": 0.6066249994949842, - "grad_norm": 1844.6190185546875, - "learning_rate": 2.0607240703980142e-05, - "loss": 57.9656, - "step": 150150 - }, - { - "epoch": 0.6066654007603518, - "grad_norm": 577.7366333007812, - "learning_rate": 2.0603804396301876e-05, - "loss": 68.4628, - "step": 150160 - }, - { - "epoch": 0.6067058020257194, - "grad_norm": 529.1781005859375, - "learning_rate": 2.0600368174329714e-05, - "loss": 44.7865, - "step": 150170 - }, - { - "epoch": 0.6067462032910871, - "grad_norm": 867.0560913085938, - "learning_rate": 2.0596932038130628e-05, - "loss": 70.4242, - "step": 150180 - }, - { - "epoch": 0.6067866045564547, - "grad_norm": 436.40948486328125, - "learning_rate": 2.0593495987771634e-05, - "loss": 41.8435, - "step": 150190 - }, - { - "epoch": 0.6068270058218224, - "grad_norm": 644.1611328125, - "learning_rate": 2.0590060023319696e-05, - "loss": 34.4137, - "step": 150200 - }, - { - "epoch": 0.60686740708719, - "grad_norm": 1544.953857421875, - "learning_rate": 2.0586624144841803e-05, - "loss": 57.2875, - "step": 150210 - }, - { - "epoch": 0.6069078083525576, - "grad_norm": 762.7637939453125, - "learning_rate": 2.0583188352404954e-05, - "loss": 77.1049, - "step": 150220 - }, - { - "epoch": 0.6069482096179253, - "grad_norm": 1265.564453125, - "learning_rate": 2.0579752646076123e-05, - "loss": 47.9961, - "step": 150230 - }, - { - "epoch": 0.6069886108832928, - "grad_norm": 1833.8243408203125, - "learning_rate": 2.0576317025922283e-05, - "loss": 65.6349, - "step": 150240 - }, - { - "epoch": 0.6070290121486605, - "grad_norm": 1576.8372802734375, - "learning_rate": 2.057288149201042e-05, - "loss": 49.3936, - "step": 150250 - }, - { - "epoch": 0.6070694134140281, - "grad_norm": 637.5480346679688, - "learning_rate": 2.056944604440751e-05, - "loss": 61.4114, - "step": 150260 - }, - { - "epoch": 0.6071098146793957, - "grad_norm": 895.0432739257812, - "learning_rate": 2.056601068318054e-05, - "loss": 39.7264, - "step": 150270 - }, - { - "epoch": 0.6071502159447634, - "grad_norm": 600.3475952148438, - "learning_rate": 2.056257540839647e-05, - "loss": 47.7371, - "step": 150280 - }, - { - "epoch": 0.607190617210131, - "grad_norm": 985.7539672851562, - "learning_rate": 2.055914022012227e-05, - "loss": 68.4636, - "step": 150290 - }, - { - "epoch": 0.6072310184754987, - "grad_norm": 838.5362548828125, - "learning_rate": 2.0555705118424927e-05, - "loss": 39.3113, - "step": 150300 - }, - { - "epoch": 0.6072714197408663, - "grad_norm": 802.4273071289062, - "learning_rate": 2.05522701033714e-05, - "loss": 54.2198, - "step": 150310 - }, - { - "epoch": 0.6073118210062339, - "grad_norm": 535.5287475585938, - "learning_rate": 2.0548835175028647e-05, - "loss": 43.866, - "step": 150320 - }, - { - "epoch": 0.6073522222716016, - "grad_norm": 556.2651977539062, - "learning_rate": 2.0545400333463656e-05, - "loss": 28.3161, - "step": 150330 - }, - { - "epoch": 0.6073926235369692, - "grad_norm": 589.1482543945312, - "learning_rate": 2.054196557874337e-05, - "loss": 63.3741, - "step": 150340 - }, - { - "epoch": 0.6074330248023369, - "grad_norm": 1289.090576171875, - "learning_rate": 2.0538530910934768e-05, - "loss": 53.0286, - "step": 150350 - }, - { - "epoch": 0.6074734260677045, - "grad_norm": 502.2218322753906, - "learning_rate": 2.0535096330104804e-05, - "loss": 45.3578, - "step": 150360 - }, - { - "epoch": 0.607513827333072, - "grad_norm": 598.0867309570312, - "learning_rate": 2.0531661836320422e-05, - "loss": 39.5945, - "step": 150370 - }, - { - "epoch": 0.6075542285984397, - "grad_norm": 724.6481323242188, - "learning_rate": 2.0528227429648604e-05, - "loss": 39.6854, - "step": 150380 - }, - { - "epoch": 0.6075946298638073, - "grad_norm": 962.4512939453125, - "learning_rate": 2.0524793110156297e-05, - "loss": 86.767, - "step": 150390 - }, - { - "epoch": 0.6076350311291749, - "grad_norm": 635.9224853515625, - "learning_rate": 2.0521358877910444e-05, - "loss": 44.4046, - "step": 150400 - }, - { - "epoch": 0.6076754323945426, - "grad_norm": 891.631103515625, - "learning_rate": 2.051792473297801e-05, - "loss": 49.1075, - "step": 150410 - }, - { - "epoch": 0.6077158336599102, - "grad_norm": 374.13427734375, - "learning_rate": 2.051449067542594e-05, - "loss": 31.0687, - "step": 150420 - }, - { - "epoch": 0.6077562349252779, - "grad_norm": 596.3746948242188, - "learning_rate": 2.0511056705321185e-05, - "loss": 47.9743, - "step": 150430 - }, - { - "epoch": 0.6077966361906455, - "grad_norm": 262.538818359375, - "learning_rate": 2.0507622822730695e-05, - "loss": 42.9091, - "step": 150440 - }, - { - "epoch": 0.6078370374560131, - "grad_norm": 1468.2989501953125, - "learning_rate": 2.0504189027721395e-05, - "loss": 57.1524, - "step": 150450 - }, - { - "epoch": 0.6078774387213808, - "grad_norm": 646.4707641601562, - "learning_rate": 2.050075532036026e-05, - "loss": 67.7372, - "step": 150460 - }, - { - "epoch": 0.6079178399867484, - "grad_norm": 5379.966796875, - "learning_rate": 2.0497321700714204e-05, - "loss": 67.6201, - "step": 150470 - }, - { - "epoch": 0.6079582412521161, - "grad_norm": 717.5620727539062, - "learning_rate": 2.0493888168850188e-05, - "loss": 44.0507, - "step": 150480 - }, - { - "epoch": 0.6079986425174837, - "grad_norm": 1433.787109375, - "learning_rate": 2.0490454724835147e-05, - "loss": 84.3167, - "step": 150490 - }, - { - "epoch": 0.6080390437828512, - "grad_norm": 862.51611328125, - "learning_rate": 2.0487021368736003e-05, - "loss": 47.1453, - "step": 150500 - }, - { - "epoch": 0.6080794450482189, - "grad_norm": 1338.2392578125, - "learning_rate": 2.0483588100619707e-05, - "loss": 59.3567, - "step": 150510 - }, - { - "epoch": 0.6081198463135865, - "grad_norm": 1019.3951416015625, - "learning_rate": 2.0480154920553186e-05, - "loss": 41.7931, - "step": 150520 - }, - { - "epoch": 0.6081602475789541, - "grad_norm": 778.1670532226562, - "learning_rate": 2.047672182860336e-05, - "loss": 56.9556, - "step": 150530 - }, - { - "epoch": 0.6082006488443218, - "grad_norm": 594.0452880859375, - "learning_rate": 2.0473288824837187e-05, - "loss": 45.3777, - "step": 150540 - }, - { - "epoch": 0.6082410501096894, - "grad_norm": 1647.530029296875, - "learning_rate": 2.0469855909321564e-05, - "loss": 58.5577, - "step": 150550 - }, - { - "epoch": 0.6082814513750571, - "grad_norm": 654.25390625, - "learning_rate": 2.0466423082123443e-05, - "loss": 39.5328, - "step": 150560 - }, - { - "epoch": 0.6083218526404247, - "grad_norm": 563.961181640625, - "learning_rate": 2.046299034330974e-05, - "loss": 56.6749, - "step": 150570 - }, - { - "epoch": 0.6083622539057923, - "grad_norm": 900.1375732421875, - "learning_rate": 2.0459557692947367e-05, - "loss": 40.388, - "step": 150580 - }, - { - "epoch": 0.60840265517116, - "grad_norm": 814.3180541992188, - "learning_rate": 2.0456125131103255e-05, - "loss": 46.5721, - "step": 150590 - }, - { - "epoch": 0.6084430564365276, - "grad_norm": 1955.17919921875, - "learning_rate": 2.0452692657844333e-05, - "loss": 53.1568, - "step": 150600 - }, - { - "epoch": 0.6084834577018953, - "grad_norm": 2265.74951171875, - "learning_rate": 2.04492602732375e-05, - "loss": 68.3158, - "step": 150610 - }, - { - "epoch": 0.6085238589672629, - "grad_norm": 405.62066650390625, - "learning_rate": 2.0445827977349685e-05, - "loss": 41.4512, - "step": 150620 - }, - { - "epoch": 0.6085642602326304, - "grad_norm": 762.5441284179688, - "learning_rate": 2.044239577024779e-05, - "loss": 44.7131, - "step": 150630 - }, - { - "epoch": 0.6086046614979981, - "grad_norm": 1103.0228271484375, - "learning_rate": 2.0438963651998747e-05, - "loss": 59.9257, - "step": 150640 - }, - { - "epoch": 0.6086450627633657, - "grad_norm": 773.6843872070312, - "learning_rate": 2.0435531622669453e-05, - "loss": 47.6074, - "step": 150650 - }, - { - "epoch": 0.6086854640287334, - "grad_norm": 500.89013671875, - "learning_rate": 2.0432099682326812e-05, - "loss": 32.2808, - "step": 150660 - }, - { - "epoch": 0.608725865294101, - "grad_norm": 907.202392578125, - "learning_rate": 2.042866783103775e-05, - "loss": 49.861, - "step": 150670 - }, - { - "epoch": 0.6087662665594686, - "grad_norm": 459.9265441894531, - "learning_rate": 2.042523606886916e-05, - "loss": 34.1603, - "step": 150680 - }, - { - "epoch": 0.6088066678248363, - "grad_norm": 915.3919067382812, - "learning_rate": 2.0421804395887942e-05, - "loss": 58.615, - "step": 150690 - }, - { - "epoch": 0.6088470690902039, - "grad_norm": 642.7056274414062, - "learning_rate": 2.0418372812161012e-05, - "loss": 65.4354, - "step": 150700 - }, - { - "epoch": 0.6088874703555716, - "grad_norm": 433.1236572265625, - "learning_rate": 2.041494131775525e-05, - "loss": 40.9949, - "step": 150710 - }, - { - "epoch": 0.6089278716209392, - "grad_norm": 1081.02880859375, - "learning_rate": 2.041150991273758e-05, - "loss": 43.7319, - "step": 150720 - }, - { - "epoch": 0.6089682728863068, - "grad_norm": 701.385498046875, - "learning_rate": 2.0408078597174886e-05, - "loss": 32.3251, - "step": 150730 - }, - { - "epoch": 0.6090086741516745, - "grad_norm": 731.4369506835938, - "learning_rate": 2.0404647371134055e-05, - "loss": 45.6466, - "step": 150740 - }, - { - "epoch": 0.609049075417042, - "grad_norm": 523.586181640625, - "learning_rate": 2.0401216234681995e-05, - "loss": 27.7691, - "step": 150750 - }, - { - "epoch": 0.6090894766824096, - "grad_norm": 551.3516845703125, - "learning_rate": 2.0397785187885598e-05, - "loss": 48.8377, - "step": 150760 - }, - { - "epoch": 0.6091298779477773, - "grad_norm": 513.434814453125, - "learning_rate": 2.039435423081174e-05, - "loss": 51.5516, - "step": 150770 - }, - { - "epoch": 0.6091702792131449, - "grad_norm": 1080.2301025390625, - "learning_rate": 2.039092336352732e-05, - "loss": 75.6299, - "step": 150780 - }, - { - "epoch": 0.6092106804785126, - "grad_norm": 568.66796875, - "learning_rate": 2.038749258609922e-05, - "loss": 44.2861, - "step": 150790 - }, - { - "epoch": 0.6092510817438802, - "grad_norm": 1709.7052001953125, - "learning_rate": 2.038406189859433e-05, - "loss": 62.1356, - "step": 150800 - }, - { - "epoch": 0.6092914830092478, - "grad_norm": 1051.6448974609375, - "learning_rate": 2.0380631301079532e-05, - "loss": 52.0556, - "step": 150810 - }, - { - "epoch": 0.6093318842746155, - "grad_norm": 643.7091674804688, - "learning_rate": 2.037720079362169e-05, - "loss": 34.8519, - "step": 150820 - }, - { - "epoch": 0.6093722855399831, - "grad_norm": 995.33642578125, - "learning_rate": 2.0373770376287715e-05, - "loss": 45.4539, - "step": 150830 - }, - { - "epoch": 0.6094126868053508, - "grad_norm": 871.22705078125, - "learning_rate": 2.037034004914447e-05, - "loss": 86.5264, - "step": 150840 - }, - { - "epoch": 0.6094530880707184, - "grad_norm": 291.0818786621094, - "learning_rate": 2.0366909812258817e-05, - "loss": 45.437, - "step": 150850 - }, - { - "epoch": 0.609493489336086, - "grad_norm": 370.9341125488281, - "learning_rate": 2.0363479665697652e-05, - "loss": 41.5995, - "step": 150860 - }, - { - "epoch": 0.6095338906014537, - "grad_norm": 993.7173461914062, - "learning_rate": 2.0360049609527825e-05, - "loss": 33.4351, - "step": 150870 - }, - { - "epoch": 0.6095742918668212, - "grad_norm": 269.01416015625, - "learning_rate": 2.0356619643816234e-05, - "loss": 35.0631, - "step": 150880 - }, - { - "epoch": 0.6096146931321889, - "grad_norm": 5912.23828125, - "learning_rate": 2.035318976862973e-05, - "loss": 54.3141, - "step": 150890 - }, - { - "epoch": 0.6096550943975565, - "grad_norm": 536.7326049804688, - "learning_rate": 2.034975998403517e-05, - "loss": 62.3261, - "step": 150900 - }, - { - "epoch": 0.6096954956629241, - "grad_norm": 735.4278564453125, - "learning_rate": 2.034633029009945e-05, - "loss": 50.3085, - "step": 150910 - }, - { - "epoch": 0.6097358969282918, - "grad_norm": 1243.7513427734375, - "learning_rate": 2.034290068688941e-05, - "loss": 58.6372, - "step": 150920 - }, - { - "epoch": 0.6097762981936594, - "grad_norm": 1334.8585205078125, - "learning_rate": 2.033947117447192e-05, - "loss": 61.1152, - "step": 150930 - }, - { - "epoch": 0.6098166994590271, - "grad_norm": 1525.87939453125, - "learning_rate": 2.0336041752913843e-05, - "loss": 51.0284, - "step": 150940 - }, - { - "epoch": 0.6098571007243947, - "grad_norm": 557.0948486328125, - "learning_rate": 2.0332612422282027e-05, - "loss": 64.3433, - "step": 150950 - }, - { - "epoch": 0.6098975019897623, - "grad_norm": 390.4165344238281, - "learning_rate": 2.032918318264334e-05, - "loss": 60.4239, - "step": 150960 - }, - { - "epoch": 0.60993790325513, - "grad_norm": 876.21044921875, - "learning_rate": 2.032575403406463e-05, - "loss": 41.9331, - "step": 150970 - }, - { - "epoch": 0.6099783045204976, - "grad_norm": 296.02685546875, - "learning_rate": 2.0322324976612745e-05, - "loss": 48.232, - "step": 150980 - }, - { - "epoch": 0.6100187057858653, - "grad_norm": 633.3362426757812, - "learning_rate": 2.0318896010354553e-05, - "loss": 58.8014, - "step": 150990 - }, - { - "epoch": 0.6100591070512329, - "grad_norm": 716.9625244140625, - "learning_rate": 2.031546713535688e-05, - "loss": 63.4572, - "step": 151000 - }, - { - "epoch": 0.6100995083166004, - "grad_norm": 639.0398559570312, - "learning_rate": 2.0312038351686598e-05, - "loss": 51.1365, - "step": 151010 - }, - { - "epoch": 0.6101399095819681, - "grad_norm": 1237.0989990234375, - "learning_rate": 2.0308609659410544e-05, - "loss": 52.9566, - "step": 151020 - }, - { - "epoch": 0.6101803108473357, - "grad_norm": 675.1341552734375, - "learning_rate": 2.0305181058595552e-05, - "loss": 57.4781, - "step": 151030 - }, - { - "epoch": 0.6102207121127033, - "grad_norm": 1143.0931396484375, - "learning_rate": 2.030175254930848e-05, - "loss": 49.0002, - "step": 151040 - }, - { - "epoch": 0.610261113378071, - "grad_norm": 784.3997802734375, - "learning_rate": 2.0298324131616158e-05, - "loss": 55.3003, - "step": 151050 - }, - { - "epoch": 0.6103015146434386, - "grad_norm": 360.4586181640625, - "learning_rate": 2.029489580558542e-05, - "loss": 37.6115, - "step": 151060 - }, - { - "epoch": 0.6103419159088063, - "grad_norm": 2819.0810546875, - "learning_rate": 2.029146757128312e-05, - "loss": 59.4453, - "step": 151070 - }, - { - "epoch": 0.6103823171741739, - "grad_norm": 975.2780151367188, - "learning_rate": 2.0288039428776073e-05, - "loss": 62.0133, - "step": 151080 - }, - { - "epoch": 0.6104227184395415, - "grad_norm": 1327.5264892578125, - "learning_rate": 2.0284611378131136e-05, - "loss": 67.0731, - "step": 151090 - }, - { - "epoch": 0.6104631197049092, - "grad_norm": 405.42041015625, - "learning_rate": 2.0281183419415125e-05, - "loss": 48.7321, - "step": 151100 - }, - { - "epoch": 0.6105035209702768, - "grad_norm": 507.772705078125, - "learning_rate": 2.0277755552694867e-05, - "loss": 53.5197, - "step": 151110 - }, - { - "epoch": 0.6105439222356445, - "grad_norm": 566.7341918945312, - "learning_rate": 2.02743277780372e-05, - "loss": 45.4196, - "step": 151120 - }, - { - "epoch": 0.6105843235010121, - "grad_norm": 907.8681030273438, - "learning_rate": 2.027090009550895e-05, - "loss": 43.463, - "step": 151130 - }, - { - "epoch": 0.6106247247663796, - "grad_norm": 753.9053955078125, - "learning_rate": 2.0267472505176927e-05, - "loss": 51.493, - "step": 151140 - }, - { - "epoch": 0.6106651260317473, - "grad_norm": 456.0301818847656, - "learning_rate": 2.0264045007107973e-05, - "loss": 58.2713, - "step": 151150 - }, - { - "epoch": 0.6107055272971149, - "grad_norm": 894.2009887695312, - "learning_rate": 2.0260617601368886e-05, - "loss": 64.6016, - "step": 151160 - }, - { - "epoch": 0.6107459285624826, - "grad_norm": 613.248046875, - "learning_rate": 2.0257190288026516e-05, - "loss": 46.2595, - "step": 151170 - }, - { - "epoch": 0.6107863298278502, - "grad_norm": 976.0096435546875, - "learning_rate": 2.0253763067147657e-05, - "loss": 60.338, - "step": 151180 - }, - { - "epoch": 0.6108267310932178, - "grad_norm": 844.7362060546875, - "learning_rate": 2.025033593879913e-05, - "loss": 41.8549, - "step": 151190 - }, - { - "epoch": 0.6108671323585855, - "grad_norm": 1008.4219970703125, - "learning_rate": 2.024690890304775e-05, - "loss": 67.3454, - "step": 151200 - }, - { - "epoch": 0.6109075336239531, - "grad_norm": 652.0379028320312, - "learning_rate": 2.0243481959960327e-05, - "loss": 60.3732, - "step": 151210 - }, - { - "epoch": 0.6109479348893208, - "grad_norm": 252.4768829345703, - "learning_rate": 2.0240055109603668e-05, - "loss": 43.6406, - "step": 151220 - }, - { - "epoch": 0.6109883361546884, - "grad_norm": 456.96221923828125, - "learning_rate": 2.023662835204459e-05, - "loss": 54.4126, - "step": 151230 - }, - { - "epoch": 0.611028737420056, - "grad_norm": 1217.955322265625, - "learning_rate": 2.0233201687349887e-05, - "loss": 74.467, - "step": 151240 - }, - { - "epoch": 0.6110691386854237, - "grad_norm": 1050.6669921875, - "learning_rate": 2.022977511558638e-05, - "loss": 53.1463, - "step": 151250 - }, - { - "epoch": 0.6111095399507913, - "grad_norm": 345.2624206542969, - "learning_rate": 2.0226348636820865e-05, - "loss": 43.0883, - "step": 151260 - }, - { - "epoch": 0.6111499412161588, - "grad_norm": 811.0917358398438, - "learning_rate": 2.022292225112013e-05, - "loss": 46.1009, - "step": 151270 - }, - { - "epoch": 0.6111903424815265, - "grad_norm": 419.13983154296875, - "learning_rate": 2.0219495958550992e-05, - "loss": 42.6573, - "step": 151280 - }, - { - "epoch": 0.6112307437468941, - "grad_norm": 572.9585571289062, - "learning_rate": 2.0216069759180248e-05, - "loss": 64.8317, - "step": 151290 - }, - { - "epoch": 0.6112711450122618, - "grad_norm": 1485.81640625, - "learning_rate": 2.021264365307468e-05, - "loss": 63.413, - "step": 151300 - }, - { - "epoch": 0.6113115462776294, - "grad_norm": 856.283447265625, - "learning_rate": 2.0209217640301088e-05, - "loss": 40.8531, - "step": 151310 - }, - { - "epoch": 0.611351947542997, - "grad_norm": 429.21636962890625, - "learning_rate": 2.020579172092626e-05, - "loss": 75.7096, - "step": 151320 - }, - { - "epoch": 0.6113923488083647, - "grad_norm": 770.0364990234375, - "learning_rate": 2.0202365895017e-05, - "loss": 90.2186, - "step": 151330 - }, - { - "epoch": 0.6114327500737323, - "grad_norm": 775.0713500976562, - "learning_rate": 2.0198940162640084e-05, - "loss": 55.38, - "step": 151340 - }, - { - "epoch": 0.6114731513391, - "grad_norm": 341.93707275390625, - "learning_rate": 2.0195514523862293e-05, - "loss": 43.4222, - "step": 151350 - }, - { - "epoch": 0.6115135526044676, - "grad_norm": 1337.871826171875, - "learning_rate": 2.0192088978750433e-05, - "loss": 59.2981, - "step": 151360 - }, - { - "epoch": 0.6115539538698352, - "grad_norm": 847.0081787109375, - "learning_rate": 2.0188663527371272e-05, - "loss": 53.3189, - "step": 151370 - }, - { - "epoch": 0.6115943551352029, - "grad_norm": 903.5023193359375, - "learning_rate": 2.0185238169791585e-05, - "loss": 46.3016, - "step": 151380 - }, - { - "epoch": 0.6116347564005704, - "grad_norm": 691.9161376953125, - "learning_rate": 2.0181812906078164e-05, - "loss": 55.7604, - "step": 151390 - }, - { - "epoch": 0.611675157665938, - "grad_norm": 793.9723510742188, - "learning_rate": 2.0178387736297773e-05, - "loss": 62.8434, - "step": 151400 - }, - { - "epoch": 0.6117155589313057, - "grad_norm": 764.1298217773438, - "learning_rate": 2.0174962660517206e-05, - "loss": 71.301, - "step": 151410 - }, - { - "epoch": 0.6117559601966733, - "grad_norm": 990.5913696289062, - "learning_rate": 2.0171537678803225e-05, - "loss": 42.5648, - "step": 151420 - }, - { - "epoch": 0.611796361462041, - "grad_norm": 670.5265502929688, - "learning_rate": 2.016811279122259e-05, - "loss": 48.3127, - "step": 151430 - }, - { - "epoch": 0.6118367627274086, - "grad_norm": 597.8585815429688, - "learning_rate": 2.0164687997842096e-05, - "loss": 43.7411, - "step": 151440 - }, - { - "epoch": 0.6118771639927763, - "grad_norm": 533.0531616210938, - "learning_rate": 2.0161263298728495e-05, - "loss": 50.8494, - "step": 151450 - }, - { - "epoch": 0.6119175652581439, - "grad_norm": 334.20184326171875, - "learning_rate": 2.015783869394856e-05, - "loss": 52.9698, - "step": 151460 - }, - { - "epoch": 0.6119579665235115, - "grad_norm": 557.2747802734375, - "learning_rate": 2.0154414183569055e-05, - "loss": 31.3677, - "step": 151470 - }, - { - "epoch": 0.6119983677888792, - "grad_norm": 513.8084106445312, - "learning_rate": 2.0150989767656728e-05, - "loss": 67.8341, - "step": 151480 - }, - { - "epoch": 0.6120387690542468, - "grad_norm": 775.8402099609375, - "learning_rate": 2.0147565446278364e-05, - "loss": 54.9718, - "step": 151490 - }, - { - "epoch": 0.6120791703196145, - "grad_norm": 209.1566925048828, - "learning_rate": 2.0144141219500705e-05, - "loss": 34.7493, - "step": 151500 - }, - { - "epoch": 0.6121195715849821, - "grad_norm": 761.5418090820312, - "learning_rate": 2.014071708739051e-05, - "loss": 52.2124, - "step": 151510 - }, - { - "epoch": 0.6121599728503496, - "grad_norm": 764.025634765625, - "learning_rate": 2.013729305001454e-05, - "loss": 57.659, - "step": 151520 - }, - { - "epoch": 0.6122003741157173, - "grad_norm": 160.14700317382812, - "learning_rate": 2.0133869107439545e-05, - "loss": 68.8839, - "step": 151530 - }, - { - "epoch": 0.6122407753810849, - "grad_norm": 546.741943359375, - "learning_rate": 2.0130445259732285e-05, - "loss": 48.702, - "step": 151540 - }, - { - "epoch": 0.6122811766464525, - "grad_norm": 556.5968627929688, - "learning_rate": 2.0127021506959488e-05, - "loss": 34.9057, - "step": 151550 - }, - { - "epoch": 0.6123215779118202, - "grad_norm": 1158.7283935546875, - "learning_rate": 2.012359784918792e-05, - "loss": 31.9852, - "step": 151560 - }, - { - "epoch": 0.6123619791771878, - "grad_norm": 207.57945251464844, - "learning_rate": 2.012017428648433e-05, - "loss": 44.4616, - "step": 151570 - }, - { - "epoch": 0.6124023804425555, - "grad_norm": 1311.223388671875, - "learning_rate": 2.011675081891545e-05, - "loss": 46.3915, - "step": 151580 - }, - { - "epoch": 0.6124427817079231, - "grad_norm": 539.1629028320312, - "learning_rate": 2.011332744654802e-05, - "loss": 61.3504, - "step": 151590 - }, - { - "epoch": 0.6124831829732907, - "grad_norm": 905.910400390625, - "learning_rate": 2.01099041694488e-05, - "loss": 50.0863, - "step": 151600 - }, - { - "epoch": 0.6125235842386584, - "grad_norm": 319.6258850097656, - "learning_rate": 2.01064809876845e-05, - "loss": 50.1784, - "step": 151610 - }, - { - "epoch": 0.612563985504026, - "grad_norm": 879.3612670898438, - "learning_rate": 2.0103057901321884e-05, - "loss": 63.5632, - "step": 151620 - }, - { - "epoch": 0.6126043867693937, - "grad_norm": 445.58953857421875, - "learning_rate": 2.0099634910427678e-05, - "loss": 31.5658, - "step": 151630 - }, - { - "epoch": 0.6126447880347613, - "grad_norm": 766.837890625, - "learning_rate": 2.0096212015068606e-05, - "loss": 48.1527, - "step": 151640 - }, - { - "epoch": 0.6126851893001288, - "grad_norm": 1137.3616943359375, - "learning_rate": 2.009278921531141e-05, - "loss": 78.9928, - "step": 151650 - }, - { - "epoch": 0.6127255905654965, - "grad_norm": 296.29620361328125, - "learning_rate": 2.0089366511222813e-05, - "loss": 56.172, - "step": 151660 - }, - { - "epoch": 0.6127659918308641, - "grad_norm": 926.5879516601562, - "learning_rate": 2.0085943902869537e-05, - "loss": 48.9301, - "step": 151670 - }, - { - "epoch": 0.6128063930962317, - "grad_norm": 633.228271484375, - "learning_rate": 2.0082521390318322e-05, - "loss": 41.1792, - "step": 151680 - }, - { - "epoch": 0.6128467943615994, - "grad_norm": 701.0936279296875, - "learning_rate": 2.0079098973635872e-05, - "loss": 33.9974, - "step": 151690 - }, - { - "epoch": 0.612887195626967, - "grad_norm": 275.7187805175781, - "learning_rate": 2.0075676652888936e-05, - "loss": 48.2116, - "step": 151700 - }, - { - "epoch": 0.6129275968923347, - "grad_norm": 1368.0904541015625, - "learning_rate": 2.0072254428144216e-05, - "loss": 64.7694, - "step": 151710 - }, - { - "epoch": 0.6129679981577023, - "grad_norm": 623.418701171875, - "learning_rate": 2.0068832299468428e-05, - "loss": 42.1006, - "step": 151720 - }, - { - "epoch": 0.61300839942307, - "grad_norm": 870.45556640625, - "learning_rate": 2.00654102669283e-05, - "loss": 47.7842, - "step": 151730 - }, - { - "epoch": 0.6130488006884376, - "grad_norm": 588.7037353515625, - "learning_rate": 2.0061988330590538e-05, - "loss": 83.3482, - "step": 151740 - }, - { - "epoch": 0.6130892019538052, - "grad_norm": 657.9315795898438, - "learning_rate": 2.0058566490521847e-05, - "loss": 43.4999, - "step": 151750 - }, - { - "epoch": 0.6131296032191729, - "grad_norm": 708.498779296875, - "learning_rate": 2.0055144746788957e-05, - "loss": 68.977, - "step": 151760 - }, - { - "epoch": 0.6131700044845405, - "grad_norm": 1133.77490234375, - "learning_rate": 2.0051723099458554e-05, - "loss": 48.2999, - "step": 151770 - }, - { - "epoch": 0.613210405749908, - "grad_norm": 302.3292236328125, - "learning_rate": 2.0048301548597363e-05, - "loss": 40.6427, - "step": 151780 - }, - { - "epoch": 0.6132508070152757, - "grad_norm": 404.0072937011719, - "learning_rate": 2.004488009427209e-05, - "loss": 32.1956, - "step": 151790 - }, - { - "epoch": 0.6132912082806433, - "grad_norm": 1267.0899658203125, - "learning_rate": 2.004145873654942e-05, - "loss": 67.8385, - "step": 151800 - }, - { - "epoch": 0.613331609546011, - "grad_norm": 446.0968322753906, - "learning_rate": 2.0038037475496075e-05, - "loss": 56.1292, - "step": 151810 - }, - { - "epoch": 0.6133720108113786, - "grad_norm": 1070.377197265625, - "learning_rate": 2.003461631117874e-05, - "loss": 60.586, - "step": 151820 - }, - { - "epoch": 0.6134124120767462, - "grad_norm": 430.245361328125, - "learning_rate": 2.003119524366411e-05, - "loss": 82.9556, - "step": 151830 - }, - { - "epoch": 0.6134528133421139, - "grad_norm": 566.5420532226562, - "learning_rate": 2.0027774273018892e-05, - "loss": 35.1446, - "step": 151840 - }, - { - "epoch": 0.6134932146074815, - "grad_norm": 570.1262817382812, - "learning_rate": 2.0024353399309765e-05, - "loss": 46.2369, - "step": 151850 - }, - { - "epoch": 0.6135336158728492, - "grad_norm": 409.9158935546875, - "learning_rate": 2.0020932622603444e-05, - "loss": 62.6959, - "step": 151860 - }, - { - "epoch": 0.6135740171382168, - "grad_norm": 869.2930908203125, - "learning_rate": 2.00175119429666e-05, - "loss": 42.3853, - "step": 151870 - }, - { - "epoch": 0.6136144184035844, - "grad_norm": 733.9028930664062, - "learning_rate": 2.0014091360465927e-05, - "loss": 90.8488, - "step": 151880 - }, - { - "epoch": 0.6136548196689521, - "grad_norm": 806.4086303710938, - "learning_rate": 2.00106708751681e-05, - "loss": 73.2679, - "step": 151890 - }, - { - "epoch": 0.6136952209343196, - "grad_norm": 1075.563232421875, - "learning_rate": 2.000725048713983e-05, - "loss": 41.3077, - "step": 151900 - }, - { - "epoch": 0.6137356221996872, - "grad_norm": 1005.3833618164062, - "learning_rate": 2.000383019644777e-05, - "loss": 66.0867, - "step": 151910 - }, - { - "epoch": 0.6137760234650549, - "grad_norm": 1066.146484375, - "learning_rate": 2.000041000315862e-05, - "loss": 43.0172, - "step": 151920 - }, - { - "epoch": 0.6138164247304225, - "grad_norm": 379.4106140136719, - "learning_rate": 1.999698990733904e-05, - "loss": 25.9617, - "step": 151930 - }, - { - "epoch": 0.6138568259957902, - "grad_norm": 494.0150146484375, - "learning_rate": 1.9993569909055725e-05, - "loss": 57.3421, - "step": 151940 - }, - { - "epoch": 0.6138972272611578, - "grad_norm": 467.149169921875, - "learning_rate": 1.9990150008375347e-05, - "loss": 41.3854, - "step": 151950 - }, - { - "epoch": 0.6139376285265254, - "grad_norm": 1409.2052001953125, - "learning_rate": 1.998673020536456e-05, - "loss": 58.1735, - "step": 151960 - }, - { - "epoch": 0.6139780297918931, - "grad_norm": 736.8629760742188, - "learning_rate": 1.998331050009006e-05, - "loss": 52.9153, - "step": 151970 - }, - { - "epoch": 0.6140184310572607, - "grad_norm": 791.1087036132812, - "learning_rate": 1.99798908926185e-05, - "loss": 64.018, - "step": 151980 - }, - { - "epoch": 0.6140588323226284, - "grad_norm": 990.827392578125, - "learning_rate": 1.9976471383016557e-05, - "loss": 56.9068, - "step": 151990 - }, - { - "epoch": 0.614099233587996, - "grad_norm": 720.25390625, - "learning_rate": 1.9973051971350888e-05, - "loss": 59.2837, - "step": 152000 - }, - { - "epoch": 0.6141396348533636, - "grad_norm": 877.4662475585938, - "learning_rate": 1.9969632657688155e-05, - "loss": 48.7033, - "step": 152010 - }, - { - "epoch": 0.6141800361187313, - "grad_norm": 656.0659790039062, - "learning_rate": 1.9966213442095028e-05, - "loss": 57.7444, - "step": 152020 - }, - { - "epoch": 0.6142204373840988, - "grad_norm": 1309.908447265625, - "learning_rate": 1.9962794324638162e-05, - "loss": 72.4926, - "step": 152030 - }, - { - "epoch": 0.6142608386494665, - "grad_norm": 421.1377868652344, - "learning_rate": 1.9959375305384203e-05, - "loss": 39.82, - "step": 152040 - }, - { - "epoch": 0.6143012399148341, - "grad_norm": 892.0406494140625, - "learning_rate": 1.9955956384399828e-05, - "loss": 65.1622, - "step": 152050 - }, - { - "epoch": 0.6143416411802017, - "grad_norm": 984.6168823242188, - "learning_rate": 1.995253756175168e-05, - "loss": 65.7813, - "step": 152060 - }, - { - "epoch": 0.6143820424455694, - "grad_norm": 821.1974487304688, - "learning_rate": 1.994911883750641e-05, - "loss": 57.6058, - "step": 152070 - }, - { - "epoch": 0.614422443710937, - "grad_norm": 419.92474365234375, - "learning_rate": 1.994570021173067e-05, - "loss": 69.8375, - "step": 152080 - }, - { - "epoch": 0.6144628449763047, - "grad_norm": 415.35400390625, - "learning_rate": 1.9942281684491098e-05, - "loss": 44.2016, - "step": 152090 - }, - { - "epoch": 0.6145032462416723, - "grad_norm": 475.3173522949219, - "learning_rate": 1.9938863255854357e-05, - "loss": 79.3213, - "step": 152100 - }, - { - "epoch": 0.6145436475070399, - "grad_norm": 1137.9266357421875, - "learning_rate": 1.9935444925887082e-05, - "loss": 56.144, - "step": 152110 - }, - { - "epoch": 0.6145840487724076, - "grad_norm": 545.95556640625, - "learning_rate": 1.9932026694655907e-05, - "loss": 65.3321, - "step": 152120 - }, - { - "epoch": 0.6146244500377752, - "grad_norm": 1686.6630859375, - "learning_rate": 1.992860856222749e-05, - "loss": 64.3524, - "step": 152130 - }, - { - "epoch": 0.6146648513031429, - "grad_norm": 951.417236328125, - "learning_rate": 1.992519052866845e-05, - "loss": 60.3568, - "step": 152140 - }, - { - "epoch": 0.6147052525685105, - "grad_norm": 941.8363647460938, - "learning_rate": 1.992177259404545e-05, - "loss": 62.5279, - "step": 152150 - }, - { - "epoch": 0.614745653833878, - "grad_norm": 1050.4307861328125, - "learning_rate": 1.99183547584251e-05, - "loss": 47.3056, - "step": 152160 - }, - { - "epoch": 0.6147860550992457, - "grad_norm": 1039.5369873046875, - "learning_rate": 1.9914937021874032e-05, - "loss": 48.0764, - "step": 152170 - }, - { - "epoch": 0.6148264563646133, - "grad_norm": 406.83685302734375, - "learning_rate": 1.9911519384458893e-05, - "loss": 52.5042, - "step": 152180 - }, - { - "epoch": 0.614866857629981, - "grad_norm": 560.0790405273438, - "learning_rate": 1.9908101846246304e-05, - "loss": 44.851, - "step": 152190 - }, - { - "epoch": 0.6149072588953486, - "grad_norm": 795.4417724609375, - "learning_rate": 1.9904684407302883e-05, - "loss": 86.3452, - "step": 152200 - }, - { - "epoch": 0.6149476601607162, - "grad_norm": 578.105712890625, - "learning_rate": 1.990126706769527e-05, - "loss": 30.9948, - "step": 152210 - }, - { - "epoch": 0.6149880614260839, - "grad_norm": 638.46044921875, - "learning_rate": 1.989784982749008e-05, - "loss": 39.7895, - "step": 152220 - }, - { - "epoch": 0.6150284626914515, - "grad_norm": 708.2172241210938, - "learning_rate": 1.989443268675393e-05, - "loss": 99.4368, - "step": 152230 - }, - { - "epoch": 0.6150688639568191, - "grad_norm": 1003.4779663085938, - "learning_rate": 1.989101564555345e-05, - "loss": 52.786, - "step": 152240 - }, - { - "epoch": 0.6151092652221868, - "grad_norm": 658.6339721679688, - "learning_rate": 1.9887598703955242e-05, - "loss": 53.8698, - "step": 152250 - }, - { - "epoch": 0.6151496664875544, - "grad_norm": 1459.9710693359375, - "learning_rate": 1.988418186202594e-05, - "loss": 56.2961, - "step": 152260 - }, - { - "epoch": 0.6151900677529221, - "grad_norm": 706.1653442382812, - "learning_rate": 1.988076511983214e-05, - "loss": 51.5391, - "step": 152270 - }, - { - "epoch": 0.6152304690182897, - "grad_norm": 863.3594360351562, - "learning_rate": 1.9877348477440456e-05, - "loss": 63.8783, - "step": 152280 - }, - { - "epoch": 0.6152708702836572, - "grad_norm": 1135.580322265625, - "learning_rate": 1.9873931934917506e-05, - "loss": 66.3226, - "step": 152290 - }, - { - "epoch": 0.6153112715490249, - "grad_norm": 656.6669311523438, - "learning_rate": 1.9870515492329884e-05, - "loss": 46.0662, - "step": 152300 - }, - { - "epoch": 0.6153516728143925, - "grad_norm": 1270.214599609375, - "learning_rate": 1.9867099149744213e-05, - "loss": 53.8983, - "step": 152310 - }, - { - "epoch": 0.6153920740797602, - "grad_norm": 1297.236328125, - "learning_rate": 1.9863682907227088e-05, - "loss": 53.0378, - "step": 152320 - }, - { - "epoch": 0.6154324753451278, - "grad_norm": 834.4827880859375, - "learning_rate": 1.98602667648451e-05, - "loss": 59.2501, - "step": 152330 - }, - { - "epoch": 0.6154728766104954, - "grad_norm": 380.9190368652344, - "learning_rate": 1.9856850722664864e-05, - "loss": 48.5186, - "step": 152340 - }, - { - "epoch": 0.6155132778758631, - "grad_norm": 661.6729125976562, - "learning_rate": 1.9853434780752973e-05, - "loss": 43.4546, - "step": 152350 - }, - { - "epoch": 0.6155536791412307, - "grad_norm": 0.0, - "learning_rate": 1.9850018939176014e-05, - "loss": 43.309, - "step": 152360 - }, - { - "epoch": 0.6155940804065984, - "grad_norm": 1031.6661376953125, - "learning_rate": 1.984660319800059e-05, - "loss": 58.6191, - "step": 152370 - }, - { - "epoch": 0.615634481671966, - "grad_norm": 544.71337890625, - "learning_rate": 1.9843187557293284e-05, - "loss": 59.7559, - "step": 152380 - }, - { - "epoch": 0.6156748829373336, - "grad_norm": 1013.8038330078125, - "learning_rate": 1.98397720171207e-05, - "loss": 56.4807, - "step": 152390 - }, - { - "epoch": 0.6157152842027013, - "grad_norm": 952.9639892578125, - "learning_rate": 1.983635657754942e-05, - "loss": 59.5986, - "step": 152400 - }, - { - "epoch": 0.6157556854680689, - "grad_norm": 650.10107421875, - "learning_rate": 1.983294123864602e-05, - "loss": 60.1285, - "step": 152410 - }, - { - "epoch": 0.6157960867334364, - "grad_norm": 1023.7354125976562, - "learning_rate": 1.9829526000477095e-05, - "loss": 63.0215, - "step": 152420 - }, - { - "epoch": 0.6158364879988041, - "grad_norm": 1077.8424072265625, - "learning_rate": 1.982611086310922e-05, - "loss": 50.8245, - "step": 152430 - }, - { - "epoch": 0.6158768892641717, - "grad_norm": 702.2991333007812, - "learning_rate": 1.9822695826608972e-05, - "loss": 102.1233, - "step": 152440 - }, - { - "epoch": 0.6159172905295394, - "grad_norm": 432.0490417480469, - "learning_rate": 1.981928089104294e-05, - "loss": 31.7835, - "step": 152450 - }, - { - "epoch": 0.615957691794907, - "grad_norm": 942.7676391601562, - "learning_rate": 1.981586605647769e-05, - "loss": 51.312, - "step": 152460 - }, - { - "epoch": 0.6159980930602746, - "grad_norm": 485.92242431640625, - "learning_rate": 1.9812451322979805e-05, - "loss": 54.0049, - "step": 152470 - }, - { - "epoch": 0.6160384943256423, - "grad_norm": 2057.892578125, - "learning_rate": 1.9809036690615853e-05, - "loss": 57.5523, - "step": 152480 - }, - { - "epoch": 0.6160788955910099, - "grad_norm": 1027.524658203125, - "learning_rate": 1.98056221594524e-05, - "loss": 64.4764, - "step": 152490 - }, - { - "epoch": 0.6161192968563776, - "grad_norm": 434.5711975097656, - "learning_rate": 1.980220772955602e-05, - "loss": 43.7486, - "step": 152500 - }, - { - "epoch": 0.6161596981217452, - "grad_norm": 1201.443603515625, - "learning_rate": 1.979879340099327e-05, - "loss": 40.2573, - "step": 152510 - }, - { - "epoch": 0.6162000993871128, - "grad_norm": 1387.3876953125, - "learning_rate": 1.979537917383073e-05, - "loss": 44.6969, - "step": 152520 - }, - { - "epoch": 0.6162405006524805, - "grad_norm": 564.0380249023438, - "learning_rate": 1.979196504813495e-05, - "loss": 36.653, - "step": 152530 - }, - { - "epoch": 0.616280901917848, - "grad_norm": 824.3677978515625, - "learning_rate": 1.9788551023972484e-05, - "loss": 43.8623, - "step": 152540 - }, - { - "epoch": 0.6163213031832157, - "grad_norm": 531.8292236328125, - "learning_rate": 1.9785137101409908e-05, - "loss": 57.3038, - "step": 152550 - }, - { - "epoch": 0.6163617044485833, - "grad_norm": 276.3924865722656, - "learning_rate": 1.9781723280513768e-05, - "loss": 42.5384, - "step": 152560 - }, - { - "epoch": 0.6164021057139509, - "grad_norm": 521.0140380859375, - "learning_rate": 1.9778309561350616e-05, - "loss": 54.9165, - "step": 152570 - }, - { - "epoch": 0.6164425069793186, - "grad_norm": 4706.6787109375, - "learning_rate": 1.9774895943987007e-05, - "loss": 70.1737, - "step": 152580 - }, - { - "epoch": 0.6164829082446862, - "grad_norm": 1025.7991943359375, - "learning_rate": 1.977148242848949e-05, - "loss": 51.8512, - "step": 152590 - }, - { - "epoch": 0.6165233095100539, - "grad_norm": 1082.083984375, - "learning_rate": 1.9768069014924622e-05, - "loss": 45.9668, - "step": 152600 - }, - { - "epoch": 0.6165637107754215, - "grad_norm": 820.9380493164062, - "learning_rate": 1.9764655703358945e-05, - "loss": 57.365, - "step": 152610 - }, - { - "epoch": 0.6166041120407891, - "grad_norm": 500.4762878417969, - "learning_rate": 1.9761242493858987e-05, - "loss": 60.4964, - "step": 152620 - }, - { - "epoch": 0.6166445133061568, - "grad_norm": 767.7699584960938, - "learning_rate": 1.975782938649131e-05, - "loss": 44.4775, - "step": 152630 - }, - { - "epoch": 0.6166849145715244, - "grad_norm": 730.7559204101562, - "learning_rate": 1.9754416381322455e-05, - "loss": 61.5032, - "step": 152640 - }, - { - "epoch": 0.616725315836892, - "grad_norm": 621.5751342773438, - "learning_rate": 1.975100347841894e-05, - "loss": 45.6827, - "step": 152650 - }, - { - "epoch": 0.6167657171022597, - "grad_norm": 1287.0096435546875, - "learning_rate": 1.974759067784732e-05, - "loss": 53.478, - "step": 152660 - }, - { - "epoch": 0.6168061183676272, - "grad_norm": 415.4209899902344, - "learning_rate": 1.974417797967413e-05, - "loss": 28.9644, - "step": 152670 - }, - { - "epoch": 0.6168465196329949, - "grad_norm": 277.8809509277344, - "learning_rate": 1.9740765383965893e-05, - "loss": 55.9292, - "step": 152680 - }, - { - "epoch": 0.6168869208983625, - "grad_norm": 597.7545776367188, - "learning_rate": 1.9737352890789142e-05, - "loss": 35.1805, - "step": 152690 - }, - { - "epoch": 0.6169273221637301, - "grad_norm": 395.4619140625, - "learning_rate": 1.9733940500210398e-05, - "loss": 64.5738, - "step": 152700 - }, - { - "epoch": 0.6169677234290978, - "grad_norm": 606.8431396484375, - "learning_rate": 1.9730528212296208e-05, - "loss": 47.9119, - "step": 152710 - }, - { - "epoch": 0.6170081246944654, - "grad_norm": 919.1580810546875, - "learning_rate": 1.9727116027113077e-05, - "loss": 42.3338, - "step": 152720 - }, - { - "epoch": 0.6170485259598331, - "grad_norm": 1222.4852294921875, - "learning_rate": 1.972370394472753e-05, - "loss": 89.0341, - "step": 152730 - }, - { - "epoch": 0.6170889272252007, - "grad_norm": 975.9841918945312, - "learning_rate": 1.9720291965206095e-05, - "loss": 40.6854, - "step": 152740 - }, - { - "epoch": 0.6171293284905683, - "grad_norm": 815.9156494140625, - "learning_rate": 1.9716880088615285e-05, - "loss": 67.8414, - "step": 152750 - }, - { - "epoch": 0.617169729755936, - "grad_norm": 587.384033203125, - "learning_rate": 1.9713468315021622e-05, - "loss": 60.1556, - "step": 152760 - }, - { - "epoch": 0.6172101310213036, - "grad_norm": 369.40081787109375, - "learning_rate": 1.9710056644491614e-05, - "loss": 46.0275, - "step": 152770 - }, - { - "epoch": 0.6172505322866713, - "grad_norm": 611.0215454101562, - "learning_rate": 1.9706645077091767e-05, - "loss": 50.6546, - "step": 152780 - }, - { - "epoch": 0.6172909335520389, - "grad_norm": 170.70001220703125, - "learning_rate": 1.970323361288861e-05, - "loss": 23.4807, - "step": 152790 - }, - { - "epoch": 0.6173313348174064, - "grad_norm": 473.4237976074219, - "learning_rate": 1.969982225194864e-05, - "loss": 63.5025, - "step": 152800 - }, - { - "epoch": 0.6173717360827741, - "grad_norm": 773.618408203125, - "learning_rate": 1.9696410994338354e-05, - "loss": 52.0266, - "step": 152810 - }, - { - "epoch": 0.6174121373481417, - "grad_norm": 649.8694458007812, - "learning_rate": 1.9692999840124275e-05, - "loss": 54.1027, - "step": 152820 - }, - { - "epoch": 0.6174525386135093, - "grad_norm": 1274.1107177734375, - "learning_rate": 1.9689588789372896e-05, - "loss": 67.0911, - "step": 152830 - }, - { - "epoch": 0.617492939878877, - "grad_norm": 541.9605102539062, - "learning_rate": 1.9686177842150715e-05, - "loss": 68.1363, - "step": 152840 - }, - { - "epoch": 0.6175333411442446, - "grad_norm": 1026.8818359375, - "learning_rate": 1.968276699852424e-05, - "loss": 53.6045, - "step": 152850 - }, - { - "epoch": 0.6175737424096123, - "grad_norm": 864.9629516601562, - "learning_rate": 1.9679356258559944e-05, - "loss": 79.2347, - "step": 152860 - }, - { - "epoch": 0.6176141436749799, - "grad_norm": 593.311767578125, - "learning_rate": 1.967594562232435e-05, - "loss": 42.9782, - "step": 152870 - }, - { - "epoch": 0.6176545449403475, - "grad_norm": 664.3198852539062, - "learning_rate": 1.967253508988394e-05, - "loss": 52.8853, - "step": 152880 - }, - { - "epoch": 0.6176949462057152, - "grad_norm": 1579.415283203125, - "learning_rate": 1.9669124661305185e-05, - "loss": 75.1785, - "step": 152890 - }, - { - "epoch": 0.6177353474710828, - "grad_norm": 1345.0396728515625, - "learning_rate": 1.9665714336654602e-05, - "loss": 46.0577, - "step": 152900 - }, - { - "epoch": 0.6177757487364505, - "grad_norm": 811.4996948242188, - "learning_rate": 1.966230411599866e-05, - "loss": 55.0498, - "step": 152910 - }, - { - "epoch": 0.6178161500018181, - "grad_norm": 1309.7557373046875, - "learning_rate": 1.9658893999403847e-05, - "loss": 51.9295, - "step": 152920 - }, - { - "epoch": 0.6178565512671856, - "grad_norm": 936.4150390625, - "learning_rate": 1.9655483986936653e-05, - "loss": 45.001, - "step": 152930 - }, - { - "epoch": 0.6178969525325533, - "grad_norm": 293.4042663574219, - "learning_rate": 1.965207407866354e-05, - "loss": 54.548, - "step": 152940 - }, - { - "epoch": 0.6179373537979209, - "grad_norm": 679.14306640625, - "learning_rate": 1.9648664274651e-05, - "loss": 48.1192, - "step": 152950 - }, - { - "epoch": 0.6179777550632886, - "grad_norm": 677.8344116210938, - "learning_rate": 1.964525457496551e-05, - "loss": 55.2474, - "step": 152960 - }, - { - "epoch": 0.6180181563286562, - "grad_norm": 584.7177734375, - "learning_rate": 1.964184497967353e-05, - "loss": 52.2991, - "step": 152970 - }, - { - "epoch": 0.6180585575940238, - "grad_norm": 619.6227416992188, - "learning_rate": 1.9638435488841546e-05, - "loss": 38.7748, - "step": 152980 - }, - { - "epoch": 0.6180989588593915, - "grad_norm": 824.2867431640625, - "learning_rate": 1.9635026102536014e-05, - "loss": 41.0035, - "step": 152990 - }, - { - "epoch": 0.6181393601247591, - "grad_norm": 825.877685546875, - "learning_rate": 1.963161682082342e-05, - "loss": 77.0988, - "step": 153000 - }, - { - "epoch": 0.6181797613901268, - "grad_norm": 733.9141235351562, - "learning_rate": 1.9628207643770223e-05, - "loss": 42.6838, - "step": 153010 - }, - { - "epoch": 0.6182201626554944, - "grad_norm": 1160.487060546875, - "learning_rate": 1.9624798571442873e-05, - "loss": 46.4142, - "step": 153020 - }, - { - "epoch": 0.618260563920862, - "grad_norm": 331.2978210449219, - "learning_rate": 1.9621389603907852e-05, - "loss": 29.5836, - "step": 153030 - }, - { - "epoch": 0.6183009651862297, - "grad_norm": 704.036376953125, - "learning_rate": 1.96179807412316e-05, - "loss": 51.0001, - "step": 153040 - }, - { - "epoch": 0.6183413664515973, - "grad_norm": 751.7705078125, - "learning_rate": 1.961457198348059e-05, - "loss": 53.3635, - "step": 153050 - }, - { - "epoch": 0.6183817677169648, - "grad_norm": 2129.126708984375, - "learning_rate": 1.9611163330721275e-05, - "loss": 66.4268, - "step": 153060 - }, - { - "epoch": 0.6184221689823325, - "grad_norm": 1071.379150390625, - "learning_rate": 1.9607754783020092e-05, - "loss": 75.9926, - "step": 153070 - }, - { - "epoch": 0.6184625702477001, - "grad_norm": 600.6515502929688, - "learning_rate": 1.9604346340443518e-05, - "loss": 49.0746, - "step": 153080 - }, - { - "epoch": 0.6185029715130678, - "grad_norm": 635.5545043945312, - "learning_rate": 1.9600938003057994e-05, - "loss": 50.7416, - "step": 153090 - }, - { - "epoch": 0.6185433727784354, - "grad_norm": 392.02178955078125, - "learning_rate": 1.959752977092995e-05, - "loss": 98.5631, - "step": 153100 - }, - { - "epoch": 0.618583774043803, - "grad_norm": 811.9187622070312, - "learning_rate": 1.9594121644125852e-05, - "loss": 55.1491, - "step": 153110 - }, - { - "epoch": 0.6186241753091707, - "grad_norm": 912.7703247070312, - "learning_rate": 1.9590713622712132e-05, - "loss": 69.0469, - "step": 153120 - }, - { - "epoch": 0.6186645765745383, - "grad_norm": 588.3534545898438, - "learning_rate": 1.9587305706755236e-05, - "loss": 54.1767, - "step": 153130 - }, - { - "epoch": 0.618704977839906, - "grad_norm": 878.7616577148438, - "learning_rate": 1.9583897896321607e-05, - "loss": 68.2562, - "step": 153140 - }, - { - "epoch": 0.6187453791052736, - "grad_norm": 334.1783142089844, - "learning_rate": 1.958049019147767e-05, - "loss": 40.0663, - "step": 153150 - }, - { - "epoch": 0.6187857803706412, - "grad_norm": 619.4424438476562, - "learning_rate": 1.957708259228987e-05, - "loss": 35.9437, - "step": 153160 - }, - { - "epoch": 0.6188261816360089, - "grad_norm": 1056.420166015625, - "learning_rate": 1.957367509882464e-05, - "loss": 54.9157, - "step": 153170 - }, - { - "epoch": 0.6188665829013764, - "grad_norm": 1181.443115234375, - "learning_rate": 1.9570267711148403e-05, - "loss": 50.0268, - "step": 153180 - }, - { - "epoch": 0.6189069841667441, - "grad_norm": 431.3962097167969, - "learning_rate": 1.9566860429327595e-05, - "loss": 47.6178, - "step": 153190 - }, - { - "epoch": 0.6189473854321117, - "grad_norm": 490.9452819824219, - "learning_rate": 1.956345325342863e-05, - "loss": 48.6404, - "step": 153200 - }, - { - "epoch": 0.6189877866974793, - "grad_norm": 572.4602661132812, - "learning_rate": 1.9560046183517953e-05, - "loss": 50.4382, - "step": 153210 - }, - { - "epoch": 0.619028187962847, - "grad_norm": 801.1991577148438, - "learning_rate": 1.955663921966198e-05, - "loss": 41.4329, - "step": 153220 - }, - { - "epoch": 0.6190685892282146, - "grad_norm": 290.9499206542969, - "learning_rate": 1.9553232361927114e-05, - "loss": 56.9374, - "step": 153230 - }, - { - "epoch": 0.6191089904935823, - "grad_norm": 657.2138671875, - "learning_rate": 1.95498256103798e-05, - "loss": 58.0168, - "step": 153240 - }, - { - "epoch": 0.6191493917589499, - "grad_norm": 1027.6114501953125, - "learning_rate": 1.9546418965086442e-05, - "loss": 54.518, - "step": 153250 - }, - { - "epoch": 0.6191897930243175, - "grad_norm": 748.5098266601562, - "learning_rate": 1.954301242611344e-05, - "loss": 43.2595, - "step": 153260 - }, - { - "epoch": 0.6192301942896852, - "grad_norm": 648.7311401367188, - "learning_rate": 1.9539605993527237e-05, - "loss": 59.3503, - "step": 153270 - }, - { - "epoch": 0.6192705955550528, - "grad_norm": 344.7987365722656, - "learning_rate": 1.9536199667394215e-05, - "loss": 61.7763, - "step": 153280 - }, - { - "epoch": 0.6193109968204205, - "grad_norm": 451.1872863769531, - "learning_rate": 1.95327934477808e-05, - "loss": 84.8409, - "step": 153290 - }, - { - "epoch": 0.6193513980857881, - "grad_norm": 830.3189697265625, - "learning_rate": 1.9529387334753395e-05, - "loss": 43.1897, - "step": 153300 - }, - { - "epoch": 0.6193917993511556, - "grad_norm": 514.0184326171875, - "learning_rate": 1.9525981328378384e-05, - "loss": 53.5215, - "step": 153310 - }, - { - "epoch": 0.6194322006165233, - "grad_norm": 357.33038330078125, - "learning_rate": 1.95225754287222e-05, - "loss": 53.0679, - "step": 153320 - }, - { - "epoch": 0.6194726018818909, - "grad_norm": 453.4263610839844, - "learning_rate": 1.9519169635851224e-05, - "loss": 52.7831, - "step": 153330 - }, - { - "epoch": 0.6195130031472585, - "grad_norm": 1120.7613525390625, - "learning_rate": 1.951576394983185e-05, - "loss": 87.4061, - "step": 153340 - }, - { - "epoch": 0.6195534044126262, - "grad_norm": 1069.5909423828125, - "learning_rate": 1.9512358370730493e-05, - "loss": 63.5944, - "step": 153350 - }, - { - "epoch": 0.6195938056779938, - "grad_norm": 665.5206909179688, - "learning_rate": 1.9508952898613528e-05, - "loss": 50.0496, - "step": 153360 - }, - { - "epoch": 0.6196342069433615, - "grad_norm": 509.3944091796875, - "learning_rate": 1.9505547533547358e-05, - "loss": 50.3182, - "step": 153370 - }, - { - "epoch": 0.6196746082087291, - "grad_norm": 457.98583984375, - "learning_rate": 1.950214227559837e-05, - "loss": 39.0197, - "step": 153380 - }, - { - "epoch": 0.6197150094740967, - "grad_norm": 1662.1475830078125, - "learning_rate": 1.9498737124832936e-05, - "loss": 41.9308, - "step": 153390 - }, - { - "epoch": 0.6197554107394644, - "grad_norm": 1056.866455078125, - "learning_rate": 1.9495332081317464e-05, - "loss": 43.6751, - "step": 153400 - }, - { - "epoch": 0.619795812004832, - "grad_norm": 1036.8895263671875, - "learning_rate": 1.949192714511833e-05, - "loss": 56.6728, - "step": 153410 - }, - { - "epoch": 0.6198362132701997, - "grad_norm": 950.1953735351562, - "learning_rate": 1.9488522316301898e-05, - "loss": 84.3255, - "step": 153420 - }, - { - "epoch": 0.6198766145355673, - "grad_norm": 740.6848754882812, - "learning_rate": 1.9485117594934574e-05, - "loss": 51.3797, - "step": 153430 - }, - { - "epoch": 0.6199170158009348, - "grad_norm": 798.8438720703125, - "learning_rate": 1.9481712981082714e-05, - "loss": 72.0278, - "step": 153440 - }, - { - "epoch": 0.6199574170663025, - "grad_norm": 735.481689453125, - "learning_rate": 1.947830847481271e-05, - "loss": 39.8441, - "step": 153450 - }, - { - "epoch": 0.6199978183316701, - "grad_norm": 574.896240234375, - "learning_rate": 1.947490407619092e-05, - "loss": 59.1198, - "step": 153460 - }, - { - "epoch": 0.6200382195970378, - "grad_norm": 802.9642944335938, - "learning_rate": 1.9471499785283712e-05, - "loss": 58.8451, - "step": 153470 - }, - { - "epoch": 0.6200786208624054, - "grad_norm": 601.0955810546875, - "learning_rate": 1.946809560215747e-05, - "loss": 53.9513, - "step": 153480 - }, - { - "epoch": 0.620119022127773, - "grad_norm": 844.2098999023438, - "learning_rate": 1.9464691526878555e-05, - "loss": 56.864, - "step": 153490 - }, - { - "epoch": 0.6201594233931407, - "grad_norm": 836.8554077148438, - "learning_rate": 1.946128755951332e-05, - "loss": 48.7098, - "step": 153500 - }, - { - "epoch": 0.6201998246585083, - "grad_norm": 1133.341552734375, - "learning_rate": 1.945788370012814e-05, - "loss": 60.1469, - "step": 153510 - }, - { - "epoch": 0.620240225923876, - "grad_norm": 597.8056640625, - "learning_rate": 1.945447994878937e-05, - "loss": 52.2197, - "step": 153520 - }, - { - "epoch": 0.6202806271892436, - "grad_norm": 370.60791015625, - "learning_rate": 1.945107630556337e-05, - "loss": 30.6839, - "step": 153530 - }, - { - "epoch": 0.6203210284546112, - "grad_norm": 2185.733642578125, - "learning_rate": 1.9447672770516494e-05, - "loss": 55.9894, - "step": 153540 - }, - { - "epoch": 0.6203614297199789, - "grad_norm": 950.8214721679688, - "learning_rate": 1.9444269343715092e-05, - "loss": 50.5676, - "step": 153550 - }, - { - "epoch": 0.6204018309853465, - "grad_norm": 982.4635009765625, - "learning_rate": 1.9440866025225525e-05, - "loss": 50.9509, - "step": 153560 - }, - { - "epoch": 0.620442232250714, - "grad_norm": 1476.201171875, - "learning_rate": 1.9437462815114128e-05, - "loss": 50.0211, - "step": 153570 - }, - { - "epoch": 0.6204826335160817, - "grad_norm": 811.0206298828125, - "learning_rate": 1.9434059713447265e-05, - "loss": 67.0507, - "step": 153580 - }, - { - "epoch": 0.6205230347814493, - "grad_norm": 753.006591796875, - "learning_rate": 1.9430656720291275e-05, - "loss": 53.868, - "step": 153590 - }, - { - "epoch": 0.620563436046817, - "grad_norm": 937.0206909179688, - "learning_rate": 1.942725383571249e-05, - "loss": 55.5334, - "step": 153600 - }, - { - "epoch": 0.6206038373121846, - "grad_norm": 889.5717163085938, - "learning_rate": 1.942385105977727e-05, - "loss": 70.7559, - "step": 153610 - }, - { - "epoch": 0.6206442385775522, - "grad_norm": 412.93267822265625, - "learning_rate": 1.9420448392551943e-05, - "loss": 36.8532, - "step": 153620 - }, - { - "epoch": 0.6206846398429199, - "grad_norm": 654.7472534179688, - "learning_rate": 1.9417045834102844e-05, - "loss": 48.5801, - "step": 153630 - }, - { - "epoch": 0.6207250411082875, - "grad_norm": 586.552490234375, - "learning_rate": 1.9413643384496316e-05, - "loss": 54.077, - "step": 153640 - }, - { - "epoch": 0.6207654423736552, - "grad_norm": 811.3733520507812, - "learning_rate": 1.9410241043798676e-05, - "loss": 63.7313, - "step": 153650 - }, - { - "epoch": 0.6208058436390228, - "grad_norm": 444.1869812011719, - "learning_rate": 1.9406838812076273e-05, - "loss": 46.0719, - "step": 153660 - }, - { - "epoch": 0.6208462449043904, - "grad_norm": 811.6187744140625, - "learning_rate": 1.9403436689395426e-05, - "loss": 62.3275, - "step": 153670 - }, - { - "epoch": 0.6208866461697581, - "grad_norm": 504.3353271484375, - "learning_rate": 1.9400034675822452e-05, - "loss": 42.1343, - "step": 153680 - }, - { - "epoch": 0.6209270474351257, - "grad_norm": 799.2766723632812, - "learning_rate": 1.93966327714237e-05, - "loss": 52.7461, - "step": 153690 - }, - { - "epoch": 0.6209674487004933, - "grad_norm": 461.0577392578125, - "learning_rate": 1.9393230976265473e-05, - "loss": 59.0141, - "step": 153700 - }, - { - "epoch": 0.6210078499658609, - "grad_norm": 828.9132690429688, - "learning_rate": 1.938982929041409e-05, - "loss": 46.2773, - "step": 153710 - }, - { - "epoch": 0.6210482512312285, - "grad_norm": 354.5053405761719, - "learning_rate": 1.938642771393588e-05, - "loss": 53.5887, - "step": 153720 - }, - { - "epoch": 0.6210886524965962, - "grad_norm": 144.50497436523438, - "learning_rate": 1.9383026246897143e-05, - "loss": 37.4983, - "step": 153730 - }, - { - "epoch": 0.6211290537619638, - "grad_norm": 783.1920776367188, - "learning_rate": 1.937962488936421e-05, - "loss": 38.912, - "step": 153740 - }, - { - "epoch": 0.6211694550273315, - "grad_norm": 666.5381469726562, - "learning_rate": 1.937622364140338e-05, - "loss": 69.1318, - "step": 153750 - }, - { - "epoch": 0.6212098562926991, - "grad_norm": 760.7601318359375, - "learning_rate": 1.9372822503080957e-05, - "loss": 40.2389, - "step": 153760 - }, - { - "epoch": 0.6212502575580667, - "grad_norm": 508.2515563964844, - "learning_rate": 1.9369421474463268e-05, - "loss": 42.0188, - "step": 153770 - }, - { - "epoch": 0.6212906588234344, - "grad_norm": 610.3115844726562, - "learning_rate": 1.9366020555616603e-05, - "loss": 43.7602, - "step": 153780 - }, - { - "epoch": 0.621331060088802, - "grad_norm": 336.53662109375, - "learning_rate": 1.936261974660727e-05, - "loss": 44.282, - "step": 153790 - }, - { - "epoch": 0.6213714613541697, - "grad_norm": 952.2464599609375, - "learning_rate": 1.9359219047501565e-05, - "loss": 81.2008, - "step": 153800 - }, - { - "epoch": 0.6214118626195373, - "grad_norm": 365.46612548828125, - "learning_rate": 1.9355818458365782e-05, - "loss": 37.9822, - "step": 153810 - }, - { - "epoch": 0.6214522638849048, - "grad_norm": 697.7381591796875, - "learning_rate": 1.9352417979266233e-05, - "loss": 66.52, - "step": 153820 - }, - { - "epoch": 0.6214926651502725, - "grad_norm": 800.6105346679688, - "learning_rate": 1.93490176102692e-05, - "loss": 62.5696, - "step": 153830 - }, - { - "epoch": 0.6215330664156401, - "grad_norm": 527.7947387695312, - "learning_rate": 1.9345617351440973e-05, - "loss": 79.746, - "step": 153840 - }, - { - "epoch": 0.6215734676810077, - "grad_norm": 1173.56689453125, - "learning_rate": 1.9342217202847856e-05, - "loss": 59.6406, - "step": 153850 - }, - { - "epoch": 0.6216138689463754, - "grad_norm": 1889.8114013671875, - "learning_rate": 1.9338817164556128e-05, - "loss": 65.9831, - "step": 153860 - }, - { - "epoch": 0.621654270211743, - "grad_norm": 1583.56591796875, - "learning_rate": 1.9335417236632065e-05, - "loss": 48.6368, - "step": 153870 - }, - { - "epoch": 0.6216946714771107, - "grad_norm": 379.38201904296875, - "learning_rate": 1.9332017419141962e-05, - "loss": 33.5321, - "step": 153880 - }, - { - "epoch": 0.6217350727424783, - "grad_norm": 617.2946166992188, - "learning_rate": 1.9328617712152098e-05, - "loss": 60.8888, - "step": 153890 - }, - { - "epoch": 0.6217754740078459, - "grad_norm": 685.6942138671875, - "learning_rate": 1.9325218115728755e-05, - "loss": 48.9418, - "step": 153900 - }, - { - "epoch": 0.6218158752732136, - "grad_norm": 514.7144165039062, - "learning_rate": 1.9321818629938208e-05, - "loss": 67.4029, - "step": 153910 - }, - { - "epoch": 0.6218562765385812, - "grad_norm": 1621.0958251953125, - "learning_rate": 1.9318419254846718e-05, - "loss": 42.5269, - "step": 153920 - }, - { - "epoch": 0.6218966778039489, - "grad_norm": 954.4661254882812, - "learning_rate": 1.9315019990520582e-05, - "loss": 60.1667, - "step": 153930 - }, - { - "epoch": 0.6219370790693165, - "grad_norm": 1003.4674682617188, - "learning_rate": 1.9311620837026057e-05, - "loss": 68.499, - "step": 153940 - }, - { - "epoch": 0.621977480334684, - "grad_norm": 713.1990966796875, - "learning_rate": 1.9308221794429403e-05, - "loss": 50.8365, - "step": 153950 - }, - { - "epoch": 0.6220178816000517, - "grad_norm": 381.65618896484375, - "learning_rate": 1.9304822862796903e-05, - "loss": 34.9108, - "step": 153960 - }, - { - "epoch": 0.6220582828654193, - "grad_norm": 1819.873046875, - "learning_rate": 1.930142404219481e-05, - "loss": 46.8878, - "step": 153970 - }, - { - "epoch": 0.622098684130787, - "grad_norm": 444.01611328125, - "learning_rate": 1.9298025332689397e-05, - "loss": 55.4984, - "step": 153980 - }, - { - "epoch": 0.6221390853961546, - "grad_norm": 379.1543273925781, - "learning_rate": 1.9294626734346914e-05, - "loss": 73.1394, - "step": 153990 - }, - { - "epoch": 0.6221794866615222, - "grad_norm": 1029.5592041015625, - "learning_rate": 1.9291228247233605e-05, - "loss": 53.775, - "step": 154000 - }, - { - "epoch": 0.6222198879268899, - "grad_norm": 991.5259399414062, - "learning_rate": 1.9287829871415757e-05, - "loss": 55.4859, - "step": 154010 - }, - { - "epoch": 0.6222602891922575, - "grad_norm": 375.24810791015625, - "learning_rate": 1.9284431606959594e-05, - "loss": 47.712, - "step": 154020 - }, - { - "epoch": 0.6223006904576251, - "grad_norm": 706.0023193359375, - "learning_rate": 1.9281033453931388e-05, - "loss": 42.1541, - "step": 154030 - }, - { - "epoch": 0.6223410917229928, - "grad_norm": 842.9607543945312, - "learning_rate": 1.9277635412397383e-05, - "loss": 51.2036, - "step": 154040 - }, - { - "epoch": 0.6223814929883604, - "grad_norm": 0.0, - "learning_rate": 1.9274237482423814e-05, - "loss": 51.3722, - "step": 154050 - }, - { - "epoch": 0.6224218942537281, - "grad_norm": 1058.037841796875, - "learning_rate": 1.9270839664076936e-05, - "loss": 50.9369, - "step": 154060 - }, - { - "epoch": 0.6224622955190957, - "grad_norm": 908.2258911132812, - "learning_rate": 1.9267441957422994e-05, - "loss": 48.8932, - "step": 154070 - }, - { - "epoch": 0.6225026967844632, - "grad_norm": 783.5228271484375, - "learning_rate": 1.926404436252821e-05, - "loss": 48.0859, - "step": 154080 - }, - { - "epoch": 0.6225430980498309, - "grad_norm": 719.8128662109375, - "learning_rate": 1.926064687945884e-05, - "loss": 58.9963, - "step": 154090 - }, - { - "epoch": 0.6225834993151985, - "grad_norm": 573.966552734375, - "learning_rate": 1.9257249508281107e-05, - "loss": 50.9664, - "step": 154100 - }, - { - "epoch": 0.6226239005805662, - "grad_norm": 477.0748291015625, - "learning_rate": 1.925385224906126e-05, - "loss": 38.2241, - "step": 154110 - }, - { - "epoch": 0.6226643018459338, - "grad_norm": 405.0860290527344, - "learning_rate": 1.9250455101865526e-05, - "loss": 56.007, - "step": 154120 - }, - { - "epoch": 0.6227047031113014, - "grad_norm": 1051.1751708984375, - "learning_rate": 1.924705806676012e-05, - "loss": 67.0096, - "step": 154130 - }, - { - "epoch": 0.6227451043766691, - "grad_norm": 960.0936889648438, - "learning_rate": 1.9243661143811287e-05, - "loss": 39.5979, - "step": 154140 - }, - { - "epoch": 0.6227855056420367, - "grad_norm": 476.98992919921875, - "learning_rate": 1.9240264333085245e-05, - "loss": 69.4723, - "step": 154150 - }, - { - "epoch": 0.6228259069074044, - "grad_norm": 675.7411499023438, - "learning_rate": 1.92368676346482e-05, - "loss": 46.5527, - "step": 154160 - }, - { - "epoch": 0.622866308172772, - "grad_norm": 305.06793212890625, - "learning_rate": 1.92334710485664e-05, - "loss": 42.114, - "step": 154170 - }, - { - "epoch": 0.6229067094381396, - "grad_norm": 505.45458984375, - "learning_rate": 1.9230074574906042e-05, - "loss": 66.8375, - "step": 154180 - }, - { - "epoch": 0.6229471107035073, - "grad_norm": 799.241455078125, - "learning_rate": 1.9226678213733358e-05, - "loss": 59.1406, - "step": 154190 - }, - { - "epoch": 0.6229875119688749, - "grad_norm": 676.989013671875, - "learning_rate": 1.922328196511456e-05, - "loss": 33.6825, - "step": 154200 - }, - { - "epoch": 0.6230279132342424, - "grad_norm": 1138.4705810546875, - "learning_rate": 1.9219885829115843e-05, - "loss": 48.9602, - "step": 154210 - }, - { - "epoch": 0.6230683144996101, - "grad_norm": 401.67071533203125, - "learning_rate": 1.921648980580343e-05, - "loss": 54.2608, - "step": 154220 - }, - { - "epoch": 0.6231087157649777, - "grad_norm": 2265.824951171875, - "learning_rate": 1.921309389524353e-05, - "loss": 78.4544, - "step": 154230 - }, - { - "epoch": 0.6231491170303454, - "grad_norm": 764.9121704101562, - "learning_rate": 1.920969809750234e-05, - "loss": 58.4583, - "step": 154240 - }, - { - "epoch": 0.623189518295713, - "grad_norm": 914.537109375, - "learning_rate": 1.920630241264607e-05, - "loss": 41.7083, - "step": 154250 - }, - { - "epoch": 0.6232299195610806, - "grad_norm": 293.2212219238281, - "learning_rate": 1.9202906840740907e-05, - "loss": 43.5702, - "step": 154260 - }, - { - "epoch": 0.6232703208264483, - "grad_norm": 665.3999633789062, - "learning_rate": 1.9199511381853076e-05, - "loss": 47.1354, - "step": 154270 - }, - { - "epoch": 0.6233107220918159, - "grad_norm": 415.6860656738281, - "learning_rate": 1.919611603604875e-05, - "loss": 40.6379, - "step": 154280 - }, - { - "epoch": 0.6233511233571836, - "grad_norm": 528.6532592773438, - "learning_rate": 1.919272080339412e-05, - "loss": 42.3219, - "step": 154290 - }, - { - "epoch": 0.6233915246225512, - "grad_norm": 700.0554809570312, - "learning_rate": 1.91893256839554e-05, - "loss": 58.3909, - "step": 154300 - }, - { - "epoch": 0.6234319258879188, - "grad_norm": 941.929931640625, - "learning_rate": 1.918593067779877e-05, - "loss": 63.8411, - "step": 154310 - }, - { - "epoch": 0.6234723271532865, - "grad_norm": 319.9075622558594, - "learning_rate": 1.9182535784990403e-05, - "loss": 66.3877, - "step": 154320 - }, - { - "epoch": 0.6235127284186541, - "grad_norm": 357.0832824707031, - "learning_rate": 1.9179141005596505e-05, - "loss": 60.1818, - "step": 154330 - }, - { - "epoch": 0.6235531296840217, - "grad_norm": 571.7152709960938, - "learning_rate": 1.9175746339683244e-05, - "loss": 67.6065, - "step": 154340 - }, - { - "epoch": 0.6235935309493893, - "grad_norm": 501.433349609375, - "learning_rate": 1.917235178731681e-05, - "loss": 48.194, - "step": 154350 - }, - { - "epoch": 0.6236339322147569, - "grad_norm": 1153.9691162109375, - "learning_rate": 1.916895734856338e-05, - "loss": 49.62, - "step": 154360 - }, - { - "epoch": 0.6236743334801246, - "grad_norm": 777.2158813476562, - "learning_rate": 1.916556302348912e-05, - "loss": 62.7281, - "step": 154370 - }, - { - "epoch": 0.6237147347454922, - "grad_norm": 1199.586181640625, - "learning_rate": 1.9162168812160218e-05, - "loss": 50.8841, - "step": 154380 - }, - { - "epoch": 0.6237551360108599, - "grad_norm": 1093.0955810546875, - "learning_rate": 1.9158774714642845e-05, - "loss": 50.4877, - "step": 154390 - }, - { - "epoch": 0.6237955372762275, - "grad_norm": 514.9522705078125, - "learning_rate": 1.915538073100316e-05, - "loss": 35.3574, - "step": 154400 - }, - { - "epoch": 0.6238359385415951, - "grad_norm": 630.1919555664062, - "learning_rate": 1.9151986861307344e-05, - "loss": 46.5631, - "step": 154410 - }, - { - "epoch": 0.6238763398069628, - "grad_norm": 663.0952758789062, - "learning_rate": 1.914859310562154e-05, - "loss": 48.6632, - "step": 154420 - }, - { - "epoch": 0.6239167410723304, - "grad_norm": 3295.518798828125, - "learning_rate": 1.914519946401194e-05, - "loss": 92.438, - "step": 154430 - }, - { - "epoch": 0.6239571423376981, - "grad_norm": 3211.037353515625, - "learning_rate": 1.914180593654469e-05, - "loss": 51.1125, - "step": 154440 - }, - { - "epoch": 0.6239975436030657, - "grad_norm": 391.7280578613281, - "learning_rate": 1.9138412523285936e-05, - "loss": 52.6217, - "step": 154450 - }, - { - "epoch": 0.6240379448684332, - "grad_norm": 1007.160400390625, - "learning_rate": 1.9135019224301864e-05, - "loss": 62.9541, - "step": 154460 - }, - { - "epoch": 0.6240783461338009, - "grad_norm": 1041.9798583984375, - "learning_rate": 1.9131626039658607e-05, - "loss": 51.7518, - "step": 154470 - }, - { - "epoch": 0.6241187473991685, - "grad_norm": 690.2793579101562, - "learning_rate": 1.9128232969422315e-05, - "loss": 60.4521, - "step": 154480 - }, - { - "epoch": 0.6241591486645361, - "grad_norm": 874.3128662109375, - "learning_rate": 1.9124840013659155e-05, - "loss": 50.6818, - "step": 154490 - }, - { - "epoch": 0.6241995499299038, - "grad_norm": 504.86700439453125, - "learning_rate": 1.912144717243525e-05, - "loss": 39.3337, - "step": 154500 - }, - { - "epoch": 0.6242399511952714, - "grad_norm": 1537.7491455078125, - "learning_rate": 1.9118054445816767e-05, - "loss": 56.018, - "step": 154510 - }, - { - "epoch": 0.6242803524606391, - "grad_norm": 689.298095703125, - "learning_rate": 1.9114661833869847e-05, - "loss": 41.9827, - "step": 154520 - }, - { - "epoch": 0.6243207537260067, - "grad_norm": 1061.407470703125, - "learning_rate": 1.911126933666061e-05, - "loss": 50.2677, - "step": 154530 - }, - { - "epoch": 0.6243611549913743, - "grad_norm": 347.8809509277344, - "learning_rate": 1.9107876954255217e-05, - "loss": 44.7597, - "step": 154540 - }, - { - "epoch": 0.624401556256742, - "grad_norm": 1970.9144287109375, - "learning_rate": 1.9104484686719795e-05, - "loss": 58.5685, - "step": 154550 - }, - { - "epoch": 0.6244419575221096, - "grad_norm": 253.61517333984375, - "learning_rate": 1.9101092534120478e-05, - "loss": 29.4708, - "step": 154560 - }, - { - "epoch": 0.6244823587874773, - "grad_norm": 402.3465270996094, - "learning_rate": 1.9097700496523404e-05, - "loss": 50.5293, - "step": 154570 - }, - { - "epoch": 0.6245227600528449, - "grad_norm": 1666.6761474609375, - "learning_rate": 1.9094308573994692e-05, - "loss": 66.7547, - "step": 154580 - }, - { - "epoch": 0.6245631613182124, - "grad_norm": 272.4801940917969, - "learning_rate": 1.909091676660048e-05, - "loss": 28.7191, - "step": 154590 - }, - { - "epoch": 0.6246035625835801, - "grad_norm": 1157.2830810546875, - "learning_rate": 1.908752507440689e-05, - "loss": 75.1753, - "step": 154600 - }, - { - "epoch": 0.6246439638489477, - "grad_norm": 1568.1524658203125, - "learning_rate": 1.908413349748003e-05, - "loss": 51.9157, - "step": 154610 - }, - { - "epoch": 0.6246843651143154, - "grad_norm": 797.2478637695312, - "learning_rate": 1.9080742035886045e-05, - "loss": 46.6599, - "step": 154620 - }, - { - "epoch": 0.624724766379683, - "grad_norm": 253.48960876464844, - "learning_rate": 1.9077350689691032e-05, - "loss": 49.9191, - "step": 154630 - }, - { - "epoch": 0.6247651676450506, - "grad_norm": 780.4749755859375, - "learning_rate": 1.9073959458961125e-05, - "loss": 66.0298, - "step": 154640 - }, - { - "epoch": 0.6248055689104183, - "grad_norm": 413.2430419921875, - "learning_rate": 1.907056834376243e-05, - "loss": 50.5995, - "step": 154650 - }, - { - "epoch": 0.6248459701757859, - "grad_norm": 1501.6806640625, - "learning_rate": 1.906717734416105e-05, - "loss": 50.1577, - "step": 154660 - }, - { - "epoch": 0.6248863714411536, - "grad_norm": 499.1429443359375, - "learning_rate": 1.906378646022311e-05, - "loss": 52.356, - "step": 154670 - }, - { - "epoch": 0.6249267727065212, - "grad_norm": 959.724609375, - "learning_rate": 1.9060395692014708e-05, - "loss": 59.3793, - "step": 154680 - }, - { - "epoch": 0.6249671739718888, - "grad_norm": 436.2023620605469, - "learning_rate": 1.905700503960194e-05, - "loss": 51.1567, - "step": 154690 - }, - { - "epoch": 0.6250075752372565, - "grad_norm": 669.9522094726562, - "learning_rate": 1.9053614503050928e-05, - "loss": 55.7573, - "step": 154700 - }, - { - "epoch": 0.6250479765026241, - "grad_norm": 1216.4349365234375, - "learning_rate": 1.9050224082427753e-05, - "loss": 62.0031, - "step": 154710 - }, - { - "epoch": 0.6250883777679916, - "grad_norm": 698.4359741210938, - "learning_rate": 1.9046833777798533e-05, - "loss": 63.305, - "step": 154720 - }, - { - "epoch": 0.6251287790333593, - "grad_norm": 750.6848754882812, - "learning_rate": 1.9043443589229355e-05, - "loss": 44.9028, - "step": 154730 - }, - { - "epoch": 0.6251691802987269, - "grad_norm": 688.37109375, - "learning_rate": 1.9040053516786306e-05, - "loss": 57.3157, - "step": 154740 - }, - { - "epoch": 0.6252095815640946, - "grad_norm": 384.5275573730469, - "learning_rate": 1.9036663560535483e-05, - "loss": 63.4863, - "step": 154750 - }, - { - "epoch": 0.6252499828294622, - "grad_norm": 1952.0465087890625, - "learning_rate": 1.9033273720542975e-05, - "loss": 69.0948, - "step": 154760 - }, - { - "epoch": 0.6252903840948298, - "grad_norm": 1709.10888671875, - "learning_rate": 1.902988399687486e-05, - "loss": 119.0979, - "step": 154770 - }, - { - "epoch": 0.6253307853601975, - "grad_norm": 499.8750915527344, - "learning_rate": 1.9026494389597238e-05, - "loss": 36.8402, - "step": 154780 - }, - { - "epoch": 0.6253711866255651, - "grad_norm": 612.3027954101562, - "learning_rate": 1.9023104898776176e-05, - "loss": 34.4344, - "step": 154790 - }, - { - "epoch": 0.6254115878909328, - "grad_norm": 390.22845458984375, - "learning_rate": 1.9019715524477767e-05, - "loss": 71.2229, - "step": 154800 - }, - { - "epoch": 0.6254519891563004, - "grad_norm": 1915.9998779296875, - "learning_rate": 1.9016326266768088e-05, - "loss": 57.8614, - "step": 154810 - }, - { - "epoch": 0.625492390421668, - "grad_norm": 317.40777587890625, - "learning_rate": 1.90129371257132e-05, - "loss": 52.2577, - "step": 154820 - }, - { - "epoch": 0.6255327916870357, - "grad_norm": 685.293212890625, - "learning_rate": 1.9009548101379194e-05, - "loss": 56.8706, - "step": 154830 - }, - { - "epoch": 0.6255731929524033, - "grad_norm": 803.1461791992188, - "learning_rate": 1.9006159193832125e-05, - "loss": 67.1199, - "step": 154840 - }, - { - "epoch": 0.6256135942177709, - "grad_norm": 978.5604248046875, - "learning_rate": 1.9002770403138065e-05, - "loss": 55.7089, - "step": 154850 - }, - { - "epoch": 0.6256539954831385, - "grad_norm": 819.3570556640625, - "learning_rate": 1.899938172936309e-05, - "loss": 69.4563, - "step": 154860 - }, - { - "epoch": 0.6256943967485061, - "grad_norm": 326.7989196777344, - "learning_rate": 1.8995993172573253e-05, - "loss": 62.7371, - "step": 154870 - }, - { - "epoch": 0.6257347980138738, - "grad_norm": 1138.951416015625, - "learning_rate": 1.8992604732834623e-05, - "loss": 71.6288, - "step": 154880 - }, - { - "epoch": 0.6257751992792414, - "grad_norm": 1044.2083740234375, - "learning_rate": 1.898921641021326e-05, - "loss": 30.9312, - "step": 154890 - }, - { - "epoch": 0.625815600544609, - "grad_norm": 663.309326171875, - "learning_rate": 1.8985828204775206e-05, - "loss": 43.888, - "step": 154900 - }, - { - "epoch": 0.6258560018099767, - "grad_norm": 392.0646057128906, - "learning_rate": 1.898244011658654e-05, - "loss": 57.6133, - "step": 154910 - }, - { - "epoch": 0.6258964030753443, - "grad_norm": 2559.40771484375, - "learning_rate": 1.89790521457133e-05, - "loss": 56.647, - "step": 154920 - }, - { - "epoch": 0.625936804340712, - "grad_norm": 602.9083862304688, - "learning_rate": 1.8975664292221532e-05, - "loss": 49.7372, - "step": 154930 - }, - { - "epoch": 0.6259772056060796, - "grad_norm": 1236.2510986328125, - "learning_rate": 1.89722765561773e-05, - "loss": 50.0771, - "step": 154940 - }, - { - "epoch": 0.6260176068714473, - "grad_norm": 626.7944946289062, - "learning_rate": 1.8968888937646622e-05, - "loss": 47.5724, - "step": 154950 - }, - { - "epoch": 0.6260580081368149, - "grad_norm": 599.9056396484375, - "learning_rate": 1.8965501436695577e-05, - "loss": 62.9184, - "step": 154960 - }, - { - "epoch": 0.6260984094021824, - "grad_norm": 548.3302001953125, - "learning_rate": 1.8962114053390185e-05, - "loss": 26.0454, - "step": 154970 - }, - { - "epoch": 0.6261388106675501, - "grad_norm": 329.4832458496094, - "learning_rate": 1.8958726787796477e-05, - "loss": 29.8244, - "step": 154980 - }, - { - "epoch": 0.6261792119329177, - "grad_norm": 371.7790832519531, - "learning_rate": 1.8955339639980512e-05, - "loss": 46.4422, - "step": 154990 - }, - { - "epoch": 0.6262196131982853, - "grad_norm": 636.3258056640625, - "learning_rate": 1.895195261000831e-05, - "loss": 69.783, - "step": 155000 - }, - { - "epoch": 0.626260014463653, - "grad_norm": 468.350341796875, - "learning_rate": 1.8948565697945907e-05, - "loss": 76.4184, - "step": 155010 - }, - { - "epoch": 0.6263004157290206, - "grad_norm": 1748.3941650390625, - "learning_rate": 1.894517890385933e-05, - "loss": 72.7289, - "step": 155020 - }, - { - "epoch": 0.6263408169943883, - "grad_norm": 265.7440490722656, - "learning_rate": 1.8941792227814597e-05, - "loss": 94.1757, - "step": 155030 - }, - { - "epoch": 0.6263812182597559, - "grad_norm": 1297.6448974609375, - "learning_rate": 1.893840566987776e-05, - "loss": 45.0199, - "step": 155040 - }, - { - "epoch": 0.6264216195251235, - "grad_norm": 1067.8428955078125, - "learning_rate": 1.893501923011482e-05, - "loss": 73.8872, - "step": 155050 - }, - { - "epoch": 0.6264620207904912, - "grad_norm": 720.9359741210938, - "learning_rate": 1.8931632908591796e-05, - "loss": 40.9904, - "step": 155060 - }, - { - "epoch": 0.6265024220558588, - "grad_norm": 430.0225830078125, - "learning_rate": 1.892824670537472e-05, - "loss": 51.2785, - "step": 155070 - }, - { - "epoch": 0.6265428233212265, - "grad_norm": 952.5421752929688, - "learning_rate": 1.8924860620529594e-05, - "loss": 57.799, - "step": 155080 - }, - { - "epoch": 0.6265832245865941, - "grad_norm": 811.53857421875, - "learning_rate": 1.8921474654122444e-05, - "loss": 56.84, - "step": 155090 - }, - { - "epoch": 0.6266236258519616, - "grad_norm": 1707.3125, - "learning_rate": 1.891808880621928e-05, - "loss": 52.2367, - "step": 155100 - }, - { - "epoch": 0.6266640271173293, - "grad_norm": 683.6460571289062, - "learning_rate": 1.891470307688609e-05, - "loss": 75.291, - "step": 155110 - }, - { - "epoch": 0.6267044283826969, - "grad_norm": 809.841796875, - "learning_rate": 1.891131746618891e-05, - "loss": 71.0322, - "step": 155120 - }, - { - "epoch": 0.6267448296480645, - "grad_norm": 601.0204467773438, - "learning_rate": 1.8907931974193728e-05, - "loss": 55.0767, - "step": 155130 - }, - { - "epoch": 0.6267852309134322, - "grad_norm": 1440.623779296875, - "learning_rate": 1.890454660096654e-05, - "loss": 75.6987, - "step": 155140 - }, - { - "epoch": 0.6268256321787998, - "grad_norm": 258.63116455078125, - "learning_rate": 1.890116134657336e-05, - "loss": 61.5404, - "step": 155150 - }, - { - "epoch": 0.6268660334441675, - "grad_norm": 507.3258056640625, - "learning_rate": 1.8897776211080182e-05, - "loss": 55.1314, - "step": 155160 - }, - { - "epoch": 0.6269064347095351, - "grad_norm": 755.1796264648438, - "learning_rate": 1.8894391194552997e-05, - "loss": 53.2773, - "step": 155170 - }, - { - "epoch": 0.6269468359749027, - "grad_norm": 618.8252563476562, - "learning_rate": 1.8891006297057798e-05, - "loss": 107.6506, - "step": 155180 - }, - { - "epoch": 0.6269872372402704, - "grad_norm": 376.06964111328125, - "learning_rate": 1.8887621518660577e-05, - "loss": 51.3056, - "step": 155190 - }, - { - "epoch": 0.627027638505638, - "grad_norm": 472.5889892578125, - "learning_rate": 1.888423685942732e-05, - "loss": 56.5211, - "step": 155200 - }, - { - "epoch": 0.6270680397710057, - "grad_norm": 652.8603515625, - "learning_rate": 1.8880852319424018e-05, - "loss": 37.9688, - "step": 155210 - }, - { - "epoch": 0.6271084410363733, - "grad_norm": 3448.654052734375, - "learning_rate": 1.887746789871664e-05, - "loss": 59.2658, - "step": 155220 - }, - { - "epoch": 0.6271488423017408, - "grad_norm": 392.4132995605469, - "learning_rate": 1.887408359737119e-05, - "loss": 35.5017, - "step": 155230 - }, - { - "epoch": 0.6271892435671085, - "grad_norm": 1030.261474609375, - "learning_rate": 1.8870699415453627e-05, - "loss": 54.3306, - "step": 155240 - }, - { - "epoch": 0.6272296448324761, - "grad_norm": 599.7904663085938, - "learning_rate": 1.8867315353029935e-05, - "loss": 58.7418, - "step": 155250 - }, - { - "epoch": 0.6272700460978438, - "grad_norm": 595.4382934570312, - "learning_rate": 1.886393141016609e-05, - "loss": 64.527, - "step": 155260 - }, - { - "epoch": 0.6273104473632114, - "grad_norm": 762.1326293945312, - "learning_rate": 1.886054758692806e-05, - "loss": 62.5916, - "step": 155270 - }, - { - "epoch": 0.627350848628579, - "grad_norm": 1090.7520751953125, - "learning_rate": 1.885716388338182e-05, - "loss": 55.2791, - "step": 155280 - }, - { - "epoch": 0.6273912498939467, - "grad_norm": 1541.6251220703125, - "learning_rate": 1.8853780299593332e-05, - "loss": 64.5295, - "step": 155290 - }, - { - "epoch": 0.6274316511593143, - "grad_norm": 1311.8800048828125, - "learning_rate": 1.885039683562855e-05, - "loss": 78.9983, - "step": 155300 - }, - { - "epoch": 0.627472052424682, - "grad_norm": 399.0144958496094, - "learning_rate": 1.884701349155346e-05, - "loss": 46.4417, - "step": 155310 - }, - { - "epoch": 0.6275124536900496, - "grad_norm": 1750.837890625, - "learning_rate": 1.8843630267434e-05, - "loss": 63.7231, - "step": 155320 - }, - { - "epoch": 0.6275528549554172, - "grad_norm": 1599.4073486328125, - "learning_rate": 1.8840247163336143e-05, - "loss": 41.7063, - "step": 155330 - }, - { - "epoch": 0.6275932562207849, - "grad_norm": 479.2684326171875, - "learning_rate": 1.883686417932584e-05, - "loss": 49.5772, - "step": 155340 - }, - { - "epoch": 0.6276336574861525, - "grad_norm": 518.2067260742188, - "learning_rate": 1.8833481315469042e-05, - "loss": 41.1463, - "step": 155350 - }, - { - "epoch": 0.62767405875152, - "grad_norm": 1002.8010864257812, - "learning_rate": 1.8830098571831705e-05, - "loss": 45.5635, - "step": 155360 - }, - { - "epoch": 0.6277144600168877, - "grad_norm": 305.9928894042969, - "learning_rate": 1.882671594847977e-05, - "loss": 80.5473, - "step": 155370 - }, - { - "epoch": 0.6277548612822553, - "grad_norm": 324.7518615722656, - "learning_rate": 1.8823333445479174e-05, - "loss": 44.882, - "step": 155380 - }, - { - "epoch": 0.627795262547623, - "grad_norm": 1423.6795654296875, - "learning_rate": 1.8819951062895885e-05, - "loss": 62.9079, - "step": 155390 - }, - { - "epoch": 0.6278356638129906, - "grad_norm": 1083.814453125, - "learning_rate": 1.8816568800795822e-05, - "loss": 74.6381, - "step": 155400 - }, - { - "epoch": 0.6278760650783582, - "grad_norm": 621.3231811523438, - "learning_rate": 1.8813186659244943e-05, - "loss": 70.4143, - "step": 155410 - }, - { - "epoch": 0.6279164663437259, - "grad_norm": 490.48724365234375, - "learning_rate": 1.8809804638309177e-05, - "loss": 52.3341, - "step": 155420 - }, - { - "epoch": 0.6279568676090935, - "grad_norm": 816.6148681640625, - "learning_rate": 1.880642273805445e-05, - "loss": 36.432, - "step": 155430 - }, - { - "epoch": 0.6279972688744612, - "grad_norm": 910.425048828125, - "learning_rate": 1.8803040958546707e-05, - "loss": 46.4939, - "step": 155440 - }, - { - "epoch": 0.6280376701398288, - "grad_norm": 787.06640625, - "learning_rate": 1.879965929985187e-05, - "loss": 48.2395, - "step": 155450 - }, - { - "epoch": 0.6280780714051964, - "grad_norm": 634.6830444335938, - "learning_rate": 1.8796277762035856e-05, - "loss": 28.2061, - "step": 155460 - }, - { - "epoch": 0.6281184726705641, - "grad_norm": 300.8420715332031, - "learning_rate": 1.879289634516461e-05, - "loss": 57.9607, - "step": 155470 - }, - { - "epoch": 0.6281588739359317, - "grad_norm": 638.6072387695312, - "learning_rate": 1.8789515049304038e-05, - "loss": 48.7444, - "step": 155480 - }, - { - "epoch": 0.6281992752012993, - "grad_norm": 429.5278625488281, - "learning_rate": 1.8786133874520078e-05, - "loss": 46.0567, - "step": 155490 - }, - { - "epoch": 0.6282396764666669, - "grad_norm": 405.1448059082031, - "learning_rate": 1.8782752820878634e-05, - "loss": 48.6294, - "step": 155500 - }, - { - "epoch": 0.6282800777320345, - "grad_norm": 22.98813247680664, - "learning_rate": 1.8779371888445624e-05, - "loss": 41.636, - "step": 155510 - }, - { - "epoch": 0.6283204789974022, - "grad_norm": 445.4391174316406, - "learning_rate": 1.8775991077286965e-05, - "loss": 49.7633, - "step": 155520 - }, - { - "epoch": 0.6283608802627698, - "grad_norm": 905.4905395507812, - "learning_rate": 1.8772610387468555e-05, - "loss": 84.4601, - "step": 155530 - }, - { - "epoch": 0.6284012815281375, - "grad_norm": 1306.422119140625, - "learning_rate": 1.8769229819056315e-05, - "loss": 66.4575, - "step": 155540 - }, - { - "epoch": 0.6284416827935051, - "grad_norm": 857.094970703125, - "learning_rate": 1.8765849372116153e-05, - "loss": 54.3967, - "step": 155550 - }, - { - "epoch": 0.6284820840588727, - "grad_norm": 795.1137084960938, - "learning_rate": 1.8762469046713956e-05, - "loss": 75.3259, - "step": 155560 - }, - { - "epoch": 0.6285224853242404, - "grad_norm": 763.9700317382812, - "learning_rate": 1.8759088842915644e-05, - "loss": 48.5607, - "step": 155570 - }, - { - "epoch": 0.628562886589608, - "grad_norm": 655.9850463867188, - "learning_rate": 1.8755708760787113e-05, - "loss": 52.3753, - "step": 155580 - }, - { - "epoch": 0.6286032878549757, - "grad_norm": 505.8095703125, - "learning_rate": 1.8752328800394242e-05, - "loss": 63.009, - "step": 155590 - }, - { - "epoch": 0.6286436891203433, - "grad_norm": 500.4537353515625, - "learning_rate": 1.8748948961802948e-05, - "loss": 42.3465, - "step": 155600 - }, - { - "epoch": 0.6286840903857108, - "grad_norm": 431.3822021484375, - "learning_rate": 1.8745569245079104e-05, - "loss": 49.0103, - "step": 155610 - }, - { - "epoch": 0.6287244916510785, - "grad_norm": 1207.086669921875, - "learning_rate": 1.8742189650288615e-05, - "loss": 47.9568, - "step": 155620 - }, - { - "epoch": 0.6287648929164461, - "grad_norm": 440.156982421875, - "learning_rate": 1.8738810177497365e-05, - "loss": 38.8417, - "step": 155630 - }, - { - "epoch": 0.6288052941818137, - "grad_norm": 274.6180725097656, - "learning_rate": 1.873543082677122e-05, - "loss": 31.751, - "step": 155640 - }, - { - "epoch": 0.6288456954471814, - "grad_norm": 376.49639892578125, - "learning_rate": 1.8732051598176086e-05, - "loss": 63.3421, - "step": 155650 - }, - { - "epoch": 0.628886096712549, - "grad_norm": 2292.678955078125, - "learning_rate": 1.872867249177783e-05, - "loss": 93.0058, - "step": 155660 - }, - { - "epoch": 0.6289264979779167, - "grad_norm": 803.1299438476562, - "learning_rate": 1.872529350764233e-05, - "loss": 37.7863, - "step": 155670 - }, - { - "epoch": 0.6289668992432843, - "grad_norm": 1643.458984375, - "learning_rate": 1.872191464583547e-05, - "loss": 78.7152, - "step": 155680 - }, - { - "epoch": 0.629007300508652, - "grad_norm": 673.4326171875, - "learning_rate": 1.8718535906423106e-05, - "loss": 43.4824, - "step": 155690 - }, - { - "epoch": 0.6290477017740196, - "grad_norm": 939.1830444335938, - "learning_rate": 1.871515728947113e-05, - "loss": 47.4452, - "step": 155700 - }, - { - "epoch": 0.6290881030393872, - "grad_norm": 1257.9764404296875, - "learning_rate": 1.8711778795045398e-05, - "loss": 52.2282, - "step": 155710 - }, - { - "epoch": 0.6291285043047549, - "grad_norm": 939.0426025390625, - "learning_rate": 1.8708400423211764e-05, - "loss": 37.8956, - "step": 155720 - }, - { - "epoch": 0.6291689055701225, - "grad_norm": 613.3606567382812, - "learning_rate": 1.8705022174036114e-05, - "loss": 40.0705, - "step": 155730 - }, - { - "epoch": 0.62920930683549, - "grad_norm": 487.5195007324219, - "learning_rate": 1.8701644047584293e-05, - "loss": 30.0292, - "step": 155740 - }, - { - "epoch": 0.6292497081008577, - "grad_norm": 642.8336181640625, - "learning_rate": 1.869826604392216e-05, - "loss": 49.1987, - "step": 155750 - }, - { - "epoch": 0.6292901093662253, - "grad_norm": 501.4791564941406, - "learning_rate": 1.869488816311558e-05, - "loss": 42.2183, - "step": 155760 - }, - { - "epoch": 0.629330510631593, - "grad_norm": 763.4742431640625, - "learning_rate": 1.86915104052304e-05, - "loss": 67.459, - "step": 155770 - }, - { - "epoch": 0.6293709118969606, - "grad_norm": 464.489013671875, - "learning_rate": 1.8688132770332476e-05, - "loss": 60.1081, - "step": 155780 - }, - { - "epoch": 0.6294113131623282, - "grad_norm": 241.61941528320312, - "learning_rate": 1.8684755258487653e-05, - "loss": 76.872, - "step": 155790 - }, - { - "epoch": 0.6294517144276959, - "grad_norm": 514.704345703125, - "learning_rate": 1.868137786976177e-05, - "loss": 67.6247, - "step": 155800 - }, - { - "epoch": 0.6294921156930635, - "grad_norm": 685.1314086914062, - "learning_rate": 1.8678000604220683e-05, - "loss": 48.1277, - "step": 155810 - }, - { - "epoch": 0.6295325169584312, - "grad_norm": 363.3772277832031, - "learning_rate": 1.8674623461930233e-05, - "loss": 64.7407, - "step": 155820 - }, - { - "epoch": 0.6295729182237988, - "grad_norm": 1206.5020751953125, - "learning_rate": 1.8671246442956243e-05, - "loss": 47.2309, - "step": 155830 - }, - { - "epoch": 0.6296133194891664, - "grad_norm": 1087.311279296875, - "learning_rate": 1.8667869547364576e-05, - "loss": 55.6458, - "step": 155840 - }, - { - "epoch": 0.6296537207545341, - "grad_norm": 1133.510498046875, - "learning_rate": 1.8664492775221042e-05, - "loss": 56.3427, - "step": 155850 - }, - { - "epoch": 0.6296941220199017, - "grad_norm": 513.0890502929688, - "learning_rate": 1.866111612659149e-05, - "loss": 52.5945, - "step": 155860 - }, - { - "epoch": 0.6297345232852692, - "grad_norm": 1111.2569580078125, - "learning_rate": 1.865773960154174e-05, - "loss": 44.945, - "step": 155870 - }, - { - "epoch": 0.6297749245506369, - "grad_norm": 566.517333984375, - "learning_rate": 1.865436320013762e-05, - "loss": 41.3349, - "step": 155880 - }, - { - "epoch": 0.6298153258160045, - "grad_norm": 1226.1885986328125, - "learning_rate": 1.865098692244496e-05, - "loss": 61.058, - "step": 155890 - }, - { - "epoch": 0.6298557270813722, - "grad_norm": 642.2447509765625, - "learning_rate": 1.864761076852958e-05, - "loss": 55.2996, - "step": 155900 - }, - { - "epoch": 0.6298961283467398, - "grad_norm": 963.623291015625, - "learning_rate": 1.864423473845729e-05, - "loss": 75.762, - "step": 155910 - }, - { - "epoch": 0.6299365296121074, - "grad_norm": 656.4022216796875, - "learning_rate": 1.864085883229392e-05, - "loss": 30.6479, - "step": 155920 - }, - { - "epoch": 0.6299769308774751, - "grad_norm": 972.303466796875, - "learning_rate": 1.8637483050105274e-05, - "loss": 55.5703, - "step": 155930 - }, - { - "epoch": 0.6300173321428427, - "grad_norm": 0.0, - "learning_rate": 1.8634107391957186e-05, - "loss": 34.8945, - "step": 155940 - }, - { - "epoch": 0.6300577334082104, - "grad_norm": 655.4844360351562, - "learning_rate": 1.863073185791545e-05, - "loss": 32.5324, - "step": 155950 - }, - { - "epoch": 0.630098134673578, - "grad_norm": 3676.433349609375, - "learning_rate": 1.8627356448045867e-05, - "loss": 62.8763, - "step": 155960 - }, - { - "epoch": 0.6301385359389456, - "grad_norm": 1670.3465576171875, - "learning_rate": 1.8623981162414263e-05, - "loss": 54.1714, - "step": 155970 - }, - { - "epoch": 0.6301789372043133, - "grad_norm": 976.9774169921875, - "learning_rate": 1.862060600108642e-05, - "loss": 49.8334, - "step": 155980 - }, - { - "epoch": 0.6302193384696809, - "grad_norm": 180.9348602294922, - "learning_rate": 1.861723096412814e-05, - "loss": 39.5339, - "step": 155990 - }, - { - "epoch": 0.6302597397350485, - "grad_norm": 754.650634765625, - "learning_rate": 1.8613856051605243e-05, - "loss": 24.7535, - "step": 156000 - }, - { - "epoch": 0.6303001410004161, - "grad_norm": 121.3849105834961, - "learning_rate": 1.8610481263583496e-05, - "loss": 27.1816, - "step": 156010 - }, - { - "epoch": 0.6303405422657837, - "grad_norm": 372.6917724609375, - "learning_rate": 1.8607106600128715e-05, - "loss": 54.0215, - "step": 156020 - }, - { - "epoch": 0.6303809435311514, - "grad_norm": 767.4069213867188, - "learning_rate": 1.8603732061306683e-05, - "loss": 37.4301, - "step": 156030 - }, - { - "epoch": 0.630421344796519, - "grad_norm": 181.80638122558594, - "learning_rate": 1.8600357647183185e-05, - "loss": 99.4061, - "step": 156040 - }, - { - "epoch": 0.6304617460618867, - "grad_norm": 380.4937438964844, - "learning_rate": 1.8596983357824012e-05, - "loss": 39.366, - "step": 156050 - }, - { - "epoch": 0.6305021473272543, - "grad_norm": 316.6199951171875, - "learning_rate": 1.8593609193294947e-05, - "loss": 47.472, - "step": 156060 - }, - { - "epoch": 0.6305425485926219, - "grad_norm": 0.0, - "learning_rate": 1.8590235153661757e-05, - "loss": 47.0838, - "step": 156070 - }, - { - "epoch": 0.6305829498579896, - "grad_norm": 1608.3348388671875, - "learning_rate": 1.8586861238990244e-05, - "loss": 49.0129, - "step": 156080 - }, - { - "epoch": 0.6306233511233572, - "grad_norm": 1044.3524169921875, - "learning_rate": 1.858348744934616e-05, - "loss": 52.0046, - "step": 156090 - }, - { - "epoch": 0.6306637523887249, - "grad_norm": 1383.7996826171875, - "learning_rate": 1.8580113784795305e-05, - "loss": 70.3039, - "step": 156100 - }, - { - "epoch": 0.6307041536540925, - "grad_norm": 833.0409545898438, - "learning_rate": 1.857674024540344e-05, - "loss": 50.6667, - "step": 156110 - }, - { - "epoch": 0.6307445549194601, - "grad_norm": 1461.901611328125, - "learning_rate": 1.8573366831236323e-05, - "loss": 54.4854, - "step": 156120 - }, - { - "epoch": 0.6307849561848277, - "grad_norm": 676.2404174804688, - "learning_rate": 1.856999354235973e-05, - "loss": 72.485, - "step": 156130 - }, - { - "epoch": 0.6308253574501953, - "grad_norm": 532.0584716796875, - "learning_rate": 1.8566620378839417e-05, - "loss": 42.6068, - "step": 156140 - }, - { - "epoch": 0.6308657587155629, - "grad_norm": 513.3949584960938, - "learning_rate": 1.856324734074116e-05, - "loss": 93.1572, - "step": 156150 - }, - { - "epoch": 0.6309061599809306, - "grad_norm": 1914.272216796875, - "learning_rate": 1.8559874428130706e-05, - "loss": 65.1624, - "step": 156160 - }, - { - "epoch": 0.6309465612462982, - "grad_norm": 542.9099731445312, - "learning_rate": 1.8556501641073813e-05, - "loss": 49.7561, - "step": 156170 - }, - { - "epoch": 0.6309869625116659, - "grad_norm": 781.0393676757812, - "learning_rate": 1.8553128979636243e-05, - "loss": 47.1173, - "step": 156180 - }, - { - "epoch": 0.6310273637770335, - "grad_norm": 813.7147827148438, - "learning_rate": 1.8549756443883746e-05, - "loss": 39.8702, - "step": 156190 - }, - { - "epoch": 0.6310677650424011, - "grad_norm": 1207.70849609375, - "learning_rate": 1.8546384033882062e-05, - "loss": 84.3893, - "step": 156200 - }, - { - "epoch": 0.6311081663077688, - "grad_norm": 583.6326904296875, - "learning_rate": 1.8543011749696944e-05, - "loss": 72.3307, - "step": 156210 - }, - { - "epoch": 0.6311485675731364, - "grad_norm": 815.402587890625, - "learning_rate": 1.8539639591394133e-05, - "loss": 54.2483, - "step": 156220 - }, - { - "epoch": 0.6311889688385041, - "grad_norm": 412.4958190917969, - "learning_rate": 1.8536267559039384e-05, - "loss": 55.6868, - "step": 156230 - }, - { - "epoch": 0.6312293701038717, - "grad_norm": 780.7228393554688, - "learning_rate": 1.8532895652698422e-05, - "loss": 52.2046, - "step": 156240 - }, - { - "epoch": 0.6312697713692392, - "grad_norm": 401.5675048828125, - "learning_rate": 1.852952387243698e-05, - "loss": 46.8964, - "step": 156250 - }, - { - "epoch": 0.6313101726346069, - "grad_norm": 464.23828125, - "learning_rate": 1.8526152218320815e-05, - "loss": 44.0663, - "step": 156260 - }, - { - "epoch": 0.6313505738999745, - "grad_norm": 363.0769958496094, - "learning_rate": 1.852278069041564e-05, - "loss": 69.285, - "step": 156270 - }, - { - "epoch": 0.6313909751653421, - "grad_norm": 281.2511901855469, - "learning_rate": 1.851940928878718e-05, - "loss": 51.6398, - "step": 156280 - }, - { - "epoch": 0.6314313764307098, - "grad_norm": 965.007080078125, - "learning_rate": 1.8516038013501188e-05, - "loss": 43.5258, - "step": 156290 - }, - { - "epoch": 0.6314717776960774, - "grad_norm": 1698.4444580078125, - "learning_rate": 1.8512666864623365e-05, - "loss": 76.7266, - "step": 156300 - }, - { - "epoch": 0.6315121789614451, - "grad_norm": 494.7231140136719, - "learning_rate": 1.8509295842219448e-05, - "loss": 43.2274, - "step": 156310 - }, - { - "epoch": 0.6315525802268127, - "grad_norm": 723.9692993164062, - "learning_rate": 1.8505924946355147e-05, - "loss": 49.6911, - "step": 156320 - }, - { - "epoch": 0.6315929814921803, - "grad_norm": 632.7535400390625, - "learning_rate": 1.8502554177096177e-05, - "loss": 104.3508, - "step": 156330 - }, - { - "epoch": 0.631633382757548, - "grad_norm": 908.9913940429688, - "learning_rate": 1.8499183534508263e-05, - "loss": 58.2353, - "step": 156340 - }, - { - "epoch": 0.6316737840229156, - "grad_norm": 802.5706787109375, - "learning_rate": 1.8495813018657116e-05, - "loss": 40.5866, - "step": 156350 - }, - { - "epoch": 0.6317141852882833, - "grad_norm": 604.8616333007812, - "learning_rate": 1.8492442629608434e-05, - "loss": 33.7931, - "step": 156360 - }, - { - "epoch": 0.6317545865536509, - "grad_norm": 625.544189453125, - "learning_rate": 1.848907236742794e-05, - "loss": 43.9119, - "step": 156370 - }, - { - "epoch": 0.6317949878190184, - "grad_norm": 3475.124755859375, - "learning_rate": 1.848570223218133e-05, - "loss": 60.458, - "step": 156380 - }, - { - "epoch": 0.6318353890843861, - "grad_norm": 398.1371765136719, - "learning_rate": 1.8482332223934314e-05, - "loss": 62.6251, - "step": 156390 - }, - { - "epoch": 0.6318757903497537, - "grad_norm": 893.6590576171875, - "learning_rate": 1.8478962342752583e-05, - "loss": 50.1743, - "step": 156400 - }, - { - "epoch": 0.6319161916151214, - "grad_norm": 1160.157470703125, - "learning_rate": 1.847559258870183e-05, - "loss": 41.7392, - "step": 156410 - }, - { - "epoch": 0.631956592880489, - "grad_norm": 1044.7193603515625, - "learning_rate": 1.847222296184777e-05, - "loss": 69.7084, - "step": 156420 - }, - { - "epoch": 0.6319969941458566, - "grad_norm": 606.6983032226562, - "learning_rate": 1.8468853462256085e-05, - "loss": 34.135, - "step": 156430 - }, - { - "epoch": 0.6320373954112243, - "grad_norm": 560.6671752929688, - "learning_rate": 1.846548408999245e-05, - "loss": 48.3755, - "step": 156440 - }, - { - "epoch": 0.6320777966765919, - "grad_norm": 376.26239013671875, - "learning_rate": 1.846211484512258e-05, - "loss": 38.9917, - "step": 156450 - }, - { - "epoch": 0.6321181979419596, - "grad_norm": 395.11138916015625, - "learning_rate": 1.8458745727712144e-05, - "loss": 41.6774, - "step": 156460 - }, - { - "epoch": 0.6321585992073272, - "grad_norm": 710.415283203125, - "learning_rate": 1.845537673782683e-05, - "loss": 53.0831, - "step": 156470 - }, - { - "epoch": 0.6321990004726948, - "grad_norm": 686.17919921875, - "learning_rate": 1.8452007875532317e-05, - "loss": 99.8278, - "step": 156480 - }, - { - "epoch": 0.6322394017380625, - "grad_norm": 486.0894775390625, - "learning_rate": 1.844863914089427e-05, - "loss": 42.6447, - "step": 156490 - }, - { - "epoch": 0.6322798030034301, - "grad_norm": 505.011962890625, - "learning_rate": 1.8445270533978388e-05, - "loss": 28.8061, - "step": 156500 - }, - { - "epoch": 0.6323202042687976, - "grad_norm": 680.4307861328125, - "learning_rate": 1.844190205485033e-05, - "loss": 57.4097, - "step": 156510 - }, - { - "epoch": 0.6323606055341653, - "grad_norm": 515.7891845703125, - "learning_rate": 1.8438533703575754e-05, - "loss": 55.8497, - "step": 156520 - }, - { - "epoch": 0.6324010067995329, - "grad_norm": 468.45953369140625, - "learning_rate": 1.8435165480220356e-05, - "loss": 63.9403, - "step": 156530 - }, - { - "epoch": 0.6324414080649006, - "grad_norm": 472.6759948730469, - "learning_rate": 1.8431797384849783e-05, - "loss": 24.1151, - "step": 156540 - }, - { - "epoch": 0.6324818093302682, - "grad_norm": 1147.822509765625, - "learning_rate": 1.84284294175297e-05, - "loss": 57.033, - "step": 156550 - }, - { - "epoch": 0.6325222105956358, - "grad_norm": 2694.32666015625, - "learning_rate": 1.8425061578325772e-05, - "loss": 46.6325, - "step": 156560 - }, - { - "epoch": 0.6325626118610035, - "grad_norm": 987.6583251953125, - "learning_rate": 1.8421693867303653e-05, - "loss": 47.813, - "step": 156570 - }, - { - "epoch": 0.6326030131263711, - "grad_norm": 856.338134765625, - "learning_rate": 1.8418326284528996e-05, - "loss": 48.7433, - "step": 156580 - }, - { - "epoch": 0.6326434143917388, - "grad_norm": 519.7534790039062, - "learning_rate": 1.8414958830067464e-05, - "loss": 42.5235, - "step": 156590 - }, - { - "epoch": 0.6326838156571064, - "grad_norm": 1199.5218505859375, - "learning_rate": 1.841159150398469e-05, - "loss": 70.6071, - "step": 156600 - }, - { - "epoch": 0.632724216922474, - "grad_norm": 636.864990234375, - "learning_rate": 1.8408224306346335e-05, - "loss": 42.0584, - "step": 156610 - }, - { - "epoch": 0.6327646181878417, - "grad_norm": 765.359130859375, - "learning_rate": 1.8404857237218038e-05, - "loss": 65.0647, - "step": 156620 - }, - { - "epoch": 0.6328050194532093, - "grad_norm": 180.48377990722656, - "learning_rate": 1.8401490296665445e-05, - "loss": 50.7906, - "step": 156630 - }, - { - "epoch": 0.6328454207185769, - "grad_norm": 529.7195434570312, - "learning_rate": 1.8398123484754203e-05, - "loss": 43.8623, - "step": 156640 - }, - { - "epoch": 0.6328858219839445, - "grad_norm": 915.6985473632812, - "learning_rate": 1.839475680154994e-05, - "loss": 71.13, - "step": 156650 - }, - { - "epoch": 0.6329262232493121, - "grad_norm": 469.94805908203125, - "learning_rate": 1.8391390247118295e-05, - "loss": 45.6889, - "step": 156660 - }, - { - "epoch": 0.6329666245146798, - "grad_norm": 915.044189453125, - "learning_rate": 1.838802382152489e-05, - "loss": 50.4943, - "step": 156670 - }, - { - "epoch": 0.6330070257800474, - "grad_norm": 647.909423828125, - "learning_rate": 1.8384657524835376e-05, - "loss": 51.613, - "step": 156680 - }, - { - "epoch": 0.6330474270454151, - "grad_norm": 954.224365234375, - "learning_rate": 1.8381291357115367e-05, - "loss": 69.1072, - "step": 156690 - }, - { - "epoch": 0.6330878283107827, - "grad_norm": 1955.664306640625, - "learning_rate": 1.8377925318430477e-05, - "loss": 89.2116, - "step": 156700 - }, - { - "epoch": 0.6331282295761503, - "grad_norm": 177.62167358398438, - "learning_rate": 1.8374559408846357e-05, - "loss": 65.1419, - "step": 156710 - }, - { - "epoch": 0.633168630841518, - "grad_norm": 12293.146484375, - "learning_rate": 1.8371193628428613e-05, - "loss": 78.1634, - "step": 156720 - }, - { - "epoch": 0.6332090321068856, - "grad_norm": 689.4757690429688, - "learning_rate": 1.8367827977242858e-05, - "loss": 52.0371, - "step": 156730 - }, - { - "epoch": 0.6332494333722533, - "grad_norm": 527.7891845703125, - "learning_rate": 1.8364462455354714e-05, - "loss": 65.2768, - "step": 156740 - }, - { - "epoch": 0.6332898346376209, - "grad_norm": 976.8132934570312, - "learning_rate": 1.8361097062829778e-05, - "loss": 66.5185, - "step": 156750 - }, - { - "epoch": 0.6333302359029885, - "grad_norm": 612.609130859375, - "learning_rate": 1.8357731799733686e-05, - "loss": 37.7978, - "step": 156760 - }, - { - "epoch": 0.6333706371683561, - "grad_norm": 514.4799194335938, - "learning_rate": 1.8354366666132035e-05, - "loss": 45.4972, - "step": 156770 - }, - { - "epoch": 0.6334110384337237, - "grad_norm": 1488.8907470703125, - "learning_rate": 1.8351001662090412e-05, - "loss": 64.7894, - "step": 156780 - }, - { - "epoch": 0.6334514396990913, - "grad_norm": 492.0083923339844, - "learning_rate": 1.8347636787674442e-05, - "loss": 48.8563, - "step": 156790 - }, - { - "epoch": 0.633491840964459, - "grad_norm": 897.5473022460938, - "learning_rate": 1.8344272042949724e-05, - "loss": 43.1116, - "step": 156800 - }, - { - "epoch": 0.6335322422298266, - "grad_norm": 419.75213623046875, - "learning_rate": 1.8340907427981843e-05, - "loss": 54.2634, - "step": 156810 - }, - { - "epoch": 0.6335726434951943, - "grad_norm": 604.8643188476562, - "learning_rate": 1.8337542942836406e-05, - "loss": 53.386, - "step": 156820 - }, - { - "epoch": 0.6336130447605619, - "grad_norm": 620.7161254882812, - "learning_rate": 1.8334178587578988e-05, - "loss": 79.5961, - "step": 156830 - }, - { - "epoch": 0.6336534460259295, - "grad_norm": 888.98388671875, - "learning_rate": 1.8330814362275198e-05, - "loss": 29.9113, - "step": 156840 - }, - { - "epoch": 0.6336938472912972, - "grad_norm": 833.1226196289062, - "learning_rate": 1.8327450266990616e-05, - "loss": 72.1005, - "step": 156850 - }, - { - "epoch": 0.6337342485566648, - "grad_norm": 370.32244873046875, - "learning_rate": 1.832408630179082e-05, - "loss": 36.1181, - "step": 156860 - }, - { - "epoch": 0.6337746498220325, - "grad_norm": 430.3398132324219, - "learning_rate": 1.8320722466741404e-05, - "loss": 38.9571, - "step": 156870 - }, - { - "epoch": 0.6338150510874001, - "grad_norm": 531.71435546875, - "learning_rate": 1.8317358761907942e-05, - "loss": 30.0332, - "step": 156880 - }, - { - "epoch": 0.6338554523527676, - "grad_norm": 398.49224853515625, - "learning_rate": 1.8313995187356004e-05, - "loss": 49.0851, - "step": 156890 - }, - { - "epoch": 0.6338958536181353, - "grad_norm": 516.3076782226562, - "learning_rate": 1.8310631743151185e-05, - "loss": 41.1616, - "step": 156900 - }, - { - "epoch": 0.6339362548835029, - "grad_norm": 267.231201171875, - "learning_rate": 1.830726842935904e-05, - "loss": 62.2498, - "step": 156910 - }, - { - "epoch": 0.6339766561488706, - "grad_norm": 692.8582763671875, - "learning_rate": 1.8303905246045138e-05, - "loss": 38.564, - "step": 156920 - }, - { - "epoch": 0.6340170574142382, - "grad_norm": 1075.482666015625, - "learning_rate": 1.8300542193275057e-05, - "loss": 41.1674, - "step": 156930 - }, - { - "epoch": 0.6340574586796058, - "grad_norm": 692.3712768554688, - "learning_rate": 1.8297179271114346e-05, - "loss": 73.7916, - "step": 156940 - }, - { - "epoch": 0.6340978599449735, - "grad_norm": 613.9649047851562, - "learning_rate": 1.8293816479628583e-05, - "loss": 47.4327, - "step": 156950 - }, - { - "epoch": 0.6341382612103411, - "grad_norm": 472.95611572265625, - "learning_rate": 1.829045381888332e-05, - "loss": 51.9652, - "step": 156960 - }, - { - "epoch": 0.6341786624757088, - "grad_norm": 743.0994262695312, - "learning_rate": 1.828709128894411e-05, - "loss": 49.6867, - "step": 156970 - }, - { - "epoch": 0.6342190637410764, - "grad_norm": 777.2012939453125, - "learning_rate": 1.8283728889876513e-05, - "loss": 43.0576, - "step": 156980 - }, - { - "epoch": 0.634259465006444, - "grad_norm": 518.1323852539062, - "learning_rate": 1.828036662174608e-05, - "loss": 38.773, - "step": 156990 - }, - { - "epoch": 0.6342998662718117, - "grad_norm": 864.1792602539062, - "learning_rate": 1.827700448461836e-05, - "loss": 58.8282, - "step": 157000 - }, - { - "epoch": 0.6343402675371793, - "grad_norm": 938.5631713867188, - "learning_rate": 1.8273642478558903e-05, - "loss": 84.6849, - "step": 157010 - }, - { - "epoch": 0.6343806688025468, - "grad_norm": 0.0, - "learning_rate": 1.8270280603633234e-05, - "loss": 38.4407, - "step": 157020 - }, - { - "epoch": 0.6344210700679145, - "grad_norm": 757.0884399414062, - "learning_rate": 1.8266918859906922e-05, - "loss": 48.6319, - "step": 157030 - }, - { - "epoch": 0.6344614713332821, - "grad_norm": 1165.9722900390625, - "learning_rate": 1.8263557247445495e-05, - "loss": 76.0864, - "step": 157040 - }, - { - "epoch": 0.6345018725986498, - "grad_norm": 454.1170959472656, - "learning_rate": 1.8260195766314476e-05, - "loss": 44.9781, - "step": 157050 - }, - { - "epoch": 0.6345422738640174, - "grad_norm": 1561.450927734375, - "learning_rate": 1.825683441657942e-05, - "loss": 50.8106, - "step": 157060 - }, - { - "epoch": 0.634582675129385, - "grad_norm": 898.5762939453125, - "learning_rate": 1.8253473198305846e-05, - "loss": 27.4869, - "step": 157070 - }, - { - "epoch": 0.6346230763947527, - "grad_norm": 562.4152221679688, - "learning_rate": 1.8250112111559287e-05, - "loss": 62.3948, - "step": 157080 - }, - { - "epoch": 0.6346634776601203, - "grad_norm": 672.2083740234375, - "learning_rate": 1.824675115640527e-05, - "loss": 61.7726, - "step": 157090 - }, - { - "epoch": 0.634703878925488, - "grad_norm": 926.6039428710938, - "learning_rate": 1.8243390332909303e-05, - "loss": 79.0976, - "step": 157100 - }, - { - "epoch": 0.6347442801908556, - "grad_norm": 400.0118713378906, - "learning_rate": 1.8240029641136935e-05, - "loss": 34.7595, - "step": 157110 - }, - { - "epoch": 0.6347846814562232, - "grad_norm": 566.873291015625, - "learning_rate": 1.8236669081153657e-05, - "loss": 49.1967, - "step": 157120 - }, - { - "epoch": 0.6348250827215909, - "grad_norm": 396.2868347167969, - "learning_rate": 1.8233308653025006e-05, - "loss": 68.7327, - "step": 157130 - }, - { - "epoch": 0.6348654839869585, - "grad_norm": 463.4564514160156, - "learning_rate": 1.8229948356816486e-05, - "loss": 48.3621, - "step": 157140 - }, - { - "epoch": 0.634905885252326, - "grad_norm": 510.2298278808594, - "learning_rate": 1.8226588192593605e-05, - "loss": 41.7354, - "step": 157150 - }, - { - "epoch": 0.6349462865176937, - "grad_norm": 630.56689453125, - "learning_rate": 1.822322816042188e-05, - "loss": 51.7539, - "step": 157160 - }, - { - "epoch": 0.6349866877830613, - "grad_norm": 533.6231689453125, - "learning_rate": 1.8219868260366802e-05, - "loss": 35.9532, - "step": 157170 - }, - { - "epoch": 0.635027089048429, - "grad_norm": 819.6351318359375, - "learning_rate": 1.8216508492493886e-05, - "loss": 70.248, - "step": 157180 - }, - { - "epoch": 0.6350674903137966, - "grad_norm": 964.2184448242188, - "learning_rate": 1.8213148856868634e-05, - "loss": 55.642, - "step": 157190 - }, - { - "epoch": 0.6351078915791643, - "grad_norm": 669.3191528320312, - "learning_rate": 1.820978935355653e-05, - "loss": 46.5878, - "step": 157200 - }, - { - "epoch": 0.6351482928445319, - "grad_norm": 1667.268310546875, - "learning_rate": 1.8206429982623086e-05, - "loss": 59.5129, - "step": 157210 - }, - { - "epoch": 0.6351886941098995, - "grad_norm": 640.9869995117188, - "learning_rate": 1.820307074413379e-05, - "loss": 47.4462, - "step": 157220 - }, - { - "epoch": 0.6352290953752672, - "grad_norm": 817.10986328125, - "learning_rate": 1.8199711638154116e-05, - "loss": 74.9785, - "step": 157230 - }, - { - "epoch": 0.6352694966406348, - "grad_norm": 666.6509399414062, - "learning_rate": 1.8196352664749576e-05, - "loss": 54.3086, - "step": 157240 - }, - { - "epoch": 0.6353098979060025, - "grad_norm": 1946.0594482421875, - "learning_rate": 1.8192993823985643e-05, - "loss": 49.1748, - "step": 157250 - }, - { - "epoch": 0.6353502991713701, - "grad_norm": 852.0538330078125, - "learning_rate": 1.81896351159278e-05, - "loss": 61.9614, - "step": 157260 - }, - { - "epoch": 0.6353907004367377, - "grad_norm": 352.1046142578125, - "learning_rate": 1.8186276540641527e-05, - "loss": 31.2032, - "step": 157270 - }, - { - "epoch": 0.6354311017021053, - "grad_norm": 2632.97216796875, - "learning_rate": 1.818291809819229e-05, - "loss": 62.7898, - "step": 157280 - }, - { - "epoch": 0.6354715029674729, - "grad_norm": 795.597412109375, - "learning_rate": 1.8179559788645583e-05, - "loss": 55.3189, - "step": 157290 - }, - { - "epoch": 0.6355119042328405, - "grad_norm": 661.72314453125, - "learning_rate": 1.817620161206687e-05, - "loss": 72.9509, - "step": 157300 - }, - { - "epoch": 0.6355523054982082, - "grad_norm": 1691.134521484375, - "learning_rate": 1.8172843568521613e-05, - "loss": 67.871, - "step": 157310 - }, - { - "epoch": 0.6355927067635758, - "grad_norm": 601.0391235351562, - "learning_rate": 1.8169485658075298e-05, - "loss": 58.32, - "step": 157320 - }, - { - "epoch": 0.6356331080289435, - "grad_norm": 564.531005859375, - "learning_rate": 1.8166127880793372e-05, - "loss": 69.3806, - "step": 157330 - }, - { - "epoch": 0.6356735092943111, - "grad_norm": 731.717529296875, - "learning_rate": 1.8162770236741294e-05, - "loss": 51.7775, - "step": 157340 - }, - { - "epoch": 0.6357139105596787, - "grad_norm": 379.14654541015625, - "learning_rate": 1.8159412725984543e-05, - "loss": 36.2211, - "step": 157350 - }, - { - "epoch": 0.6357543118250464, - "grad_norm": 1579.4599609375, - "learning_rate": 1.8156055348588546e-05, - "loss": 76.9737, - "step": 157360 - }, - { - "epoch": 0.635794713090414, - "grad_norm": 266.6926574707031, - "learning_rate": 1.8152698104618785e-05, - "loss": 53.4006, - "step": 157370 - }, - { - "epoch": 0.6358351143557817, - "grad_norm": 561.2648315429688, - "learning_rate": 1.8149340994140702e-05, - "loss": 62.8926, - "step": 157380 - }, - { - "epoch": 0.6358755156211493, - "grad_norm": 284.5917053222656, - "learning_rate": 1.814598401721973e-05, - "loss": 51.397, - "step": 157390 - }, - { - "epoch": 0.6359159168865169, - "grad_norm": 1907.217529296875, - "learning_rate": 1.8142627173921338e-05, - "loss": 55.0741, - "step": 157400 - }, - { - "epoch": 0.6359563181518845, - "grad_norm": 357.5486145019531, - "learning_rate": 1.813927046431096e-05, - "loss": 71.2046, - "step": 157410 - }, - { - "epoch": 0.6359967194172521, - "grad_norm": 526.5702514648438, - "learning_rate": 1.8135913888454033e-05, - "loss": 24.1797, - "step": 157420 - }, - { - "epoch": 0.6360371206826197, - "grad_norm": 672.9292602539062, - "learning_rate": 1.8132557446416005e-05, - "loss": 44.7071, - "step": 157430 - }, - { - "epoch": 0.6360775219479874, - "grad_norm": 344.2654113769531, - "learning_rate": 1.812920113826229e-05, - "loss": 56.6532, - "step": 157440 - }, - { - "epoch": 0.636117923213355, - "grad_norm": 781.5308227539062, - "learning_rate": 1.8125844964058354e-05, - "loss": 50.7511, - "step": 157450 - }, - { - "epoch": 0.6361583244787227, - "grad_norm": 2054.18408203125, - "learning_rate": 1.8122488923869605e-05, - "loss": 55.7488, - "step": 157460 - }, - { - "epoch": 0.6361987257440903, - "grad_norm": 585.6814575195312, - "learning_rate": 1.8119133017761465e-05, - "loss": 48.2461, - "step": 157470 - }, - { - "epoch": 0.636239127009458, - "grad_norm": 720.3282470703125, - "learning_rate": 1.811577724579938e-05, - "loss": 42.073, - "step": 157480 - }, - { - "epoch": 0.6362795282748256, - "grad_norm": 479.0511169433594, - "learning_rate": 1.8112421608048765e-05, - "loss": 53.2484, - "step": 157490 - }, - { - "epoch": 0.6363199295401932, - "grad_norm": 179.38345336914062, - "learning_rate": 1.8109066104575023e-05, - "loss": 70.5386, - "step": 157500 - }, - { - "epoch": 0.6363603308055609, - "grad_norm": 1411.388916015625, - "learning_rate": 1.8105710735443593e-05, - "loss": 67.7427, - "step": 157510 - }, - { - "epoch": 0.6364007320709285, - "grad_norm": 452.4316101074219, - "learning_rate": 1.8102355500719876e-05, - "loss": 43.8855, - "step": 157520 - }, - { - "epoch": 0.636441133336296, - "grad_norm": 1131.415283203125, - "learning_rate": 1.8099000400469297e-05, - "loss": 49.2478, - "step": 157530 - }, - { - "epoch": 0.6364815346016637, - "grad_norm": 1270.067138671875, - "learning_rate": 1.809564543475726e-05, - "loss": 48.759, - "step": 157540 - }, - { - "epoch": 0.6365219358670313, - "grad_norm": 512.9644775390625, - "learning_rate": 1.809229060364916e-05, - "loss": 33.8024, - "step": 157550 - }, - { - "epoch": 0.636562337132399, - "grad_norm": 1274.902587890625, - "learning_rate": 1.8088935907210418e-05, - "loss": 51.3391, - "step": 157560 - }, - { - "epoch": 0.6366027383977666, - "grad_norm": 881.6553955078125, - "learning_rate": 1.808558134550643e-05, - "loss": 64.7102, - "step": 157570 - }, - { - "epoch": 0.6366431396631342, - "grad_norm": 1123.6348876953125, - "learning_rate": 1.8082226918602585e-05, - "loss": 40.6092, - "step": 157580 - }, - { - "epoch": 0.6366835409285019, - "grad_norm": 441.2737731933594, - "learning_rate": 1.8078872626564295e-05, - "loss": 46.9466, - "step": 157590 - }, - { - "epoch": 0.6367239421938695, - "grad_norm": 999.4472045898438, - "learning_rate": 1.807551846945694e-05, - "loss": 77.4907, - "step": 157600 - }, - { - "epoch": 0.6367643434592372, - "grad_norm": 464.4021301269531, - "learning_rate": 1.8072164447345927e-05, - "loss": 60.3902, - "step": 157610 - }, - { - "epoch": 0.6368047447246048, - "grad_norm": 580.01953125, - "learning_rate": 1.8068810560296634e-05, - "loss": 66.6603, - "step": 157620 - }, - { - "epoch": 0.6368451459899724, - "grad_norm": 305.635498046875, - "learning_rate": 1.8065456808374433e-05, - "loss": 48.442, - "step": 157630 - }, - { - "epoch": 0.6368855472553401, - "grad_norm": 829.7825927734375, - "learning_rate": 1.8062103191644737e-05, - "loss": 54.7456, - "step": 157640 - }, - { - "epoch": 0.6369259485207077, - "grad_norm": 1587.976806640625, - "learning_rate": 1.80587497101729e-05, - "loss": 59.605, - "step": 157650 - }, - { - "epoch": 0.6369663497860752, - "grad_norm": 162.47715759277344, - "learning_rate": 1.8055396364024317e-05, - "loss": 39.8688, - "step": 157660 - }, - { - "epoch": 0.6370067510514429, - "grad_norm": 501.1140441894531, - "learning_rate": 1.8052043153264364e-05, - "loss": 39.7927, - "step": 157670 - }, - { - "epoch": 0.6370471523168105, - "grad_norm": 505.4931945800781, - "learning_rate": 1.8048690077958397e-05, - "loss": 42.7583, - "step": 157680 - }, - { - "epoch": 0.6370875535821782, - "grad_norm": 776.6007690429688, - "learning_rate": 1.80453371381718e-05, - "loss": 45.7655, - "step": 157690 - }, - { - "epoch": 0.6371279548475458, - "grad_norm": 1815.0123291015625, - "learning_rate": 1.804198433396994e-05, - "loss": 39.881, - "step": 157700 - }, - { - "epoch": 0.6371683561129134, - "grad_norm": 483.6055908203125, - "learning_rate": 1.8038631665418165e-05, - "loss": 58.2693, - "step": 157710 - }, - { - "epoch": 0.6372087573782811, - "grad_norm": 923.490234375, - "learning_rate": 1.803527913258186e-05, - "loss": 72.5108, - "step": 157720 - }, - { - "epoch": 0.6372491586436487, - "grad_norm": 487.7062072753906, - "learning_rate": 1.8031926735526363e-05, - "loss": 81.6203, - "step": 157730 - }, - { - "epoch": 0.6372895599090164, - "grad_norm": 775.66845703125, - "learning_rate": 1.8028574474317052e-05, - "loss": 41.2521, - "step": 157740 - }, - { - "epoch": 0.637329961174384, - "grad_norm": 435.292236328125, - "learning_rate": 1.802522234901927e-05, - "loss": 46.5016, - "step": 157750 - }, - { - "epoch": 0.6373703624397516, - "grad_norm": 422.0650634765625, - "learning_rate": 1.8021870359698368e-05, - "loss": 60.5132, - "step": 157760 - }, - { - "epoch": 0.6374107637051193, - "grad_norm": 808.7423095703125, - "learning_rate": 1.8018518506419698e-05, - "loss": 55.5142, - "step": 157770 - }, - { - "epoch": 0.6374511649704869, - "grad_norm": 885.0738525390625, - "learning_rate": 1.8015166789248604e-05, - "loss": 50.6987, - "step": 157780 - }, - { - "epoch": 0.6374915662358545, - "grad_norm": 515.9467163085938, - "learning_rate": 1.801181520825042e-05, - "loss": 43.7261, - "step": 157790 - }, - { - "epoch": 0.6375319675012221, - "grad_norm": 521.8585815429688, - "learning_rate": 1.800846376349051e-05, - "loss": 49.786, - "step": 157800 - }, - { - "epoch": 0.6375723687665897, - "grad_norm": 511.0011291503906, - "learning_rate": 1.800511245503418e-05, - "loss": 40.6436, - "step": 157810 - }, - { - "epoch": 0.6376127700319574, - "grad_norm": 320.0465087890625, - "learning_rate": 1.80017612829468e-05, - "loss": 50.3526, - "step": 157820 - }, - { - "epoch": 0.637653171297325, - "grad_norm": 638.8887939453125, - "learning_rate": 1.7998410247293686e-05, - "loss": 40.4759, - "step": 157830 - }, - { - "epoch": 0.6376935725626927, - "grad_norm": 1001.0759887695312, - "learning_rate": 1.7995059348140165e-05, - "loss": 52.3484, - "step": 157840 - }, - { - "epoch": 0.6377339738280603, - "grad_norm": 513.6463623046875, - "learning_rate": 1.7991708585551563e-05, - "loss": 42.4503, - "step": 157850 - }, - { - "epoch": 0.6377743750934279, - "grad_norm": 489.6709899902344, - "learning_rate": 1.7988357959593222e-05, - "loss": 34.6381, - "step": 157860 - }, - { - "epoch": 0.6378147763587956, - "grad_norm": 661.804443359375, - "learning_rate": 1.7985007470330444e-05, - "loss": 79.6513, - "step": 157870 - }, - { - "epoch": 0.6378551776241632, - "grad_norm": 427.7678527832031, - "learning_rate": 1.798165711782856e-05, - "loss": 56.3009, - "step": 157880 - }, - { - "epoch": 0.6378955788895309, - "grad_norm": 858.1063232421875, - "learning_rate": 1.7978306902152878e-05, - "loss": 39.8254, - "step": 157890 - }, - { - "epoch": 0.6379359801548985, - "grad_norm": 666.5377197265625, - "learning_rate": 1.7974956823368727e-05, - "loss": 49.063, - "step": 157900 - }, - { - "epoch": 0.6379763814202661, - "grad_norm": 1005.5430297851562, - "learning_rate": 1.7971606881541407e-05, - "loss": 61.1597, - "step": 157910 - }, - { - "epoch": 0.6380167826856337, - "grad_norm": 711.9946899414062, - "learning_rate": 1.796825707673622e-05, - "loss": 66.6129, - "step": 157920 - }, - { - "epoch": 0.6380571839510013, - "grad_norm": 674.28759765625, - "learning_rate": 1.7964907409018496e-05, - "loss": 48.3361, - "step": 157930 - }, - { - "epoch": 0.638097585216369, - "grad_norm": 895.3465576171875, - "learning_rate": 1.7961557878453516e-05, - "loss": 53.9553, - "step": 157940 - }, - { - "epoch": 0.6381379864817366, - "grad_norm": 206.62802124023438, - "learning_rate": 1.7958208485106586e-05, - "loss": 66.486, - "step": 157950 - }, - { - "epoch": 0.6381783877471042, - "grad_norm": 1281.931640625, - "learning_rate": 1.7954859229043016e-05, - "loss": 51.8759, - "step": 157960 - }, - { - "epoch": 0.6382187890124719, - "grad_norm": 420.1695861816406, - "learning_rate": 1.7951510110328077e-05, - "loss": 31.3493, - "step": 157970 - }, - { - "epoch": 0.6382591902778395, - "grad_norm": 1604.4742431640625, - "learning_rate": 1.7948161129027094e-05, - "loss": 79.5504, - "step": 157980 - }, - { - "epoch": 0.6382995915432071, - "grad_norm": 798.0696411132812, - "learning_rate": 1.7944812285205335e-05, - "loss": 45.0031, - "step": 157990 - }, - { - "epoch": 0.6383399928085748, - "grad_norm": 812.3040771484375, - "learning_rate": 1.7941463578928086e-05, - "loss": 47.2854, - "step": 158000 - }, - { - "epoch": 0.6383803940739424, - "grad_norm": 858.7913208007812, - "learning_rate": 1.7938115010260647e-05, - "loss": 60.8041, - "step": 158010 - }, - { - "epoch": 0.6384207953393101, - "grad_norm": 363.1507263183594, - "learning_rate": 1.7934766579268292e-05, - "loss": 67.6859, - "step": 158020 - }, - { - "epoch": 0.6384611966046777, - "grad_norm": 1177.5140380859375, - "learning_rate": 1.7931418286016292e-05, - "loss": 71.1444, - "step": 158030 - }, - { - "epoch": 0.6385015978700452, - "grad_norm": 682.765869140625, - "learning_rate": 1.7928070130569942e-05, - "loss": 41.8368, - "step": 158040 - }, - { - "epoch": 0.6385419991354129, - "grad_norm": 1204.63427734375, - "learning_rate": 1.7924722112994495e-05, - "loss": 44.7631, - "step": 158050 - }, - { - "epoch": 0.6385824004007805, - "grad_norm": 0.0, - "learning_rate": 1.792137423335524e-05, - "loss": 50.4615, - "step": 158060 - }, - { - "epoch": 0.6386228016661482, - "grad_norm": 1152.09521484375, - "learning_rate": 1.7918026491717437e-05, - "loss": 72.6507, - "step": 158070 - }, - { - "epoch": 0.6386632029315158, - "grad_norm": 551.220458984375, - "learning_rate": 1.7914678888146347e-05, - "loss": 49.2288, - "step": 158080 - }, - { - "epoch": 0.6387036041968834, - "grad_norm": 1303.2149658203125, - "learning_rate": 1.791133142270725e-05, - "loss": 45.762, - "step": 158090 - }, - { - "epoch": 0.6387440054622511, - "grad_norm": 623.5195922851562, - "learning_rate": 1.7907984095465395e-05, - "loss": 53.7673, - "step": 158100 - }, - { - "epoch": 0.6387844067276187, - "grad_norm": 799.129150390625, - "learning_rate": 1.7904636906486037e-05, - "loss": 38.7584, - "step": 158110 - }, - { - "epoch": 0.6388248079929864, - "grad_norm": 536.8320922851562, - "learning_rate": 1.790128985583444e-05, - "loss": 63.6297, - "step": 158120 - }, - { - "epoch": 0.638865209258354, - "grad_norm": 885.3190307617188, - "learning_rate": 1.7897942943575843e-05, - "loss": 65.513, - "step": 158130 - }, - { - "epoch": 0.6389056105237216, - "grad_norm": 729.3671875, - "learning_rate": 1.7894596169775512e-05, - "loss": 41.7076, - "step": 158140 - }, - { - "epoch": 0.6389460117890893, - "grad_norm": 540.5570678710938, - "learning_rate": 1.789124953449869e-05, - "loss": 50.7414, - "step": 158150 - }, - { - "epoch": 0.6389864130544569, - "grad_norm": 569.721435546875, - "learning_rate": 1.788790303781061e-05, - "loss": 78.4995, - "step": 158160 - }, - { - "epoch": 0.6390268143198244, - "grad_norm": 1166.6480712890625, - "learning_rate": 1.7884556679776525e-05, - "loss": 59.4203, - "step": 158170 - }, - { - "epoch": 0.6390672155851921, - "grad_norm": 601.839599609375, - "learning_rate": 1.788121046046167e-05, - "loss": 48.1814, - "step": 158180 - }, - { - "epoch": 0.6391076168505597, - "grad_norm": 716.4414672851562, - "learning_rate": 1.7877864379931287e-05, - "loss": 67.2857, - "step": 158190 - }, - { - "epoch": 0.6391480181159274, - "grad_norm": 395.68963623046875, - "learning_rate": 1.7874518438250597e-05, - "loss": 46.7157, - "step": 158200 - }, - { - "epoch": 0.639188419381295, - "grad_norm": 1754.9854736328125, - "learning_rate": 1.7871172635484842e-05, - "loss": 84.133, - "step": 158210 - }, - { - "epoch": 0.6392288206466626, - "grad_norm": 439.41949462890625, - "learning_rate": 1.7867826971699253e-05, - "loss": 43.6031, - "step": 158220 - }, - { - "epoch": 0.6392692219120303, - "grad_norm": 280.5320739746094, - "learning_rate": 1.7864481446959045e-05, - "loss": 60.2605, - "step": 158230 - }, - { - "epoch": 0.6393096231773979, - "grad_norm": 386.1781005859375, - "learning_rate": 1.7861136061329437e-05, - "loss": 32.9864, - "step": 158240 - }, - { - "epoch": 0.6393500244427656, - "grad_norm": 915.8598022460938, - "learning_rate": 1.7857790814875663e-05, - "loss": 38.008, - "step": 158250 - }, - { - "epoch": 0.6393904257081332, - "grad_norm": 678.6073608398438, - "learning_rate": 1.7854445707662928e-05, - "loss": 43.3288, - "step": 158260 - }, - { - "epoch": 0.6394308269735008, - "grad_norm": 853.765380859375, - "learning_rate": 1.785110073975646e-05, - "loss": 41.3135, - "step": 158270 - }, - { - "epoch": 0.6394712282388685, - "grad_norm": 773.3751220703125, - "learning_rate": 1.7847755911221466e-05, - "loss": 41.4973, - "step": 158280 - }, - { - "epoch": 0.6395116295042361, - "grad_norm": 665.0879516601562, - "learning_rate": 1.7844411222123147e-05, - "loss": 45.4086, - "step": 158290 - }, - { - "epoch": 0.6395520307696037, - "grad_norm": 616.668701171875, - "learning_rate": 1.7841066672526723e-05, - "loss": 29.242, - "step": 158300 - }, - { - "epoch": 0.6395924320349713, - "grad_norm": 2878.92724609375, - "learning_rate": 1.7837722262497385e-05, - "loss": 74.1233, - "step": 158310 - }, - { - "epoch": 0.6396328333003389, - "grad_norm": 495.0466613769531, - "learning_rate": 1.7834377992100333e-05, - "loss": 61.9444, - "step": 158320 - }, - { - "epoch": 0.6396732345657066, - "grad_norm": 598.6763305664062, - "learning_rate": 1.7831033861400777e-05, - "loss": 62.46, - "step": 158330 - }, - { - "epoch": 0.6397136358310742, - "grad_norm": 579.919921875, - "learning_rate": 1.78276898704639e-05, - "loss": 54.3735, - "step": 158340 - }, - { - "epoch": 0.6397540370964419, - "grad_norm": 641.3147583007812, - "learning_rate": 1.782434601935491e-05, - "loss": 62.4215, - "step": 158350 - }, - { - "epoch": 0.6397944383618095, - "grad_norm": 511.64569091796875, - "learning_rate": 1.782100230813899e-05, - "loss": 36.6608, - "step": 158360 - }, - { - "epoch": 0.6398348396271771, - "grad_norm": 1257.087158203125, - "learning_rate": 1.7817658736881322e-05, - "loss": 54.9247, - "step": 158370 - }, - { - "epoch": 0.6398752408925448, - "grad_norm": 735.0720825195312, - "learning_rate": 1.7814315305647093e-05, - "loss": 38.0215, - "step": 158380 - }, - { - "epoch": 0.6399156421579124, - "grad_norm": 1210.0875244140625, - "learning_rate": 1.781097201450149e-05, - "loss": 50.9138, - "step": 158390 - }, - { - "epoch": 0.63995604342328, - "grad_norm": 765.9678955078125, - "learning_rate": 1.7807628863509685e-05, - "loss": 38.4157, - "step": 158400 - }, - { - "epoch": 0.6399964446886477, - "grad_norm": 719.0951538085938, - "learning_rate": 1.7804285852736864e-05, - "loss": 73.3833, - "step": 158410 - }, - { - "epoch": 0.6400368459540153, - "grad_norm": 925.130615234375, - "learning_rate": 1.780094298224818e-05, - "loss": 60.2505, - "step": 158420 - }, - { - "epoch": 0.6400772472193829, - "grad_norm": 665.5335693359375, - "learning_rate": 1.7797600252108834e-05, - "loss": 45.8719, - "step": 158430 - }, - { - "epoch": 0.6401176484847505, - "grad_norm": 772.93603515625, - "learning_rate": 1.779425766238398e-05, - "loss": 70.8536, - "step": 158440 - }, - { - "epoch": 0.6401580497501181, - "grad_norm": 519.6685180664062, - "learning_rate": 1.7790915213138776e-05, - "loss": 72.4601, - "step": 158450 - }, - { - "epoch": 0.6401984510154858, - "grad_norm": 516.3569946289062, - "learning_rate": 1.7787572904438392e-05, - "loss": 38.114, - "step": 158460 - }, - { - "epoch": 0.6402388522808534, - "grad_norm": 633.9620971679688, - "learning_rate": 1.778423073634799e-05, - "loss": 72.208, - "step": 158470 - }, - { - "epoch": 0.6402792535462211, - "grad_norm": 568.24609375, - "learning_rate": 1.7780888708932718e-05, - "loss": 53.3362, - "step": 158480 - }, - { - "epoch": 0.6403196548115887, - "grad_norm": 2042.0194091796875, - "learning_rate": 1.7777546822257748e-05, - "loss": 59.6458, - "step": 158490 - }, - { - "epoch": 0.6403600560769563, - "grad_norm": 682.5473022460938, - "learning_rate": 1.7774205076388206e-05, - "loss": 31.9435, - "step": 158500 - }, - { - "epoch": 0.640400457342324, - "grad_norm": 708.43310546875, - "learning_rate": 1.777086347138927e-05, - "loss": 42.089, - "step": 158510 - }, - { - "epoch": 0.6404408586076916, - "grad_norm": 1302.8370361328125, - "learning_rate": 1.7767522007326072e-05, - "loss": 71.7163, - "step": 158520 - }, - { - "epoch": 0.6404812598730593, - "grad_norm": 904.3488159179688, - "learning_rate": 1.7764180684263752e-05, - "loss": 72.1357, - "step": 158530 - }, - { - "epoch": 0.6405216611384269, - "grad_norm": 854.4265747070312, - "learning_rate": 1.7760839502267452e-05, - "loss": 62.3567, - "step": 158540 - }, - { - "epoch": 0.6405620624037945, - "grad_norm": 639.02490234375, - "learning_rate": 1.7757498461402318e-05, - "loss": 49.8052, - "step": 158550 - }, - { - "epoch": 0.6406024636691621, - "grad_norm": 585.0482177734375, - "learning_rate": 1.7754157561733476e-05, - "loss": 42.8153, - "step": 158560 - }, - { - "epoch": 0.6406428649345297, - "grad_norm": 810.7124633789062, - "learning_rate": 1.775081680332607e-05, - "loss": 28.1556, - "step": 158570 - }, - { - "epoch": 0.6406832661998973, - "grad_norm": 1076.5986328125, - "learning_rate": 1.774747618624521e-05, - "loss": 77.0871, - "step": 158580 - }, - { - "epoch": 0.640723667465265, - "grad_norm": 524.3070068359375, - "learning_rate": 1.7744135710556045e-05, - "loss": 39.2759, - "step": 158590 - }, - { - "epoch": 0.6407640687306326, - "grad_norm": 260.4174499511719, - "learning_rate": 1.7740795376323692e-05, - "loss": 43.6856, - "step": 158600 - }, - { - "epoch": 0.6408044699960003, - "grad_norm": 441.21075439453125, - "learning_rate": 1.7737455183613258e-05, - "loss": 40.3064, - "step": 158610 - }, - { - "epoch": 0.6408448712613679, - "grad_norm": 1924.186767578125, - "learning_rate": 1.7734115132489886e-05, - "loss": 51.7431, - "step": 158620 - }, - { - "epoch": 0.6408852725267355, - "grad_norm": 352.7893371582031, - "learning_rate": 1.773077522301868e-05, - "loss": 55.666, - "step": 158630 - }, - { - "epoch": 0.6409256737921032, - "grad_norm": 381.9687805175781, - "learning_rate": 1.7727435455264747e-05, - "loss": 53.1297, - "step": 158640 - }, - { - "epoch": 0.6409660750574708, - "grad_norm": 1260.5701904296875, - "learning_rate": 1.772409582929321e-05, - "loss": 59.0081, - "step": 158650 - }, - { - "epoch": 0.6410064763228385, - "grad_norm": 442.5443420410156, - "learning_rate": 1.772075634516916e-05, - "loss": 28.4182, - "step": 158660 - }, - { - "epoch": 0.6410468775882061, - "grad_norm": 776.69873046875, - "learning_rate": 1.7717417002957725e-05, - "loss": 93.5575, - "step": 158670 - }, - { - "epoch": 0.6410872788535736, - "grad_norm": 983.2738037109375, - "learning_rate": 1.7714077802723994e-05, - "loss": 59.6079, - "step": 158680 - }, - { - "epoch": 0.6411276801189413, - "grad_norm": 338.20379638671875, - "learning_rate": 1.7710738744533057e-05, - "loss": 55.5007, - "step": 158690 - }, - { - "epoch": 0.6411680813843089, - "grad_norm": 872.261962890625, - "learning_rate": 1.7707399828450027e-05, - "loss": 57.6118, - "step": 158700 - }, - { - "epoch": 0.6412084826496766, - "grad_norm": 441.12628173828125, - "learning_rate": 1.7704061054539993e-05, - "loss": 40.5436, - "step": 158710 - }, - { - "epoch": 0.6412488839150442, - "grad_norm": 655.5393676757812, - "learning_rate": 1.7700722422868048e-05, - "loss": 57.556, - "step": 158720 - }, - { - "epoch": 0.6412892851804118, - "grad_norm": 864.171875, - "learning_rate": 1.7697383933499274e-05, - "loss": 35.3705, - "step": 158730 - }, - { - "epoch": 0.6413296864457795, - "grad_norm": 502.4867248535156, - "learning_rate": 1.7694045586498752e-05, - "loss": 38.337, - "step": 158740 - }, - { - "epoch": 0.6413700877111471, - "grad_norm": 961.3887329101562, - "learning_rate": 1.7690707381931583e-05, - "loss": 61.8364, - "step": 158750 - }, - { - "epoch": 0.6414104889765148, - "grad_norm": 2974.65087890625, - "learning_rate": 1.768736931986284e-05, - "loss": 53.8332, - "step": 158760 - }, - { - "epoch": 0.6414508902418824, - "grad_norm": 1488.987060546875, - "learning_rate": 1.768403140035758e-05, - "loss": 49.7502, - "step": 158770 - }, - { - "epoch": 0.64149129150725, - "grad_norm": 713.788818359375, - "learning_rate": 1.768069362348091e-05, - "loss": 48.6836, - "step": 158780 - }, - { - "epoch": 0.6415316927726177, - "grad_norm": 403.7445373535156, - "learning_rate": 1.767735598929788e-05, - "loss": 70.3594, - "step": 158790 - }, - { - "epoch": 0.6415720940379853, - "grad_norm": 0.0, - "learning_rate": 1.767401849787357e-05, - "loss": 34.1432, - "step": 158800 - }, - { - "epoch": 0.6416124953033528, - "grad_norm": 4756.49951171875, - "learning_rate": 1.7670681149273038e-05, - "loss": 64.6012, - "step": 158810 - }, - { - "epoch": 0.6416528965687205, - "grad_norm": 906.1963500976562, - "learning_rate": 1.7667343943561344e-05, - "loss": 63.7832, - "step": 158820 - }, - { - "epoch": 0.6416932978340881, - "grad_norm": 335.9945373535156, - "learning_rate": 1.7664006880803563e-05, - "loss": 51.2343, - "step": 158830 - }, - { - "epoch": 0.6417336990994558, - "grad_norm": 1291.095947265625, - "learning_rate": 1.7660669961064748e-05, - "loss": 59.827, - "step": 158840 - }, - { - "epoch": 0.6417741003648234, - "grad_norm": 578.92236328125, - "learning_rate": 1.7657333184409936e-05, - "loss": 48.6957, - "step": 158850 - }, - { - "epoch": 0.641814501630191, - "grad_norm": 1438.8035888671875, - "learning_rate": 1.7653996550904205e-05, - "loss": 59.5564, - "step": 158860 - }, - { - "epoch": 0.6418549028955587, - "grad_norm": 251.0753936767578, - "learning_rate": 1.765066006061259e-05, - "loss": 51.8155, - "step": 158870 - }, - { - "epoch": 0.6418953041609263, - "grad_norm": 525.5086059570312, - "learning_rate": 1.7647323713600138e-05, - "loss": 51.6161, - "step": 158880 - }, - { - "epoch": 0.641935705426294, - "grad_norm": 950.3296508789062, - "learning_rate": 1.7643987509931903e-05, - "loss": 49.2463, - "step": 158890 - }, - { - "epoch": 0.6419761066916616, - "grad_norm": 400.7654724121094, - "learning_rate": 1.7640651449672913e-05, - "loss": 48.2175, - "step": 158900 - }, - { - "epoch": 0.6420165079570292, - "grad_norm": 494.3016662597656, - "learning_rate": 1.7637315532888216e-05, - "loss": 60.7076, - "step": 158910 - }, - { - "epoch": 0.6420569092223969, - "grad_norm": 913.7971801757812, - "learning_rate": 1.7633979759642844e-05, - "loss": 38.4652, - "step": 158920 - }, - { - "epoch": 0.6420973104877645, - "grad_norm": 495.9393310546875, - "learning_rate": 1.7630644130001818e-05, - "loss": 47.9746, - "step": 158930 - }, - { - "epoch": 0.6421377117531321, - "grad_norm": 903.8270263671875, - "learning_rate": 1.7627308644030187e-05, - "loss": 76.0501, - "step": 158940 - }, - { - "epoch": 0.6421781130184997, - "grad_norm": 1427.69970703125, - "learning_rate": 1.7623973301792966e-05, - "loss": 37.5955, - "step": 158950 - }, - { - "epoch": 0.6422185142838673, - "grad_norm": 700.1619262695312, - "learning_rate": 1.7620638103355186e-05, - "loss": 50.5823, - "step": 158960 - }, - { - "epoch": 0.642258915549235, - "grad_norm": 452.8930969238281, - "learning_rate": 1.761730304878187e-05, - "loss": 42.1029, - "step": 158970 - }, - { - "epoch": 0.6422993168146026, - "grad_norm": 248.73570251464844, - "learning_rate": 1.7613968138138026e-05, - "loss": 32.7044, - "step": 158980 - }, - { - "epoch": 0.6423397180799703, - "grad_norm": 779.9566650390625, - "learning_rate": 1.7610633371488682e-05, - "loss": 61.1448, - "step": 158990 - }, - { - "epoch": 0.6423801193453379, - "grad_norm": 1629.21337890625, - "learning_rate": 1.7607298748898842e-05, - "loss": 63.5136, - "step": 159000 - }, - { - "epoch": 0.6424205206107055, - "grad_norm": 824.9151000976562, - "learning_rate": 1.760396427043351e-05, - "loss": 42.806, - "step": 159010 - }, - { - "epoch": 0.6424609218760732, - "grad_norm": 1080.2861328125, - "learning_rate": 1.7600629936157708e-05, - "loss": 45.2424, - "step": 159020 - }, - { - "epoch": 0.6425013231414408, - "grad_norm": 955.01708984375, - "learning_rate": 1.759729574613643e-05, - "loss": 57.9693, - "step": 159030 - }, - { - "epoch": 0.6425417244068085, - "grad_norm": 337.5832824707031, - "learning_rate": 1.759396170043469e-05, - "loss": 60.3471, - "step": 159040 - }, - { - "epoch": 0.6425821256721761, - "grad_norm": 250.19577026367188, - "learning_rate": 1.759062779911748e-05, - "loss": 50.2848, - "step": 159050 - }, - { - "epoch": 0.6426225269375437, - "grad_norm": 1389.3502197265625, - "learning_rate": 1.7587294042249797e-05, - "loss": 50.535, - "step": 159060 - }, - { - "epoch": 0.6426629282029113, - "grad_norm": 741.0910034179688, - "learning_rate": 1.758396042989663e-05, - "loss": 46.095, - "step": 159070 - }, - { - "epoch": 0.6427033294682789, - "grad_norm": 2326.651123046875, - "learning_rate": 1.7580626962122977e-05, - "loss": 69.9279, - "step": 159080 - }, - { - "epoch": 0.6427437307336465, - "grad_norm": 544.2945556640625, - "learning_rate": 1.7577293638993808e-05, - "loss": 43.031, - "step": 159090 - }, - { - "epoch": 0.6427841319990142, - "grad_norm": 1186.790771484375, - "learning_rate": 1.7573960460574133e-05, - "loss": 47.7816, - "step": 159100 - }, - { - "epoch": 0.6428245332643818, - "grad_norm": 1846.28369140625, - "learning_rate": 1.757062742692891e-05, - "loss": 67.478, - "step": 159110 - }, - { - "epoch": 0.6428649345297495, - "grad_norm": 800.3991088867188, - "learning_rate": 1.7567294538123142e-05, - "loss": 64.417, - "step": 159120 - }, - { - "epoch": 0.6429053357951171, - "grad_norm": 534.356201171875, - "learning_rate": 1.7563961794221795e-05, - "loss": 32.4049, - "step": 159130 - }, - { - "epoch": 0.6429457370604847, - "grad_norm": 1035.6060791015625, - "learning_rate": 1.756062919528983e-05, - "loss": 65.2042, - "step": 159140 - }, - { - "epoch": 0.6429861383258524, - "grad_norm": 1449.9736328125, - "learning_rate": 1.755729674139224e-05, - "loss": 57.8393, - "step": 159150 - }, - { - "epoch": 0.64302653959122, - "grad_norm": 1185.867431640625, - "learning_rate": 1.7553964432593976e-05, - "loss": 56.2053, - "step": 159160 - }, - { - "epoch": 0.6430669408565877, - "grad_norm": 261.984375, - "learning_rate": 1.7550632268960003e-05, - "loss": 43.3817, - "step": 159170 - }, - { - "epoch": 0.6431073421219553, - "grad_norm": 1169.52197265625, - "learning_rate": 1.7547300250555304e-05, - "loss": 70.408, - "step": 159180 - }, - { - "epoch": 0.643147743387323, - "grad_norm": 528.8572387695312, - "learning_rate": 1.7543968377444806e-05, - "loss": 57.1005, - "step": 159190 - }, - { - "epoch": 0.6431881446526905, - "grad_norm": 1181.6226806640625, - "learning_rate": 1.7540636649693496e-05, - "loss": 48.1173, - "step": 159200 - }, - { - "epoch": 0.6432285459180581, - "grad_norm": 453.0842590332031, - "learning_rate": 1.7537305067366315e-05, - "loss": 49.9454, - "step": 159210 - }, - { - "epoch": 0.6432689471834258, - "grad_norm": 920.9298095703125, - "learning_rate": 1.75339736305282e-05, - "loss": 46.288, - "step": 159220 - }, - { - "epoch": 0.6433093484487934, - "grad_norm": 843.010009765625, - "learning_rate": 1.753064233924413e-05, - "loss": 38.9058, - "step": 159230 - }, - { - "epoch": 0.643349749714161, - "grad_norm": 239.32803344726562, - "learning_rate": 1.752731119357902e-05, - "loss": 68.3454, - "step": 159240 - }, - { - "epoch": 0.6433901509795287, - "grad_norm": 506.0323791503906, - "learning_rate": 1.7523980193597836e-05, - "loss": 40.895, - "step": 159250 - }, - { - "epoch": 0.6434305522448963, - "grad_norm": 1487.357177734375, - "learning_rate": 1.752064933936551e-05, - "loss": 71.0815, - "step": 159260 - }, - { - "epoch": 0.643470953510264, - "grad_norm": 1509.1661376953125, - "learning_rate": 1.7517318630946957e-05, - "loss": 53.0446, - "step": 159270 - }, - { - "epoch": 0.6435113547756316, - "grad_norm": 1130.5875244140625, - "learning_rate": 1.7513988068407146e-05, - "loss": 35.6718, - "step": 159280 - }, - { - "epoch": 0.6435517560409992, - "grad_norm": 634.6209106445312, - "learning_rate": 1.7510657651810986e-05, - "loss": 37.0097, - "step": 159290 - }, - { - "epoch": 0.6435921573063669, - "grad_norm": 268.3912048339844, - "learning_rate": 1.7507327381223405e-05, - "loss": 33.5271, - "step": 159300 - }, - { - "epoch": 0.6436325585717345, - "grad_norm": 443.1457214355469, - "learning_rate": 1.7503997256709342e-05, - "loss": 51.4052, - "step": 159310 - }, - { - "epoch": 0.643672959837102, - "grad_norm": 687.546142578125, - "learning_rate": 1.750066727833371e-05, - "loss": 44.2651, - "step": 159320 - }, - { - "epoch": 0.6437133611024697, - "grad_norm": 600.9700317382812, - "learning_rate": 1.7497337446161432e-05, - "loss": 68.676, - "step": 159330 - }, - { - "epoch": 0.6437537623678373, - "grad_norm": 565.4005737304688, - "learning_rate": 1.749400776025743e-05, - "loss": 70.9337, - "step": 159340 - }, - { - "epoch": 0.643794163633205, - "grad_norm": 660.9468383789062, - "learning_rate": 1.749067822068659e-05, - "loss": 87.7747, - "step": 159350 - }, - { - "epoch": 0.6438345648985726, - "grad_norm": 744.2423095703125, - "learning_rate": 1.748734882751386e-05, - "loss": 51.0859, - "step": 159360 - }, - { - "epoch": 0.6438749661639402, - "grad_norm": 1477.296630859375, - "learning_rate": 1.7484019580804135e-05, - "loss": 85.0133, - "step": 159370 - }, - { - "epoch": 0.6439153674293079, - "grad_norm": 585.2022705078125, - "learning_rate": 1.7480690480622303e-05, - "loss": 33.7616, - "step": 159380 - }, - { - "epoch": 0.6439557686946755, - "grad_norm": 181.0767364501953, - "learning_rate": 1.7477361527033295e-05, - "loss": 51.101, - "step": 159390 - }, - { - "epoch": 0.6439961699600432, - "grad_norm": 815.1417236328125, - "learning_rate": 1.747403272010199e-05, - "loss": 53.4556, - "step": 159400 - }, - { - "epoch": 0.6440365712254108, - "grad_norm": 615.5349731445312, - "learning_rate": 1.7470704059893296e-05, - "loss": 48.4075, - "step": 159410 - }, - { - "epoch": 0.6440769724907784, - "grad_norm": 322.2975769042969, - "learning_rate": 1.7467375546472105e-05, - "loss": 56.5311, - "step": 159420 - }, - { - "epoch": 0.6441173737561461, - "grad_norm": 621.2068481445312, - "learning_rate": 1.7464047179903296e-05, - "loss": 64.1465, - "step": 159430 - }, - { - "epoch": 0.6441577750215137, - "grad_norm": 922.0742797851562, - "learning_rate": 1.7460718960251772e-05, - "loss": 39.1898, - "step": 159440 - }, - { - "epoch": 0.6441981762868813, - "grad_norm": 427.25860595703125, - "learning_rate": 1.745739088758242e-05, - "loss": 27.403, - "step": 159450 - }, - { - "epoch": 0.6442385775522489, - "grad_norm": 785.6755981445312, - "learning_rate": 1.74540629619601e-05, - "loss": 59.5198, - "step": 159460 - }, - { - "epoch": 0.6442789788176165, - "grad_norm": 1196.318115234375, - "learning_rate": 1.7450735183449723e-05, - "loss": 53.6108, - "step": 159470 - }, - { - "epoch": 0.6443193800829842, - "grad_norm": 1071.03173828125, - "learning_rate": 1.744740755211614e-05, - "loss": 46.8555, - "step": 159480 - }, - { - "epoch": 0.6443597813483518, - "grad_norm": 506.322998046875, - "learning_rate": 1.7444080068024243e-05, - "loss": 52.3927, - "step": 159490 - }, - { - "epoch": 0.6444001826137195, - "grad_norm": 385.1031799316406, - "learning_rate": 1.744075273123889e-05, - "loss": 48.9555, - "step": 159500 - }, - { - "epoch": 0.6444405838790871, - "grad_norm": 782.7656860351562, - "learning_rate": 1.7437425541824952e-05, - "loss": 57.9691, - "step": 159510 - }, - { - "epoch": 0.6444809851444547, - "grad_norm": 398.4875793457031, - "learning_rate": 1.7434098499847306e-05, - "loss": 47.1612, - "step": 159520 - }, - { - "epoch": 0.6445213864098224, - "grad_norm": 736.70947265625, - "learning_rate": 1.74307716053708e-05, - "loss": 58.8329, - "step": 159530 - }, - { - "epoch": 0.64456178767519, - "grad_norm": 814.1843872070312, - "learning_rate": 1.742744485846029e-05, - "loss": 48.0108, - "step": 159540 - }, - { - "epoch": 0.6446021889405577, - "grad_norm": 578.465576171875, - "learning_rate": 1.7424118259180654e-05, - "loss": 50.4716, - "step": 159550 - }, - { - "epoch": 0.6446425902059253, - "grad_norm": 409.29443359375, - "learning_rate": 1.742079180759672e-05, - "loss": 31.7956, - "step": 159560 - }, - { - "epoch": 0.6446829914712929, - "grad_norm": 604.2058715820312, - "learning_rate": 1.7417465503773365e-05, - "loss": 69.64, - "step": 159570 - }, - { - "epoch": 0.6447233927366605, - "grad_norm": 312.2088928222656, - "learning_rate": 1.741413934777542e-05, - "loss": 49.1468, - "step": 159580 - }, - { - "epoch": 0.6447637940020281, - "grad_norm": 241.517822265625, - "learning_rate": 1.7410813339667732e-05, - "loss": 28.2745, - "step": 159590 - }, - { - "epoch": 0.6448041952673957, - "grad_norm": 911.9425048828125, - "learning_rate": 1.7407487479515147e-05, - "loss": 68.7837, - "step": 159600 - }, - { - "epoch": 0.6448445965327634, - "grad_norm": 568.782958984375, - "learning_rate": 1.7404161767382504e-05, - "loss": 49.4384, - "step": 159610 - }, - { - "epoch": 0.644884997798131, - "grad_norm": 350.11181640625, - "learning_rate": 1.7400836203334626e-05, - "loss": 41.8597, - "step": 159620 - }, - { - "epoch": 0.6449253990634987, - "grad_norm": 634.2789916992188, - "learning_rate": 1.739751078743637e-05, - "loss": 62.7964, - "step": 159630 - }, - { - "epoch": 0.6449658003288663, - "grad_norm": 609.7909545898438, - "learning_rate": 1.7394185519752545e-05, - "loss": 43.3135, - "step": 159640 - }, - { - "epoch": 0.6450062015942339, - "grad_norm": 918.4065551757812, - "learning_rate": 1.7390860400348002e-05, - "loss": 47.5571, - "step": 159650 - }, - { - "epoch": 0.6450466028596016, - "grad_norm": 536.011474609375, - "learning_rate": 1.7387535429287548e-05, - "loss": 43.0701, - "step": 159660 - }, - { - "epoch": 0.6450870041249692, - "grad_norm": 1669.569091796875, - "learning_rate": 1.7384210606636007e-05, - "loss": 52.9317, - "step": 159670 - }, - { - "epoch": 0.6451274053903369, - "grad_norm": 346.0846862792969, - "learning_rate": 1.7380885932458206e-05, - "loss": 51.7841, - "step": 159680 - }, - { - "epoch": 0.6451678066557045, - "grad_norm": 665.9485473632812, - "learning_rate": 1.7377561406818958e-05, - "loss": 40.3949, - "step": 159690 - }, - { - "epoch": 0.6452082079210721, - "grad_norm": 804.7376098632812, - "learning_rate": 1.7374237029783062e-05, - "loss": 56.1418, - "step": 159700 - }, - { - "epoch": 0.6452486091864397, - "grad_norm": 856.9612426757812, - "learning_rate": 1.7370912801415357e-05, - "loss": 64.3835, - "step": 159710 - }, - { - "epoch": 0.6452890104518073, - "grad_norm": 518.4335327148438, - "learning_rate": 1.736758872178062e-05, - "loss": 58.4394, - "step": 159720 - }, - { - "epoch": 0.645329411717175, - "grad_norm": 935.9529418945312, - "learning_rate": 1.736426479094368e-05, - "loss": 31.8426, - "step": 159730 - }, - { - "epoch": 0.6453698129825426, - "grad_norm": 951.4691162109375, - "learning_rate": 1.7360941008969327e-05, - "loss": 60.8805, - "step": 159740 - }, - { - "epoch": 0.6454102142479102, - "grad_norm": 809.8295288085938, - "learning_rate": 1.735761737592236e-05, - "loss": 45.1077, - "step": 159750 - }, - { - "epoch": 0.6454506155132779, - "grad_norm": 653.5640869140625, - "learning_rate": 1.7354293891867582e-05, - "loss": 61.4744, - "step": 159760 - }, - { - "epoch": 0.6454910167786455, - "grad_norm": 555.7434692382812, - "learning_rate": 1.7350970556869768e-05, - "loss": 44.4319, - "step": 159770 - }, - { - "epoch": 0.6455314180440131, - "grad_norm": 519.6600952148438, - "learning_rate": 1.7347647370993738e-05, - "loss": 62.9967, - "step": 159780 - }, - { - "epoch": 0.6455718193093808, - "grad_norm": 571.7482299804688, - "learning_rate": 1.7344324334304255e-05, - "loss": 31.3623, - "step": 159790 - }, - { - "epoch": 0.6456122205747484, - "grad_norm": 1434.1971435546875, - "learning_rate": 1.73410014468661e-05, - "loss": 68.6211, - "step": 159800 - }, - { - "epoch": 0.6456526218401161, - "grad_norm": 634.74951171875, - "learning_rate": 1.7337678708744083e-05, - "loss": 53.1098, - "step": 159810 - }, - { - "epoch": 0.6456930231054837, - "grad_norm": 893.255126953125, - "learning_rate": 1.7334356120002957e-05, - "loss": 66.1552, - "step": 159820 - }, - { - "epoch": 0.6457334243708513, - "grad_norm": 776.960205078125, - "learning_rate": 1.7331033680707502e-05, - "loss": 43.6166, - "step": 159830 - }, - { - "epoch": 0.6457738256362189, - "grad_norm": 1306.64306640625, - "learning_rate": 1.7327711390922494e-05, - "loss": 56.0734, - "step": 159840 - }, - { - "epoch": 0.6458142269015865, - "grad_norm": 947.216552734375, - "learning_rate": 1.7324389250712702e-05, - "loss": 42.4781, - "step": 159850 - }, - { - "epoch": 0.6458546281669542, - "grad_norm": 638.41796875, - "learning_rate": 1.73210672601429e-05, - "loss": 55.3032, - "step": 159860 - }, - { - "epoch": 0.6458950294323218, - "grad_norm": 336.3428649902344, - "learning_rate": 1.731774541927784e-05, - "loss": 43.4402, - "step": 159870 - }, - { - "epoch": 0.6459354306976894, - "grad_norm": 432.4233703613281, - "learning_rate": 1.7314423728182283e-05, - "loss": 35.9008, - "step": 159880 - }, - { - "epoch": 0.6459758319630571, - "grad_norm": 796.058837890625, - "learning_rate": 1.7311102186921e-05, - "loss": 51.9767, - "step": 159890 - }, - { - "epoch": 0.6460162332284247, - "grad_norm": 522.548828125, - "learning_rate": 1.730778079555874e-05, - "loss": 44.1105, - "step": 159900 - }, - { - "epoch": 0.6460566344937924, - "grad_norm": 1422.60205078125, - "learning_rate": 1.7304459554160245e-05, - "loss": 74.7213, - "step": 159910 - }, - { - "epoch": 0.64609703575916, - "grad_norm": 675.3711547851562, - "learning_rate": 1.7301138462790278e-05, - "loss": 104.2761, - "step": 159920 - }, - { - "epoch": 0.6461374370245276, - "grad_norm": 591.744140625, - "learning_rate": 1.729781752151358e-05, - "loss": 38.1823, - "step": 159930 - }, - { - "epoch": 0.6461778382898953, - "grad_norm": 238.55694580078125, - "learning_rate": 1.7294496730394895e-05, - "loss": 35.8654, - "step": 159940 - }, - { - "epoch": 0.6462182395552629, - "grad_norm": 416.1772766113281, - "learning_rate": 1.7291176089498967e-05, - "loss": 51.6108, - "step": 159950 - }, - { - "epoch": 0.6462586408206304, - "grad_norm": 1862.9417724609375, - "learning_rate": 1.728785559889052e-05, - "loss": 52.4495, - "step": 159960 - }, - { - "epoch": 0.6462990420859981, - "grad_norm": 415.4091796875, - "learning_rate": 1.7284535258634307e-05, - "loss": 53.8081, - "step": 159970 - }, - { - "epoch": 0.6463394433513657, - "grad_norm": 613.8626098632812, - "learning_rate": 1.7281215068795055e-05, - "loss": 55.2499, - "step": 159980 - }, - { - "epoch": 0.6463798446167334, - "grad_norm": 326.62451171875, - "learning_rate": 1.7277895029437474e-05, - "loss": 54.2632, - "step": 159990 - }, - { - "epoch": 0.646420245882101, - "grad_norm": 860.3509521484375, - "learning_rate": 1.7274575140626318e-05, - "loss": 44.3942, - "step": 160000 - }, - { - "epoch": 0.6464606471474686, - "grad_norm": 1033.29833984375, - "learning_rate": 1.727125540242629e-05, - "loss": 52.0802, - "step": 160010 - }, - { - "epoch": 0.6465010484128363, - "grad_norm": 697.0635986328125, - "learning_rate": 1.726793581490212e-05, - "loss": 58.4031, - "step": 160020 - }, - { - "epoch": 0.6465414496782039, - "grad_norm": 649.52734375, - "learning_rate": 1.7264616378118528e-05, - "loss": 52.7141, - "step": 160030 - }, - { - "epoch": 0.6465818509435716, - "grad_norm": 911.1539306640625, - "learning_rate": 1.7261297092140212e-05, - "loss": 47.7209, - "step": 160040 - }, - { - "epoch": 0.6466222522089392, - "grad_norm": 635.70556640625, - "learning_rate": 1.72579779570319e-05, - "loss": 43.9278, - "step": 160050 - }, - { - "epoch": 0.6466626534743068, - "grad_norm": 839.5745239257812, - "learning_rate": 1.7254658972858293e-05, - "loss": 38.8251, - "step": 160060 - }, - { - "epoch": 0.6467030547396745, - "grad_norm": 529.5610961914062, - "learning_rate": 1.7251340139684086e-05, - "loss": 37.6862, - "step": 160070 - }, - { - "epoch": 0.6467434560050421, - "grad_norm": 743.9073486328125, - "learning_rate": 1.7248021457574004e-05, - "loss": 48.4526, - "step": 160080 - }, - { - "epoch": 0.6467838572704097, - "grad_norm": 0.0, - "learning_rate": 1.7244702926592733e-05, - "loss": 47.8844, - "step": 160090 - }, - { - "epoch": 0.6468242585357773, - "grad_norm": 200.43478393554688, - "learning_rate": 1.7241384546804972e-05, - "loss": 46.4571, - "step": 160100 - }, - { - "epoch": 0.6468646598011449, - "grad_norm": 921.7332763671875, - "learning_rate": 1.7238066318275415e-05, - "loss": 60.5268, - "step": 160110 - }, - { - "epoch": 0.6469050610665126, - "grad_norm": 918.865478515625, - "learning_rate": 1.7234748241068742e-05, - "loss": 63.9479, - "step": 160120 - }, - { - "epoch": 0.6469454623318802, - "grad_norm": 654.3958740234375, - "learning_rate": 1.723143031524966e-05, - "loss": 71.1433, - "step": 160130 - }, - { - "epoch": 0.6469858635972479, - "grad_norm": 1054.6842041015625, - "learning_rate": 1.722811254088284e-05, - "loss": 45.9198, - "step": 160140 - }, - { - "epoch": 0.6470262648626155, - "grad_norm": 494.39654541015625, - "learning_rate": 1.722479491803296e-05, - "loss": 38.1067, - "step": 160150 - }, - { - "epoch": 0.6470666661279831, - "grad_norm": 522.5104370117188, - "learning_rate": 1.7221477446764717e-05, - "loss": 47.4557, - "step": 160160 - }, - { - "epoch": 0.6471070673933508, - "grad_norm": 751.9249267578125, - "learning_rate": 1.7218160127142767e-05, - "loss": 54.4638, - "step": 160170 - }, - { - "epoch": 0.6471474686587184, - "grad_norm": 520.9002075195312, - "learning_rate": 1.7214842959231794e-05, - "loss": 58.695, - "step": 160180 - }, - { - "epoch": 0.6471878699240861, - "grad_norm": 1485.286865234375, - "learning_rate": 1.721152594309647e-05, - "loss": 80.7229, - "step": 160190 - }, - { - "epoch": 0.6472282711894537, - "grad_norm": 872.4847412109375, - "learning_rate": 1.7208209078801454e-05, - "loss": 44.0047, - "step": 160200 - }, - { - "epoch": 0.6472686724548213, - "grad_norm": 574.897705078125, - "learning_rate": 1.7204892366411416e-05, - "loss": 30.545, - "step": 160210 - }, - { - "epoch": 0.6473090737201889, - "grad_norm": 4002.87939453125, - "learning_rate": 1.720157580599101e-05, - "loss": 69.5485, - "step": 160220 - }, - { - "epoch": 0.6473494749855565, - "grad_norm": 231.5006866455078, - "learning_rate": 1.7198259397604904e-05, - "loss": 42.8945, - "step": 160230 - }, - { - "epoch": 0.6473898762509241, - "grad_norm": 545.1617431640625, - "learning_rate": 1.719494314131775e-05, - "loss": 46.0572, - "step": 160240 - }, - { - "epoch": 0.6474302775162918, - "grad_norm": 659.3037719726562, - "learning_rate": 1.7191627037194186e-05, - "loss": 51.2325, - "step": 160250 - }, - { - "epoch": 0.6474706787816594, - "grad_norm": 855.5885620117188, - "learning_rate": 1.718831108529888e-05, - "loss": 54.8725, - "step": 160260 - }, - { - "epoch": 0.6475110800470271, - "grad_norm": 484.8570861816406, - "learning_rate": 1.7184995285696477e-05, - "loss": 54.7183, - "step": 160270 - }, - { - "epoch": 0.6475514813123947, - "grad_norm": 827.1829223632812, - "learning_rate": 1.7181679638451603e-05, - "loss": 49.625, - "step": 160280 - }, - { - "epoch": 0.6475918825777623, - "grad_norm": 199.2564239501953, - "learning_rate": 1.7178364143628923e-05, - "loss": 29.7762, - "step": 160290 - }, - { - "epoch": 0.64763228384313, - "grad_norm": 830.081298828125, - "learning_rate": 1.717504880129304e-05, - "loss": 66.7952, - "step": 160300 - }, - { - "epoch": 0.6476726851084976, - "grad_norm": 859.8870849609375, - "learning_rate": 1.7171733611508627e-05, - "loss": 51.3871, - "step": 160310 - }, - { - "epoch": 0.6477130863738653, - "grad_norm": 718.7318725585938, - "learning_rate": 1.7168418574340298e-05, - "loss": 62.161, - "step": 160320 - }, - { - "epoch": 0.6477534876392329, - "grad_norm": 733.2501831054688, - "learning_rate": 1.716510368985267e-05, - "loss": 77.6914, - "step": 160330 - }, - { - "epoch": 0.6477938889046005, - "grad_norm": 958.80126953125, - "learning_rate": 1.7161788958110388e-05, - "loss": 69.0079, - "step": 160340 - }, - { - "epoch": 0.6478342901699681, - "grad_norm": 597.50439453125, - "learning_rate": 1.7158474379178064e-05, - "loss": 65.9391, - "step": 160350 - }, - { - "epoch": 0.6478746914353357, - "grad_norm": 956.994873046875, - "learning_rate": 1.7155159953120313e-05, - "loss": 66.6005, - "step": 160360 - }, - { - "epoch": 0.6479150927007034, - "grad_norm": 524.7179565429688, - "learning_rate": 1.7151845680001767e-05, - "loss": 39.2416, - "step": 160370 - }, - { - "epoch": 0.647955493966071, - "grad_norm": 451.7310485839844, - "learning_rate": 1.7148531559887018e-05, - "loss": 55.5202, - "step": 160380 - }, - { - "epoch": 0.6479958952314386, - "grad_norm": 627.1416015625, - "learning_rate": 1.71452175928407e-05, - "loss": 69.2645, - "step": 160390 - }, - { - "epoch": 0.6480362964968063, - "grad_norm": 482.4586486816406, - "learning_rate": 1.7141903778927406e-05, - "loss": 58.0431, - "step": 160400 - }, - { - "epoch": 0.6480766977621739, - "grad_norm": 681.709716796875, - "learning_rate": 1.7138590118211733e-05, - "loss": 52.02, - "step": 160410 - }, - { - "epoch": 0.6481170990275416, - "grad_norm": 361.3819885253906, - "learning_rate": 1.7135276610758307e-05, - "loss": 53.8131, - "step": 160420 - }, - { - "epoch": 0.6481575002929092, - "grad_norm": 607.1292724609375, - "learning_rate": 1.713196325663171e-05, - "loss": 51.5823, - "step": 160430 - }, - { - "epoch": 0.6481979015582768, - "grad_norm": 1490.06298828125, - "learning_rate": 1.7128650055896535e-05, - "loss": 68.2615, - "step": 160440 - }, - { - "epoch": 0.6482383028236445, - "grad_norm": 596.255615234375, - "learning_rate": 1.7125337008617386e-05, - "loss": 49.4564, - "step": 160450 - }, - { - "epoch": 0.6482787040890121, - "grad_norm": 834.3792114257812, - "learning_rate": 1.7122024114858835e-05, - "loss": 49.1376, - "step": 160460 - }, - { - "epoch": 0.6483191053543798, - "grad_norm": 1242.96337890625, - "learning_rate": 1.711871137468549e-05, - "loss": 66.0371, - "step": 160470 - }, - { - "epoch": 0.6483595066197473, - "grad_norm": 1074.2083740234375, - "learning_rate": 1.7115398788161925e-05, - "loss": 64.567, - "step": 160480 - }, - { - "epoch": 0.6483999078851149, - "grad_norm": 1067.1346435546875, - "learning_rate": 1.7112086355352707e-05, - "loss": 53.1166, - "step": 160490 - }, - { - "epoch": 0.6484403091504826, - "grad_norm": 442.2884826660156, - "learning_rate": 1.7108774076322443e-05, - "loss": 32.6686, - "step": 160500 - }, - { - "epoch": 0.6484807104158502, - "grad_norm": 450.2314758300781, - "learning_rate": 1.7105461951135686e-05, - "loss": 50.7306, - "step": 160510 - }, - { - "epoch": 0.6485211116812178, - "grad_norm": 1504.6929931640625, - "learning_rate": 1.7102149979857e-05, - "loss": 47.6178, - "step": 160520 - }, - { - "epoch": 0.6485615129465855, - "grad_norm": 605.5977783203125, - "learning_rate": 1.7098838162550983e-05, - "loss": 41.464, - "step": 160530 - }, - { - "epoch": 0.6486019142119531, - "grad_norm": 867.1982421875, - "learning_rate": 1.7095526499282172e-05, - "loss": 46.7762, - "step": 160540 - }, - { - "epoch": 0.6486423154773208, - "grad_norm": 447.15863037109375, - "learning_rate": 1.7092214990115152e-05, - "loss": 32.9571, - "step": 160550 - }, - { - "epoch": 0.6486827167426884, - "grad_norm": 679.4548950195312, - "learning_rate": 1.708890363511447e-05, - "loss": 72.9706, - "step": 160560 - }, - { - "epoch": 0.648723118008056, - "grad_norm": 1380.580810546875, - "learning_rate": 1.708559243434467e-05, - "loss": 62.2892, - "step": 160570 - }, - { - "epoch": 0.6487635192734237, - "grad_norm": 668.0836181640625, - "learning_rate": 1.7082281387870338e-05, - "loss": 44.8698, - "step": 160580 - }, - { - "epoch": 0.6488039205387913, - "grad_norm": 675.6842651367188, - "learning_rate": 1.7078970495756002e-05, - "loss": 41.055, - "step": 160590 - }, - { - "epoch": 0.6488443218041589, - "grad_norm": 703.9763793945312, - "learning_rate": 1.7075659758066208e-05, - "loss": 63.1799, - "step": 160600 - }, - { - "epoch": 0.6488847230695265, - "grad_norm": 458.03302001953125, - "learning_rate": 1.7072349174865513e-05, - "loss": 38.6174, - "step": 160610 - }, - { - "epoch": 0.6489251243348941, - "grad_norm": 291.4510803222656, - "learning_rate": 1.7069038746218445e-05, - "loss": 55.229, - "step": 160620 - }, - { - "epoch": 0.6489655256002618, - "grad_norm": 2080.05322265625, - "learning_rate": 1.7065728472189563e-05, - "loss": 68.5965, - "step": 160630 - }, - { - "epoch": 0.6490059268656294, - "grad_norm": 536.862548828125, - "learning_rate": 1.7062418352843382e-05, - "loss": 67.5582, - "step": 160640 - }, - { - "epoch": 0.649046328130997, - "grad_norm": 462.50970458984375, - "learning_rate": 1.7059108388244432e-05, - "loss": 38.4309, - "step": 160650 - }, - { - "epoch": 0.6490867293963647, - "grad_norm": 838.4609985351562, - "learning_rate": 1.7055798578457266e-05, - "loss": 57.7827, - "step": 160660 - }, - { - "epoch": 0.6491271306617323, - "grad_norm": 761.4349365234375, - "learning_rate": 1.7052488923546396e-05, - "loss": 42.0861, - "step": 160670 - }, - { - "epoch": 0.6491675319271, - "grad_norm": 357.4921569824219, - "learning_rate": 1.7049179423576334e-05, - "loss": 42.084, - "step": 160680 - }, - { - "epoch": 0.6492079331924676, - "grad_norm": 565.8134765625, - "learning_rate": 1.7045870078611627e-05, - "loss": 61.3284, - "step": 160690 - }, - { - "epoch": 0.6492483344578353, - "grad_norm": 607.5779418945312, - "learning_rate": 1.7042560888716766e-05, - "loss": 43.1379, - "step": 160700 - }, - { - "epoch": 0.6492887357232029, - "grad_norm": 716.2503662109375, - "learning_rate": 1.7039251853956283e-05, - "loss": 50.0222, - "step": 160710 - }, - { - "epoch": 0.6493291369885705, - "grad_norm": 446.25128173828125, - "learning_rate": 1.703594297439469e-05, - "loss": 26.8644, - "step": 160720 - }, - { - "epoch": 0.6493695382539381, - "grad_norm": 1103.192626953125, - "learning_rate": 1.7032634250096473e-05, - "loss": 58.0758, - "step": 160730 - }, - { - "epoch": 0.6494099395193057, - "grad_norm": 270.75579833984375, - "learning_rate": 1.7029325681126167e-05, - "loss": 37.3797, - "step": 160740 - }, - { - "epoch": 0.6494503407846733, - "grad_norm": 871.2796020507812, - "learning_rate": 1.702601726754825e-05, - "loss": 58.7828, - "step": 160750 - }, - { - "epoch": 0.649490742050041, - "grad_norm": 584.755126953125, - "learning_rate": 1.702270900942724e-05, - "loss": 68.4647, - "step": 160760 - }, - { - "epoch": 0.6495311433154086, - "grad_norm": 1826.71044921875, - "learning_rate": 1.7019400906827624e-05, - "loss": 58.1163, - "step": 160770 - }, - { - "epoch": 0.6495715445807763, - "grad_norm": 397.8055419921875, - "learning_rate": 1.7016092959813893e-05, - "loss": 75.9175, - "step": 160780 - }, - { - "epoch": 0.6496119458461439, - "grad_norm": 535.3204956054688, - "learning_rate": 1.7012785168450543e-05, - "loss": 89.4188, - "step": 160790 - }, - { - "epoch": 0.6496523471115115, - "grad_norm": 877.5353393554688, - "learning_rate": 1.7009477532802054e-05, - "loss": 78.9825, - "step": 160800 - }, - { - "epoch": 0.6496927483768792, - "grad_norm": 1077.1090087890625, - "learning_rate": 1.7006170052932916e-05, - "loss": 43.3018, - "step": 160810 - }, - { - "epoch": 0.6497331496422468, - "grad_norm": 596.0875854492188, - "learning_rate": 1.700286272890761e-05, - "loss": 41.5323, - "step": 160820 - }, - { - "epoch": 0.6497735509076145, - "grad_norm": 3221.248291015625, - "learning_rate": 1.6999555560790607e-05, - "loss": 52.7061, - "step": 160830 - }, - { - "epoch": 0.6498139521729821, - "grad_norm": 436.67425537109375, - "learning_rate": 1.6996248548646394e-05, - "loss": 59.5454, - "step": 160840 - }, - { - "epoch": 0.6498543534383497, - "grad_norm": 418.7576599121094, - "learning_rate": 1.6992941692539437e-05, - "loss": 37.4969, - "step": 160850 - }, - { - "epoch": 0.6498947547037173, - "grad_norm": 775.6710815429688, - "learning_rate": 1.6989634992534194e-05, - "loss": 33.1508, - "step": 160860 - }, - { - "epoch": 0.6499351559690849, - "grad_norm": 513.3265380859375, - "learning_rate": 1.6986328448695148e-05, - "loss": 62.6621, - "step": 160870 - }, - { - "epoch": 0.6499755572344526, - "grad_norm": 1174.877685546875, - "learning_rate": 1.6983022061086763e-05, - "loss": 75.1632, - "step": 160880 - }, - { - "epoch": 0.6500159584998202, - "grad_norm": 117.83036041259766, - "learning_rate": 1.6979715829773476e-05, - "loss": 28.0209, - "step": 160890 - }, - { - "epoch": 0.6500563597651878, - "grad_norm": 667.8468627929688, - "learning_rate": 1.6976409754819767e-05, - "loss": 41.9234, - "step": 160900 - }, - { - "epoch": 0.6500967610305555, - "grad_norm": 1031.58544921875, - "learning_rate": 1.6973103836290072e-05, - "loss": 70.4715, - "step": 160910 - }, - { - "epoch": 0.6501371622959231, - "grad_norm": 372.794677734375, - "learning_rate": 1.6969798074248858e-05, - "loss": 45.1825, - "step": 160920 - }, - { - "epoch": 0.6501775635612908, - "grad_norm": 776.9822387695312, - "learning_rate": 1.6966492468760565e-05, - "loss": 63.0023, - "step": 160930 - }, - { - "epoch": 0.6502179648266584, - "grad_norm": 1337.965576171875, - "learning_rate": 1.6963187019889626e-05, - "loss": 55.9985, - "step": 160940 - }, - { - "epoch": 0.650258366092026, - "grad_norm": 828.7103881835938, - "learning_rate": 1.6959881727700508e-05, - "loss": 55.6604, - "step": 160950 - }, - { - "epoch": 0.6502987673573937, - "grad_norm": 626.9825439453125, - "learning_rate": 1.6956576592257635e-05, - "loss": 46.8431, - "step": 160960 - }, - { - "epoch": 0.6503391686227613, - "grad_norm": 660.419189453125, - "learning_rate": 1.6953271613625432e-05, - "loss": 68.8267, - "step": 160970 - }, - { - "epoch": 0.650379569888129, - "grad_norm": 528.8330078125, - "learning_rate": 1.694996679186835e-05, - "loss": 77.1253, - "step": 160980 - }, - { - "epoch": 0.6504199711534965, - "grad_norm": 540.1735229492188, - "learning_rate": 1.69466621270508e-05, - "loss": 44.3839, - "step": 160990 - }, - { - "epoch": 0.6504603724188641, - "grad_norm": 568.5739135742188, - "learning_rate": 1.6943357619237226e-05, - "loss": 46.5589, - "step": 161000 - }, - { - "epoch": 0.6505007736842318, - "grad_norm": 597.6793212890625, - "learning_rate": 1.6940053268492045e-05, - "loss": 32.6383, - "step": 161010 - }, - { - "epoch": 0.6505411749495994, - "grad_norm": 1134.3443603515625, - "learning_rate": 1.693674907487966e-05, - "loss": 44.2843, - "step": 161020 - }, - { - "epoch": 0.650581576214967, - "grad_norm": 681.8280029296875, - "learning_rate": 1.6933445038464518e-05, - "loss": 51.1013, - "step": 161030 - }, - { - "epoch": 0.6506219774803347, - "grad_norm": 1109.9951171875, - "learning_rate": 1.693014115931102e-05, - "loss": 73.4727, - "step": 161040 - }, - { - "epoch": 0.6506623787457023, - "grad_norm": 895.8487548828125, - "learning_rate": 1.6926837437483566e-05, - "loss": 84.4225, - "step": 161050 - }, - { - "epoch": 0.65070278001107, - "grad_norm": 3003.786865234375, - "learning_rate": 1.6923533873046578e-05, - "loss": 60.5545, - "step": 161060 - }, - { - "epoch": 0.6507431812764376, - "grad_norm": 1717.6845703125, - "learning_rate": 1.6920230466064446e-05, - "loss": 49.7207, - "step": 161070 - }, - { - "epoch": 0.6507835825418052, - "grad_norm": 1025.4805908203125, - "learning_rate": 1.6916927216601593e-05, - "loss": 57.6173, - "step": 161080 - }, - { - "epoch": 0.6508239838071729, - "grad_norm": 583.477783203125, - "learning_rate": 1.69136241247224e-05, - "loss": 56.1462, - "step": 161090 - }, - { - "epoch": 0.6508643850725405, - "grad_norm": 2021.0592041015625, - "learning_rate": 1.6910321190491263e-05, - "loss": 59.2203, - "step": 161100 - }, - { - "epoch": 0.650904786337908, - "grad_norm": 571.0921630859375, - "learning_rate": 1.6907018413972586e-05, - "loss": 31.7513, - "step": 161110 - }, - { - "epoch": 0.6509451876032757, - "grad_norm": 1296.876220703125, - "learning_rate": 1.690371579523075e-05, - "loss": 49.9373, - "step": 161120 - }, - { - "epoch": 0.6509855888686433, - "grad_norm": 481.8075866699219, - "learning_rate": 1.6900413334330142e-05, - "loss": 46.3444, - "step": 161130 - }, - { - "epoch": 0.651025990134011, - "grad_norm": 1343.282958984375, - "learning_rate": 1.6897111031335145e-05, - "loss": 52.6248, - "step": 161140 - }, - { - "epoch": 0.6510663913993786, - "grad_norm": 260.4144592285156, - "learning_rate": 1.689380888631014e-05, - "loss": 47.9411, - "step": 161150 - }, - { - "epoch": 0.6511067926647462, - "grad_norm": 760.5383911132812, - "learning_rate": 1.689050689931951e-05, - "loss": 46.6919, - "step": 161160 - }, - { - "epoch": 0.6511471939301139, - "grad_norm": 795.2639770507812, - "learning_rate": 1.688720507042762e-05, - "loss": 47.9917, - "step": 161170 - }, - { - "epoch": 0.6511875951954815, - "grad_norm": 892.8135375976562, - "learning_rate": 1.6883903399698833e-05, - "loss": 35.0797, - "step": 161180 - }, - { - "epoch": 0.6512279964608492, - "grad_norm": 742.6301879882812, - "learning_rate": 1.688060188719754e-05, - "loss": 46.3297, - "step": 161190 - }, - { - "epoch": 0.6512683977262168, - "grad_norm": 897.2402954101562, - "learning_rate": 1.6877300532988094e-05, - "loss": 47.0836, - "step": 161200 - }, - { - "epoch": 0.6513087989915844, - "grad_norm": 856.124267578125, - "learning_rate": 1.6873999337134846e-05, - "loss": 69.0124, - "step": 161210 - }, - { - "epoch": 0.6513492002569521, - "grad_norm": 929.366943359375, - "learning_rate": 1.6870698299702177e-05, - "loss": 55.2155, - "step": 161220 - }, - { - "epoch": 0.6513896015223197, - "grad_norm": 724.1373291015625, - "learning_rate": 1.686739742075442e-05, - "loss": 50.1667, - "step": 161230 - }, - { - "epoch": 0.6514300027876873, - "grad_norm": 252.8017578125, - "learning_rate": 1.686409670035594e-05, - "loss": 32.0923, - "step": 161240 - }, - { - "epoch": 0.6514704040530549, - "grad_norm": 1198.72900390625, - "learning_rate": 1.686079613857109e-05, - "loss": 43.797, - "step": 161250 - }, - { - "epoch": 0.6515108053184225, - "grad_norm": 369.4762268066406, - "learning_rate": 1.6857495735464195e-05, - "loss": 58.9313, - "step": 161260 - }, - { - "epoch": 0.6515512065837902, - "grad_norm": 251.4295654296875, - "learning_rate": 1.6854195491099628e-05, - "loss": 42.9671, - "step": 161270 - }, - { - "epoch": 0.6515916078491578, - "grad_norm": 441.81585693359375, - "learning_rate": 1.68508954055417e-05, - "loss": 62.5547, - "step": 161280 - }, - { - "epoch": 0.6516320091145255, - "grad_norm": 395.6822509765625, - "learning_rate": 1.6847595478854773e-05, - "loss": 41.6582, - "step": 161290 - }, - { - "epoch": 0.6516724103798931, - "grad_norm": 749.1453247070312, - "learning_rate": 1.6844295711103167e-05, - "loss": 52.6976, - "step": 161300 - }, - { - "epoch": 0.6517128116452607, - "grad_norm": 481.84503173828125, - "learning_rate": 1.6840996102351212e-05, - "loss": 49.7407, - "step": 161310 - }, - { - "epoch": 0.6517532129106284, - "grad_norm": 830.3899536132812, - "learning_rate": 1.6837696652663242e-05, - "loss": 47.5324, - "step": 161320 - }, - { - "epoch": 0.651793614175996, - "grad_norm": 1153.26123046875, - "learning_rate": 1.6834397362103577e-05, - "loss": 60.7175, - "step": 161330 - }, - { - "epoch": 0.6518340154413637, - "grad_norm": 422.6778259277344, - "learning_rate": 1.6831098230736532e-05, - "loss": 32.2861, - "step": 161340 - }, - { - "epoch": 0.6518744167067313, - "grad_norm": 689.973388671875, - "learning_rate": 1.6827799258626442e-05, - "loss": 47.7611, - "step": 161350 - }, - { - "epoch": 0.6519148179720989, - "grad_norm": 537.6976318359375, - "learning_rate": 1.6824500445837606e-05, - "loss": 54.7357, - "step": 161360 - }, - { - "epoch": 0.6519552192374665, - "grad_norm": 1012.6497802734375, - "learning_rate": 1.6821201792434345e-05, - "loss": 80.4569, - "step": 161370 - }, - { - "epoch": 0.6519956205028341, - "grad_norm": 550.6050415039062, - "learning_rate": 1.681790329848097e-05, - "loss": 48.5143, - "step": 161380 - }, - { - "epoch": 0.6520360217682017, - "grad_norm": 618.5077514648438, - "learning_rate": 1.681460496404178e-05, - "loss": 41.0049, - "step": 161390 - }, - { - "epoch": 0.6520764230335694, - "grad_norm": 497.9303894042969, - "learning_rate": 1.681130678918108e-05, - "loss": 41.7235, - "step": 161400 - }, - { - "epoch": 0.652116824298937, - "grad_norm": 1291.5350341796875, - "learning_rate": 1.6808008773963173e-05, - "loss": 70.4364, - "step": 161410 - }, - { - "epoch": 0.6521572255643047, - "grad_norm": 955.0973510742188, - "learning_rate": 1.6804710918452342e-05, - "loss": 50.173, - "step": 161420 - }, - { - "epoch": 0.6521976268296723, - "grad_norm": 647.0445556640625, - "learning_rate": 1.6801413222712904e-05, - "loss": 71.8404, - "step": 161430 - }, - { - "epoch": 0.65223802809504, - "grad_norm": 282.2113952636719, - "learning_rate": 1.6798115686809125e-05, - "loss": 52.0266, - "step": 161440 - }, - { - "epoch": 0.6522784293604076, - "grad_norm": 189.90536499023438, - "learning_rate": 1.679481831080531e-05, - "loss": 68.1988, - "step": 161450 - }, - { - "epoch": 0.6523188306257752, - "grad_norm": 474.63739013671875, - "learning_rate": 1.679152109476574e-05, - "loss": 65.4844, - "step": 161460 - }, - { - "epoch": 0.6523592318911429, - "grad_norm": 912.4492797851562, - "learning_rate": 1.6788224038754687e-05, - "loss": 70.473, - "step": 161470 - }, - { - "epoch": 0.6523996331565105, - "grad_norm": 785.5763549804688, - "learning_rate": 1.6784927142836436e-05, - "loss": 76.3289, - "step": 161480 - }, - { - "epoch": 0.6524400344218781, - "grad_norm": 614.8267211914062, - "learning_rate": 1.6781630407075257e-05, - "loss": 49.6596, - "step": 161490 - }, - { - "epoch": 0.6524804356872457, - "grad_norm": 658.1514282226562, - "learning_rate": 1.677833383153542e-05, - "loss": 59.6776, - "step": 161500 - }, - { - "epoch": 0.6525208369526133, - "grad_norm": 416.6296081542969, - "learning_rate": 1.677503741628121e-05, - "loss": 48.725, - "step": 161510 - }, - { - "epoch": 0.652561238217981, - "grad_norm": 769.060791015625, - "learning_rate": 1.6771741161376862e-05, - "loss": 43.6589, - "step": 161520 - }, - { - "epoch": 0.6526016394833486, - "grad_norm": 1873.2025146484375, - "learning_rate": 1.676844506688667e-05, - "loss": 76.2816, - "step": 161530 - }, - { - "epoch": 0.6526420407487162, - "grad_norm": 318.47247314453125, - "learning_rate": 1.6765149132874882e-05, - "loss": 39.9869, - "step": 161540 - }, - { - "epoch": 0.6526824420140839, - "grad_norm": 203.8809814453125, - "learning_rate": 1.6761853359405737e-05, - "loss": 39.4577, - "step": 161550 - }, - { - "epoch": 0.6527228432794515, - "grad_norm": 635.0053100585938, - "learning_rate": 1.6758557746543518e-05, - "loss": 61.5434, - "step": 161560 - }, - { - "epoch": 0.6527632445448192, - "grad_norm": 848.6380004882812, - "learning_rate": 1.675526229435245e-05, - "loss": 60.4842, - "step": 161570 - }, - { - "epoch": 0.6528036458101868, - "grad_norm": 253.58030700683594, - "learning_rate": 1.675196700289679e-05, - "loss": 40.0244, - "step": 161580 - }, - { - "epoch": 0.6528440470755544, - "grad_norm": 3136.26025390625, - "learning_rate": 1.6748671872240785e-05, - "loss": 46.3562, - "step": 161590 - }, - { - "epoch": 0.6528844483409221, - "grad_norm": 560.5701904296875, - "learning_rate": 1.6745376902448656e-05, - "loss": 104.8585, - "step": 161600 - }, - { - "epoch": 0.6529248496062897, - "grad_norm": 1130.5245361328125, - "learning_rate": 1.6742082093584667e-05, - "loss": 52.8417, - "step": 161610 - }, - { - "epoch": 0.6529652508716574, - "grad_norm": 197.07540893554688, - "learning_rate": 1.6738787445713037e-05, - "loss": 54.9652, - "step": 161620 - }, - { - "epoch": 0.6530056521370249, - "grad_norm": 579.043701171875, - "learning_rate": 1.673549295889799e-05, - "loss": 37.0909, - "step": 161630 - }, - { - "epoch": 0.6530460534023925, - "grad_norm": 486.1392517089844, - "learning_rate": 1.6732198633203773e-05, - "loss": 55.2478, - "step": 161640 - }, - { - "epoch": 0.6530864546677602, - "grad_norm": 735.452392578125, - "learning_rate": 1.67289044686946e-05, - "loss": 43.7261, - "step": 161650 - }, - { - "epoch": 0.6531268559331278, - "grad_norm": 1341.6444091796875, - "learning_rate": 1.672561046543469e-05, - "loss": 53.2419, - "step": 161660 - }, - { - "epoch": 0.6531672571984954, - "grad_norm": 713.711181640625, - "learning_rate": 1.6722316623488262e-05, - "loss": 35.3926, - "step": 161670 - }, - { - "epoch": 0.6532076584638631, - "grad_norm": 1008.7979125976562, - "learning_rate": 1.6719022942919527e-05, - "loss": 34.2371, - "step": 161680 - }, - { - "epoch": 0.6532480597292307, - "grad_norm": 1110.8104248046875, - "learning_rate": 1.6715729423792714e-05, - "loss": 44.102, - "step": 161690 - }, - { - "epoch": 0.6532884609945984, - "grad_norm": 527.839599609375, - "learning_rate": 1.671243606617202e-05, - "loss": 39.0247, - "step": 161700 - }, - { - "epoch": 0.653328862259966, - "grad_norm": 533.70849609375, - "learning_rate": 1.6709142870121643e-05, - "loss": 53.8311, - "step": 161710 - }, - { - "epoch": 0.6533692635253336, - "grad_norm": 165.14747619628906, - "learning_rate": 1.6705849835705802e-05, - "loss": 41.9429, - "step": 161720 - }, - { - "epoch": 0.6534096647907013, - "grad_norm": 762.746337890625, - "learning_rate": 1.6702556962988693e-05, - "loss": 66.698, - "step": 161730 - }, - { - "epoch": 0.6534500660560689, - "grad_norm": 1566.11328125, - "learning_rate": 1.6699264252034497e-05, - "loss": 66.9996, - "step": 161740 - }, - { - "epoch": 0.6534904673214365, - "grad_norm": 1001.0677490234375, - "learning_rate": 1.6695971702907426e-05, - "loss": 88.9084, - "step": 161750 - }, - { - "epoch": 0.6535308685868041, - "grad_norm": 656.9507446289062, - "learning_rate": 1.669267931567165e-05, - "loss": 42.3577, - "step": 161760 - }, - { - "epoch": 0.6535712698521717, - "grad_norm": 1003.6537475585938, - "learning_rate": 1.668938709039138e-05, - "loss": 62.9774, - "step": 161770 - }, - { - "epoch": 0.6536116711175394, - "grad_norm": 879.7156372070312, - "learning_rate": 1.6686095027130783e-05, - "loss": 50.1465, - "step": 161780 - }, - { - "epoch": 0.653652072382907, - "grad_norm": 709.9041137695312, - "learning_rate": 1.6682803125954037e-05, - "loss": 45.2575, - "step": 161790 - }, - { - "epoch": 0.6536924736482747, - "grad_norm": 557.7595825195312, - "learning_rate": 1.6679511386925337e-05, - "loss": 69.0358, - "step": 161800 - }, - { - "epoch": 0.6537328749136423, - "grad_norm": 855.1320190429688, - "learning_rate": 1.667621981010884e-05, - "loss": 45.8865, - "step": 161810 - }, - { - "epoch": 0.6537732761790099, - "grad_norm": 1192.5213623046875, - "learning_rate": 1.6672928395568727e-05, - "loss": 58.5781, - "step": 161820 - }, - { - "epoch": 0.6538136774443776, - "grad_norm": 753.4164428710938, - "learning_rate": 1.666963714336916e-05, - "loss": 49.3136, - "step": 161830 - }, - { - "epoch": 0.6538540787097452, - "grad_norm": 836.4158935546875, - "learning_rate": 1.66663460535743e-05, - "loss": 38.0138, - "step": 161840 - }, - { - "epoch": 0.6538944799751129, - "grad_norm": 480.0657043457031, - "learning_rate": 1.6663055126248326e-05, - "loss": 43.5118, - "step": 161850 - }, - { - "epoch": 0.6539348812404805, - "grad_norm": 971.6677856445312, - "learning_rate": 1.6659764361455383e-05, - "loss": 46.9971, - "step": 161860 - }, - { - "epoch": 0.6539752825058481, - "grad_norm": 603.728271484375, - "learning_rate": 1.6656473759259614e-05, - "loss": 64.1542, - "step": 161870 - }, - { - "epoch": 0.6540156837712157, - "grad_norm": 2384.861328125, - "learning_rate": 1.6653183319725195e-05, - "loss": 45.0268, - "step": 161880 - }, - { - "epoch": 0.6540560850365833, - "grad_norm": 783.9608764648438, - "learning_rate": 1.6649893042916258e-05, - "loss": 52.3407, - "step": 161890 - }, - { - "epoch": 0.6540964863019509, - "grad_norm": 922.7523193359375, - "learning_rate": 1.6646602928896963e-05, - "loss": 43.5483, - "step": 161900 - }, - { - "epoch": 0.6541368875673186, - "grad_norm": 1093.2479248046875, - "learning_rate": 1.6643312977731445e-05, - "loss": 57.16, - "step": 161910 - }, - { - "epoch": 0.6541772888326862, - "grad_norm": 265.1941833496094, - "learning_rate": 1.6640023189483835e-05, - "loss": 43.1179, - "step": 161920 - }, - { - "epoch": 0.6542176900980539, - "grad_norm": 732.4922485351562, - "learning_rate": 1.663673356421829e-05, - "loss": 38.3712, - "step": 161930 - }, - { - "epoch": 0.6542580913634215, - "grad_norm": 2143.16162109375, - "learning_rate": 1.6633444101998917e-05, - "loss": 56.0343, - "step": 161940 - }, - { - "epoch": 0.6542984926287891, - "grad_norm": 746.3717041015625, - "learning_rate": 1.663015480288986e-05, - "loss": 48.6199, - "step": 161950 - }, - { - "epoch": 0.6543388938941568, - "grad_norm": 1015.5712280273438, - "learning_rate": 1.662686566695525e-05, - "loss": 47.5555, - "step": 161960 - }, - { - "epoch": 0.6543792951595244, - "grad_norm": 997.4241333007812, - "learning_rate": 1.6623576694259194e-05, - "loss": 64.299, - "step": 161970 - }, - { - "epoch": 0.6544196964248921, - "grad_norm": 938.6635131835938, - "learning_rate": 1.662028788486583e-05, - "loss": 50.552, - "step": 161980 - }, - { - "epoch": 0.6544600976902597, - "grad_norm": 602.8508911132812, - "learning_rate": 1.6616999238839277e-05, - "loss": 67.2282, - "step": 161990 - }, - { - "epoch": 0.6545004989556273, - "grad_norm": 561.122314453125, - "learning_rate": 1.6613710756243626e-05, - "loss": 51.4449, - "step": 162000 - }, - { - "epoch": 0.6545409002209949, - "grad_norm": 705.6483154296875, - "learning_rate": 1.6610422437143007e-05, - "loss": 63.1683, - "step": 162010 - }, - { - "epoch": 0.6545813014863625, - "grad_norm": 537.274658203125, - "learning_rate": 1.6607134281601526e-05, - "loss": 49.2148, - "step": 162020 - }, - { - "epoch": 0.6546217027517302, - "grad_norm": 213.13555908203125, - "learning_rate": 1.6603846289683273e-05, - "loss": 95.6635, - "step": 162030 - }, - { - "epoch": 0.6546621040170978, - "grad_norm": 749.0859985351562, - "learning_rate": 1.660055846145237e-05, - "loss": 51.4249, - "step": 162040 - }, - { - "epoch": 0.6547025052824654, - "grad_norm": 551.2670288085938, - "learning_rate": 1.659727079697289e-05, - "loss": 31.3299, - "step": 162050 - }, - { - "epoch": 0.6547429065478331, - "grad_norm": 787.1797485351562, - "learning_rate": 1.659398329630895e-05, - "loss": 44.1481, - "step": 162060 - }, - { - "epoch": 0.6547833078132007, - "grad_norm": 611.82958984375, - "learning_rate": 1.659069595952464e-05, - "loss": 50.7975, - "step": 162070 - }, - { - "epoch": 0.6548237090785684, - "grad_norm": 473.4167785644531, - "learning_rate": 1.6587408786684033e-05, - "loss": 47.7533, - "step": 162080 - }, - { - "epoch": 0.654864110343936, - "grad_norm": 938.8263549804688, - "learning_rate": 1.6584121777851226e-05, - "loss": 37.6045, - "step": 162090 - }, - { - "epoch": 0.6549045116093036, - "grad_norm": 722.9107666015625, - "learning_rate": 1.65808349330903e-05, - "loss": 39.03, - "step": 162100 - }, - { - "epoch": 0.6549449128746713, - "grad_norm": 877.0104370117188, - "learning_rate": 1.6577548252465325e-05, - "loss": 24.0046, - "step": 162110 - }, - { - "epoch": 0.6549853141400389, - "grad_norm": 733.263671875, - "learning_rate": 1.6574261736040387e-05, - "loss": 44.6276, - "step": 162120 - }, - { - "epoch": 0.6550257154054066, - "grad_norm": 327.0345764160156, - "learning_rate": 1.6570975383879546e-05, - "loss": 39.1917, - "step": 162130 - }, - { - "epoch": 0.6550661166707741, - "grad_norm": 468.39044189453125, - "learning_rate": 1.6567689196046894e-05, - "loss": 40.167, - "step": 162140 - }, - { - "epoch": 0.6551065179361417, - "grad_norm": 842.412109375, - "learning_rate": 1.6564403172606475e-05, - "loss": 57.5191, - "step": 162150 - }, - { - "epoch": 0.6551469192015094, - "grad_norm": 594.8579711914062, - "learning_rate": 1.656111731362236e-05, - "loss": 68.3253, - "step": 162160 - }, - { - "epoch": 0.655187320466877, - "grad_norm": 553.1251220703125, - "learning_rate": 1.6557831619158605e-05, - "loss": 41.9115, - "step": 162170 - }, - { - "epoch": 0.6552277217322446, - "grad_norm": 2678.964599609375, - "learning_rate": 1.6554546089279273e-05, - "loss": 68.2033, - "step": 162180 - }, - { - "epoch": 0.6552681229976123, - "grad_norm": 812.5203857421875, - "learning_rate": 1.6551260724048408e-05, - "loss": 39.7395, - "step": 162190 - }, - { - "epoch": 0.6553085242629799, - "grad_norm": 776.0007934570312, - "learning_rate": 1.6547975523530075e-05, - "loss": 46.6357, - "step": 162200 - }, - { - "epoch": 0.6553489255283476, - "grad_norm": 824.1729736328125, - "learning_rate": 1.6544690487788294e-05, - "loss": 62.6703, - "step": 162210 - }, - { - "epoch": 0.6553893267937152, - "grad_norm": 855.05029296875, - "learning_rate": 1.6541405616887137e-05, - "loss": 67.054, - "step": 162220 - }, - { - "epoch": 0.6554297280590828, - "grad_norm": 578.1856079101562, - "learning_rate": 1.6538120910890634e-05, - "loss": 57.6205, - "step": 162230 - }, - { - "epoch": 0.6554701293244505, - "grad_norm": 680.5081787109375, - "learning_rate": 1.6534836369862806e-05, - "loss": 83.3065, - "step": 162240 - }, - { - "epoch": 0.6555105305898181, - "grad_norm": 532.9240112304688, - "learning_rate": 1.6531551993867717e-05, - "loss": 28.7695, - "step": 162250 - }, - { - "epoch": 0.6555509318551858, - "grad_norm": 596.7019653320312, - "learning_rate": 1.652826778296938e-05, - "loss": 44.5479, - "step": 162260 - }, - { - "epoch": 0.6555913331205533, - "grad_norm": 502.9052429199219, - "learning_rate": 1.6524983737231818e-05, - "loss": 48.7583, - "step": 162270 - }, - { - "epoch": 0.6556317343859209, - "grad_norm": 494.0071716308594, - "learning_rate": 1.6521699856719062e-05, - "loss": 50.4662, - "step": 162280 - }, - { - "epoch": 0.6556721356512886, - "grad_norm": 1018.645751953125, - "learning_rate": 1.651841614149513e-05, - "loss": 76.721, - "step": 162290 - }, - { - "epoch": 0.6557125369166562, - "grad_norm": 755.8010864257812, - "learning_rate": 1.6515132591624048e-05, - "loss": 48.0349, - "step": 162300 - }, - { - "epoch": 0.6557529381820238, - "grad_norm": 1917.8994140625, - "learning_rate": 1.6511849207169826e-05, - "loss": 59.3606, - "step": 162310 - }, - { - "epoch": 0.6557933394473915, - "grad_norm": 778.1441650390625, - "learning_rate": 1.650856598819646e-05, - "loss": 40.8655, - "step": 162320 - }, - { - "epoch": 0.6558337407127591, - "grad_norm": 385.09332275390625, - "learning_rate": 1.6505282934767986e-05, - "loss": 69.3711, - "step": 162330 - }, - { - "epoch": 0.6558741419781268, - "grad_norm": 2902.443603515625, - "learning_rate": 1.650200004694839e-05, - "loss": 74.7698, - "step": 162340 - }, - { - "epoch": 0.6559145432434944, - "grad_norm": 908.2015991210938, - "learning_rate": 1.6498717324801683e-05, - "loss": 43.1564, - "step": 162350 - }, - { - "epoch": 0.655954944508862, - "grad_norm": 1079.4625244140625, - "learning_rate": 1.6495434768391855e-05, - "loss": 84.2936, - "step": 162360 - }, - { - "epoch": 0.6559953457742297, - "grad_norm": 959.7824096679688, - "learning_rate": 1.6492152377782898e-05, - "loss": 50.2236, - "step": 162370 - }, - { - "epoch": 0.6560357470395973, - "grad_norm": 530.7819213867188, - "learning_rate": 1.6488870153038815e-05, - "loss": 34.8689, - "step": 162380 - }, - { - "epoch": 0.6560761483049649, - "grad_norm": 622.3926391601562, - "learning_rate": 1.6485588094223597e-05, - "loss": 36.584, - "step": 162390 - }, - { - "epoch": 0.6561165495703325, - "grad_norm": 1052.944580078125, - "learning_rate": 1.648230620140121e-05, - "loss": 51.4176, - "step": 162400 - }, - { - "epoch": 0.6561569508357001, - "grad_norm": 1051.1805419921875, - "learning_rate": 1.6479024474635656e-05, - "loss": 52.1934, - "step": 162410 - }, - { - "epoch": 0.6561973521010678, - "grad_norm": 729.2684326171875, - "learning_rate": 1.6475742913990906e-05, - "loss": 39.7446, - "step": 162420 - }, - { - "epoch": 0.6562377533664354, - "grad_norm": 1571.4078369140625, - "learning_rate": 1.647246151953094e-05, - "loss": 68.3066, - "step": 162430 - }, - { - "epoch": 0.6562781546318031, - "grad_norm": 1100.865478515625, - "learning_rate": 1.6469180291319723e-05, - "loss": 52.9918, - "step": 162440 - }, - { - "epoch": 0.6563185558971707, - "grad_norm": 888.1741943359375, - "learning_rate": 1.6465899229421223e-05, - "loss": 84.3379, - "step": 162450 - }, - { - "epoch": 0.6563589571625383, - "grad_norm": 887.6325073242188, - "learning_rate": 1.6462618333899422e-05, - "loss": 66.6163, - "step": 162460 - }, - { - "epoch": 0.656399358427906, - "grad_norm": 631.6770629882812, - "learning_rate": 1.645933760481827e-05, - "loss": 55.3771, - "step": 162470 - }, - { - "epoch": 0.6564397596932736, - "grad_norm": 2138.152587890625, - "learning_rate": 1.645605704224172e-05, - "loss": 51.0784, - "step": 162480 - }, - { - "epoch": 0.6564801609586413, - "grad_norm": 785.101318359375, - "learning_rate": 1.6452776646233742e-05, - "loss": 40.2111, - "step": 162490 - }, - { - "epoch": 0.6565205622240089, - "grad_norm": 830.0779418945312, - "learning_rate": 1.6449496416858284e-05, - "loss": 56.9483, - "step": 162500 - }, - { - "epoch": 0.6565609634893765, - "grad_norm": 341.7655334472656, - "learning_rate": 1.644621635417929e-05, - "loss": 68.2469, - "step": 162510 - }, - { - "epoch": 0.6566013647547441, - "grad_norm": 357.12286376953125, - "learning_rate": 1.644293645826072e-05, - "loss": 47.8341, - "step": 162520 - }, - { - "epoch": 0.6566417660201117, - "grad_norm": 1889.9239501953125, - "learning_rate": 1.6439656729166508e-05, - "loss": 68.8827, - "step": 162530 - }, - { - "epoch": 0.6566821672854793, - "grad_norm": 347.9149475097656, - "learning_rate": 1.6436377166960597e-05, - "loss": 29.565, - "step": 162540 - }, - { - "epoch": 0.656722568550847, - "grad_norm": 763.558837890625, - "learning_rate": 1.643309777170692e-05, - "loss": 52.3415, - "step": 162550 - }, - { - "epoch": 0.6567629698162146, - "grad_norm": 651.407470703125, - "learning_rate": 1.6429818543469406e-05, - "loss": 53.0294, - "step": 162560 - }, - { - "epoch": 0.6568033710815823, - "grad_norm": 874.4526977539062, - "learning_rate": 1.6426539482311998e-05, - "loss": 56.1281, - "step": 162570 - }, - { - "epoch": 0.6568437723469499, - "grad_norm": 1284.8436279296875, - "learning_rate": 1.642326058829861e-05, - "loss": 61.655, - "step": 162580 - }, - { - "epoch": 0.6568841736123175, - "grad_norm": 1306.5478515625, - "learning_rate": 1.641998186149318e-05, - "loss": 43.2158, - "step": 162590 - }, - { - "epoch": 0.6569245748776852, - "grad_norm": 624.8187255859375, - "learning_rate": 1.6416703301959622e-05, - "loss": 47.5994, - "step": 162600 - }, - { - "epoch": 0.6569649761430528, - "grad_norm": 691.6862182617188, - "learning_rate": 1.6413424909761846e-05, - "loss": 53.6551, - "step": 162610 - }, - { - "epoch": 0.6570053774084205, - "grad_norm": 1279.94140625, - "learning_rate": 1.6410146684963777e-05, - "loss": 87.3749, - "step": 162620 - }, - { - "epoch": 0.6570457786737881, - "grad_norm": 1330.4853515625, - "learning_rate": 1.6406868627629323e-05, - "loss": 91.1492, - "step": 162630 - }, - { - "epoch": 0.6570861799391557, - "grad_norm": 769.7970581054688, - "learning_rate": 1.6403590737822376e-05, - "loss": 61.354, - "step": 162640 - }, - { - "epoch": 0.6571265812045233, - "grad_norm": 352.4034423828125, - "learning_rate": 1.6400313015606865e-05, - "loss": 51.5814, - "step": 162650 - }, - { - "epoch": 0.6571669824698909, - "grad_norm": 1086.769775390625, - "learning_rate": 1.6397035461046672e-05, - "loss": 39.5388, - "step": 162660 - }, - { - "epoch": 0.6572073837352586, - "grad_norm": 900.6954345703125, - "learning_rate": 1.6393758074205708e-05, - "loss": 39.8533, - "step": 162670 - }, - { - "epoch": 0.6572477850006262, - "grad_norm": 918.2764892578125, - "learning_rate": 1.6390480855147862e-05, - "loss": 54.463, - "step": 162680 - }, - { - "epoch": 0.6572881862659938, - "grad_norm": 147.70904541015625, - "learning_rate": 1.638720380393702e-05, - "loss": 66.0242, - "step": 162690 - }, - { - "epoch": 0.6573285875313615, - "grad_norm": 796.7276611328125, - "learning_rate": 1.6383926920637077e-05, - "loss": 50.0915, - "step": 162700 - }, - { - "epoch": 0.6573689887967291, - "grad_norm": 727.0776977539062, - "learning_rate": 1.6380650205311917e-05, - "loss": 82.3581, - "step": 162710 - }, - { - "epoch": 0.6574093900620968, - "grad_norm": 623.9905395507812, - "learning_rate": 1.637737365802541e-05, - "loss": 61.5921, - "step": 162720 - }, - { - "epoch": 0.6574497913274644, - "grad_norm": 728.1505126953125, - "learning_rate": 1.637409727884145e-05, - "loss": 44.927, - "step": 162730 - }, - { - "epoch": 0.657490192592832, - "grad_norm": 607.5389404296875, - "learning_rate": 1.6370821067823893e-05, - "loss": 53.0443, - "step": 162740 - }, - { - "epoch": 0.6575305938581997, - "grad_norm": 762.04541015625, - "learning_rate": 1.6367545025036636e-05, - "loss": 63.8751, - "step": 162750 - }, - { - "epoch": 0.6575709951235673, - "grad_norm": 425.23284912109375, - "learning_rate": 1.6364269150543532e-05, - "loss": 50.8446, - "step": 162760 - }, - { - "epoch": 0.657611396388935, - "grad_norm": 722.2022094726562, - "learning_rate": 1.636099344440844e-05, - "loss": 56.3553, - "step": 162770 - }, - { - "epoch": 0.6576517976543025, - "grad_norm": 1616.810791015625, - "learning_rate": 1.635771790669523e-05, - "loss": 45.0226, - "step": 162780 - }, - { - "epoch": 0.6576921989196701, - "grad_norm": 447.7799377441406, - "learning_rate": 1.6354442537467757e-05, - "loss": 67.5927, - "step": 162790 - }, - { - "epoch": 0.6577326001850378, - "grad_norm": 717.3369140625, - "learning_rate": 1.635116733678988e-05, - "loss": 37.616, - "step": 162800 - }, - { - "epoch": 0.6577730014504054, - "grad_norm": 862.1865844726562, - "learning_rate": 1.6347892304725454e-05, - "loss": 62.0235, - "step": 162810 - }, - { - "epoch": 0.657813402715773, - "grad_norm": 460.8524169921875, - "learning_rate": 1.634461744133831e-05, - "loss": 58.7466, - "step": 162820 - }, - { - "epoch": 0.6578538039811407, - "grad_norm": 198.9490509033203, - "learning_rate": 1.6341342746692317e-05, - "loss": 53.2849, - "step": 162830 - }, - { - "epoch": 0.6578942052465083, - "grad_norm": 498.9598693847656, - "learning_rate": 1.6338068220851306e-05, - "loss": 51.7222, - "step": 162840 - }, - { - "epoch": 0.657934606511876, - "grad_norm": 794.1101684570312, - "learning_rate": 1.63347938638791e-05, - "loss": 49.5184, - "step": 162850 - }, - { - "epoch": 0.6579750077772436, - "grad_norm": 499.0705261230469, - "learning_rate": 1.633151967583956e-05, - "loss": 38.8184, - "step": 162860 - }, - { - "epoch": 0.6580154090426112, - "grad_norm": 963.583984375, - "learning_rate": 1.6328245656796503e-05, - "loss": 60.0164, - "step": 162870 - }, - { - "epoch": 0.6580558103079789, - "grad_norm": 996.2311401367188, - "learning_rate": 1.6324971806813767e-05, - "loss": 44.6584, - "step": 162880 - }, - { - "epoch": 0.6580962115733465, - "grad_norm": 632.038330078125, - "learning_rate": 1.632169812595517e-05, - "loss": 36.6395, - "step": 162890 - }, - { - "epoch": 0.6581366128387142, - "grad_norm": 457.18304443359375, - "learning_rate": 1.6318424614284524e-05, - "loss": 55.2279, - "step": 162900 - }, - { - "epoch": 0.6581770141040817, - "grad_norm": 841.80029296875, - "learning_rate": 1.6315151271865672e-05, - "loss": 61.0696, - "step": 162910 - }, - { - "epoch": 0.6582174153694493, - "grad_norm": 1407.9495849609375, - "learning_rate": 1.6311878098762417e-05, - "loss": 35.4202, - "step": 162920 - }, - { - "epoch": 0.658257816634817, - "grad_norm": 456.8638610839844, - "learning_rate": 1.6308605095038566e-05, - "loss": 56.0512, - "step": 162930 - }, - { - "epoch": 0.6582982179001846, - "grad_norm": 626.9701538085938, - "learning_rate": 1.6305332260757936e-05, - "loss": 43.3455, - "step": 162940 - }, - { - "epoch": 0.6583386191655523, - "grad_norm": 555.2235107421875, - "learning_rate": 1.6302059595984327e-05, - "loss": 42.8999, - "step": 162950 - }, - { - "epoch": 0.6583790204309199, - "grad_norm": 300.74713134765625, - "learning_rate": 1.629878710078155e-05, - "loss": 55.2905, - "step": 162960 - }, - { - "epoch": 0.6584194216962875, - "grad_norm": 1130.7760009765625, - "learning_rate": 1.6295514775213398e-05, - "loss": 41.7524, - "step": 162970 - }, - { - "epoch": 0.6584598229616552, - "grad_norm": 816.3352661132812, - "learning_rate": 1.629224261934366e-05, - "loss": 61.8027, - "step": 162980 - }, - { - "epoch": 0.6585002242270228, - "grad_norm": 655.1786499023438, - "learning_rate": 1.628897063323614e-05, - "loss": 24.371, - "step": 162990 - }, - { - "epoch": 0.6585406254923905, - "grad_norm": 2886.60498046875, - "learning_rate": 1.6285698816954624e-05, - "loss": 69.9516, - "step": 163000 - }, - { - "epoch": 0.6585810267577581, - "grad_norm": 1917.0316162109375, - "learning_rate": 1.628242717056289e-05, - "loss": 67.4653, - "step": 163010 - }, - { - "epoch": 0.6586214280231257, - "grad_norm": 971.503662109375, - "learning_rate": 1.6279155694124732e-05, - "loss": 41.1, - "step": 163020 - }, - { - "epoch": 0.6586618292884933, - "grad_norm": 797.81005859375, - "learning_rate": 1.6275884387703918e-05, - "loss": 52.8087, - "step": 163030 - }, - { - "epoch": 0.6587022305538609, - "grad_norm": 1382.03955078125, - "learning_rate": 1.6272613251364237e-05, - "loss": 49.5148, - "step": 163040 - }, - { - "epoch": 0.6587426318192285, - "grad_norm": 612.2030639648438, - "learning_rate": 1.6269342285169453e-05, - "loss": 30.3748, - "step": 163050 - }, - { - "epoch": 0.6587830330845962, - "grad_norm": 528.9710693359375, - "learning_rate": 1.6266071489183327e-05, - "loss": 63.2863, - "step": 163060 - }, - { - "epoch": 0.6588234343499638, - "grad_norm": 707.3026123046875, - "learning_rate": 1.626280086346964e-05, - "loss": 64.9225, - "step": 163070 - }, - { - "epoch": 0.6588638356153315, - "grad_norm": 1027.89892578125, - "learning_rate": 1.6259530408092154e-05, - "loss": 57.8081, - "step": 163080 - }, - { - "epoch": 0.6589042368806991, - "grad_norm": 432.06866455078125, - "learning_rate": 1.625626012311461e-05, - "loss": 47.481, - "step": 163090 - }, - { - "epoch": 0.6589446381460667, - "grad_norm": 501.8659362792969, - "learning_rate": 1.6252990008600784e-05, - "loss": 54.4411, - "step": 163100 - }, - { - "epoch": 0.6589850394114344, - "grad_norm": 1181.19677734375, - "learning_rate": 1.6249720064614417e-05, - "loss": 70.2707, - "step": 163110 - }, - { - "epoch": 0.659025440676802, - "grad_norm": 874.8756103515625, - "learning_rate": 1.6246450291219266e-05, - "loss": 75.9436, - "step": 163120 - }, - { - "epoch": 0.6590658419421697, - "grad_norm": 452.441162109375, - "learning_rate": 1.624318068847907e-05, - "loss": 31.4531, - "step": 163130 - }, - { - "epoch": 0.6591062432075373, - "grad_norm": 625.8572998046875, - "learning_rate": 1.623991125645758e-05, - "loss": 34.9913, - "step": 163140 - }, - { - "epoch": 0.6591466444729049, - "grad_norm": 631.9246215820312, - "learning_rate": 1.623664199521853e-05, - "loss": 41.2059, - "step": 163150 - }, - { - "epoch": 0.6591870457382725, - "grad_norm": 444.54461669921875, - "learning_rate": 1.6233372904825656e-05, - "loss": 48.9469, - "step": 163160 - }, - { - "epoch": 0.6592274470036401, - "grad_norm": 860.9658813476562, - "learning_rate": 1.623010398534268e-05, - "loss": 38.0498, - "step": 163170 - }, - { - "epoch": 0.6592678482690078, - "grad_norm": 545.662109375, - "learning_rate": 1.6226835236833354e-05, - "loss": 44.3886, - "step": 163180 - }, - { - "epoch": 0.6593082495343754, - "grad_norm": 326.8611145019531, - "learning_rate": 1.622356665936138e-05, - "loss": 69.7939, - "step": 163190 - }, - { - "epoch": 0.659348650799743, - "grad_norm": 728.9844360351562, - "learning_rate": 1.6220298252990502e-05, - "loss": 62.5377, - "step": 163200 - }, - { - "epoch": 0.6593890520651107, - "grad_norm": 428.49652099609375, - "learning_rate": 1.621703001778443e-05, - "loss": 37.8547, - "step": 163210 - }, - { - "epoch": 0.6594294533304783, - "grad_norm": 528.7867431640625, - "learning_rate": 1.621376195380688e-05, - "loss": 72.5068, - "step": 163220 - }, - { - "epoch": 0.659469854595846, - "grad_norm": 865.8905639648438, - "learning_rate": 1.6210494061121562e-05, - "loss": 52.1617, - "step": 163230 - }, - { - "epoch": 0.6595102558612136, - "grad_norm": 629.0953979492188, - "learning_rate": 1.620722633979219e-05, - "loss": 59.1401, - "step": 163240 - }, - { - "epoch": 0.6595506571265812, - "grad_norm": 538.1380615234375, - "learning_rate": 1.6203958789882456e-05, - "loss": 49.7746, - "step": 163250 - }, - { - "epoch": 0.6595910583919489, - "grad_norm": 967.8890380859375, - "learning_rate": 1.6200691411456084e-05, - "loss": 55.0573, - "step": 163260 - }, - { - "epoch": 0.6596314596573165, - "grad_norm": 1064.715576171875, - "learning_rate": 1.6197424204576757e-05, - "loss": 52.5899, - "step": 163270 - }, - { - "epoch": 0.6596718609226842, - "grad_norm": 542.53466796875, - "learning_rate": 1.6194157169308182e-05, - "loss": 55.8167, - "step": 163280 - }, - { - "epoch": 0.6597122621880517, - "grad_norm": 432.77374267578125, - "learning_rate": 1.619089030571405e-05, - "loss": 37.0786, - "step": 163290 - }, - { - "epoch": 0.6597526634534193, - "grad_norm": 581.6807250976562, - "learning_rate": 1.6187623613858038e-05, - "loss": 44.2656, - "step": 163300 - }, - { - "epoch": 0.659793064718787, - "grad_norm": 0.0, - "learning_rate": 1.6184357093803847e-05, - "loss": 45.494, - "step": 163310 - }, - { - "epoch": 0.6598334659841546, - "grad_norm": 466.3706359863281, - "learning_rate": 1.6181090745615147e-05, - "loss": 32.6534, - "step": 163320 - }, - { - "epoch": 0.6598738672495222, - "grad_norm": 1236.4456787109375, - "learning_rate": 1.617782456935563e-05, - "loss": 50.6887, - "step": 163330 - }, - { - "epoch": 0.6599142685148899, - "grad_norm": 404.6134948730469, - "learning_rate": 1.6174558565088965e-05, - "loss": 58.2112, - "step": 163340 - }, - { - "epoch": 0.6599546697802575, - "grad_norm": 314.0501708984375, - "learning_rate": 1.6171292732878812e-05, - "loss": 18.9486, - "step": 163350 - }, - { - "epoch": 0.6599950710456252, - "grad_norm": 915.3804931640625, - "learning_rate": 1.6168027072788867e-05, - "loss": 55.3569, - "step": 163360 - }, - { - "epoch": 0.6600354723109928, - "grad_norm": 1853.2662353515625, - "learning_rate": 1.616476158488278e-05, - "loss": 68.6005, - "step": 163370 - }, - { - "epoch": 0.6600758735763604, - "grad_norm": 1049.0845947265625, - "learning_rate": 1.6161496269224208e-05, - "loss": 55.2401, - "step": 163380 - }, - { - "epoch": 0.6601162748417281, - "grad_norm": 969.3112182617188, - "learning_rate": 1.6158231125876823e-05, - "loss": 47.6416, - "step": 163390 - }, - { - "epoch": 0.6601566761070957, - "grad_norm": 595.2536010742188, - "learning_rate": 1.6154966154904265e-05, - "loss": 52.834, - "step": 163400 - }, - { - "epoch": 0.6601970773724634, - "grad_norm": 7205.93701171875, - "learning_rate": 1.6151701356370203e-05, - "loss": 108.3844, - "step": 163410 - }, - { - "epoch": 0.6602374786378309, - "grad_norm": 1252.0986328125, - "learning_rate": 1.614843673033828e-05, - "loss": 53.7415, - "step": 163420 - }, - { - "epoch": 0.6602778799031985, - "grad_norm": 825.3316650390625, - "learning_rate": 1.614517227687213e-05, - "loss": 38.9342, - "step": 163430 - }, - { - "epoch": 0.6603182811685662, - "grad_norm": 818.31689453125, - "learning_rate": 1.6141907996035415e-05, - "loss": 49.7294, - "step": 163440 - }, - { - "epoch": 0.6603586824339338, - "grad_norm": 455.1985168457031, - "learning_rate": 1.6138643887891763e-05, - "loss": 52.1195, - "step": 163450 - }, - { - "epoch": 0.6603990836993014, - "grad_norm": 499.15533447265625, - "learning_rate": 1.613537995250481e-05, - "loss": 62.1534, - "step": 163460 - }, - { - "epoch": 0.6604394849646691, - "grad_norm": 266.7299499511719, - "learning_rate": 1.6132116189938185e-05, - "loss": 43.204, - "step": 163470 - }, - { - "epoch": 0.6604798862300367, - "grad_norm": 527.0379028320312, - "learning_rate": 1.612885260025552e-05, - "loss": 49.2214, - "step": 163480 - }, - { - "epoch": 0.6605202874954044, - "grad_norm": 542.5224609375, - "learning_rate": 1.6125589183520445e-05, - "loss": 40.1502, - "step": 163490 - }, - { - "epoch": 0.660560688760772, - "grad_norm": 489.45208740234375, - "learning_rate": 1.612232593979658e-05, - "loss": 54.8088, - "step": 163500 - }, - { - "epoch": 0.6606010900261396, - "grad_norm": 346.20880126953125, - "learning_rate": 1.611906286914753e-05, - "loss": 25.9498, - "step": 163510 - }, - { - "epoch": 0.6606414912915073, - "grad_norm": 526.3746337890625, - "learning_rate": 1.611579997163693e-05, - "loss": 41.5305, - "step": 163520 - }, - { - "epoch": 0.6606818925568749, - "grad_norm": 589.9901123046875, - "learning_rate": 1.611253724732839e-05, - "loss": 66.6984, - "step": 163530 - }, - { - "epoch": 0.6607222938222426, - "grad_norm": 324.912841796875, - "learning_rate": 1.6109274696285495e-05, - "loss": 54.046, - "step": 163540 - }, - { - "epoch": 0.6607626950876101, - "grad_norm": 948.199462890625, - "learning_rate": 1.6106012318571877e-05, - "loss": 43.5441, - "step": 163550 - }, - { - "epoch": 0.6608030963529777, - "grad_norm": 554.431884765625, - "learning_rate": 1.610275011425113e-05, - "loss": 42.9645, - "step": 163560 - }, - { - "epoch": 0.6608434976183454, - "grad_norm": 974.4641723632812, - "learning_rate": 1.6099488083386847e-05, - "loss": 59.781, - "step": 163570 - }, - { - "epoch": 0.660883898883713, - "grad_norm": 727.3328247070312, - "learning_rate": 1.6096226226042632e-05, - "loss": 56.2628, - "step": 163580 - }, - { - "epoch": 0.6609243001490807, - "grad_norm": 553.086669921875, - "learning_rate": 1.6092964542282058e-05, - "loss": 46.6205, - "step": 163590 - }, - { - "epoch": 0.6609647014144483, - "grad_norm": 425.8204345703125, - "learning_rate": 1.6089703032168733e-05, - "loss": 53.4261, - "step": 163600 - }, - { - "epoch": 0.6610051026798159, - "grad_norm": 550.3936157226562, - "learning_rate": 1.608644169576624e-05, - "loss": 39.1422, - "step": 163610 - }, - { - "epoch": 0.6610455039451836, - "grad_norm": 986.0363159179688, - "learning_rate": 1.6083180533138143e-05, - "loss": 65.5904, - "step": 163620 - }, - { - "epoch": 0.6610859052105512, - "grad_norm": 0.0, - "learning_rate": 1.6079919544348045e-05, - "loss": 39.4999, - "step": 163630 - }, - { - "epoch": 0.6611263064759189, - "grad_norm": 246.7885284423828, - "learning_rate": 1.60766587294595e-05, - "loss": 62.9314, - "step": 163640 - }, - { - "epoch": 0.6611667077412865, - "grad_norm": 728.0462036132812, - "learning_rate": 1.607339808853609e-05, - "loss": 55.5237, - "step": 163650 - }, - { - "epoch": 0.6612071090066541, - "grad_norm": 824.4031982421875, - "learning_rate": 1.607013762164138e-05, - "loss": 44.8297, - "step": 163660 - }, - { - "epoch": 0.6612475102720217, - "grad_norm": 863.5546875, - "learning_rate": 1.606687732883893e-05, - "loss": 37.9698, - "step": 163670 - }, - { - "epoch": 0.6612879115373893, - "grad_norm": 554.3681640625, - "learning_rate": 1.606361721019231e-05, - "loss": 55.0306, - "step": 163680 - }, - { - "epoch": 0.661328312802757, - "grad_norm": 908.3670043945312, - "learning_rate": 1.6060357265765073e-05, - "loss": 46.0023, - "step": 163690 - }, - { - "epoch": 0.6613687140681246, - "grad_norm": 517.4747924804688, - "learning_rate": 1.6057097495620767e-05, - "loss": 33.2749, - "step": 163700 - }, - { - "epoch": 0.6614091153334922, - "grad_norm": 377.3349914550781, - "learning_rate": 1.605383789982296e-05, - "loss": 49.5776, - "step": 163710 - }, - { - "epoch": 0.6614495165988599, - "grad_norm": 910.6222534179688, - "learning_rate": 1.605057847843518e-05, - "loss": 60.544, - "step": 163720 - }, - { - "epoch": 0.6614899178642275, - "grad_norm": 997.0488891601562, - "learning_rate": 1.604731923152099e-05, - "loss": 41.9783, - "step": 163730 - }, - { - "epoch": 0.6615303191295951, - "grad_norm": 610.9208374023438, - "learning_rate": 1.604406015914392e-05, - "loss": 37.3855, - "step": 163740 - }, - { - "epoch": 0.6615707203949628, - "grad_norm": 587.4974365234375, - "learning_rate": 1.6040801261367493e-05, - "loss": 44.2066, - "step": 163750 - }, - { - "epoch": 0.6616111216603304, - "grad_norm": 286.9742431640625, - "learning_rate": 1.6037542538255274e-05, - "loss": 41.9279, - "step": 163760 - }, - { - "epoch": 0.6616515229256981, - "grad_norm": 296.52197265625, - "learning_rate": 1.6034283989870773e-05, - "loss": 36.0075, - "step": 163770 - }, - { - "epoch": 0.6616919241910657, - "grad_norm": 228.6723175048828, - "learning_rate": 1.603102561627751e-05, - "loss": 43.1621, - "step": 163780 - }, - { - "epoch": 0.6617323254564333, - "grad_norm": 981.3435668945312, - "learning_rate": 1.6027767417539036e-05, - "loss": 55.8184, - "step": 163790 - }, - { - "epoch": 0.6617727267218009, - "grad_norm": 515.3609008789062, - "learning_rate": 1.6024509393718844e-05, - "loss": 50.0376, - "step": 163800 - }, - { - "epoch": 0.6618131279871685, - "grad_norm": 612.1990966796875, - "learning_rate": 1.6021251544880467e-05, - "loss": 88.1613, - "step": 163810 - }, - { - "epoch": 0.6618535292525362, - "grad_norm": 493.67962646484375, - "learning_rate": 1.6017993871087418e-05, - "loss": 30.4908, - "step": 163820 - }, - { - "epoch": 0.6618939305179038, - "grad_norm": 489.2311706542969, - "learning_rate": 1.6014736372403198e-05, - "loss": 55.9566, - "step": 163830 - }, - { - "epoch": 0.6619343317832714, - "grad_norm": 676.2103271484375, - "learning_rate": 1.6011479048891324e-05, - "loss": 73.5741, - "step": 163840 - }, - { - "epoch": 0.6619747330486391, - "grad_norm": 909.5650634765625, - "learning_rate": 1.600822190061528e-05, - "loss": 58.0165, - "step": 163850 - }, - { - "epoch": 0.6620151343140067, - "grad_norm": 576.3464965820312, - "learning_rate": 1.6004964927638593e-05, - "loss": 97.8101, - "step": 163860 - }, - { - "epoch": 0.6620555355793744, - "grad_norm": 921.4266357421875, - "learning_rate": 1.6001708130024746e-05, - "loss": 58.1987, - "step": 163870 - }, - { - "epoch": 0.662095936844742, - "grad_norm": 220.7132568359375, - "learning_rate": 1.5998451507837216e-05, - "loss": 44.0783, - "step": 163880 - }, - { - "epoch": 0.6621363381101096, - "grad_norm": 395.4250793457031, - "learning_rate": 1.5995195061139524e-05, - "loss": 37.5821, - "step": 163890 - }, - { - "epoch": 0.6621767393754773, - "grad_norm": 566.3154907226562, - "learning_rate": 1.5991938789995137e-05, - "loss": 42.0082, - "step": 163900 - }, - { - "epoch": 0.6622171406408449, - "grad_norm": 600.4891357421875, - "learning_rate": 1.5988682694467537e-05, - "loss": 31.8918, - "step": 163910 - }, - { - "epoch": 0.6622575419062126, - "grad_norm": 985.5350341796875, - "learning_rate": 1.598542677462021e-05, - "loss": 36.2068, - "step": 163920 - }, - { - "epoch": 0.6622979431715801, - "grad_norm": 311.7273864746094, - "learning_rate": 1.5982171030516623e-05, - "loss": 64.975, - "step": 163930 - }, - { - "epoch": 0.6623383444369477, - "grad_norm": 513.2449340820312, - "learning_rate": 1.5978915462220263e-05, - "loss": 44.5756, - "step": 163940 - }, - { - "epoch": 0.6623787457023154, - "grad_norm": 645.991943359375, - "learning_rate": 1.597566006979459e-05, - "loss": 55.4562, - "step": 163950 - }, - { - "epoch": 0.662419146967683, - "grad_norm": 2931.17724609375, - "learning_rate": 1.5972404853303062e-05, - "loss": 60.1816, - "step": 163960 - }, - { - "epoch": 0.6624595482330506, - "grad_norm": 712.2529907226562, - "learning_rate": 1.5969149812809157e-05, - "loss": 38.7537, - "step": 163970 - }, - { - "epoch": 0.6624999494984183, - "grad_norm": 560.7094116210938, - "learning_rate": 1.5965894948376326e-05, - "loss": 32.3941, - "step": 163980 - }, - { - "epoch": 0.6625403507637859, - "grad_norm": 1348.53857421875, - "learning_rate": 1.5962640260068017e-05, - "loss": 50.6592, - "step": 163990 - }, - { - "epoch": 0.6625807520291536, - "grad_norm": 534.3580932617188, - "learning_rate": 1.5959385747947698e-05, - "loss": 67.778, - "step": 164000 - }, - { - "epoch": 0.6626211532945212, - "grad_norm": 838.6098022460938, - "learning_rate": 1.5956131412078794e-05, - "loss": 40.7363, - "step": 164010 - }, - { - "epoch": 0.6626615545598888, - "grad_norm": 510.424072265625, - "learning_rate": 1.595287725252478e-05, - "loss": 34.0819, - "step": 164020 - }, - { - "epoch": 0.6627019558252565, - "grad_norm": 616.5304565429688, - "learning_rate": 1.5949623269349078e-05, - "loss": 49.7259, - "step": 164030 - }, - { - "epoch": 0.6627423570906241, - "grad_norm": 1309.3828125, - "learning_rate": 1.5946369462615117e-05, - "loss": 38.566, - "step": 164040 - }, - { - "epoch": 0.6627827583559918, - "grad_norm": 338.8128662109375, - "learning_rate": 1.594311583238636e-05, - "loss": 48.4212, - "step": 164050 - }, - { - "epoch": 0.6628231596213593, - "grad_norm": 530.7924194335938, - "learning_rate": 1.593986237872622e-05, - "loss": 54.5951, - "step": 164060 - }, - { - "epoch": 0.6628635608867269, - "grad_norm": 545.570556640625, - "learning_rate": 1.593660910169812e-05, - "loss": 55.6956, - "step": 164070 - }, - { - "epoch": 0.6629039621520946, - "grad_norm": 484.4242248535156, - "learning_rate": 1.59333560013655e-05, - "loss": 52.3372, - "step": 164080 - }, - { - "epoch": 0.6629443634174622, - "grad_norm": 496.9941101074219, - "learning_rate": 1.5930103077791763e-05, - "loss": 61.0413, - "step": 164090 - }, - { - "epoch": 0.6629847646828299, - "grad_norm": 516.7117309570312, - "learning_rate": 1.5926850331040345e-05, - "loss": 53.589, - "step": 164100 - }, - { - "epoch": 0.6630251659481975, - "grad_norm": 843.0413818359375, - "learning_rate": 1.592359776117465e-05, - "loss": 65.3876, - "step": 164110 - }, - { - "epoch": 0.6630655672135651, - "grad_norm": 1167.8038330078125, - "learning_rate": 1.5920345368258084e-05, - "loss": 38.0708, - "step": 164120 - }, - { - "epoch": 0.6631059684789328, - "grad_norm": 349.5180358886719, - "learning_rate": 1.5917093152354062e-05, - "loss": 51.9246, - "step": 164130 - }, - { - "epoch": 0.6631463697443004, - "grad_norm": 799.0294799804688, - "learning_rate": 1.5913841113525992e-05, - "loss": 43.2969, - "step": 164140 - }, - { - "epoch": 0.663186771009668, - "grad_norm": 468.5010986328125, - "learning_rate": 1.5910589251837257e-05, - "loss": 41.2818, - "step": 164150 - }, - { - "epoch": 0.6632271722750357, - "grad_norm": 636.5210571289062, - "learning_rate": 1.5907337567351264e-05, - "loss": 62.2918, - "step": 164160 - }, - { - "epoch": 0.6632675735404033, - "grad_norm": 900.3253784179688, - "learning_rate": 1.5904086060131412e-05, - "loss": 62.7353, - "step": 164170 - }, - { - "epoch": 0.6633079748057709, - "grad_norm": 743.2527465820312, - "learning_rate": 1.5900834730241088e-05, - "loss": 68.0121, - "step": 164180 - }, - { - "epoch": 0.6633483760711385, - "grad_norm": 1014.6226196289062, - "learning_rate": 1.5897583577743676e-05, - "loss": 83.6025, - "step": 164190 - }, - { - "epoch": 0.6633887773365061, - "grad_norm": 551.89697265625, - "learning_rate": 1.5894332602702545e-05, - "loss": 43.9586, - "step": 164200 - }, - { - "epoch": 0.6634291786018738, - "grad_norm": 629.16162109375, - "learning_rate": 1.58910818051811e-05, - "loss": 44.4142, - "step": 164210 - }, - { - "epoch": 0.6634695798672414, - "grad_norm": 652.8489990234375, - "learning_rate": 1.5887831185242703e-05, - "loss": 77.5553, - "step": 164220 - }, - { - "epoch": 0.6635099811326091, - "grad_norm": 487.5631408691406, - "learning_rate": 1.5884580742950723e-05, - "loss": 41.1645, - "step": 164230 - }, - { - "epoch": 0.6635503823979767, - "grad_norm": 501.0959777832031, - "learning_rate": 1.588133047836854e-05, - "loss": 35.929, - "step": 164240 - }, - { - "epoch": 0.6635907836633443, - "grad_norm": 371.644287109375, - "learning_rate": 1.5878080391559508e-05, - "loss": 67.765, - "step": 164250 - }, - { - "epoch": 0.663631184928712, - "grad_norm": 693.4502563476562, - "learning_rate": 1.5874830482587e-05, - "loss": 56.9889, - "step": 164260 - }, - { - "epoch": 0.6636715861940796, - "grad_norm": 373.0396728515625, - "learning_rate": 1.5871580751514374e-05, - "loss": 55.6719, - "step": 164270 - }, - { - "epoch": 0.6637119874594473, - "grad_norm": 658.143798828125, - "learning_rate": 1.586833119840497e-05, - "loss": 55.5467, - "step": 164280 - }, - { - "epoch": 0.6637523887248149, - "grad_norm": 361.97808837890625, - "learning_rate": 1.586508182332216e-05, - "loss": 45.0967, - "step": 164290 - }, - { - "epoch": 0.6637927899901825, - "grad_norm": 669.2239990234375, - "learning_rate": 1.5861832626329282e-05, - "loss": 28.2219, - "step": 164300 - }, - { - "epoch": 0.6638331912555501, - "grad_norm": 783.1472778320312, - "learning_rate": 1.585858360748967e-05, - "loss": 64.7959, - "step": 164310 - }, - { - "epoch": 0.6638735925209177, - "grad_norm": 620.7758178710938, - "learning_rate": 1.585533476686669e-05, - "loss": 46.4694, - "step": 164320 - }, - { - "epoch": 0.6639139937862854, - "grad_norm": 761.25146484375, - "learning_rate": 1.585208610452366e-05, - "loss": 48.2226, - "step": 164330 - }, - { - "epoch": 0.663954395051653, - "grad_norm": 925.231689453125, - "learning_rate": 1.5848837620523927e-05, - "loss": 48.1655, - "step": 164340 - }, - { - "epoch": 0.6639947963170206, - "grad_norm": 602.0315551757812, - "learning_rate": 1.5845589314930813e-05, - "loss": 51.8964, - "step": 164350 - }, - { - "epoch": 0.6640351975823883, - "grad_norm": 693.58544921875, - "learning_rate": 1.584234118780764e-05, - "loss": 46.2405, - "step": 164360 - }, - { - "epoch": 0.6640755988477559, - "grad_norm": 705.6416015625, - "learning_rate": 1.5839093239217754e-05, - "loss": 55.715, - "step": 164370 - }, - { - "epoch": 0.6641160001131236, - "grad_norm": 921.3760375976562, - "learning_rate": 1.5835845469224447e-05, - "loss": 36.3578, - "step": 164380 - }, - { - "epoch": 0.6641564013784912, - "grad_norm": 596.5466918945312, - "learning_rate": 1.5832597877891066e-05, - "loss": 56.3782, - "step": 164390 - }, - { - "epoch": 0.6641968026438588, - "grad_norm": 265.7430725097656, - "learning_rate": 1.58293504652809e-05, - "loss": 37.4067, - "step": 164400 - }, - { - "epoch": 0.6642372039092265, - "grad_norm": 1141.7506103515625, - "learning_rate": 1.582610323145727e-05, - "loss": 50.2969, - "step": 164410 - }, - { - "epoch": 0.6642776051745941, - "grad_norm": 658.0392456054688, - "learning_rate": 1.5822856176483482e-05, - "loss": 58.6385, - "step": 164420 - }, - { - "epoch": 0.6643180064399618, - "grad_norm": 676.5816040039062, - "learning_rate": 1.5819609300422835e-05, - "loss": 46.8305, - "step": 164430 - }, - { - "epoch": 0.6643584077053293, - "grad_norm": 1031.92236328125, - "learning_rate": 1.581636260333863e-05, - "loss": 34.6241, - "step": 164440 - }, - { - "epoch": 0.6643988089706969, - "grad_norm": 756.4705200195312, - "learning_rate": 1.581311608529417e-05, - "loss": 51.8875, - "step": 164450 - }, - { - "epoch": 0.6644392102360646, - "grad_norm": 296.3645935058594, - "learning_rate": 1.5809869746352732e-05, - "loss": 52.9606, - "step": 164460 - }, - { - "epoch": 0.6644796115014322, - "grad_norm": 570.0635375976562, - "learning_rate": 1.5806623586577622e-05, - "loss": 36.714, - "step": 164470 - }, - { - "epoch": 0.6645200127667998, - "grad_norm": 973.0076293945312, - "learning_rate": 1.580337760603212e-05, - "loss": 43.9121, - "step": 164480 - }, - { - "epoch": 0.6645604140321675, - "grad_norm": 760.7753295898438, - "learning_rate": 1.5800131804779504e-05, - "loss": 24.1093, - "step": 164490 - }, - { - "epoch": 0.6646008152975351, - "grad_norm": 1284.3609619140625, - "learning_rate": 1.5796886182883053e-05, - "loss": 36.6988, - "step": 164500 - }, - { - "epoch": 0.6646412165629028, - "grad_norm": 517.6578369140625, - "learning_rate": 1.5793640740406045e-05, - "loss": 46.5038, - "step": 164510 - }, - { - "epoch": 0.6646816178282704, - "grad_norm": 853.7466430664062, - "learning_rate": 1.579039547741175e-05, - "loss": 55.7709, - "step": 164520 - }, - { - "epoch": 0.664722019093638, - "grad_norm": 606.7926635742188, - "learning_rate": 1.578715039396344e-05, - "loss": 44.6481, - "step": 164530 - }, - { - "epoch": 0.6647624203590057, - "grad_norm": 484.31829833984375, - "learning_rate": 1.5783905490124366e-05, - "loss": 51.9917, - "step": 164540 - }, - { - "epoch": 0.6648028216243733, - "grad_norm": 770.72314453125, - "learning_rate": 1.578066076595781e-05, - "loss": 38.291, - "step": 164550 - }, - { - "epoch": 0.664843222889741, - "grad_norm": 862.612060546875, - "learning_rate": 1.577741622152702e-05, - "loss": 54.8366, - "step": 164560 - }, - { - "epoch": 0.6648836241551085, - "grad_norm": 548.0719604492188, - "learning_rate": 1.5774171856895236e-05, - "loss": 50.1739, - "step": 164570 - }, - { - "epoch": 0.6649240254204761, - "grad_norm": 953.9413452148438, - "learning_rate": 1.5770927672125735e-05, - "loss": 39.8906, - "step": 164580 - }, - { - "epoch": 0.6649644266858438, - "grad_norm": 555.2219848632812, - "learning_rate": 1.5767683667281746e-05, - "loss": 42.6911, - "step": 164590 - }, - { - "epoch": 0.6650048279512114, - "grad_norm": 567.25244140625, - "learning_rate": 1.5764439842426515e-05, - "loss": 45.8761, - "step": 164600 - }, - { - "epoch": 0.665045229216579, - "grad_norm": 525.5807495117188, - "learning_rate": 1.576119619762329e-05, - "loss": 42.2324, - "step": 164610 - }, - { - "epoch": 0.6650856304819467, - "grad_norm": 766.9520263671875, - "learning_rate": 1.5757952732935288e-05, - "loss": 53.3623, - "step": 164620 - }, - { - "epoch": 0.6651260317473143, - "grad_norm": 859.6406860351562, - "learning_rate": 1.575470944842577e-05, - "loss": 61.6819, - "step": 164630 - }, - { - "epoch": 0.665166433012682, - "grad_norm": 1102.3270263671875, - "learning_rate": 1.5751466344157947e-05, - "loss": 52.0058, - "step": 164640 - }, - { - "epoch": 0.6652068342780496, - "grad_norm": 599.8565673828125, - "learning_rate": 1.574822342019504e-05, - "loss": 47.8022, - "step": 164650 - }, - { - "epoch": 0.6652472355434172, - "grad_norm": 1298.8262939453125, - "learning_rate": 1.574498067660029e-05, - "loss": 60.007, - "step": 164660 - }, - { - "epoch": 0.6652876368087849, - "grad_norm": 507.6510925292969, - "learning_rate": 1.574173811343691e-05, - "loss": 54.002, - "step": 164670 - }, - { - "epoch": 0.6653280380741525, - "grad_norm": 820.8052978515625, - "learning_rate": 1.5738495730768105e-05, - "loss": 51.6573, - "step": 164680 - }, - { - "epoch": 0.6653684393395202, - "grad_norm": 242.71102905273438, - "learning_rate": 1.5735253528657098e-05, - "loss": 29.9105, - "step": 164690 - }, - { - "epoch": 0.6654088406048877, - "grad_norm": 1396.0880126953125, - "learning_rate": 1.5732011507167084e-05, - "loss": 51.9466, - "step": 164700 - }, - { - "epoch": 0.6654492418702553, - "grad_norm": 748.735595703125, - "learning_rate": 1.5728769666361287e-05, - "loss": 45.9762, - "step": 164710 - }, - { - "epoch": 0.665489643135623, - "grad_norm": 501.557373046875, - "learning_rate": 1.5725528006302898e-05, - "loss": 51.5674, - "step": 164720 - }, - { - "epoch": 0.6655300444009906, - "grad_norm": 1090.5556640625, - "learning_rate": 1.5722286527055106e-05, - "loss": 64.3017, - "step": 164730 - }, - { - "epoch": 0.6655704456663583, - "grad_norm": 632.6434326171875, - "learning_rate": 1.5719045228681125e-05, - "loss": 86.9166, - "step": 164740 - }, - { - "epoch": 0.6656108469317259, - "grad_norm": 746.149658203125, - "learning_rate": 1.5715804111244137e-05, - "loss": 45.3344, - "step": 164750 - }, - { - "epoch": 0.6656512481970935, - "grad_norm": 922.2538452148438, - "learning_rate": 1.5712563174807317e-05, - "loss": 45.3737, - "step": 164760 - }, - { - "epoch": 0.6656916494624612, - "grad_norm": 435.47711181640625, - "learning_rate": 1.570932241943387e-05, - "loss": 44.8212, - "step": 164770 - }, - { - "epoch": 0.6657320507278288, - "grad_norm": 261.5279541015625, - "learning_rate": 1.5706081845186954e-05, - "loss": 40.834, - "step": 164780 - }, - { - "epoch": 0.6657724519931965, - "grad_norm": 903.6303100585938, - "learning_rate": 1.5702841452129765e-05, - "loss": 57.0551, - "step": 164790 - }, - { - "epoch": 0.6658128532585641, - "grad_norm": 539.7006225585938, - "learning_rate": 1.5699601240325474e-05, - "loss": 64.2796, - "step": 164800 - }, - { - "epoch": 0.6658532545239317, - "grad_norm": 385.1630859375, - "learning_rate": 1.5696361209837228e-05, - "loss": 49.2995, - "step": 164810 - }, - { - "epoch": 0.6658936557892993, - "grad_norm": 1175.4351806640625, - "learning_rate": 1.5693121360728223e-05, - "loss": 57.2194, - "step": 164820 - }, - { - "epoch": 0.6659340570546669, - "grad_norm": 844.3092041015625, - "learning_rate": 1.5689881693061607e-05, - "loss": 47.4218, - "step": 164830 - }, - { - "epoch": 0.6659744583200345, - "grad_norm": 1089.0220947265625, - "learning_rate": 1.5686642206900538e-05, - "loss": 53.9814, - "step": 164840 - }, - { - "epoch": 0.6660148595854022, - "grad_norm": 1410.1602783203125, - "learning_rate": 1.5683402902308175e-05, - "loss": 59.7386, - "step": 164850 - }, - { - "epoch": 0.6660552608507698, - "grad_norm": 522.8424682617188, - "learning_rate": 1.5680163779347667e-05, - "loss": 47.6538, - "step": 164860 - }, - { - "epoch": 0.6660956621161375, - "grad_norm": 1108.32080078125, - "learning_rate": 1.5676924838082172e-05, - "loss": 47.7382, - "step": 164870 - }, - { - "epoch": 0.6661360633815051, - "grad_norm": 781.7514038085938, - "learning_rate": 1.5673686078574822e-05, - "loss": 52.2223, - "step": 164880 - }, - { - "epoch": 0.6661764646468727, - "grad_norm": 787.5927124023438, - "learning_rate": 1.5670447500888756e-05, - "loss": 49.3363, - "step": 164890 - }, - { - "epoch": 0.6662168659122404, - "grad_norm": 2937.76708984375, - "learning_rate": 1.5667209105087132e-05, - "loss": 80.4566, - "step": 164900 - }, - { - "epoch": 0.666257267177608, - "grad_norm": 777.5316772460938, - "learning_rate": 1.566397089123306e-05, - "loss": 51.269, - "step": 164910 - }, - { - "epoch": 0.6662976684429757, - "grad_norm": 330.6843566894531, - "learning_rate": 1.5660732859389686e-05, - "loss": 41.2739, - "step": 164920 - }, - { - "epoch": 0.6663380697083433, - "grad_norm": 474.75457763671875, - "learning_rate": 1.565749500962014e-05, - "loss": 63.9933, - "step": 164930 - }, - { - "epoch": 0.666378470973711, - "grad_norm": 295.5137634277344, - "learning_rate": 1.5654257341987532e-05, - "loss": 51.5275, - "step": 164940 - }, - { - "epoch": 0.6664188722390785, - "grad_norm": 1012.8373413085938, - "learning_rate": 1.5651019856554995e-05, - "loss": 75.4704, - "step": 164950 - }, - { - "epoch": 0.6664592735044461, - "grad_norm": 1193.5340576171875, - "learning_rate": 1.5647782553385636e-05, - "loss": 55.0925, - "step": 164960 - }, - { - "epoch": 0.6664996747698138, - "grad_norm": 672.0003662109375, - "learning_rate": 1.564454543254256e-05, - "loss": 41.2695, - "step": 164970 - }, - { - "epoch": 0.6665400760351814, - "grad_norm": 1096.6192626953125, - "learning_rate": 1.56413084940889e-05, - "loss": 80.5824, - "step": 164980 - }, - { - "epoch": 0.666580477300549, - "grad_norm": 518.763671875, - "learning_rate": 1.5638071738087738e-05, - "loss": 42.7408, - "step": 164990 - }, - { - "epoch": 0.6666208785659167, - "grad_norm": 463.8653564453125, - "learning_rate": 1.56348351646022e-05, - "loss": 52.6907, - "step": 165000 - }, - { - "epoch": 0.6666612798312843, - "grad_norm": 1809.75732421875, - "learning_rate": 1.563159877369537e-05, - "loss": 63.8859, - "step": 165010 - }, - { - "epoch": 0.666701681096652, - "grad_norm": 705.6831665039062, - "learning_rate": 1.562836256543034e-05, - "loss": 52.4492, - "step": 165020 - }, - { - "epoch": 0.6667420823620196, - "grad_norm": 1125.3067626953125, - "learning_rate": 1.562512653987021e-05, - "loss": 37.5355, - "step": 165030 - }, - { - "epoch": 0.6667824836273872, - "grad_norm": 869.19482421875, - "learning_rate": 1.562189069707807e-05, - "loss": 40.3621, - "step": 165040 - }, - { - "epoch": 0.6668228848927549, - "grad_norm": 617.2032470703125, - "learning_rate": 1.561865503711698e-05, - "loss": 48.8869, - "step": 165050 - }, - { - "epoch": 0.6668632861581225, - "grad_norm": 846.593017578125, - "learning_rate": 1.5615419560050058e-05, - "loss": 69.1997, - "step": 165060 - }, - { - "epoch": 0.6669036874234902, - "grad_norm": 470.2166748046875, - "learning_rate": 1.5612184265940348e-05, - "loss": 51.828, - "step": 165070 - }, - { - "epoch": 0.6669440886888577, - "grad_norm": 505.6989440917969, - "learning_rate": 1.560894915485095e-05, - "loss": 75.3402, - "step": 165080 - }, - { - "epoch": 0.6669844899542253, - "grad_norm": 548.412841796875, - "learning_rate": 1.5605714226844924e-05, - "loss": 62.3138, - "step": 165090 - }, - { - "epoch": 0.667024891219593, - "grad_norm": 516.51123046875, - "learning_rate": 1.5602479481985333e-05, - "loss": 71.4453, - "step": 165100 - }, - { - "epoch": 0.6670652924849606, - "grad_norm": 919.295166015625, - "learning_rate": 1.5599244920335245e-05, - "loss": 48.0811, - "step": 165110 - }, - { - "epoch": 0.6671056937503282, - "grad_norm": 540.0143432617188, - "learning_rate": 1.5596010541957712e-05, - "loss": 39.6037, - "step": 165120 - }, - { - "epoch": 0.6671460950156959, - "grad_norm": 554.9569702148438, - "learning_rate": 1.5592776346915796e-05, - "loss": 56.8077, - "step": 165130 - }, - { - "epoch": 0.6671864962810635, - "grad_norm": 352.08392333984375, - "learning_rate": 1.5589542335272556e-05, - "loss": 41.5959, - "step": 165140 - }, - { - "epoch": 0.6672268975464312, - "grad_norm": 666.3216552734375, - "learning_rate": 1.5586308507091018e-05, - "loss": 54.632, - "step": 165150 - }, - { - "epoch": 0.6672672988117988, - "grad_norm": 372.5094299316406, - "learning_rate": 1.5583074862434255e-05, - "loss": 41.4742, - "step": 165160 - }, - { - "epoch": 0.6673077000771664, - "grad_norm": 464.4438781738281, - "learning_rate": 1.5579841401365293e-05, - "loss": 68.3877, - "step": 165170 - }, - { - "epoch": 0.6673481013425341, - "grad_norm": 325.2723388671875, - "learning_rate": 1.5576608123947166e-05, - "loss": 44.0116, - "step": 165180 - }, - { - "epoch": 0.6673885026079017, - "grad_norm": 538.7552490234375, - "learning_rate": 1.5573375030242922e-05, - "loss": 43.1703, - "step": 165190 - }, - { - "epoch": 0.6674289038732694, - "grad_norm": 629.1891479492188, - "learning_rate": 1.557014212031559e-05, - "loss": 37.3895, - "step": 165200 - }, - { - "epoch": 0.6674693051386369, - "grad_norm": 873.6672973632812, - "learning_rate": 1.5566909394228178e-05, - "loss": 42.7094, - "step": 165210 - }, - { - "epoch": 0.6675097064040045, - "grad_norm": 1655.4644775390625, - "learning_rate": 1.556367685204374e-05, - "loss": 46.7371, - "step": 165220 - }, - { - "epoch": 0.6675501076693722, - "grad_norm": 1335.8275146484375, - "learning_rate": 1.556044449382526e-05, - "loss": 66.9424, - "step": 165230 - }, - { - "epoch": 0.6675905089347398, - "grad_norm": 473.89788818359375, - "learning_rate": 1.555721231963579e-05, - "loss": 44.3112, - "step": 165240 - }, - { - "epoch": 0.6676309102001075, - "grad_norm": 938.0970458984375, - "learning_rate": 1.5553980329538326e-05, - "loss": 43.0849, - "step": 165250 - }, - { - "epoch": 0.6676713114654751, - "grad_norm": 714.7005004882812, - "learning_rate": 1.555074852359587e-05, - "loss": 42.4088, - "step": 165260 - }, - { - "epoch": 0.6677117127308427, - "grad_norm": 954.3150634765625, - "learning_rate": 1.5547516901871446e-05, - "loss": 48.4536, - "step": 165270 - }, - { - "epoch": 0.6677521139962104, - "grad_norm": 474.6979064941406, - "learning_rate": 1.5544285464428045e-05, - "loss": 46.0709, - "step": 165280 - }, - { - "epoch": 0.667792515261578, - "grad_norm": 850.7158813476562, - "learning_rate": 1.5541054211328663e-05, - "loss": 61.7394, - "step": 165290 - }, - { - "epoch": 0.6678329165269457, - "grad_norm": 373.7199401855469, - "learning_rate": 1.5537823142636305e-05, - "loss": 52.8841, - "step": 165300 - }, - { - "epoch": 0.6678733177923133, - "grad_norm": 1184.4080810546875, - "learning_rate": 1.5534592258413943e-05, - "loss": 55.5253, - "step": 165310 - }, - { - "epoch": 0.6679137190576809, - "grad_norm": 632.0458984375, - "learning_rate": 1.5531361558724587e-05, - "loss": 42.4319, - "step": 165320 - }, - { - "epoch": 0.6679541203230486, - "grad_norm": 733.4188842773438, - "learning_rate": 1.5528131043631215e-05, - "loss": 58.4967, - "step": 165330 - }, - { - "epoch": 0.6679945215884161, - "grad_norm": 483.6113586425781, - "learning_rate": 1.55249007131968e-05, - "loss": 46.9925, - "step": 165340 - }, - { - "epoch": 0.6680349228537837, - "grad_norm": 1163.3182373046875, - "learning_rate": 1.5521670567484325e-05, - "loss": 51.8809, - "step": 165350 - }, - { - "epoch": 0.6680753241191514, - "grad_norm": 1143.9605712890625, - "learning_rate": 1.5518440606556766e-05, - "loss": 72.9315, - "step": 165360 - }, - { - "epoch": 0.668115725384519, - "grad_norm": 1212.5926513671875, - "learning_rate": 1.5515210830477083e-05, - "loss": 47.2623, - "step": 165370 - }, - { - "epoch": 0.6681561266498867, - "grad_norm": 1403.037109375, - "learning_rate": 1.5511981239308256e-05, - "loss": 50.8124, - "step": 165380 - }, - { - "epoch": 0.6681965279152543, - "grad_norm": 3793.583984375, - "learning_rate": 1.5508751833113223e-05, - "loss": 54.1592, - "step": 165390 - }, - { - "epoch": 0.6682369291806219, - "grad_norm": 926.212890625, - "learning_rate": 1.5505522611954975e-05, - "loss": 57.2393, - "step": 165400 - }, - { - "epoch": 0.6682773304459896, - "grad_norm": 1016.1986694335938, - "learning_rate": 1.550229357589645e-05, - "loss": 58.6371, - "step": 165410 - }, - { - "epoch": 0.6683177317113572, - "grad_norm": 705.27001953125, - "learning_rate": 1.5499064725000592e-05, - "loss": 35.6693, - "step": 165420 - }, - { - "epoch": 0.6683581329767249, - "grad_norm": 657.5211181640625, - "learning_rate": 1.549583605933037e-05, - "loss": 38.7974, - "step": 165430 - }, - { - "epoch": 0.6683985342420925, - "grad_norm": 1164.2186279296875, - "learning_rate": 1.549260757894871e-05, - "loss": 34.9751, - "step": 165440 - }, - { - "epoch": 0.6684389355074601, - "grad_norm": 1214.388427734375, - "learning_rate": 1.5489379283918566e-05, - "loss": 63.4225, - "step": 165450 - }, - { - "epoch": 0.6684793367728277, - "grad_norm": 491.2007751464844, - "learning_rate": 1.548615117430286e-05, - "loss": 49.4402, - "step": 165460 - }, - { - "epoch": 0.6685197380381953, - "grad_norm": 851.6419677734375, - "learning_rate": 1.5482923250164537e-05, - "loss": 64.906, - "step": 165470 - }, - { - "epoch": 0.668560139303563, - "grad_norm": 1071.079345703125, - "learning_rate": 1.5479695511566534e-05, - "loss": 50.622, - "step": 165480 - }, - { - "epoch": 0.6686005405689306, - "grad_norm": 417.3287658691406, - "learning_rate": 1.5476467958571767e-05, - "loss": 49.5059, - "step": 165490 - }, - { - "epoch": 0.6686409418342982, - "grad_norm": 607.785400390625, - "learning_rate": 1.547324059124315e-05, - "loss": 34.5434, - "step": 165500 - }, - { - "epoch": 0.6686813430996659, - "grad_norm": 614.81591796875, - "learning_rate": 1.547001340964362e-05, - "loss": 39.9333, - "step": 165510 - }, - { - "epoch": 0.6687217443650335, - "grad_norm": 649.9052734375, - "learning_rate": 1.5466786413836077e-05, - "loss": 62.8206, - "step": 165520 - }, - { - "epoch": 0.6687621456304012, - "grad_norm": 387.1318359375, - "learning_rate": 1.546355960388345e-05, - "loss": 37.9586, - "step": 165530 - }, - { - "epoch": 0.6688025468957688, - "grad_norm": 216.71209716796875, - "learning_rate": 1.5460332979848634e-05, - "loss": 36.5199, - "step": 165540 - }, - { - "epoch": 0.6688429481611364, - "grad_norm": 913.2258911132812, - "learning_rate": 1.5457106541794543e-05, - "loss": 52.0842, - "step": 165550 - }, - { - "epoch": 0.6688833494265041, - "grad_norm": 1081.210205078125, - "learning_rate": 1.5453880289784066e-05, - "loss": 55.7764, - "step": 165560 - }, - { - "epoch": 0.6689237506918717, - "grad_norm": 1004.1875, - "learning_rate": 1.5450654223880117e-05, - "loss": 32.5717, - "step": 165570 - }, - { - "epoch": 0.6689641519572394, - "grad_norm": 1424.0938720703125, - "learning_rate": 1.5447428344145563e-05, - "loss": 67.9024, - "step": 165580 - }, - { - "epoch": 0.6690045532226069, - "grad_norm": 898.0243530273438, - "learning_rate": 1.544420265064333e-05, - "loss": 67.8531, - "step": 165590 - }, - { - "epoch": 0.6690449544879745, - "grad_norm": 912.253662109375, - "learning_rate": 1.544097714343627e-05, - "loss": 43.7589, - "step": 165600 - }, - { - "epoch": 0.6690853557533422, - "grad_norm": 743.2623901367188, - "learning_rate": 1.5437751822587294e-05, - "loss": 46.5631, - "step": 165610 - }, - { - "epoch": 0.6691257570187098, - "grad_norm": 476.3370056152344, - "learning_rate": 1.5434526688159267e-05, - "loss": 51.2162, - "step": 165620 - }, - { - "epoch": 0.6691661582840774, - "grad_norm": 856.005859375, - "learning_rate": 1.5431301740215064e-05, - "loss": 51.07, - "step": 165630 - }, - { - "epoch": 0.6692065595494451, - "grad_norm": 809.520751953125, - "learning_rate": 1.5428076978817562e-05, - "loss": 37.5981, - "step": 165640 - }, - { - "epoch": 0.6692469608148127, - "grad_norm": 729.1454467773438, - "learning_rate": 1.5424852404029634e-05, - "loss": 44.3347, - "step": 165650 - }, - { - "epoch": 0.6692873620801804, - "grad_norm": 524.4779663085938, - "learning_rate": 1.542162801591412e-05, - "loss": 74.8679, - "step": 165660 - }, - { - "epoch": 0.669327763345548, - "grad_norm": 556.4620971679688, - "learning_rate": 1.5418403814533912e-05, - "loss": 53.1046, - "step": 165670 - }, - { - "epoch": 0.6693681646109156, - "grad_norm": 354.8873596191406, - "learning_rate": 1.5415179799951844e-05, - "loss": 51.9128, - "step": 165680 - }, - { - "epoch": 0.6694085658762833, - "grad_norm": 507.58636474609375, - "learning_rate": 1.5411955972230794e-05, - "loss": 43.3807, - "step": 165690 - }, - { - "epoch": 0.6694489671416509, - "grad_norm": 573.6036987304688, - "learning_rate": 1.5408732331433595e-05, - "loss": 59.5482, - "step": 165700 - }, - { - "epoch": 0.6694893684070186, - "grad_norm": 363.93328857421875, - "learning_rate": 1.5405508877623094e-05, - "loss": 49.7304, - "step": 165710 - }, - { - "epoch": 0.6695297696723861, - "grad_norm": 994.6358032226562, - "learning_rate": 1.5402285610862142e-05, - "loss": 41.309, - "step": 165720 - }, - { - "epoch": 0.6695701709377537, - "grad_norm": 0.0, - "learning_rate": 1.539906253121357e-05, - "loss": 43.8188, - "step": 165730 - }, - { - "epoch": 0.6696105722031214, - "grad_norm": 914.8114013671875, - "learning_rate": 1.5395839638740213e-05, - "loss": 52.398, - "step": 165740 - }, - { - "epoch": 0.669650973468489, - "grad_norm": 894.1461181640625, - "learning_rate": 1.539261693350491e-05, - "loss": 48.5215, - "step": 165750 - }, - { - "epoch": 0.6696913747338566, - "grad_norm": 1071.9180908203125, - "learning_rate": 1.538939441557048e-05, - "loss": 42.8886, - "step": 165760 - }, - { - "epoch": 0.6697317759992243, - "grad_norm": 801.8084106445312, - "learning_rate": 1.5386172084999765e-05, - "loss": 57.6356, - "step": 165770 - }, - { - "epoch": 0.6697721772645919, - "grad_norm": 1373.5555419921875, - "learning_rate": 1.5382949941855574e-05, - "loss": 67.3987, - "step": 165780 - }, - { - "epoch": 0.6698125785299596, - "grad_norm": 693.4388427734375, - "learning_rate": 1.5379727986200716e-05, - "loss": 76.9919, - "step": 165790 - }, - { - "epoch": 0.6698529797953272, - "grad_norm": 586.3527221679688, - "learning_rate": 1.5376506218098015e-05, - "loss": 77.7751, - "step": 165800 - }, - { - "epoch": 0.6698933810606948, - "grad_norm": 481.37579345703125, - "learning_rate": 1.537328463761029e-05, - "loss": 44.1322, - "step": 165810 - }, - { - "epoch": 0.6699337823260625, - "grad_norm": 631.0782470703125, - "learning_rate": 1.5370063244800327e-05, - "loss": 60.4053, - "step": 165820 - }, - { - "epoch": 0.6699741835914301, - "grad_norm": 374.3154602050781, - "learning_rate": 1.5366842039730946e-05, - "loss": 58.5419, - "step": 165830 - }, - { - "epoch": 0.6700145848567978, - "grad_norm": 457.1123046875, - "learning_rate": 1.5363621022464924e-05, - "loss": 32.6104, - "step": 165840 - }, - { - "epoch": 0.6700549861221653, - "grad_norm": 947.1151123046875, - "learning_rate": 1.536040019306509e-05, - "loss": 45.5976, - "step": 165850 - }, - { - "epoch": 0.6700953873875329, - "grad_norm": 882.1751708984375, - "learning_rate": 1.535717955159421e-05, - "loss": 41.449, - "step": 165860 - }, - { - "epoch": 0.6701357886529006, - "grad_norm": 547.3237915039062, - "learning_rate": 1.5353959098115072e-05, - "loss": 41.6607, - "step": 165870 - }, - { - "epoch": 0.6701761899182682, - "grad_norm": 774.8670654296875, - "learning_rate": 1.535073883269048e-05, - "loss": 40.8859, - "step": 165880 - }, - { - "epoch": 0.6702165911836359, - "grad_norm": 1032.724365234375, - "learning_rate": 1.534751875538319e-05, - "loss": 63.1162, - "step": 165890 - }, - { - "epoch": 0.6702569924490035, - "grad_norm": 846.3161010742188, - "learning_rate": 1.5344298866256e-05, - "loss": 63.9277, - "step": 165900 - }, - { - "epoch": 0.6702973937143711, - "grad_norm": 950.7833862304688, - "learning_rate": 1.534107916537168e-05, - "loss": 54.6691, - "step": 165910 - }, - { - "epoch": 0.6703377949797388, - "grad_norm": 385.0533752441406, - "learning_rate": 1.533785965279298e-05, - "loss": 82.8562, - "step": 165920 - }, - { - "epoch": 0.6703781962451064, - "grad_norm": 101.061279296875, - "learning_rate": 1.5334640328582688e-05, - "loss": 45.9366, - "step": 165930 - }, - { - "epoch": 0.6704185975104741, - "grad_norm": 896.1965942382812, - "learning_rate": 1.5331421192803565e-05, - "loss": 44.7952, - "step": 165940 - }, - { - "epoch": 0.6704589987758417, - "grad_norm": 1131.50634765625, - "learning_rate": 1.5328202245518347e-05, - "loss": 51.4988, - "step": 165950 - }, - { - "epoch": 0.6704994000412093, - "grad_norm": 504.31109619140625, - "learning_rate": 1.5324983486789818e-05, - "loss": 51.8349, - "step": 165960 - }, - { - "epoch": 0.670539801306577, - "grad_norm": 683.3836059570312, - "learning_rate": 1.532176491668071e-05, - "loss": 42.7658, - "step": 165970 - }, - { - "epoch": 0.6705802025719445, - "grad_norm": 0.0, - "learning_rate": 1.5318546535253785e-05, - "loss": 49.9037, - "step": 165980 - }, - { - "epoch": 0.6706206038373121, - "grad_norm": 881.3528442382812, - "learning_rate": 1.531532834257178e-05, - "loss": 79.0532, - "step": 165990 - }, - { - "epoch": 0.6706610051026798, - "grad_norm": 662.7510986328125, - "learning_rate": 1.5312110338697426e-05, - "loss": 49.7831, - "step": 166000 - }, - { - "epoch": 0.6707014063680474, - "grad_norm": 586.3132934570312, - "learning_rate": 1.5308892523693478e-05, - "loss": 50.3877, - "step": 166010 - }, - { - "epoch": 0.6707418076334151, - "grad_norm": 1294.632568359375, - "learning_rate": 1.5305674897622658e-05, - "loss": 48.3198, - "step": 166020 - }, - { - "epoch": 0.6707822088987827, - "grad_norm": 0.0, - "learning_rate": 1.5302457460547687e-05, - "loss": 29.5519, - "step": 166030 - }, - { - "epoch": 0.6708226101641503, - "grad_norm": 799.5355834960938, - "learning_rate": 1.5299240212531314e-05, - "loss": 54.3518, - "step": 166040 - }, - { - "epoch": 0.670863011429518, - "grad_norm": 284.1693420410156, - "learning_rate": 1.5296023153636235e-05, - "loss": 50.3665, - "step": 166050 - }, - { - "epoch": 0.6709034126948856, - "grad_norm": 651.7603759765625, - "learning_rate": 1.5292806283925193e-05, - "loss": 41.6884, - "step": 166060 - }, - { - "epoch": 0.6709438139602533, - "grad_norm": 915.2215576171875, - "learning_rate": 1.5289589603460885e-05, - "loss": 41.2101, - "step": 166070 - }, - { - "epoch": 0.6709842152256209, - "grad_norm": 562.0250854492188, - "learning_rate": 1.5286373112306018e-05, - "loss": 83.8193, - "step": 166080 - }, - { - "epoch": 0.6710246164909885, - "grad_norm": 884.758544921875, - "learning_rate": 1.528315681052332e-05, - "loss": 38.1881, - "step": 166090 - }, - { - "epoch": 0.6710650177563561, - "grad_norm": 298.85906982421875, - "learning_rate": 1.527994069817548e-05, - "loss": 61.6959, - "step": 166100 - }, - { - "epoch": 0.6711054190217237, - "grad_norm": 886.3592529296875, - "learning_rate": 1.5276724775325192e-05, - "loss": 60.5207, - "step": 166110 - }, - { - "epoch": 0.6711458202870914, - "grad_norm": 724.6257934570312, - "learning_rate": 1.5273509042035172e-05, - "loss": 49.5858, - "step": 166120 - }, - { - "epoch": 0.671186221552459, - "grad_norm": 1337.0560302734375, - "learning_rate": 1.5270293498368093e-05, - "loss": 44.4996, - "step": 166130 - }, - { - "epoch": 0.6712266228178266, - "grad_norm": 970.5552368164062, - "learning_rate": 1.5267078144386654e-05, - "loss": 34.8261, - "step": 166140 - }, - { - "epoch": 0.6712670240831943, - "grad_norm": 754.7394409179688, - "learning_rate": 1.526386298015354e-05, - "loss": 48.2782, - "step": 166150 - }, - { - "epoch": 0.6713074253485619, - "grad_norm": 591.5711669921875, - "learning_rate": 1.5260648005731427e-05, - "loss": 43.9917, - "step": 166160 - }, - { - "epoch": 0.6713478266139296, - "grad_norm": 557.9755859375, - "learning_rate": 1.5257433221182999e-05, - "loss": 44.6998, - "step": 166170 - }, - { - "epoch": 0.6713882278792972, - "grad_norm": 1885.062744140625, - "learning_rate": 1.5254218626570926e-05, - "loss": 45.9201, - "step": 166180 - }, - { - "epoch": 0.6714286291446648, - "grad_norm": 581.98974609375, - "learning_rate": 1.5251004221957865e-05, - "loss": 57.501, - "step": 166190 - }, - { - "epoch": 0.6714690304100325, - "grad_norm": 1093.8936767578125, - "learning_rate": 1.524779000740651e-05, - "loss": 60.8076, - "step": 166200 - }, - { - "epoch": 0.6715094316754001, - "grad_norm": 2516.6455078125, - "learning_rate": 1.5244575982979497e-05, - "loss": 56.7106, - "step": 166210 - }, - { - "epoch": 0.6715498329407678, - "grad_norm": 470.2278137207031, - "learning_rate": 1.5241362148739513e-05, - "loss": 58.4495, - "step": 166220 - }, - { - "epoch": 0.6715902342061353, - "grad_norm": 861.3046875, - "learning_rate": 1.5238148504749195e-05, - "loss": 43.9791, - "step": 166230 - }, - { - "epoch": 0.6716306354715029, - "grad_norm": 960.9512329101562, - "learning_rate": 1.5234935051071192e-05, - "loss": 48.8351, - "step": 166240 - }, - { - "epoch": 0.6716710367368706, - "grad_norm": 472.22882080078125, - "learning_rate": 1.523172178776816e-05, - "loss": 56.9353, - "step": 166250 - }, - { - "epoch": 0.6717114380022382, - "grad_norm": 870.235595703125, - "learning_rate": 1.5228508714902745e-05, - "loss": 65.5524, - "step": 166260 - }, - { - "epoch": 0.6717518392676058, - "grad_norm": 618.8545532226562, - "learning_rate": 1.5225295832537574e-05, - "loss": 43.7606, - "step": 166270 - }, - { - "epoch": 0.6717922405329735, - "grad_norm": 359.6185607910156, - "learning_rate": 1.52220831407353e-05, - "loss": 59.9645, - "step": 166280 - }, - { - "epoch": 0.6718326417983411, - "grad_norm": 848.9765625, - "learning_rate": 1.5218870639558536e-05, - "loss": 47.4422, - "step": 166290 - }, - { - "epoch": 0.6718730430637088, - "grad_norm": 493.81597900390625, - "learning_rate": 1.521565832906994e-05, - "loss": 42.8739, - "step": 166300 - }, - { - "epoch": 0.6719134443290764, - "grad_norm": 546.1114501953125, - "learning_rate": 1.521244620933212e-05, - "loss": 50.4388, - "step": 166310 - }, - { - "epoch": 0.671953845594444, - "grad_norm": 850.0634155273438, - "learning_rate": 1.5209234280407697e-05, - "loss": 39.2991, - "step": 166320 - }, - { - "epoch": 0.6719942468598117, - "grad_norm": 476.33441162109375, - "learning_rate": 1.5206022542359297e-05, - "loss": 43.6006, - "step": 166330 - }, - { - "epoch": 0.6720346481251793, - "grad_norm": 1327.771484375, - "learning_rate": 1.5202810995249527e-05, - "loss": 67.9057, - "step": 166340 - }, - { - "epoch": 0.672075049390547, - "grad_norm": 998.8424072265625, - "learning_rate": 1.5199599639140994e-05, - "loss": 48.7459, - "step": 166350 - }, - { - "epoch": 0.6721154506559145, - "grad_norm": 401.8807067871094, - "learning_rate": 1.5196388474096319e-05, - "loss": 31.3531, - "step": 166360 - }, - { - "epoch": 0.6721558519212821, - "grad_norm": 647.2686157226562, - "learning_rate": 1.5193177500178091e-05, - "loss": 53.1445, - "step": 166370 - }, - { - "epoch": 0.6721962531866498, - "grad_norm": 4298.70654296875, - "learning_rate": 1.5189966717448923e-05, - "loss": 134.2395, - "step": 166380 - }, - { - "epoch": 0.6722366544520174, - "grad_norm": 331.1536865234375, - "learning_rate": 1.5186756125971407e-05, - "loss": 37.2909, - "step": 166390 - }, - { - "epoch": 0.672277055717385, - "grad_norm": 532.593505859375, - "learning_rate": 1.5183545725808127e-05, - "loss": 41.4082, - "step": 166400 - }, - { - "epoch": 0.6723174569827527, - "grad_norm": 53.21999740600586, - "learning_rate": 1.5180335517021682e-05, - "loss": 46.6067, - "step": 166410 - }, - { - "epoch": 0.6723578582481203, - "grad_norm": 404.0959777832031, - "learning_rate": 1.5177125499674638e-05, - "loss": 52.84, - "step": 166420 - }, - { - "epoch": 0.672398259513488, - "grad_norm": 858.9264526367188, - "learning_rate": 1.5173915673829604e-05, - "loss": 57.7387, - "step": 166430 - }, - { - "epoch": 0.6724386607788556, - "grad_norm": 446.1383972167969, - "learning_rate": 1.5170706039549142e-05, - "loss": 56.045, - "step": 166440 - }, - { - "epoch": 0.6724790620442233, - "grad_norm": 436.2591247558594, - "learning_rate": 1.5167496596895814e-05, - "loss": 55.9357, - "step": 166450 - }, - { - "epoch": 0.6725194633095909, - "grad_norm": 609.2459106445312, - "learning_rate": 1.5164287345932216e-05, - "loss": 41.1153, - "step": 166460 - }, - { - "epoch": 0.6725598645749585, - "grad_norm": 379.0639343261719, - "learning_rate": 1.5161078286720898e-05, - "loss": 32.7601, - "step": 166470 - }, - { - "epoch": 0.6726002658403262, - "grad_norm": 392.2806396484375, - "learning_rate": 1.515786941932441e-05, - "loss": 30.7783, - "step": 166480 - }, - { - "epoch": 0.6726406671056937, - "grad_norm": 710.4434814453125, - "learning_rate": 1.515466074380534e-05, - "loss": 42.4171, - "step": 166490 - }, - { - "epoch": 0.6726810683710613, - "grad_norm": 921.6600952148438, - "learning_rate": 1.5151452260226224e-05, - "loss": 43.7955, - "step": 166500 - }, - { - "epoch": 0.672721469636429, - "grad_norm": 666.1463012695312, - "learning_rate": 1.5148243968649617e-05, - "loss": 44.6316, - "step": 166510 - }, - { - "epoch": 0.6727618709017966, - "grad_norm": 567.9951171875, - "learning_rate": 1.5145035869138067e-05, - "loss": 51.5773, - "step": 166520 - }, - { - "epoch": 0.6728022721671643, - "grad_norm": 778.463134765625, - "learning_rate": 1.5141827961754107e-05, - "loss": 65.0105, - "step": 166530 - }, - { - "epoch": 0.6728426734325319, - "grad_norm": 744.7365112304688, - "learning_rate": 1.5138620246560296e-05, - "loss": 47.7923, - "step": 166540 - }, - { - "epoch": 0.6728830746978995, - "grad_norm": 750.9429931640625, - "learning_rate": 1.5135412723619158e-05, - "loss": 55.9812, - "step": 166550 - }, - { - "epoch": 0.6729234759632672, - "grad_norm": 1654.34765625, - "learning_rate": 1.513220539299322e-05, - "loss": 49.7286, - "step": 166560 - }, - { - "epoch": 0.6729638772286348, - "grad_norm": 596.2041625976562, - "learning_rate": 1.512899825474503e-05, - "loss": 35.4972, - "step": 166570 - }, - { - "epoch": 0.6730042784940025, - "grad_norm": 929.0473022460938, - "learning_rate": 1.5125791308937092e-05, - "loss": 41.1769, - "step": 166580 - }, - { - "epoch": 0.6730446797593701, - "grad_norm": 456.9891662597656, - "learning_rate": 1.5122584555631944e-05, - "loss": 51.7045, - "step": 166590 - }, - { - "epoch": 0.6730850810247377, - "grad_norm": 326.9583435058594, - "learning_rate": 1.5119377994892094e-05, - "loss": 45.5811, - "step": 166600 - }, - { - "epoch": 0.6731254822901054, - "grad_norm": 681.9387817382812, - "learning_rate": 1.5116171626780049e-05, - "loss": 60.2909, - "step": 166610 - }, - { - "epoch": 0.6731658835554729, - "grad_norm": 288.6805419921875, - "learning_rate": 1.5112965451358335e-05, - "loss": 45.1375, - "step": 166620 - }, - { - "epoch": 0.6732062848208406, - "grad_norm": 375.949951171875, - "learning_rate": 1.5109759468689449e-05, - "loss": 31.1756, - "step": 166630 - }, - { - "epoch": 0.6732466860862082, - "grad_norm": 271.1240234375, - "learning_rate": 1.5106553678835884e-05, - "loss": 19.8992, - "step": 166640 - }, - { - "epoch": 0.6732870873515758, - "grad_norm": 294.1463928222656, - "learning_rate": 1.5103348081860159e-05, - "loss": 50.6929, - "step": 166650 - }, - { - "epoch": 0.6733274886169435, - "grad_norm": 755.0875244140625, - "learning_rate": 1.5100142677824753e-05, - "loss": 44.0782, - "step": 166660 - }, - { - "epoch": 0.6733678898823111, - "grad_norm": 622.8639526367188, - "learning_rate": 1.5096937466792169e-05, - "loss": 54.3434, - "step": 166670 - }, - { - "epoch": 0.6734082911476788, - "grad_norm": 942.9237060546875, - "learning_rate": 1.5093732448824888e-05, - "loss": 58.158, - "step": 166680 - }, - { - "epoch": 0.6734486924130464, - "grad_norm": 509.4794616699219, - "learning_rate": 1.5090527623985379e-05, - "loss": 59.3872, - "step": 166690 - }, - { - "epoch": 0.673489093678414, - "grad_norm": 747.8128662109375, - "learning_rate": 1.5087322992336147e-05, - "loss": 49.9323, - "step": 166700 - }, - { - "epoch": 0.6735294949437817, - "grad_norm": 1549.0496826171875, - "learning_rate": 1.5084118553939659e-05, - "loss": 41.1457, - "step": 166710 - }, - { - "epoch": 0.6735698962091493, - "grad_norm": 930.0858154296875, - "learning_rate": 1.5080914308858374e-05, - "loss": 82.8616, - "step": 166720 - }, - { - "epoch": 0.673610297474517, - "grad_norm": 2211.052734375, - "learning_rate": 1.5077710257154782e-05, - "loss": 81.3171, - "step": 166730 - }, - { - "epoch": 0.6736506987398845, - "grad_norm": 1658.010498046875, - "learning_rate": 1.5074506398891328e-05, - "loss": 54.205, - "step": 166740 - }, - { - "epoch": 0.6736911000052521, - "grad_norm": 371.7207336425781, - "learning_rate": 1.5071302734130489e-05, - "loss": 67.5018, - "step": 166750 - }, - { - "epoch": 0.6737315012706198, - "grad_norm": 1372.85546875, - "learning_rate": 1.506809926293471e-05, - "loss": 54.2526, - "step": 166760 - }, - { - "epoch": 0.6737719025359874, - "grad_norm": 307.7871398925781, - "learning_rate": 1.506489598536645e-05, - "loss": 42.5487, - "step": 166770 - }, - { - "epoch": 0.673812303801355, - "grad_norm": 225.70396423339844, - "learning_rate": 1.5061692901488162e-05, - "loss": 53.6776, - "step": 166780 - }, - { - "epoch": 0.6738527050667227, - "grad_norm": 2758.591064453125, - "learning_rate": 1.5058490011362286e-05, - "loss": 50.1884, - "step": 166790 - }, - { - "epoch": 0.6738931063320903, - "grad_norm": 283.5628967285156, - "learning_rate": 1.505528731505126e-05, - "loss": 72.4942, - "step": 166800 - }, - { - "epoch": 0.673933507597458, - "grad_norm": 885.5732421875, - "learning_rate": 1.5052084812617533e-05, - "loss": 32.8235, - "step": 166810 - }, - { - "epoch": 0.6739739088628256, - "grad_norm": 592.6634521484375, - "learning_rate": 1.5048882504123529e-05, - "loss": 26.64, - "step": 166820 - }, - { - "epoch": 0.6740143101281932, - "grad_norm": 448.51971435546875, - "learning_rate": 1.5045680389631686e-05, - "loss": 59.0059, - "step": 166830 - }, - { - "epoch": 0.6740547113935609, - "grad_norm": 760.6209106445312, - "learning_rate": 1.5042478469204435e-05, - "loss": 45.268, - "step": 166840 - }, - { - "epoch": 0.6740951126589285, - "grad_norm": 541.5360107421875, - "learning_rate": 1.5039276742904185e-05, - "loss": 62.0476, - "step": 166850 - }, - { - "epoch": 0.6741355139242962, - "grad_norm": 1311.8394775390625, - "learning_rate": 1.5036075210793368e-05, - "loss": 86.5055, - "step": 166860 - }, - { - "epoch": 0.6741759151896637, - "grad_norm": 871.006591796875, - "learning_rate": 1.5032873872934394e-05, - "loss": 66.7284, - "step": 166870 - }, - { - "epoch": 0.6742163164550313, - "grad_norm": 750.0230712890625, - "learning_rate": 1.5029672729389669e-05, - "loss": 34.7836, - "step": 166880 - }, - { - "epoch": 0.674256717720399, - "grad_norm": 1440.93994140625, - "learning_rate": 1.5026471780221612e-05, - "loss": 44.9238, - "step": 166890 - }, - { - "epoch": 0.6742971189857666, - "grad_norm": 331.9103698730469, - "learning_rate": 1.5023271025492618e-05, - "loss": 45.4969, - "step": 166900 - }, - { - "epoch": 0.6743375202511342, - "grad_norm": 1073.5389404296875, - "learning_rate": 1.5020070465265098e-05, - "loss": 53.6601, - "step": 166910 - }, - { - "epoch": 0.6743779215165019, - "grad_norm": 854.6211547851562, - "learning_rate": 1.5016870099601444e-05, - "loss": 55.5576, - "step": 166920 - }, - { - "epoch": 0.6744183227818695, - "grad_norm": 1300.90478515625, - "learning_rate": 1.501366992856404e-05, - "loss": 40.2496, - "step": 166930 - }, - { - "epoch": 0.6744587240472372, - "grad_norm": 401.09375, - "learning_rate": 1.501046995221529e-05, - "loss": 31.3694, - "step": 166940 - }, - { - "epoch": 0.6744991253126048, - "grad_norm": 386.5203857421875, - "learning_rate": 1.500727017061756e-05, - "loss": 47.9808, - "step": 166950 - }, - { - "epoch": 0.6745395265779724, - "grad_norm": 535.3987426757812, - "learning_rate": 1.5004070583833251e-05, - "loss": 62.2983, - "step": 166960 - }, - { - "epoch": 0.6745799278433401, - "grad_norm": 1989.7027587890625, - "learning_rate": 1.5000871191924731e-05, - "loss": 70.537, - "step": 166970 - }, - { - "epoch": 0.6746203291087077, - "grad_norm": 396.22125244140625, - "learning_rate": 1.4997671994954371e-05, - "loss": 51.7699, - "step": 166980 - }, - { - "epoch": 0.6746607303740754, - "grad_norm": 453.8977355957031, - "learning_rate": 1.499447299298455e-05, - "loss": 32.1538, - "step": 166990 - }, - { - "epoch": 0.6747011316394429, - "grad_norm": 654.4794311523438, - "learning_rate": 1.4991274186077632e-05, - "loss": 49.3818, - "step": 167000 - }, - { - "epoch": 0.6747415329048105, - "grad_norm": 682.2568359375, - "learning_rate": 1.4988075574295968e-05, - "loss": 68.9815, - "step": 167010 - }, - { - "epoch": 0.6747819341701782, - "grad_norm": 1162.35791015625, - "learning_rate": 1.4984877157701932e-05, - "loss": 72.4098, - "step": 167020 - }, - { - "epoch": 0.6748223354355458, - "grad_norm": 1012.3003540039062, - "learning_rate": 1.4981678936357863e-05, - "loss": 44.8387, - "step": 167030 - }, - { - "epoch": 0.6748627367009135, - "grad_norm": 378.43292236328125, - "learning_rate": 1.4978480910326132e-05, - "loss": 74.7357, - "step": 167040 - }, - { - "epoch": 0.6749031379662811, - "grad_norm": 393.8338317871094, - "learning_rate": 1.4975283079669072e-05, - "loss": 31.4734, - "step": 167050 - }, - { - "epoch": 0.6749435392316487, - "grad_norm": 662.2154541015625, - "learning_rate": 1.4972085444449018e-05, - "loss": 33.0518, - "step": 167060 - }, - { - "epoch": 0.6749839404970164, - "grad_norm": 748.7392578125, - "learning_rate": 1.4968888004728338e-05, - "loss": 44.0402, - "step": 167070 - }, - { - "epoch": 0.675024341762384, - "grad_norm": 1040.579833984375, - "learning_rate": 1.4965690760569346e-05, - "loss": 54.7236, - "step": 167080 - }, - { - "epoch": 0.6750647430277517, - "grad_norm": 2075.803955078125, - "learning_rate": 1.4962493712034373e-05, - "loss": 70.8729, - "step": 167090 - }, - { - "epoch": 0.6751051442931193, - "grad_norm": 787.3817138671875, - "learning_rate": 1.4959296859185754e-05, - "loss": 44.2748, - "step": 167100 - }, - { - "epoch": 0.6751455455584869, - "grad_norm": 816.3740844726562, - "learning_rate": 1.4956100202085809e-05, - "loss": 52.7223, - "step": 167110 - }, - { - "epoch": 0.6751859468238546, - "grad_norm": 268.7018127441406, - "learning_rate": 1.4952903740796873e-05, - "loss": 39.8699, - "step": 167120 - }, - { - "epoch": 0.6752263480892221, - "grad_norm": 341.5850524902344, - "learning_rate": 1.4949707475381247e-05, - "loss": 61.8581, - "step": 167130 - }, - { - "epoch": 0.6752667493545897, - "grad_norm": 373.7789611816406, - "learning_rate": 1.4946511405901236e-05, - "loss": 43.0861, - "step": 167140 - }, - { - "epoch": 0.6753071506199574, - "grad_norm": 1607.9439697265625, - "learning_rate": 1.4943315532419177e-05, - "loss": 41.1233, - "step": 167150 - }, - { - "epoch": 0.675347551885325, - "grad_norm": 417.06597900390625, - "learning_rate": 1.4940119854997354e-05, - "loss": 28.2353, - "step": 167160 - }, - { - "epoch": 0.6753879531506927, - "grad_norm": 493.3361511230469, - "learning_rate": 1.4936924373698066e-05, - "loss": 52.0371, - "step": 167170 - }, - { - "epoch": 0.6754283544160603, - "grad_norm": 429.33819580078125, - "learning_rate": 1.4933729088583626e-05, - "loss": 54.8033, - "step": 167180 - }, - { - "epoch": 0.675468755681428, - "grad_norm": 731.7124633789062, - "learning_rate": 1.4930533999716317e-05, - "loss": 52.8938, - "step": 167190 - }, - { - "epoch": 0.6755091569467956, - "grad_norm": 807.2184448242188, - "learning_rate": 1.4927339107158437e-05, - "loss": 68.7912, - "step": 167200 - }, - { - "epoch": 0.6755495582121632, - "grad_norm": 594.211669921875, - "learning_rate": 1.4924144410972265e-05, - "loss": 51.8672, - "step": 167210 - }, - { - "epoch": 0.6755899594775309, - "grad_norm": 1601.8602294921875, - "learning_rate": 1.4920949911220078e-05, - "loss": 61.9253, - "step": 167220 - }, - { - "epoch": 0.6756303607428985, - "grad_norm": 216.67095947265625, - "learning_rate": 1.4917755607964168e-05, - "loss": 43.7638, - "step": 167230 - }, - { - "epoch": 0.6756707620082661, - "grad_norm": 0.0, - "learning_rate": 1.4914561501266805e-05, - "loss": 41.52, - "step": 167240 - }, - { - "epoch": 0.6757111632736337, - "grad_norm": 675.5355224609375, - "learning_rate": 1.4911367591190248e-05, - "loss": 38.7212, - "step": 167250 - }, - { - "epoch": 0.6757515645390013, - "grad_norm": 790.0083618164062, - "learning_rate": 1.4908173877796783e-05, - "loss": 49.3089, - "step": 167260 - }, - { - "epoch": 0.675791965804369, - "grad_norm": 1060.16796875, - "learning_rate": 1.490498036114866e-05, - "loss": 71.76, - "step": 167270 - }, - { - "epoch": 0.6758323670697366, - "grad_norm": 1394.681640625, - "learning_rate": 1.4901787041308146e-05, - "loss": 43.9718, - "step": 167280 - }, - { - "epoch": 0.6758727683351042, - "grad_norm": 860.03466796875, - "learning_rate": 1.4898593918337494e-05, - "loss": 42.2888, - "step": 167290 - }, - { - "epoch": 0.6759131696004719, - "grad_norm": 1046.5, - "learning_rate": 1.4895400992298942e-05, - "loss": 40.4044, - "step": 167300 - }, - { - "epoch": 0.6759535708658395, - "grad_norm": 387.24810791015625, - "learning_rate": 1.4892208263254762e-05, - "loss": 54.466, - "step": 167310 - }, - { - "epoch": 0.6759939721312072, - "grad_norm": 700.537353515625, - "learning_rate": 1.4889015731267186e-05, - "loss": 39.7297, - "step": 167320 - }, - { - "epoch": 0.6760343733965748, - "grad_norm": 465.908935546875, - "learning_rate": 1.4885823396398441e-05, - "loss": 43.5209, - "step": 167330 - }, - { - "epoch": 0.6760747746619424, - "grad_norm": 840.498291015625, - "learning_rate": 1.4882631258710788e-05, - "loss": 35.8289, - "step": 167340 - }, - { - "epoch": 0.6761151759273101, - "grad_norm": 1104.419189453125, - "learning_rate": 1.4879439318266442e-05, - "loss": 65.706, - "step": 167350 - }, - { - "epoch": 0.6761555771926777, - "grad_norm": 435.9366760253906, - "learning_rate": 1.4876247575127641e-05, - "loss": 63.7672, - "step": 167360 - }, - { - "epoch": 0.6761959784580454, - "grad_norm": 1329.9017333984375, - "learning_rate": 1.4873056029356608e-05, - "loss": 76.5193, - "step": 167370 - }, - { - "epoch": 0.6762363797234129, - "grad_norm": 357.02081298828125, - "learning_rate": 1.486986468101555e-05, - "loss": 55.5462, - "step": 167380 - }, - { - "epoch": 0.6762767809887805, - "grad_norm": 468.8736267089844, - "learning_rate": 1.4866673530166703e-05, - "loss": 48.5541, - "step": 167390 - }, - { - "epoch": 0.6763171822541482, - "grad_norm": 670.1433715820312, - "learning_rate": 1.4863482576872275e-05, - "loss": 55.1696, - "step": 167400 - }, - { - "epoch": 0.6763575835195158, - "grad_norm": 289.0069580078125, - "learning_rate": 1.486029182119446e-05, - "loss": 44.4765, - "step": 167410 - }, - { - "epoch": 0.6763979847848834, - "grad_norm": 803.2169799804688, - "learning_rate": 1.4857101263195491e-05, - "loss": 38.6828, - "step": 167420 - }, - { - "epoch": 0.6764383860502511, - "grad_norm": 852.9284057617188, - "learning_rate": 1.4853910902937548e-05, - "loss": 36.3954, - "step": 167430 - }, - { - "epoch": 0.6764787873156187, - "grad_norm": 875.1856689453125, - "learning_rate": 1.485072074048284e-05, - "loss": 57.8835, - "step": 167440 - }, - { - "epoch": 0.6765191885809864, - "grad_norm": 692.950927734375, - "learning_rate": 1.4847530775893554e-05, - "loss": 102.5225, - "step": 167450 - }, - { - "epoch": 0.676559589846354, - "grad_norm": 897.9561767578125, - "learning_rate": 1.4844341009231876e-05, - "loss": 65.091, - "step": 167460 - }, - { - "epoch": 0.6765999911117216, - "grad_norm": 800.497802734375, - "learning_rate": 1.4841151440560009e-05, - "loss": 29.9811, - "step": 167470 - }, - { - "epoch": 0.6766403923770893, - "grad_norm": 734.5826416015625, - "learning_rate": 1.4837962069940114e-05, - "loss": 57.3711, - "step": 167480 - }, - { - "epoch": 0.6766807936424569, - "grad_norm": 756.9467163085938, - "learning_rate": 1.4834772897434388e-05, - "loss": 48.2266, - "step": 167490 - }, - { - "epoch": 0.6767211949078246, - "grad_norm": 449.9790344238281, - "learning_rate": 1.4831583923104999e-05, - "loss": 62.687, - "step": 167500 - }, - { - "epoch": 0.6767615961731921, - "grad_norm": 1147.2373046875, - "learning_rate": 1.4828395147014106e-05, - "loss": 46.7687, - "step": 167510 - }, - { - "epoch": 0.6768019974385597, - "grad_norm": 546.6746826171875, - "learning_rate": 1.4825206569223899e-05, - "loss": 51.0683, - "step": 167520 - }, - { - "epoch": 0.6768423987039274, - "grad_norm": 871.5731201171875, - "learning_rate": 1.4822018189796525e-05, - "loss": 49.3357, - "step": 167530 - }, - { - "epoch": 0.676882799969295, - "grad_norm": 690.7603759765625, - "learning_rate": 1.4818830008794143e-05, - "loss": 40.3889, - "step": 167540 - }, - { - "epoch": 0.6769232012346627, - "grad_norm": 792.1572265625, - "learning_rate": 1.4815642026278913e-05, - "loss": 46.8522, - "step": 167550 - }, - { - "epoch": 0.6769636025000303, - "grad_norm": 448.63714599609375, - "learning_rate": 1.4812454242312979e-05, - "loss": 35.1668, - "step": 167560 - }, - { - "epoch": 0.6770040037653979, - "grad_norm": 594.5196533203125, - "learning_rate": 1.4809266656958504e-05, - "loss": 57.1279, - "step": 167570 - }, - { - "epoch": 0.6770444050307656, - "grad_norm": 346.8206481933594, - "learning_rate": 1.4806079270277623e-05, - "loss": 41.4994, - "step": 167580 - }, - { - "epoch": 0.6770848062961332, - "grad_norm": 435.75714111328125, - "learning_rate": 1.4802892082332461e-05, - "loss": 37.8179, - "step": 167590 - }, - { - "epoch": 0.6771252075615009, - "grad_norm": 921.9247436523438, - "learning_rate": 1.4799705093185181e-05, - "loss": 74.8439, - "step": 167600 - }, - { - "epoch": 0.6771656088268685, - "grad_norm": 453.77178955078125, - "learning_rate": 1.47965183028979e-05, - "loss": 51.5178, - "step": 167610 - }, - { - "epoch": 0.6772060100922361, - "grad_norm": 590.6558837890625, - "learning_rate": 1.4793331711532744e-05, - "loss": 48.1793, - "step": 167620 - }, - { - "epoch": 0.6772464113576038, - "grad_norm": 971.6336059570312, - "learning_rate": 1.4790145319151846e-05, - "loss": 48.0082, - "step": 167630 - }, - { - "epoch": 0.6772868126229713, - "grad_norm": 933.4710693359375, - "learning_rate": 1.4786959125817312e-05, - "loss": 42.4729, - "step": 167640 - }, - { - "epoch": 0.6773272138883389, - "grad_norm": 1059.8695068359375, - "learning_rate": 1.4783773131591278e-05, - "loss": 49.6168, - "step": 167650 - }, - { - "epoch": 0.6773676151537066, - "grad_norm": 796.0709228515625, - "learning_rate": 1.4780587336535844e-05, - "loss": 46.6943, - "step": 167660 - }, - { - "epoch": 0.6774080164190742, - "grad_norm": 361.644775390625, - "learning_rate": 1.4777401740713112e-05, - "loss": 36.3145, - "step": 167670 - }, - { - "epoch": 0.6774484176844419, - "grad_norm": 745.7164916992188, - "learning_rate": 1.4774216344185205e-05, - "loss": 55.0647, - "step": 167680 - }, - { - "epoch": 0.6774888189498095, - "grad_norm": 143.7425079345703, - "learning_rate": 1.477103114701422e-05, - "loss": 43.7669, - "step": 167690 - }, - { - "epoch": 0.6775292202151771, - "grad_norm": 663.1755981445312, - "learning_rate": 1.4767846149262237e-05, - "loss": 46.7611, - "step": 167700 - }, - { - "epoch": 0.6775696214805448, - "grad_norm": 362.8015441894531, - "learning_rate": 1.476466135099137e-05, - "loss": 39.5467, - "step": 167710 - }, - { - "epoch": 0.6776100227459124, - "grad_norm": 1011.8582153320312, - "learning_rate": 1.476147675226369e-05, - "loss": 52.9852, - "step": 167720 - }, - { - "epoch": 0.6776504240112801, - "grad_norm": 1140.3988037109375, - "learning_rate": 1.47582923531413e-05, - "loss": 68.2397, - "step": 167730 - }, - { - "epoch": 0.6776908252766477, - "grad_norm": 1081.307373046875, - "learning_rate": 1.4755108153686275e-05, - "loss": 67.8517, - "step": 167740 - }, - { - "epoch": 0.6777312265420153, - "grad_norm": 416.0245361328125, - "learning_rate": 1.475192415396068e-05, - "loss": 51.0669, - "step": 167750 - }, - { - "epoch": 0.677771627807383, - "grad_norm": 911.7064819335938, - "learning_rate": 1.474874035402661e-05, - "loss": 58.926, - "step": 167760 - }, - { - "epoch": 0.6778120290727505, - "grad_norm": 510.8608093261719, - "learning_rate": 1.4745556753946125e-05, - "loss": 64.8826, - "step": 167770 - }, - { - "epoch": 0.6778524303381182, - "grad_norm": 1057.5894775390625, - "learning_rate": 1.4742373353781285e-05, - "loss": 57.3264, - "step": 167780 - }, - { - "epoch": 0.6778928316034858, - "grad_norm": 416.422119140625, - "learning_rate": 1.4739190153594157e-05, - "loss": 27.1778, - "step": 167790 - }, - { - "epoch": 0.6779332328688534, - "grad_norm": 879.224365234375, - "learning_rate": 1.4736007153446801e-05, - "loss": 59.9546, - "step": 167800 - }, - { - "epoch": 0.6779736341342211, - "grad_norm": 1042.2764892578125, - "learning_rate": 1.4732824353401273e-05, - "loss": 60.1474, - "step": 167810 - }, - { - "epoch": 0.6780140353995887, - "grad_norm": 612.871826171875, - "learning_rate": 1.4729641753519618e-05, - "loss": 50.6535, - "step": 167820 - }, - { - "epoch": 0.6780544366649564, - "grad_norm": 663.3670654296875, - "learning_rate": 1.472645935386388e-05, - "loss": 46.683, - "step": 167830 - }, - { - "epoch": 0.678094837930324, - "grad_norm": 787.3726196289062, - "learning_rate": 1.4723277154496111e-05, - "loss": 56.9814, - "step": 167840 - }, - { - "epoch": 0.6781352391956916, - "grad_norm": 1040.9354248046875, - "learning_rate": 1.472009515547835e-05, - "loss": 60.3822, - "step": 167850 - }, - { - "epoch": 0.6781756404610593, - "grad_norm": 708.2203979492188, - "learning_rate": 1.4716913356872614e-05, - "loss": 59.3495, - "step": 167860 - }, - { - "epoch": 0.6782160417264269, - "grad_norm": 677.75048828125, - "learning_rate": 1.4713731758740956e-05, - "loss": 34.9022, - "step": 167870 - }, - { - "epoch": 0.6782564429917946, - "grad_norm": 1900.8795166015625, - "learning_rate": 1.4710550361145386e-05, - "loss": 39.3471, - "step": 167880 - }, - { - "epoch": 0.6782968442571621, - "grad_norm": 246.22775268554688, - "learning_rate": 1.470736916414794e-05, - "loss": 30.3667, - "step": 167890 - }, - { - "epoch": 0.6783372455225297, - "grad_norm": 410.8074035644531, - "learning_rate": 1.4704188167810635e-05, - "loss": 43.8699, - "step": 167900 - }, - { - "epoch": 0.6783776467878974, - "grad_norm": 580.0462646484375, - "learning_rate": 1.4701007372195469e-05, - "loss": 61.4522, - "step": 167910 - }, - { - "epoch": 0.678418048053265, - "grad_norm": 951.4273681640625, - "learning_rate": 1.4697826777364477e-05, - "loss": 83.201, - "step": 167920 - }, - { - "epoch": 0.6784584493186326, - "grad_norm": 613.5919189453125, - "learning_rate": 1.4694646383379657e-05, - "loss": 55.9393, - "step": 167930 - }, - { - "epoch": 0.6784988505840003, - "grad_norm": 1002.1031494140625, - "learning_rate": 1.4691466190303e-05, - "loss": 31.6006, - "step": 167940 - }, - { - "epoch": 0.6785392518493679, - "grad_norm": 1075.826904296875, - "learning_rate": 1.4688286198196524e-05, - "loss": 50.4907, - "step": 167950 - }, - { - "epoch": 0.6785796531147356, - "grad_norm": 880.4722290039062, - "learning_rate": 1.4685106407122218e-05, - "loss": 30.88, - "step": 167960 - }, - { - "epoch": 0.6786200543801032, - "grad_norm": 406.95733642578125, - "learning_rate": 1.4681926817142071e-05, - "loss": 49.4461, - "step": 167970 - }, - { - "epoch": 0.6786604556454708, - "grad_norm": 1066.5130615234375, - "learning_rate": 1.4678747428318079e-05, - "loss": 73.1735, - "step": 167980 - }, - { - "epoch": 0.6787008569108385, - "grad_norm": 76.85258483886719, - "learning_rate": 1.4675568240712206e-05, - "loss": 38.3394, - "step": 167990 - }, - { - "epoch": 0.6787412581762061, - "grad_norm": 2073.328125, - "learning_rate": 1.467238925438646e-05, - "loss": 76.5933, - "step": 168000 - }, - { - "epoch": 0.6787816594415738, - "grad_norm": 911.6731567382812, - "learning_rate": 1.4669210469402789e-05, - "loss": 74.6866, - "step": 168010 - }, - { - "epoch": 0.6788220607069413, - "grad_norm": 502.549072265625, - "learning_rate": 1.466603188582319e-05, - "loss": 36.4497, - "step": 168020 - }, - { - "epoch": 0.6788624619723089, - "grad_norm": 222.66180419921875, - "learning_rate": 1.4662853503709617e-05, - "loss": 78.5684, - "step": 168030 - }, - { - "epoch": 0.6789028632376766, - "grad_norm": 624.6636352539062, - "learning_rate": 1.4659675323124036e-05, - "loss": 48.2924, - "step": 168040 - }, - { - "epoch": 0.6789432645030442, - "grad_norm": 322.1091003417969, - "learning_rate": 1.4656497344128412e-05, - "loss": 40.4815, - "step": 168050 - }, - { - "epoch": 0.6789836657684118, - "grad_norm": 1130.6031494140625, - "learning_rate": 1.4653319566784696e-05, - "loss": 69.0325, - "step": 168060 - }, - { - "epoch": 0.6790240670337795, - "grad_norm": 454.5316467285156, - "learning_rate": 1.4650141991154832e-05, - "loss": 52.8761, - "step": 168070 - }, - { - "epoch": 0.6790644682991471, - "grad_norm": 980.3739624023438, - "learning_rate": 1.464696461730079e-05, - "loss": 50.6851, - "step": 168080 - }, - { - "epoch": 0.6791048695645148, - "grad_norm": 1091.614990234375, - "learning_rate": 1.464378744528449e-05, - "loss": 65.8454, - "step": 168090 - }, - { - "epoch": 0.6791452708298824, - "grad_norm": 1084.71337890625, - "learning_rate": 1.4640610475167898e-05, - "loss": 78.1262, - "step": 168100 - }, - { - "epoch": 0.67918567209525, - "grad_norm": 667.9674682617188, - "learning_rate": 1.4637433707012938e-05, - "loss": 45.5906, - "step": 168110 - }, - { - "epoch": 0.6792260733606177, - "grad_norm": 946.9363403320312, - "learning_rate": 1.4634257140881536e-05, - "loss": 54.872, - "step": 168120 - }, - { - "epoch": 0.6792664746259853, - "grad_norm": 640.0570068359375, - "learning_rate": 1.4631080776835629e-05, - "loss": 41.9037, - "step": 168130 - }, - { - "epoch": 0.679306875891353, - "grad_norm": 352.1270446777344, - "learning_rate": 1.4627904614937143e-05, - "loss": 54.49, - "step": 168140 - }, - { - "epoch": 0.6793472771567205, - "grad_norm": 1415.2705078125, - "learning_rate": 1.4624728655247995e-05, - "loss": 43.0648, - "step": 168150 - }, - { - "epoch": 0.6793876784220881, - "grad_norm": 572.370849609375, - "learning_rate": 1.462155289783011e-05, - "loss": 42.223, - "step": 168160 - }, - { - "epoch": 0.6794280796874558, - "grad_norm": 970.8530883789062, - "learning_rate": 1.4618377342745381e-05, - "loss": 57.9264, - "step": 168170 - }, - { - "epoch": 0.6794684809528234, - "grad_norm": 1352.0662841796875, - "learning_rate": 1.461520199005574e-05, - "loss": 48.2247, - "step": 168180 - }, - { - "epoch": 0.6795088822181911, - "grad_norm": 375.2728576660156, - "learning_rate": 1.4612026839823084e-05, - "loss": 43.6525, - "step": 168190 - }, - { - "epoch": 0.6795492834835587, - "grad_norm": 560.6547241210938, - "learning_rate": 1.4608851892109304e-05, - "loss": 39.3269, - "step": 168200 - }, - { - "epoch": 0.6795896847489263, - "grad_norm": 558.0577392578125, - "learning_rate": 1.4605677146976315e-05, - "loss": 51.4779, - "step": 168210 - }, - { - "epoch": 0.679630086014294, - "grad_norm": 431.2486572265625, - "learning_rate": 1.4602502604486001e-05, - "loss": 53.5672, - "step": 168220 - }, - { - "epoch": 0.6796704872796616, - "grad_norm": 616.7715454101562, - "learning_rate": 1.4599328264700247e-05, - "loss": 36.0276, - "step": 168230 - }, - { - "epoch": 0.6797108885450293, - "grad_norm": 1291.5537109375, - "learning_rate": 1.4596154127680947e-05, - "loss": 43.0771, - "step": 168240 - }, - { - "epoch": 0.6797512898103969, - "grad_norm": 1298.318115234375, - "learning_rate": 1.4592980193489975e-05, - "loss": 53.2581, - "step": 168250 - }, - { - "epoch": 0.6797916910757645, - "grad_norm": 845.0426635742188, - "learning_rate": 1.458980646218921e-05, - "loss": 50.5599, - "step": 168260 - }, - { - "epoch": 0.6798320923411322, - "grad_norm": 592.9050903320312, - "learning_rate": 1.458663293384053e-05, - "loss": 60.6831, - "step": 168270 - }, - { - "epoch": 0.6798724936064997, - "grad_norm": 488.923095703125, - "learning_rate": 1.4583459608505801e-05, - "loss": 43.6718, - "step": 168280 - }, - { - "epoch": 0.6799128948718673, - "grad_norm": 390.5216369628906, - "learning_rate": 1.45802864862469e-05, - "loss": 46.2525, - "step": 168290 - }, - { - "epoch": 0.679953296137235, - "grad_norm": 662.7907104492188, - "learning_rate": 1.4577113567125669e-05, - "loss": 53.4482, - "step": 168300 - }, - { - "epoch": 0.6799936974026026, - "grad_norm": 282.6667785644531, - "learning_rate": 1.4573940851203974e-05, - "loss": 38.2593, - "step": 168310 - }, - { - "epoch": 0.6800340986679703, - "grad_norm": 614.1632080078125, - "learning_rate": 1.4570768338543672e-05, - "loss": 43.634, - "step": 168320 - }, - { - "epoch": 0.6800744999333379, - "grad_norm": 550.2304077148438, - "learning_rate": 1.4567596029206607e-05, - "loss": 68.1899, - "step": 168330 - }, - { - "epoch": 0.6801149011987055, - "grad_norm": 692.3876953125, - "learning_rate": 1.456442392325463e-05, - "loss": 66.2251, - "step": 168340 - }, - { - "epoch": 0.6801553024640732, - "grad_norm": 389.5172424316406, - "learning_rate": 1.4561252020749591e-05, - "loss": 56.9403, - "step": 168350 - }, - { - "epoch": 0.6801957037294408, - "grad_norm": 726.8670043945312, - "learning_rate": 1.455808032175331e-05, - "loss": 59.1227, - "step": 168360 - }, - { - "epoch": 0.6802361049948085, - "grad_norm": 1539.648193359375, - "learning_rate": 1.4554908826327625e-05, - "loss": 50.6336, - "step": 168370 - }, - { - "epoch": 0.6802765062601761, - "grad_norm": 463.2121276855469, - "learning_rate": 1.4551737534534383e-05, - "loss": 36.2807, - "step": 168380 - }, - { - "epoch": 0.6803169075255437, - "grad_norm": 535.5940551757812, - "learning_rate": 1.4548566446435378e-05, - "loss": 48.5567, - "step": 168390 - }, - { - "epoch": 0.6803573087909114, - "grad_norm": 514.473388671875, - "learning_rate": 1.4545395562092468e-05, - "loss": 26.0429, - "step": 168400 - }, - { - "epoch": 0.6803977100562789, - "grad_norm": 1582.9013671875, - "learning_rate": 1.4542224881567434e-05, - "loss": 32.9378, - "step": 168410 - }, - { - "epoch": 0.6804381113216466, - "grad_norm": 2122.857177734375, - "learning_rate": 1.453905440492213e-05, - "loss": 49.2891, - "step": 168420 - }, - { - "epoch": 0.6804785125870142, - "grad_norm": 377.3238220214844, - "learning_rate": 1.4535884132218342e-05, - "loss": 43.1437, - "step": 168430 - }, - { - "epoch": 0.6805189138523818, - "grad_norm": 919.3121948242188, - "learning_rate": 1.4532714063517871e-05, - "loss": 78.3128, - "step": 168440 - }, - { - "epoch": 0.6805593151177495, - "grad_norm": 673.9483642578125, - "learning_rate": 1.4529544198882544e-05, - "loss": 75.3447, - "step": 168450 - }, - { - "epoch": 0.6805997163831171, - "grad_norm": 832.7345581054688, - "learning_rate": 1.4526374538374132e-05, - "loss": 59.8041, - "step": 168460 - }, - { - "epoch": 0.6806401176484848, - "grad_norm": 1186.2255859375, - "learning_rate": 1.4523205082054442e-05, - "loss": 52.9427, - "step": 168470 - }, - { - "epoch": 0.6806805189138524, - "grad_norm": 488.5611572265625, - "learning_rate": 1.452003582998526e-05, - "loss": 36.2347, - "step": 168480 - }, - { - "epoch": 0.68072092017922, - "grad_norm": 1167.7965087890625, - "learning_rate": 1.4516866782228378e-05, - "loss": 41.4017, - "step": 168490 - }, - { - "epoch": 0.6807613214445877, - "grad_norm": 2220.746337890625, - "learning_rate": 1.4513697938845572e-05, - "loss": 52.8377, - "step": 168500 - }, - { - "epoch": 0.6808017227099553, - "grad_norm": 1385.3912353515625, - "learning_rate": 1.4510529299898634e-05, - "loss": 49.8378, - "step": 168510 - }, - { - "epoch": 0.680842123975323, - "grad_norm": 735.9927978515625, - "learning_rate": 1.4507360865449319e-05, - "loss": 50.0806, - "step": 168520 - }, - { - "epoch": 0.6808825252406905, - "grad_norm": 548.2672729492188, - "learning_rate": 1.4504192635559406e-05, - "loss": 60.145, - "step": 168530 - }, - { - "epoch": 0.6809229265060581, - "grad_norm": 212.44097900390625, - "learning_rate": 1.4501024610290658e-05, - "loss": 42.0871, - "step": 168540 - }, - { - "epoch": 0.6809633277714258, - "grad_norm": 632.0999755859375, - "learning_rate": 1.4497856789704844e-05, - "loss": 48.5092, - "step": 168550 - }, - { - "epoch": 0.6810037290367934, - "grad_norm": 508.2789611816406, - "learning_rate": 1.4494689173863726e-05, - "loss": 44.463, - "step": 168560 - }, - { - "epoch": 0.681044130302161, - "grad_norm": 684.1690063476562, - "learning_rate": 1.4491521762829034e-05, - "loss": 47.4132, - "step": 168570 - }, - { - "epoch": 0.6810845315675287, - "grad_norm": 519.2853393554688, - "learning_rate": 1.4488354556662554e-05, - "loss": 48.8163, - "step": 168580 - }, - { - "epoch": 0.6811249328328963, - "grad_norm": 0.0, - "learning_rate": 1.4485187555426005e-05, - "loss": 36.66, - "step": 168590 - }, - { - "epoch": 0.681165334098264, - "grad_norm": 1602.7708740234375, - "learning_rate": 1.4482020759181135e-05, - "loss": 54.6894, - "step": 168600 - }, - { - "epoch": 0.6812057353636316, - "grad_norm": 423.5799255371094, - "learning_rate": 1.4478854167989687e-05, - "loss": 50.2314, - "step": 168610 - }, - { - "epoch": 0.6812461366289992, - "grad_norm": 447.10101318359375, - "learning_rate": 1.4475687781913394e-05, - "loss": 47.8248, - "step": 168620 - }, - { - "epoch": 0.6812865378943669, - "grad_norm": 0.0, - "learning_rate": 1.4472521601013995e-05, - "loss": 40.6153, - "step": 168630 - }, - { - "epoch": 0.6813269391597345, - "grad_norm": 577.5380859375, - "learning_rate": 1.4469355625353198e-05, - "loss": 48.3852, - "step": 168640 - }, - { - "epoch": 0.6813673404251022, - "grad_norm": 1142.417724609375, - "learning_rate": 1.4466189854992735e-05, - "loss": 52.0295, - "step": 168650 - }, - { - "epoch": 0.6814077416904697, - "grad_norm": 444.90838623046875, - "learning_rate": 1.4463024289994322e-05, - "loss": 56.2271, - "step": 168660 - }, - { - "epoch": 0.6814481429558373, - "grad_norm": 688.0332641601562, - "learning_rate": 1.4459858930419689e-05, - "loss": 56.1599, - "step": 168670 - }, - { - "epoch": 0.681488544221205, - "grad_norm": 634.889404296875, - "learning_rate": 1.445669377633051e-05, - "loss": 51.8216, - "step": 168680 - }, - { - "epoch": 0.6815289454865726, - "grad_norm": 318.47491455078125, - "learning_rate": 1.4453528827788531e-05, - "loss": 38.8029, - "step": 168690 - }, - { - "epoch": 0.6815693467519403, - "grad_norm": 428.7381286621094, - "learning_rate": 1.4450364084855433e-05, - "loss": 33.6186, - "step": 168700 - }, - { - "epoch": 0.6816097480173079, - "grad_norm": 1699.4671630859375, - "learning_rate": 1.4447199547592916e-05, - "loss": 77.7904, - "step": 168710 - }, - { - "epoch": 0.6816501492826755, - "grad_norm": 631.262451171875, - "learning_rate": 1.4444035216062684e-05, - "loss": 83.7363, - "step": 168720 - }, - { - "epoch": 0.6816905505480432, - "grad_norm": 686.1653442382812, - "learning_rate": 1.4440871090326404e-05, - "loss": 37.8094, - "step": 168730 - }, - { - "epoch": 0.6817309518134108, - "grad_norm": 547.116943359375, - "learning_rate": 1.4437707170445797e-05, - "loss": 51.7289, - "step": 168740 - }, - { - "epoch": 0.6817713530787785, - "grad_norm": 458.6316223144531, - "learning_rate": 1.443454345648252e-05, - "loss": 45.8365, - "step": 168750 - }, - { - "epoch": 0.6818117543441461, - "grad_norm": 436.25347900390625, - "learning_rate": 1.4431379948498253e-05, - "loss": 42.5683, - "step": 168760 - }, - { - "epoch": 0.6818521556095137, - "grad_norm": 293.4028015136719, - "learning_rate": 1.4428216646554676e-05, - "loss": 45.4803, - "step": 168770 - }, - { - "epoch": 0.6818925568748814, - "grad_norm": 0.0, - "learning_rate": 1.4425053550713458e-05, - "loss": 41.6429, - "step": 168780 - }, - { - "epoch": 0.6819329581402489, - "grad_norm": 1097.0491943359375, - "learning_rate": 1.4421890661036275e-05, - "loss": 54.7557, - "step": 168790 - }, - { - "epoch": 0.6819733594056165, - "grad_norm": 2632.930419921875, - "learning_rate": 1.4418727977584774e-05, - "loss": 50.7967, - "step": 168800 - }, - { - "epoch": 0.6820137606709842, - "grad_norm": 806.8290405273438, - "learning_rate": 1.4415565500420613e-05, - "loss": 49.4605, - "step": 168810 - }, - { - "epoch": 0.6820541619363518, - "grad_norm": 1056.539794921875, - "learning_rate": 1.4412403229605454e-05, - "loss": 55.0219, - "step": 168820 - }, - { - "epoch": 0.6820945632017195, - "grad_norm": 464.64453125, - "learning_rate": 1.4409241165200954e-05, - "loss": 62.6499, - "step": 168830 - }, - { - "epoch": 0.6821349644670871, - "grad_norm": 978.5242919921875, - "learning_rate": 1.4406079307268734e-05, - "loss": 45.2906, - "step": 168840 - }, - { - "epoch": 0.6821753657324547, - "grad_norm": 529.989990234375, - "learning_rate": 1.4402917655870466e-05, - "loss": 33.3289, - "step": 168850 - }, - { - "epoch": 0.6822157669978224, - "grad_norm": 556.6727294921875, - "learning_rate": 1.4399756211067766e-05, - "loss": 35.7993, - "step": 168860 - }, - { - "epoch": 0.68225616826319, - "grad_norm": 943.91357421875, - "learning_rate": 1.4396594972922278e-05, - "loss": 42.8627, - "step": 168870 - }, - { - "epoch": 0.6822965695285577, - "grad_norm": 813.277099609375, - "learning_rate": 1.4393433941495637e-05, - "loss": 48.5582, - "step": 168880 - }, - { - "epoch": 0.6823369707939253, - "grad_norm": 795.2565307617188, - "learning_rate": 1.4390273116849445e-05, - "loss": 47.6735, - "step": 168890 - }, - { - "epoch": 0.6823773720592929, - "grad_norm": 1392.483642578125, - "learning_rate": 1.438711249904536e-05, - "loss": 55.0963, - "step": 168900 - }, - { - "epoch": 0.6824177733246606, - "grad_norm": 1057.4271240234375, - "learning_rate": 1.438395208814497e-05, - "loss": 63.7595, - "step": 168910 - }, - { - "epoch": 0.6824581745900281, - "grad_norm": 1027.0804443359375, - "learning_rate": 1.43807918842099e-05, - "loss": 47.1839, - "step": 168920 - }, - { - "epoch": 0.6824985758553958, - "grad_norm": 1541.439453125, - "learning_rate": 1.437763188730176e-05, - "loss": 56.8558, - "step": 168930 - }, - { - "epoch": 0.6825389771207634, - "grad_norm": 744.2750854492188, - "learning_rate": 1.4374472097482155e-05, - "loss": 48.9892, - "step": 168940 - }, - { - "epoch": 0.682579378386131, - "grad_norm": 217.60009765625, - "learning_rate": 1.4371312514812685e-05, - "loss": 49.317, - "step": 168950 - }, - { - "epoch": 0.6826197796514987, - "grad_norm": 938.9303588867188, - "learning_rate": 1.4368153139354962e-05, - "loss": 36.7838, - "step": 168960 - }, - { - "epoch": 0.6826601809168663, - "grad_norm": 616.41748046875, - "learning_rate": 1.4364993971170553e-05, - "loss": 47.57, - "step": 168970 - }, - { - "epoch": 0.682700582182234, - "grad_norm": 393.9440002441406, - "learning_rate": 1.4361835010321067e-05, - "loss": 59.8542, - "step": 168980 - }, - { - "epoch": 0.6827409834476016, - "grad_norm": 1040.7830810546875, - "learning_rate": 1.435867625686808e-05, - "loss": 46.2593, - "step": 168990 - }, - { - "epoch": 0.6827813847129692, - "grad_norm": 1211.5755615234375, - "learning_rate": 1.4355517710873184e-05, - "loss": 63.6152, - "step": 169000 - }, - { - "epoch": 0.6828217859783369, - "grad_norm": 734.5034790039062, - "learning_rate": 1.4352359372397955e-05, - "loss": 48.3726, - "step": 169010 - }, - { - "epoch": 0.6828621872437045, - "grad_norm": 933.177001953125, - "learning_rate": 1.4349201241503943e-05, - "loss": 72.7957, - "step": 169020 - }, - { - "epoch": 0.6829025885090722, - "grad_norm": 532.6458129882812, - "learning_rate": 1.4346043318252756e-05, - "loss": 47.9236, - "step": 169030 - }, - { - "epoch": 0.6829429897744398, - "grad_norm": 795.7175903320312, - "learning_rate": 1.434288560270593e-05, - "loss": 43.2592, - "step": 169040 - }, - { - "epoch": 0.6829833910398073, - "grad_norm": 717.64697265625, - "learning_rate": 1.4339728094925037e-05, - "loss": 62.2802, - "step": 169050 - }, - { - "epoch": 0.683023792305175, - "grad_norm": 1106.865234375, - "learning_rate": 1.4336570794971643e-05, - "loss": 62.9854, - "step": 169060 - }, - { - "epoch": 0.6830641935705426, - "grad_norm": 452.23504638671875, - "learning_rate": 1.4333413702907267e-05, - "loss": 54.1656, - "step": 169070 - }, - { - "epoch": 0.6831045948359102, - "grad_norm": 1227.8212890625, - "learning_rate": 1.4330256818793508e-05, - "loss": 49.5026, - "step": 169080 - }, - { - "epoch": 0.6831449961012779, - "grad_norm": 637.03466796875, - "learning_rate": 1.4327100142691874e-05, - "loss": 41.3022, - "step": 169090 - }, - { - "epoch": 0.6831853973666455, - "grad_norm": 551.9191284179688, - "learning_rate": 1.4323943674663914e-05, - "loss": 50.8832, - "step": 169100 - }, - { - "epoch": 0.6832257986320132, - "grad_norm": 1289.005859375, - "learning_rate": 1.432078741477117e-05, - "loss": 45.6479, - "step": 169110 - }, - { - "epoch": 0.6832661998973808, - "grad_norm": 875.027099609375, - "learning_rate": 1.4317631363075184e-05, - "loss": 51.9001, - "step": 169120 - }, - { - "epoch": 0.6833066011627484, - "grad_norm": 655.9733276367188, - "learning_rate": 1.4314475519637466e-05, - "loss": 65.7755, - "step": 169130 - }, - { - "epoch": 0.6833470024281161, - "grad_norm": 248.5944061279297, - "learning_rate": 1.4311319884519547e-05, - "loss": 59.2009, - "step": 169140 - }, - { - "epoch": 0.6833874036934837, - "grad_norm": 251.61972045898438, - "learning_rate": 1.4308164457782952e-05, - "loss": 36.2274, - "step": 169150 - }, - { - "epoch": 0.6834278049588514, - "grad_norm": 244.47169494628906, - "learning_rate": 1.4305009239489192e-05, - "loss": 40.4156, - "step": 169160 - }, - { - "epoch": 0.6834682062242189, - "grad_norm": 425.4827575683594, - "learning_rate": 1.4301854229699796e-05, - "loss": 34.5485, - "step": 169170 - }, - { - "epoch": 0.6835086074895865, - "grad_norm": 579.6253662109375, - "learning_rate": 1.4298699428476236e-05, - "loss": 35.4702, - "step": 169180 - }, - { - "epoch": 0.6835490087549542, - "grad_norm": 1097.84716796875, - "learning_rate": 1.4295544835880065e-05, - "loss": 56.0326, - "step": 169190 - }, - { - "epoch": 0.6835894100203218, - "grad_norm": 1029.4461669921875, - "learning_rate": 1.4292390451972745e-05, - "loss": 36.3401, - "step": 169200 - }, - { - "epoch": 0.6836298112856894, - "grad_norm": 428.86077880859375, - "learning_rate": 1.4289236276815787e-05, - "loss": 59.5827, - "step": 169210 - }, - { - "epoch": 0.6836702125510571, - "grad_norm": 933.2636108398438, - "learning_rate": 1.4286082310470692e-05, - "loss": 47.7912, - "step": 169220 - }, - { - "epoch": 0.6837106138164247, - "grad_norm": 396.597412109375, - "learning_rate": 1.428292855299892e-05, - "loss": 49.1154, - "step": 169230 - }, - { - "epoch": 0.6837510150817924, - "grad_norm": 856.8510131835938, - "learning_rate": 1.427977500446199e-05, - "loss": 77.9437, - "step": 169240 - }, - { - "epoch": 0.68379141634716, - "grad_norm": 612.7291259765625, - "learning_rate": 1.4276621664921357e-05, - "loss": 50.0896, - "step": 169250 - }, - { - "epoch": 0.6838318176125276, - "grad_norm": 544.8532104492188, - "learning_rate": 1.4273468534438505e-05, - "loss": 55.9285, - "step": 169260 - }, - { - "epoch": 0.6838722188778953, - "grad_norm": 466.5942077636719, - "learning_rate": 1.4270315613074906e-05, - "loss": 46.574, - "step": 169270 - }, - { - "epoch": 0.6839126201432629, - "grad_norm": 1225.802978515625, - "learning_rate": 1.426716290089204e-05, - "loss": 32.1837, - "step": 169280 - }, - { - "epoch": 0.6839530214086306, - "grad_norm": 1749.95556640625, - "learning_rate": 1.4264010397951335e-05, - "loss": 49.6508, - "step": 169290 - }, - { - "epoch": 0.6839934226739981, - "grad_norm": 439.6357727050781, - "learning_rate": 1.4260858104314297e-05, - "loss": 46.1547, - "step": 169300 - }, - { - "epoch": 0.6840338239393657, - "grad_norm": 879.0947875976562, - "learning_rate": 1.425770602004235e-05, - "loss": 46.7996, - "step": 169310 - }, - { - "epoch": 0.6840742252047334, - "grad_norm": 616.0336303710938, - "learning_rate": 1.4254554145196953e-05, - "loss": 51.1238, - "step": 169320 - }, - { - "epoch": 0.684114626470101, - "grad_norm": 2811.0615234375, - "learning_rate": 1.4251402479839564e-05, - "loss": 37.678, - "step": 169330 - }, - { - "epoch": 0.6841550277354687, - "grad_norm": 554.9786987304688, - "learning_rate": 1.42482510240316e-05, - "loss": 49.2597, - "step": 169340 - }, - { - "epoch": 0.6841954290008363, - "grad_norm": 344.1712341308594, - "learning_rate": 1.4245099777834539e-05, - "loss": 71.6305, - "step": 169350 - }, - { - "epoch": 0.6842358302662039, - "grad_norm": 464.71051025390625, - "learning_rate": 1.4241948741309782e-05, - "loss": 54.1099, - "step": 169360 - }, - { - "epoch": 0.6842762315315716, - "grad_norm": 1372.66064453125, - "learning_rate": 1.4238797914518776e-05, - "loss": 56.0686, - "step": 169370 - }, - { - "epoch": 0.6843166327969392, - "grad_norm": 504.1880798339844, - "learning_rate": 1.4235647297522942e-05, - "loss": 44.2797, - "step": 169380 - }, - { - "epoch": 0.6843570340623069, - "grad_norm": 579.6629638671875, - "learning_rate": 1.4232496890383706e-05, - "loss": 97.3262, - "step": 169390 - }, - { - "epoch": 0.6843974353276745, - "grad_norm": 648.5736694335938, - "learning_rate": 1.42293466931625e-05, - "loss": 63.3319, - "step": 169400 - }, - { - "epoch": 0.6844378365930421, - "grad_norm": 450.7187194824219, - "learning_rate": 1.4226196705920708e-05, - "loss": 73.6804, - "step": 169410 - }, - { - "epoch": 0.6844782378584098, - "grad_norm": 456.9446716308594, - "learning_rate": 1.4223046928719763e-05, - "loss": 48.2182, - "step": 169420 - }, - { - "epoch": 0.6845186391237773, - "grad_norm": 623.4013671875, - "learning_rate": 1.4219897361621065e-05, - "loss": 72.1421, - "step": 169430 - }, - { - "epoch": 0.684559040389145, - "grad_norm": 421.9169006347656, - "learning_rate": 1.421674800468603e-05, - "loss": 28.4017, - "step": 169440 - }, - { - "epoch": 0.6845994416545126, - "grad_norm": 623.2809448242188, - "learning_rate": 1.4213598857976024e-05, - "loss": 45.7994, - "step": 169450 - }, - { - "epoch": 0.6846398429198802, - "grad_norm": 431.9721984863281, - "learning_rate": 1.421044992155248e-05, - "loss": 35.0073, - "step": 169460 - }, - { - "epoch": 0.6846802441852479, - "grad_norm": 415.11993408203125, - "learning_rate": 1.4207301195476757e-05, - "loss": 52.3375, - "step": 169470 - }, - { - "epoch": 0.6847206454506155, - "grad_norm": 1272.1280517578125, - "learning_rate": 1.4204152679810258e-05, - "loss": 51.1304, - "step": 169480 - }, - { - "epoch": 0.6847610467159831, - "grad_norm": 571.7431030273438, - "learning_rate": 1.4201004374614369e-05, - "loss": 75.1111, - "step": 169490 - }, - { - "epoch": 0.6848014479813508, - "grad_norm": 519.0687255859375, - "learning_rate": 1.4197856279950438e-05, - "loss": 25.3569, - "step": 169500 - }, - { - "epoch": 0.6848418492467184, - "grad_norm": 544.158203125, - "learning_rate": 1.4194708395879886e-05, - "loss": 63.012, - "step": 169510 - }, - { - "epoch": 0.6848822505120861, - "grad_norm": 387.5862121582031, - "learning_rate": 1.4191560722464032e-05, - "loss": 34.8872, - "step": 169520 - }, - { - "epoch": 0.6849226517774537, - "grad_norm": 666.9616088867188, - "learning_rate": 1.4188413259764285e-05, - "loss": 42.9067, - "step": 169530 - }, - { - "epoch": 0.6849630530428213, - "grad_norm": 0.0, - "learning_rate": 1.418526600784198e-05, - "loss": 37.349, - "step": 169540 - }, - { - "epoch": 0.685003454308189, - "grad_norm": 418.4957580566406, - "learning_rate": 1.4182118966758481e-05, - "loss": 42.9753, - "step": 169550 - }, - { - "epoch": 0.6850438555735565, - "grad_norm": 254.7620849609375, - "learning_rate": 1.4178972136575153e-05, - "loss": 42.3305, - "step": 169560 - }, - { - "epoch": 0.6850842568389242, - "grad_norm": 1166.2672119140625, - "learning_rate": 1.4175825517353325e-05, - "loss": 51.9849, - "step": 169570 - }, - { - "epoch": 0.6851246581042918, - "grad_norm": 608.7311401367188, - "learning_rate": 1.4172679109154349e-05, - "loss": 38.9989, - "step": 169580 - }, - { - "epoch": 0.6851650593696594, - "grad_norm": 354.56768798828125, - "learning_rate": 1.4169532912039568e-05, - "loss": 40.5726, - "step": 169590 - }, - { - "epoch": 0.6852054606350271, - "grad_norm": 324.2098693847656, - "learning_rate": 1.4166386926070322e-05, - "loss": 46.7577, - "step": 169600 - }, - { - "epoch": 0.6852458619003947, - "grad_norm": 484.006591796875, - "learning_rate": 1.4163241151307937e-05, - "loss": 40.9271, - "step": 169610 - }, - { - "epoch": 0.6852862631657624, - "grad_norm": 694.5880737304688, - "learning_rate": 1.4160095587813757e-05, - "loss": 45.7319, - "step": 169620 - }, - { - "epoch": 0.68532666443113, - "grad_norm": 252.2886962890625, - "learning_rate": 1.4156950235649074e-05, - "loss": 63.6754, - "step": 169630 - }, - { - "epoch": 0.6853670656964976, - "grad_norm": 233.86050415039062, - "learning_rate": 1.4153805094875248e-05, - "loss": 56.9261, - "step": 169640 - }, - { - "epoch": 0.6854074669618653, - "grad_norm": 710.7440795898438, - "learning_rate": 1.4150660165553564e-05, - "loss": 52.6019, - "step": 169650 - }, - { - "epoch": 0.6854478682272329, - "grad_norm": 1143.4447021484375, - "learning_rate": 1.4147515447745349e-05, - "loss": 56.2893, - "step": 169660 - }, - { - "epoch": 0.6854882694926006, - "grad_norm": 590.0914306640625, - "learning_rate": 1.4144370941511915e-05, - "loss": 57.5833, - "step": 169670 - }, - { - "epoch": 0.6855286707579682, - "grad_norm": 617.8530883789062, - "learning_rate": 1.4141226646914541e-05, - "loss": 49.1148, - "step": 169680 - }, - { - "epoch": 0.6855690720233357, - "grad_norm": 1288.4437255859375, - "learning_rate": 1.413808256401456e-05, - "loss": 56.3085, - "step": 169690 - }, - { - "epoch": 0.6856094732887034, - "grad_norm": 1510.7669677734375, - "learning_rate": 1.4134938692873245e-05, - "loss": 50.2129, - "step": 169700 - }, - { - "epoch": 0.685649874554071, - "grad_norm": 490.9001159667969, - "learning_rate": 1.4131795033551892e-05, - "loss": 53.7312, - "step": 169710 - }, - { - "epoch": 0.6856902758194386, - "grad_norm": 1059.6722412109375, - "learning_rate": 1.412865158611179e-05, - "loss": 50.0878, - "step": 169720 - }, - { - "epoch": 0.6857306770848063, - "grad_norm": 717.5321655273438, - "learning_rate": 1.4125508350614236e-05, - "loss": 41.077, - "step": 169730 - }, - { - "epoch": 0.6857710783501739, - "grad_norm": 368.7514343261719, - "learning_rate": 1.412236532712048e-05, - "loss": 59.4592, - "step": 169740 - }, - { - "epoch": 0.6858114796155416, - "grad_norm": 1239.5657958984375, - "learning_rate": 1.4119222515691816e-05, - "loss": 45.6636, - "step": 169750 - }, - { - "epoch": 0.6858518808809092, - "grad_norm": 589.4757690429688, - "learning_rate": 1.4116079916389507e-05, - "loss": 54.7634, - "step": 169760 - }, - { - "epoch": 0.6858922821462768, - "grad_norm": 566.4857177734375, - "learning_rate": 1.4112937529274828e-05, - "loss": 40.5211, - "step": 169770 - }, - { - "epoch": 0.6859326834116445, - "grad_norm": 991.4258422851562, - "learning_rate": 1.4109795354409044e-05, - "loss": 36.051, - "step": 169780 - }, - { - "epoch": 0.6859730846770121, - "grad_norm": 0.0, - "learning_rate": 1.4106653391853386e-05, - "loss": 43.4424, - "step": 169790 - }, - { - "epoch": 0.6860134859423798, - "grad_norm": 680.9237670898438, - "learning_rate": 1.4103511641669152e-05, - "loss": 52.9324, - "step": 169800 - }, - { - "epoch": 0.6860538872077473, - "grad_norm": 287.92718505859375, - "learning_rate": 1.4100370103917554e-05, - "loss": 30.8766, - "step": 169810 - }, - { - "epoch": 0.6860942884731149, - "grad_norm": 353.46734619140625, - "learning_rate": 1.4097228778659854e-05, - "loss": 31.8396, - "step": 169820 - }, - { - "epoch": 0.6861346897384826, - "grad_norm": 1042.081298828125, - "learning_rate": 1.4094087665957304e-05, - "loss": 67.0164, - "step": 169830 - }, - { - "epoch": 0.6861750910038502, - "grad_norm": 489.08367919921875, - "learning_rate": 1.4090946765871104e-05, - "loss": 60.9808, - "step": 169840 - }, - { - "epoch": 0.6862154922692179, - "grad_norm": 874.5740356445312, - "learning_rate": 1.408780607846254e-05, - "loss": 50.7961, - "step": 169850 - }, - { - "epoch": 0.6862558935345855, - "grad_norm": 740.3602905273438, - "learning_rate": 1.4084665603792802e-05, - "loss": 42.8049, - "step": 169860 - }, - { - "epoch": 0.6862962947999531, - "grad_norm": 0.0, - "learning_rate": 1.4081525341923127e-05, - "loss": 42.2607, - "step": 169870 - }, - { - "epoch": 0.6863366960653208, - "grad_norm": 0.0, - "learning_rate": 1.4078385292914736e-05, - "loss": 40.3975, - "step": 169880 - }, - { - "epoch": 0.6863770973306884, - "grad_norm": 1021.9735717773438, - "learning_rate": 1.4075245456828857e-05, - "loss": 42.3641, - "step": 169890 - }, - { - "epoch": 0.686417498596056, - "grad_norm": 699.235595703125, - "learning_rate": 1.4072105833726684e-05, - "loss": 50.5739, - "step": 169900 - }, - { - "epoch": 0.6864578998614237, - "grad_norm": 343.7049560546875, - "learning_rate": 1.4068966423669433e-05, - "loss": 37.3967, - "step": 169910 - }, - { - "epoch": 0.6864983011267913, - "grad_norm": 680.474853515625, - "learning_rate": 1.406582722671831e-05, - "loss": 49.679, - "step": 169920 - }, - { - "epoch": 0.686538702392159, - "grad_norm": 1111.1092529296875, - "learning_rate": 1.406268824293451e-05, - "loss": 57.0753, - "step": 169930 - }, - { - "epoch": 0.6865791036575265, - "grad_norm": 523.4867553710938, - "learning_rate": 1.4059549472379247e-05, - "loss": 44.4473, - "step": 169940 - }, - { - "epoch": 0.6866195049228941, - "grad_norm": 776.1953125, - "learning_rate": 1.405641091511368e-05, - "loss": 53.1783, - "step": 169950 - }, - { - "epoch": 0.6866599061882618, - "grad_norm": 712.0708618164062, - "learning_rate": 1.4053272571199036e-05, - "loss": 95.6826, - "step": 169960 - }, - { - "epoch": 0.6867003074536294, - "grad_norm": 309.5819396972656, - "learning_rate": 1.405013444069647e-05, - "loss": 54.1302, - "step": 169970 - }, - { - "epoch": 0.6867407087189971, - "grad_norm": 682.5097045898438, - "learning_rate": 1.4046996523667166e-05, - "loss": 63.7726, - "step": 169980 - }, - { - "epoch": 0.6867811099843647, - "grad_norm": 0.0, - "learning_rate": 1.4043858820172309e-05, - "loss": 42.362, - "step": 169990 - }, - { - "epoch": 0.6868215112497323, - "grad_norm": 384.0200500488281, - "learning_rate": 1.4040721330273062e-05, - "loss": 49.3784, - "step": 170000 - }, - { - "epoch": 0.6868619125151, - "grad_norm": 550.7702026367188, - "learning_rate": 1.4037584054030606e-05, - "loss": 50.9029, - "step": 170010 - }, - { - "epoch": 0.6869023137804676, - "grad_norm": 1652.8741455078125, - "learning_rate": 1.4034446991506083e-05, - "loss": 54.4098, - "step": 170020 - }, - { - "epoch": 0.6869427150458353, - "grad_norm": 1165.6163330078125, - "learning_rate": 1.4031310142760662e-05, - "loss": 43.6158, - "step": 170030 - }, - { - "epoch": 0.6869831163112029, - "grad_norm": 1255.7396240234375, - "learning_rate": 1.4028173507855493e-05, - "loss": 56.6457, - "step": 170040 - }, - { - "epoch": 0.6870235175765705, - "grad_norm": 379.06463623046875, - "learning_rate": 1.4025037086851733e-05, - "loss": 51.2328, - "step": 170050 - }, - { - "epoch": 0.6870639188419382, - "grad_norm": 446.58673095703125, - "learning_rate": 1.4021900879810526e-05, - "loss": 44.2981, - "step": 170060 - }, - { - "epoch": 0.6871043201073057, - "grad_norm": 889.757080078125, - "learning_rate": 1.4018764886793023e-05, - "loss": 51.626, - "step": 170070 - }, - { - "epoch": 0.6871447213726734, - "grad_norm": 544.6858520507812, - "learning_rate": 1.401562910786034e-05, - "loss": 42.926, - "step": 170080 - }, - { - "epoch": 0.687185122638041, - "grad_norm": 575.81884765625, - "learning_rate": 1.4012493543073623e-05, - "loss": 46.7048, - "step": 170090 - }, - { - "epoch": 0.6872255239034086, - "grad_norm": 798.9896850585938, - "learning_rate": 1.4009358192494016e-05, - "loss": 43.1169, - "step": 170100 - }, - { - "epoch": 0.6872659251687763, - "grad_norm": 314.7311096191406, - "learning_rate": 1.4006223056182604e-05, - "loss": 46.7615, - "step": 170110 - }, - { - "epoch": 0.6873063264341439, - "grad_norm": 587.7617797851562, - "learning_rate": 1.4003088134200553e-05, - "loss": 41.9043, - "step": 170120 - }, - { - "epoch": 0.6873467276995116, - "grad_norm": 593.9027099609375, - "learning_rate": 1.3999953426608941e-05, - "loss": 52.3961, - "step": 170130 - }, - { - "epoch": 0.6873871289648792, - "grad_norm": 1898.5985107421875, - "learning_rate": 1.3996818933468925e-05, - "loss": 67.1244, - "step": 170140 - }, - { - "epoch": 0.6874275302302468, - "grad_norm": 993.4326171875, - "learning_rate": 1.3993684654841574e-05, - "loss": 46.3975, - "step": 170150 - }, - { - "epoch": 0.6874679314956145, - "grad_norm": 220.27293395996094, - "learning_rate": 1.399055059078801e-05, - "loss": 40.9544, - "step": 170160 - }, - { - "epoch": 0.6875083327609821, - "grad_norm": 702.5592651367188, - "learning_rate": 1.3987416741369336e-05, - "loss": 38.6786, - "step": 170170 - }, - { - "epoch": 0.6875487340263498, - "grad_norm": 484.0182800292969, - "learning_rate": 1.3984283106646636e-05, - "loss": 79.0217, - "step": 170180 - }, - { - "epoch": 0.6875891352917174, - "grad_norm": 1016.0274658203125, - "learning_rate": 1.3981149686681008e-05, - "loss": 60.7541, - "step": 170190 - }, - { - "epoch": 0.6876295365570849, - "grad_norm": 781.2263793945312, - "learning_rate": 1.397801648153354e-05, - "loss": 40.7296, - "step": 170200 - }, - { - "epoch": 0.6876699378224526, - "grad_norm": 935.5984497070312, - "learning_rate": 1.3974883491265312e-05, - "loss": 60.1782, - "step": 170210 - }, - { - "epoch": 0.6877103390878202, - "grad_norm": 721.454345703125, - "learning_rate": 1.397175071593741e-05, - "loss": 62.3025, - "step": 170220 - }, - { - "epoch": 0.6877507403531878, - "grad_norm": 550.3572998046875, - "learning_rate": 1.3968618155610913e-05, - "loss": 50.8357, - "step": 170230 - }, - { - "epoch": 0.6877911416185555, - "grad_norm": 467.0953674316406, - "learning_rate": 1.3965485810346874e-05, - "loss": 48.71, - "step": 170240 - }, - { - "epoch": 0.6878315428839231, - "grad_norm": 613.0184326171875, - "learning_rate": 1.3962353680206373e-05, - "loss": 58.2839, - "step": 170250 - }, - { - "epoch": 0.6878719441492908, - "grad_norm": 8435.68359375, - "learning_rate": 1.3959221765250469e-05, - "loss": 84.0615, - "step": 170260 - }, - { - "epoch": 0.6879123454146584, - "grad_norm": 1174.869384765625, - "learning_rate": 1.3956090065540217e-05, - "loss": 50.7304, - "step": 170270 - }, - { - "epoch": 0.687952746680026, - "grad_norm": 838.7512817382812, - "learning_rate": 1.3952958581136688e-05, - "loss": 43.5792, - "step": 170280 - }, - { - "epoch": 0.6879931479453937, - "grad_norm": 585.5516357421875, - "learning_rate": 1.39498273121009e-05, - "loss": 46.7663, - "step": 170290 - }, - { - "epoch": 0.6880335492107613, - "grad_norm": 595.8783569335938, - "learning_rate": 1.3946696258493936e-05, - "loss": 29.3378, - "step": 170300 - }, - { - "epoch": 0.688073950476129, - "grad_norm": 642.0225219726562, - "learning_rate": 1.3943565420376808e-05, - "loss": 30.4124, - "step": 170310 - }, - { - "epoch": 0.6881143517414965, - "grad_norm": 515.0293579101562, - "learning_rate": 1.3940434797810568e-05, - "loss": 29.4962, - "step": 170320 - }, - { - "epoch": 0.6881547530068641, - "grad_norm": 521.1478271484375, - "learning_rate": 1.3937304390856242e-05, - "loss": 47.4596, - "step": 170330 - }, - { - "epoch": 0.6881951542722318, - "grad_norm": 691.7684936523438, - "learning_rate": 1.3934174199574872e-05, - "loss": 65.3109, - "step": 170340 - }, - { - "epoch": 0.6882355555375994, - "grad_norm": 650.1378784179688, - "learning_rate": 1.3931044224027468e-05, - "loss": 50.7096, - "step": 170350 - }, - { - "epoch": 0.688275956802967, - "grad_norm": 544.4131469726562, - "learning_rate": 1.392791446427505e-05, - "loss": 40.2387, - "step": 170360 - }, - { - "epoch": 0.6883163580683347, - "grad_norm": 1095.565185546875, - "learning_rate": 1.3924784920378642e-05, - "loss": 86.7689, - "step": 170370 - }, - { - "epoch": 0.6883567593337023, - "grad_norm": 509.0849304199219, - "learning_rate": 1.3921655592399254e-05, - "loss": 52.0616, - "step": 170380 - }, - { - "epoch": 0.68839716059907, - "grad_norm": 1857.7972412109375, - "learning_rate": 1.3918526480397902e-05, - "loss": 45.2592, - "step": 170390 - }, - { - "epoch": 0.6884375618644376, - "grad_norm": 533.4299926757812, - "learning_rate": 1.3915397584435563e-05, - "loss": 49.5395, - "step": 170400 - }, - { - "epoch": 0.6884779631298052, - "grad_norm": 1264.0267333984375, - "learning_rate": 1.3912268904573277e-05, - "loss": 48.0885, - "step": 170410 - }, - { - "epoch": 0.6885183643951729, - "grad_norm": 870.8333129882812, - "learning_rate": 1.3909140440872004e-05, - "loss": 48.8337, - "step": 170420 - }, - { - "epoch": 0.6885587656605405, - "grad_norm": 267.7932434082031, - "learning_rate": 1.3906012193392748e-05, - "loss": 57.9533, - "step": 170430 - }, - { - "epoch": 0.6885991669259082, - "grad_norm": 2385.821533203125, - "learning_rate": 1.3902884162196508e-05, - "loss": 50.6058, - "step": 170440 - }, - { - "epoch": 0.6886395681912757, - "grad_norm": 1392.8212890625, - "learning_rate": 1.3899756347344234e-05, - "loss": 33.6519, - "step": 170450 - }, - { - "epoch": 0.6886799694566433, - "grad_norm": 1187.458251953125, - "learning_rate": 1.3896628748896945e-05, - "loss": 34.4566, - "step": 170460 - }, - { - "epoch": 0.688720370722011, - "grad_norm": 783.3659057617188, - "learning_rate": 1.3893501366915582e-05, - "loss": 47.9276, - "step": 170470 - }, - { - "epoch": 0.6887607719873786, - "grad_norm": 535.8975830078125, - "learning_rate": 1.3890374201461132e-05, - "loss": 36.1128, - "step": 170480 - }, - { - "epoch": 0.6888011732527463, - "grad_norm": 462.8658752441406, - "learning_rate": 1.3887247252594554e-05, - "loss": 70.0482, - "step": 170490 - }, - { - "epoch": 0.6888415745181139, - "grad_norm": 1207.6224365234375, - "learning_rate": 1.388412052037682e-05, - "loss": 50.3268, - "step": 170500 - }, - { - "epoch": 0.6888819757834815, - "grad_norm": 885.7490234375, - "learning_rate": 1.3880994004868872e-05, - "loss": 36.7898, - "step": 170510 - }, - { - "epoch": 0.6889223770488492, - "grad_norm": 971.1849975585938, - "learning_rate": 1.387786770613167e-05, - "loss": 30.5436, - "step": 170520 - }, - { - "epoch": 0.6889627783142168, - "grad_norm": 850.4481811523438, - "learning_rate": 1.3874741624226162e-05, - "loss": 40.6387, - "step": 170530 - }, - { - "epoch": 0.6890031795795845, - "grad_norm": 1244.720703125, - "learning_rate": 1.3871615759213297e-05, - "loss": 50.7725, - "step": 170540 - }, - { - "epoch": 0.6890435808449521, - "grad_norm": 444.43341064453125, - "learning_rate": 1.386849011115402e-05, - "loss": 42.5565, - "step": 170550 - }, - { - "epoch": 0.6890839821103197, - "grad_norm": 666.0784301757812, - "learning_rate": 1.386536468010924e-05, - "loss": 79.6655, - "step": 170560 - }, - { - "epoch": 0.6891243833756874, - "grad_norm": 687.4263305664062, - "learning_rate": 1.3862239466139926e-05, - "loss": 50.3608, - "step": 170570 - }, - { - "epoch": 0.6891647846410549, - "grad_norm": 901.666015625, - "learning_rate": 1.3859114469306977e-05, - "loss": 38.5541, - "step": 170580 - }, - { - "epoch": 0.6892051859064225, - "grad_norm": 721.3048095703125, - "learning_rate": 1.3855989689671328e-05, - "loss": 67.1086, - "step": 170590 - }, - { - "epoch": 0.6892455871717902, - "grad_norm": 1656.072998046875, - "learning_rate": 1.3852865127293902e-05, - "loss": 90.0558, - "step": 170600 - }, - { - "epoch": 0.6892859884371578, - "grad_norm": 631.8057861328125, - "learning_rate": 1.3849740782235604e-05, - "loss": 58.9932, - "step": 170610 - }, - { - "epoch": 0.6893263897025255, - "grad_norm": 841.3690185546875, - "learning_rate": 1.3846616654557359e-05, - "loss": 51.6805, - "step": 170620 - }, - { - "epoch": 0.6893667909678931, - "grad_norm": 746.2327270507812, - "learning_rate": 1.3843492744320059e-05, - "loss": 35.7624, - "step": 170630 - }, - { - "epoch": 0.6894071922332607, - "grad_norm": 583.4542236328125, - "learning_rate": 1.3840369051584612e-05, - "loss": 66.87, - "step": 170640 - }, - { - "epoch": 0.6894475934986284, - "grad_norm": 435.5360412597656, - "learning_rate": 1.3837245576411911e-05, - "loss": 47.8244, - "step": 170650 - }, - { - "epoch": 0.689487994763996, - "grad_norm": 462.8669738769531, - "learning_rate": 1.3834122318862858e-05, - "loss": 81.1973, - "step": 170660 - }, - { - "epoch": 0.6895283960293637, - "grad_norm": 496.9759826660156, - "learning_rate": 1.3830999278998335e-05, - "loss": 37.6189, - "step": 170670 - }, - { - "epoch": 0.6895687972947313, - "grad_norm": 1232.743408203125, - "learning_rate": 1.3827876456879246e-05, - "loss": 61.267, - "step": 170680 - }, - { - "epoch": 0.689609198560099, - "grad_norm": 288.8763732910156, - "learning_rate": 1.3824753852566447e-05, - "loss": 45.0395, - "step": 170690 - }, - { - "epoch": 0.6896495998254666, - "grad_norm": 925.8347778320312, - "learning_rate": 1.3821631466120821e-05, - "loss": 43.501, - "step": 170700 - }, - { - "epoch": 0.6896900010908341, - "grad_norm": 400.8254699707031, - "learning_rate": 1.381850929760326e-05, - "loss": 55.7822, - "step": 170710 - }, - { - "epoch": 0.6897304023562018, - "grad_norm": 523.3338623046875, - "learning_rate": 1.3815387347074594e-05, - "loss": 34.1284, - "step": 170720 - }, - { - "epoch": 0.6897708036215694, - "grad_norm": 1366.3890380859375, - "learning_rate": 1.3812265614595727e-05, - "loss": 48.7812, - "step": 170730 - }, - { - "epoch": 0.689811204886937, - "grad_norm": 1213.1644287109375, - "learning_rate": 1.3809144100227484e-05, - "loss": 46.902, - "step": 170740 - }, - { - "epoch": 0.6898516061523047, - "grad_norm": 996.2909545898438, - "learning_rate": 1.380602280403076e-05, - "loss": 89.2399, - "step": 170750 - }, - { - "epoch": 0.6898920074176723, - "grad_norm": 497.2900695800781, - "learning_rate": 1.3802901726066375e-05, - "loss": 64.0509, - "step": 170760 - }, - { - "epoch": 0.68993240868304, - "grad_norm": 583.2584838867188, - "learning_rate": 1.3799780866395182e-05, - "loss": 36.3958, - "step": 170770 - }, - { - "epoch": 0.6899728099484076, - "grad_norm": 0.0, - "learning_rate": 1.379666022507804e-05, - "loss": 44.8411, - "step": 170780 - }, - { - "epoch": 0.6900132112137752, - "grad_norm": 1305.6693115234375, - "learning_rate": 1.3793539802175764e-05, - "loss": 56.8153, - "step": 170790 - }, - { - "epoch": 0.6900536124791429, - "grad_norm": 906.8377685546875, - "learning_rate": 1.3790419597749199e-05, - "loss": 55.3632, - "step": 170800 - }, - { - "epoch": 0.6900940137445105, - "grad_norm": 580.7864990234375, - "learning_rate": 1.3787299611859172e-05, - "loss": 53.7129, - "step": 170810 - }, - { - "epoch": 0.6901344150098782, - "grad_norm": 467.542236328125, - "learning_rate": 1.3784179844566515e-05, - "loss": 55.4573, - "step": 170820 - }, - { - "epoch": 0.6901748162752458, - "grad_norm": 1431.475830078125, - "learning_rate": 1.3781060295932046e-05, - "loss": 44.2515, - "step": 170830 - }, - { - "epoch": 0.6902152175406133, - "grad_norm": 1465.8927001953125, - "learning_rate": 1.3777940966016592e-05, - "loss": 45.1074, - "step": 170840 - }, - { - "epoch": 0.690255618805981, - "grad_norm": 598.7523803710938, - "learning_rate": 1.3774821854880948e-05, - "loss": 34.8125, - "step": 170850 - }, - { - "epoch": 0.6902960200713486, - "grad_norm": 1547.8724365234375, - "learning_rate": 1.3771702962585928e-05, - "loss": 60.3331, - "step": 170860 - }, - { - "epoch": 0.6903364213367162, - "grad_norm": 894.2749633789062, - "learning_rate": 1.3768584289192351e-05, - "loss": 57.0751, - "step": 170870 - }, - { - "epoch": 0.6903768226020839, - "grad_norm": 703.2276611328125, - "learning_rate": 1.3765465834760988e-05, - "loss": 69.8606, - "step": 170880 - }, - { - "epoch": 0.6904172238674515, - "grad_norm": 684.1668701171875, - "learning_rate": 1.3762347599352667e-05, - "loss": 52.2663, - "step": 170890 - }, - { - "epoch": 0.6904576251328192, - "grad_norm": 286.8528747558594, - "learning_rate": 1.375922958302815e-05, - "loss": 33.0038, - "step": 170900 - }, - { - "epoch": 0.6904980263981868, - "grad_norm": 626.8790893554688, - "learning_rate": 1.3756111785848258e-05, - "loss": 52.7146, - "step": 170910 - }, - { - "epoch": 0.6905384276635544, - "grad_norm": 390.28375244140625, - "learning_rate": 1.3752994207873743e-05, - "loss": 44.1779, - "step": 170920 - }, - { - "epoch": 0.6905788289289221, - "grad_norm": 2262.665771484375, - "learning_rate": 1.37498768491654e-05, - "loss": 85.5712, - "step": 170930 - }, - { - "epoch": 0.6906192301942897, - "grad_norm": 681.7750854492188, - "learning_rate": 1.3746759709783996e-05, - "loss": 43.9216, - "step": 170940 - }, - { - "epoch": 0.6906596314596574, - "grad_norm": 703.8005981445312, - "learning_rate": 1.3743642789790318e-05, - "loss": 71.855, - "step": 170950 - }, - { - "epoch": 0.6907000327250249, - "grad_norm": 788.2094116210938, - "learning_rate": 1.3740526089245109e-05, - "loss": 61.7674, - "step": 170960 - }, - { - "epoch": 0.6907404339903925, - "grad_norm": 383.26641845703125, - "learning_rate": 1.373740960820914e-05, - "loss": 35.7818, - "step": 170970 - }, - { - "epoch": 0.6907808352557602, - "grad_norm": 1133.83837890625, - "learning_rate": 1.3734293346743168e-05, - "loss": 36.8474, - "step": 170980 - }, - { - "epoch": 0.6908212365211278, - "grad_norm": 877.5737915039062, - "learning_rate": 1.373117730490795e-05, - "loss": 42.4732, - "step": 170990 - }, - { - "epoch": 0.6908616377864955, - "grad_norm": 639.6044921875, - "learning_rate": 1.3728061482764238e-05, - "loss": 48.5779, - "step": 171000 - }, - { - "epoch": 0.6909020390518631, - "grad_norm": 1117.853759765625, - "learning_rate": 1.3724945880372753e-05, - "loss": 53.8607, - "step": 171010 - }, - { - "epoch": 0.6909424403172307, - "grad_norm": 613.6959228515625, - "learning_rate": 1.3721830497794275e-05, - "loss": 44.6761, - "step": 171020 - }, - { - "epoch": 0.6909828415825984, - "grad_norm": 2479.233642578125, - "learning_rate": 1.3718715335089504e-05, - "loss": 81.8502, - "step": 171030 - }, - { - "epoch": 0.691023242847966, - "grad_norm": 857.9766845703125, - "learning_rate": 1.3715600392319186e-05, - "loss": 41.1113, - "step": 171040 - }, - { - "epoch": 0.6910636441133337, - "grad_norm": 8812.421875, - "learning_rate": 1.371248566954406e-05, - "loss": 114.4573, - "step": 171050 - }, - { - "epoch": 0.6911040453787013, - "grad_norm": 1065.7593994140625, - "learning_rate": 1.3709371166824813e-05, - "loss": 36.8817, - "step": 171060 - }, - { - "epoch": 0.6911444466440689, - "grad_norm": 574.299072265625, - "learning_rate": 1.3706256884222213e-05, - "loss": 47.0386, - "step": 171070 - }, - { - "epoch": 0.6911848479094366, - "grad_norm": 650.17724609375, - "learning_rate": 1.3703142821796938e-05, - "loss": 47.3403, - "step": 171080 - }, - { - "epoch": 0.6912252491748041, - "grad_norm": 359.5917053222656, - "learning_rate": 1.3700028979609708e-05, - "loss": 34.8721, - "step": 171090 - }, - { - "epoch": 0.6912656504401717, - "grad_norm": 823.551025390625, - "learning_rate": 1.369691535772123e-05, - "loss": 39.6124, - "step": 171100 - }, - { - "epoch": 0.6913060517055394, - "grad_norm": 408.6918640136719, - "learning_rate": 1.369380195619221e-05, - "loss": 70.1454, - "step": 171110 - }, - { - "epoch": 0.691346452970907, - "grad_norm": 309.99774169921875, - "learning_rate": 1.369068877508335e-05, - "loss": 40.091, - "step": 171120 - }, - { - "epoch": 0.6913868542362747, - "grad_norm": 988.518798828125, - "learning_rate": 1.3687575814455322e-05, - "loss": 55.3994, - "step": 171130 - }, - { - "epoch": 0.6914272555016423, - "grad_norm": 1013.3809814453125, - "learning_rate": 1.368446307436883e-05, - "loss": 42.0952, - "step": 171140 - }, - { - "epoch": 0.6914676567670099, - "grad_norm": 844.95361328125, - "learning_rate": 1.3681350554884559e-05, - "loss": 49.0532, - "step": 171150 - }, - { - "epoch": 0.6915080580323776, - "grad_norm": 673.6221313476562, - "learning_rate": 1.367823825606319e-05, - "loss": 56.2636, - "step": 171160 - }, - { - "epoch": 0.6915484592977452, - "grad_norm": 1622.76806640625, - "learning_rate": 1.3675126177965381e-05, - "loss": 50.5701, - "step": 171170 - }, - { - "epoch": 0.6915888605631129, - "grad_norm": 507.78253173828125, - "learning_rate": 1.3672014320651832e-05, - "loss": 52.1529, - "step": 171180 - }, - { - "epoch": 0.6916292618284805, - "grad_norm": 2060.35693359375, - "learning_rate": 1.3668902684183188e-05, - "loss": 86.6979, - "step": 171190 - }, - { - "epoch": 0.6916696630938481, - "grad_norm": 681.6329956054688, - "learning_rate": 1.366579126862012e-05, - "loss": 47.6263, - "step": 171200 - }, - { - "epoch": 0.6917100643592158, - "grad_norm": 1296.0411376953125, - "learning_rate": 1.36626800740233e-05, - "loss": 65.7112, - "step": 171210 - }, - { - "epoch": 0.6917504656245833, - "grad_norm": 1287.6270751953125, - "learning_rate": 1.3659569100453346e-05, - "loss": 47.9453, - "step": 171220 - }, - { - "epoch": 0.691790866889951, - "grad_norm": 942.5064086914062, - "learning_rate": 1.3656458347970949e-05, - "loss": 53.8814, - "step": 171230 - }, - { - "epoch": 0.6918312681553186, - "grad_norm": 847.1381225585938, - "learning_rate": 1.3653347816636727e-05, - "loss": 34.2508, - "step": 171240 - }, - { - "epoch": 0.6918716694206862, - "grad_norm": 662.107666015625, - "learning_rate": 1.3650237506511331e-05, - "loss": 69.0237, - "step": 171250 - }, - { - "epoch": 0.6919120706860539, - "grad_norm": 1095.3472900390625, - "learning_rate": 1.3647127417655401e-05, - "loss": 45.7568, - "step": 171260 - }, - { - "epoch": 0.6919524719514215, - "grad_norm": 483.9328308105469, - "learning_rate": 1.3644017550129564e-05, - "loss": 40.4629, - "step": 171270 - }, - { - "epoch": 0.6919928732167892, - "grad_norm": 740.6072387695312, - "learning_rate": 1.3640907903994454e-05, - "loss": 36.811, - "step": 171280 - }, - { - "epoch": 0.6920332744821568, - "grad_norm": 743.8606567382812, - "learning_rate": 1.3637798479310698e-05, - "loss": 54.7159, - "step": 171290 - }, - { - "epoch": 0.6920736757475244, - "grad_norm": 2306.760986328125, - "learning_rate": 1.3634689276138904e-05, - "loss": 64.2762, - "step": 171300 - }, - { - "epoch": 0.6921140770128921, - "grad_norm": 525.9327392578125, - "learning_rate": 1.3631580294539692e-05, - "loss": 58.8963, - "step": 171310 - }, - { - "epoch": 0.6921544782782597, - "grad_norm": 743.9605712890625, - "learning_rate": 1.3628471534573686e-05, - "loss": 30.9349, - "step": 171320 - }, - { - "epoch": 0.6921948795436274, - "grad_norm": 478.309326171875, - "learning_rate": 1.3625362996301463e-05, - "loss": 75.0899, - "step": 171330 - }, - { - "epoch": 0.692235280808995, - "grad_norm": 845.4862060546875, - "learning_rate": 1.3622254679783663e-05, - "loss": 38.9251, - "step": 171340 - }, - { - "epoch": 0.6922756820743625, - "grad_norm": 520.022705078125, - "learning_rate": 1.3619146585080845e-05, - "loss": 43.2606, - "step": 171350 - }, - { - "epoch": 0.6923160833397302, - "grad_norm": 795.574462890625, - "learning_rate": 1.3616038712253646e-05, - "loss": 50.8741, - "step": 171360 - }, - { - "epoch": 0.6923564846050978, - "grad_norm": 481.3109130859375, - "learning_rate": 1.3612931061362622e-05, - "loss": 63.0619, - "step": 171370 - }, - { - "epoch": 0.6923968858704654, - "grad_norm": 702.5056762695312, - "learning_rate": 1.3609823632468366e-05, - "loss": 57.0607, - "step": 171380 - }, - { - "epoch": 0.6924372871358331, - "grad_norm": 493.5089111328125, - "learning_rate": 1.3606716425631478e-05, - "loss": 42.103, - "step": 171390 - }, - { - "epoch": 0.6924776884012007, - "grad_norm": 725.418701171875, - "learning_rate": 1.3603609440912507e-05, - "loss": 71.7141, - "step": 171400 - }, - { - "epoch": 0.6925180896665684, - "grad_norm": 417.903076171875, - "learning_rate": 1.3600502678372035e-05, - "loss": 21.3099, - "step": 171410 - }, - { - "epoch": 0.692558490931936, - "grad_norm": 189.60548400878906, - "learning_rate": 1.3597396138070633e-05, - "loss": 47.8742, - "step": 171420 - }, - { - "epoch": 0.6925988921973036, - "grad_norm": 627.2072143554688, - "learning_rate": 1.3594289820068864e-05, - "loss": 53.868, - "step": 171430 - }, - { - "epoch": 0.6926392934626713, - "grad_norm": 681.5715942382812, - "learning_rate": 1.3591183724427286e-05, - "loss": 74.8162, - "step": 171440 - }, - { - "epoch": 0.6926796947280389, - "grad_norm": 150.87960815429688, - "learning_rate": 1.3588077851206468e-05, - "loss": 32.7783, - "step": 171450 - }, - { - "epoch": 0.6927200959934066, - "grad_norm": 918.0814208984375, - "learning_rate": 1.3584972200466937e-05, - "loss": 32.5443, - "step": 171460 - }, - { - "epoch": 0.6927604972587742, - "grad_norm": 623.6495361328125, - "learning_rate": 1.3581866772269247e-05, - "loss": 54.2402, - "step": 171470 - }, - { - "epoch": 0.6928008985241417, - "grad_norm": 858.6541137695312, - "learning_rate": 1.3578761566673954e-05, - "loss": 39.7497, - "step": 171480 - }, - { - "epoch": 0.6928412997895094, - "grad_norm": 875.178466796875, - "learning_rate": 1.3575656583741564e-05, - "loss": 37.1591, - "step": 171490 - }, - { - "epoch": 0.692881701054877, - "grad_norm": 1105.78466796875, - "learning_rate": 1.3572551823532654e-05, - "loss": 47.9929, - "step": 171500 - }, - { - "epoch": 0.6929221023202446, - "grad_norm": 927.5897827148438, - "learning_rate": 1.3569447286107705e-05, - "loss": 58.4196, - "step": 171510 - }, - { - "epoch": 0.6929625035856123, - "grad_norm": 428.81097412109375, - "learning_rate": 1.3566342971527291e-05, - "loss": 61.7513, - "step": 171520 - }, - { - "epoch": 0.6930029048509799, - "grad_norm": 565.3176879882812, - "learning_rate": 1.3563238879851892e-05, - "loss": 44.2183, - "step": 171530 - }, - { - "epoch": 0.6930433061163476, - "grad_norm": 1194.6748046875, - "learning_rate": 1.3560135011142039e-05, - "loss": 59.5806, - "step": 171540 - }, - { - "epoch": 0.6930837073817152, - "grad_norm": 592.6781005859375, - "learning_rate": 1.3557031365458256e-05, - "loss": 33.0638, - "step": 171550 - }, - { - "epoch": 0.6931241086470828, - "grad_norm": 1740.63720703125, - "learning_rate": 1.3553927942861016e-05, - "loss": 57.521, - "step": 171560 - }, - { - "epoch": 0.6931645099124505, - "grad_norm": 603.8390502929688, - "learning_rate": 1.3550824743410866e-05, - "loss": 45.9965, - "step": 171570 - }, - { - "epoch": 0.6932049111778181, - "grad_norm": 387.3785400390625, - "learning_rate": 1.3547721767168272e-05, - "loss": 33.6153, - "step": 171580 - }, - { - "epoch": 0.6932453124431858, - "grad_norm": 1378.163330078125, - "learning_rate": 1.3544619014193737e-05, - "loss": 56.0015, - "step": 171590 - }, - { - "epoch": 0.6932857137085533, - "grad_norm": 0.0, - "learning_rate": 1.3541516484547753e-05, - "loss": 42.2162, - "step": 171600 - }, - { - "epoch": 0.6933261149739209, - "grad_norm": 476.3367919921875, - "learning_rate": 1.3538414178290815e-05, - "loss": 47.5764, - "step": 171610 - }, - { - "epoch": 0.6933665162392886, - "grad_norm": 599.8919677734375, - "learning_rate": 1.3535312095483371e-05, - "loss": 46.6292, - "step": 171620 - }, - { - "epoch": 0.6934069175046562, - "grad_norm": 845.22802734375, - "learning_rate": 1.3532210236185938e-05, - "loss": 32.0494, - "step": 171630 - }, - { - "epoch": 0.6934473187700239, - "grad_norm": 485.71490478515625, - "learning_rate": 1.3529108600458967e-05, - "loss": 37.3266, - "step": 171640 - }, - { - "epoch": 0.6934877200353915, - "grad_norm": 1181.7938232421875, - "learning_rate": 1.3526007188362924e-05, - "loss": 43.7579, - "step": 171650 - }, - { - "epoch": 0.6935281213007591, - "grad_norm": 727.456787109375, - "learning_rate": 1.352290599995829e-05, - "loss": 65.2715, - "step": 171660 - }, - { - "epoch": 0.6935685225661268, - "grad_norm": 999.767578125, - "learning_rate": 1.3519805035305495e-05, - "loss": 33.2939, - "step": 171670 - }, - { - "epoch": 0.6936089238314944, - "grad_norm": 398.591552734375, - "learning_rate": 1.3516704294465027e-05, - "loss": 41.9499, - "step": 171680 - }, - { - "epoch": 0.6936493250968621, - "grad_norm": 462.4875793457031, - "learning_rate": 1.3513603777497313e-05, - "loss": 70.5573, - "step": 171690 - }, - { - "epoch": 0.6936897263622297, - "grad_norm": 1010.8731079101562, - "learning_rate": 1.3510503484462805e-05, - "loss": 88.3927, - "step": 171700 - }, - { - "epoch": 0.6937301276275973, - "grad_norm": 438.3502197265625, - "learning_rate": 1.350740341542195e-05, - "loss": 38.3786, - "step": 171710 - }, - { - "epoch": 0.693770528892965, - "grad_norm": 757.7011108398438, - "learning_rate": 1.3504303570435179e-05, - "loss": 40.7736, - "step": 171720 - }, - { - "epoch": 0.6938109301583325, - "grad_norm": 503.13299560546875, - "learning_rate": 1.350120394956294e-05, - "loss": 44.1673, - "step": 171730 - }, - { - "epoch": 0.6938513314237001, - "grad_norm": 375.2835388183594, - "learning_rate": 1.3498104552865636e-05, - "loss": 61.8349, - "step": 171740 - }, - { - "epoch": 0.6938917326890678, - "grad_norm": 724.8334350585938, - "learning_rate": 1.349500538040371e-05, - "loss": 42.6491, - "step": 171750 - }, - { - "epoch": 0.6939321339544354, - "grad_norm": 1060.08837890625, - "learning_rate": 1.3491906432237577e-05, - "loss": 72.335, - "step": 171760 - }, - { - "epoch": 0.6939725352198031, - "grad_norm": 1090.9224853515625, - "learning_rate": 1.3488807708427662e-05, - "loss": 53.5348, - "step": 171770 - }, - { - "epoch": 0.6940129364851707, - "grad_norm": 1613.0850830078125, - "learning_rate": 1.3485709209034347e-05, - "loss": 56.2584, - "step": 171780 - }, - { - "epoch": 0.6940533377505383, - "grad_norm": 1278.3367919921875, - "learning_rate": 1.3482610934118078e-05, - "loss": 70.852, - "step": 171790 - }, - { - "epoch": 0.694093739015906, - "grad_norm": 647.9417114257812, - "learning_rate": 1.3479512883739232e-05, - "loss": 50.4589, - "step": 171800 - }, - { - "epoch": 0.6941341402812736, - "grad_norm": 942.9307861328125, - "learning_rate": 1.3476415057958209e-05, - "loss": 37.4094, - "step": 171810 - }, - { - "epoch": 0.6941745415466413, - "grad_norm": 480.3348388671875, - "learning_rate": 1.3473317456835421e-05, - "loss": 47.3434, - "step": 171820 - }, - { - "epoch": 0.6942149428120089, - "grad_norm": 502.8962097167969, - "learning_rate": 1.347022008043122e-05, - "loss": 65.3622, - "step": 171830 - }, - { - "epoch": 0.6942553440773765, - "grad_norm": 536.8987426757812, - "learning_rate": 1.3467122928806037e-05, - "loss": 54.2566, - "step": 171840 - }, - { - "epoch": 0.6942957453427442, - "grad_norm": 370.1306457519531, - "learning_rate": 1.3464026002020219e-05, - "loss": 46.8355, - "step": 171850 - }, - { - "epoch": 0.6943361466081117, - "grad_norm": 531.8828735351562, - "learning_rate": 1.346092930013415e-05, - "loss": 60.7364, - "step": 171860 - }, - { - "epoch": 0.6943765478734794, - "grad_norm": 652.61328125, - "learning_rate": 1.345783282320821e-05, - "loss": 54.955, - "step": 171870 - }, - { - "epoch": 0.694416949138847, - "grad_norm": 455.7646484375, - "learning_rate": 1.3454736571302763e-05, - "loss": 32.8962, - "step": 171880 - }, - { - "epoch": 0.6944573504042146, - "grad_norm": 532.4978637695312, - "learning_rate": 1.3451640544478175e-05, - "loss": 58.8792, - "step": 171890 - }, - { - "epoch": 0.6944977516695823, - "grad_norm": 708.6436767578125, - "learning_rate": 1.3448544742794791e-05, - "loss": 43.3127, - "step": 171900 - }, - { - "epoch": 0.6945381529349499, - "grad_norm": 745.49169921875, - "learning_rate": 1.3445449166312977e-05, - "loss": 52.9133, - "step": 171910 - }, - { - "epoch": 0.6945785542003176, - "grad_norm": 1188.466796875, - "learning_rate": 1.3442353815093078e-05, - "loss": 57.6101, - "step": 171920 - }, - { - "epoch": 0.6946189554656852, - "grad_norm": 650.7738647460938, - "learning_rate": 1.3439258689195453e-05, - "loss": 50.3102, - "step": 171930 - }, - { - "epoch": 0.6946593567310528, - "grad_norm": 368.1375732421875, - "learning_rate": 1.3436163788680411e-05, - "loss": 52.3002, - "step": 171940 - }, - { - "epoch": 0.6946997579964205, - "grad_norm": 0.0, - "learning_rate": 1.343306911360833e-05, - "loss": 56.2097, - "step": 171950 - }, - { - "epoch": 0.6947401592617881, - "grad_norm": 676.087890625, - "learning_rate": 1.3429974664039501e-05, - "loss": 47.7845, - "step": 171960 - }, - { - "epoch": 0.6947805605271558, - "grad_norm": 1077.6400146484375, - "learning_rate": 1.3426880440034292e-05, - "loss": 69.1431, - "step": 171970 - }, - { - "epoch": 0.6948209617925234, - "grad_norm": 658.7960205078125, - "learning_rate": 1.3423786441652998e-05, - "loss": 49.965, - "step": 171980 - }, - { - "epoch": 0.6948613630578909, - "grad_norm": 1153.1220703125, - "learning_rate": 1.3420692668955947e-05, - "loss": 45.7714, - "step": 171990 - }, - { - "epoch": 0.6949017643232586, - "grad_norm": 482.1449279785156, - "learning_rate": 1.3417599122003464e-05, - "loss": 68.6874, - "step": 172000 } ], "logging_steps": 10, - "max_steps": 250000, + "max_steps": 500000, "num_input_tokens_seen": 0, "num_train_epochs": 2, - "save_steps": 2000, + "save_steps": 4000, "stateful_callbacks": { "TrainerControl": { "args": { @@ -120427,7 +5627,7 @@ } }, "total_flos": 0.0, - "train_batch_size": 4, + "train_batch_size": 2, "trial_name": null, "trial_params": null }