{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.04694358968884695, "eval_steps": 500, "global_step": 5110000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0004999999998149024, "loss": 8.5332, "step": 5000 }, { "epoch": 0.0, "learning_rate": 0.0004999999990629436, "loss": 8.1061, "step": 10000 }, { "epoch": 0.0, "learning_rate": 0.000499999997732555, "loss": 8.1201, "step": 15000 }, { "epoch": 0.0, "learning_rate": 0.0004999999958237364, "loss": 8.1658, "step": 20000 }, { "epoch": 0.0, "learning_rate": 0.000499999993336488, "loss": 8.2813, "step": 25000 }, { "epoch": 0.0, "learning_rate": 0.0004999999902708098, "loss": 8.3271, "step": 30000 }, { "epoch": 0.0, "learning_rate": 0.0004999999866267018, "loss": 8.3799, "step": 35000 }, { "epoch": 0.0, "learning_rate": 0.0004999999824041639, "loss": 8.4484, "step": 40000 }, { "epoch": 0.0, "learning_rate": 0.0004999999776031961, "loss": 8.4779, "step": 45000 }, { "epoch": 0.0, "learning_rate": 0.0004999999722237985, "loss": 8.4402, "step": 50000 }, { "epoch": 0.0, "learning_rate": 0.0004999999662659712, "loss": 8.4371, "step": 55000 }, { "epoch": 0.0, "learning_rate": 0.0004999999597297141, "loss": 8.4231, "step": 60000 }, { "epoch": 0.0, "learning_rate": 0.0004999999526150273, "loss": 8.3955, "step": 65000 }, { "epoch": 0.0, "learning_rate": 0.0004999999449219107, "loss": 8.4, "step": 70000 }, { "epoch": 0.0, "learning_rate": 0.0004999999366503643, "loss": 8.414, "step": 75000 }, { "epoch": 0.0, "learning_rate": 0.0004999999278003882, "loss": 8.4149, "step": 80000 }, { "epoch": 0.0, "learning_rate": 0.0004999999183719824, "loss": 8.4205, "step": 85000 }, { "epoch": 0.0, "learning_rate": 0.000499999908365147, "loss": 8.4157, "step": 90000 }, { "epoch": 0.0, "learning_rate": 0.0004999998977798819, "loss": 8.4017, "step": 95000 }, { "epoch": 0.0, "learning_rate": 0.0004999998866161871, "loss": 8.4086, "step": 100000 }, { "epoch": 0.0, "learning_rate": 0.0004999998748740628, "loss": 8.3796, "step": 105000 }, { "epoch": 0.0, "learning_rate": 0.000499999862553509, "loss": 8.4095, "step": 110000 }, { "epoch": 0.0, "learning_rate": 0.0004999998496545254, "loss": 8.4944, "step": 115000 }, { "epoch": 0.0, "learning_rate": 0.0004999998361771126, "loss": 8.4752, "step": 120000 }, { "epoch": 0.0, "learning_rate": 0.0004999998221212701, "loss": 8.4344, "step": 125000 }, { "epoch": 0.0, "learning_rate": 0.0004999998074869983, "loss": 8.4137, "step": 130000 }, { "epoch": 0.0, "learning_rate": 0.0004999997922742969, "loss": 8.468, "step": 135000 }, { "epoch": 0.0, "learning_rate": 0.0004999997764831663, "loss": 8.5349, "step": 140000 }, { "epoch": 0.0, "learning_rate": 0.0004999997601136063, "loss": 8.5042, "step": 145000 }, { "epoch": 0.0, "learning_rate": 0.0004999997431656169, "loss": 8.5147, "step": 150000 }, { "epoch": 0.0, "learning_rate": 0.0004999997256391984, "loss": 8.4722, "step": 155000 }, { "epoch": 0.0, "learning_rate": 0.0004999997075343505, "loss": 8.4144, "step": 160000 }, { "epoch": 0.0, "learning_rate": 0.0004999996888510735, "loss": 8.3935, "step": 165000 }, { "epoch": 0.0, "learning_rate": 0.0004999996695893673, "loss": 8.4244, "step": 170000 }, { "epoch": 0.0, "learning_rate": 0.0004999996497492322, "loss": 8.4991, "step": 175000 }, { "epoch": 0.0, "learning_rate": 0.0004999996293306679, "loss": 8.5739, "step": 180000 }, { "epoch": 0.0, "learning_rate": 0.0004999996083336746, "loss": 8.5794, "step": 185000 }, { "epoch": 0.0, "learning_rate": 0.0004999995867582523, "loss": 8.5878, "step": 190000 }, { "epoch": 0.0, "learning_rate": 0.0004999995646044011, "loss": 8.6133, "step": 195000 }, { "epoch": 0.0, "learning_rate": 0.0004999995418721212, "loss": 8.5593, "step": 200000 }, { "epoch": 0.0, "learning_rate": 0.0004999995185614123, "loss": 8.5519, "step": 205000 }, { "epoch": 0.0, "learning_rate": 0.0004999994946722748, "loss": 8.5628, "step": 210000 }, { "epoch": 0.0, "learning_rate": 0.0004999994702047085, "loss": 8.5084, "step": 215000 }, { "epoch": 0.0, "learning_rate": 0.0004999994451587136, "loss": 8.5437, "step": 220000 }, { "epoch": 0.0, "learning_rate": 0.0004999994195342902, "loss": 8.5508, "step": 225000 }, { "epoch": 0.0, "learning_rate": 0.0004999993933314382, "loss": 8.5281, "step": 230000 }, { "epoch": 0.0, "learning_rate": 0.0004999993665501577, "loss": 8.5282, "step": 235000 }, { "epoch": 0.0, "learning_rate": 0.0004999993391904488, "loss": 8.5707, "step": 240000 }, { "epoch": 0.0, "learning_rate": 0.0004999993112523117, "loss": 8.5596, "step": 245000 }, { "epoch": 0.0, "learning_rate": 0.0004999992827357463, "loss": 8.554, "step": 250000 }, { "epoch": 0.0, "learning_rate": 0.0004999992536407527, "loss": 8.5556, "step": 255000 }, { "epoch": 0.0, "learning_rate": 0.0004999992239673309, "loss": 8.5767, "step": 260000 }, { "epoch": 0.0, "learning_rate": 0.000499999193715481, "loss": 8.579, "step": 265000 }, { "epoch": 0.0, "learning_rate": 0.0004999991628852031, "loss": 8.5437, "step": 270000 }, { "epoch": 0.0, "learning_rate": 0.0004999991314764974, "loss": 8.5226, "step": 275000 }, { "epoch": 0.0, "learning_rate": 0.0004999990994893638, "loss": 8.5095, "step": 280000 }, { "epoch": 0.0, "learning_rate": 0.0004999990669238024, "loss": 8.521, "step": 285000 }, { "epoch": 0.0, "learning_rate": 0.0004999990337798134, "loss": 8.5687, "step": 290000 }, { "epoch": 0.0, "learning_rate": 0.0004999990000573966, "loss": 8.567, "step": 295000 }, { "epoch": 0.0, "learning_rate": 0.0004999989657565524, "loss": 8.5399, "step": 300000 }, { "epoch": 0.0, "learning_rate": 0.0004999989308772806, "loss": 8.5336, "step": 305000 }, { "epoch": 0.0, "learning_rate": 0.0004999988954195816, "loss": 8.5427, "step": 310000 }, { "epoch": 0.0, "learning_rate": 0.0004999988593834551, "loss": 8.5369, "step": 315000 }, { "epoch": 0.0, "learning_rate": 0.0004999988227689015, "loss": 8.5183, "step": 320000 }, { "epoch": 0.0, "learning_rate": 0.0004999987855759207, "loss": 8.5165, "step": 325000 }, { "epoch": 0.0, "learning_rate": 0.0004999987478045128, "loss": 8.5009, "step": 330000 }, { "epoch": 0.0, "learning_rate": 0.000499998709454678, "loss": 8.4832, "step": 335000 }, { "epoch": 0.0, "learning_rate": 0.0004999986705264164, "loss": 8.4453, "step": 340000 }, { "epoch": 0.0, "learning_rate": 0.0004999986310197279, "loss": 8.4457, "step": 345000 }, { "epoch": 0.0, "learning_rate": 0.0004999985909346127, "loss": 8.4958, "step": 350000 }, { "epoch": 0.0, "learning_rate": 0.000499998550271071, "loss": 8.5252, "step": 355000 }, { "epoch": 0.0, "learning_rate": 0.0004999985090291027, "loss": 8.526, "step": 360000 }, { "epoch": 0.0, "learning_rate": 0.0004999984672087081, "loss": 8.4962, "step": 365000 }, { "epoch": 0.0, "learning_rate": 0.000499998424809887, "loss": 8.471, "step": 370000 }, { "epoch": 0.0, "learning_rate": 0.0004999983818326398, "loss": 8.4943, "step": 375000 }, { "epoch": 0.0, "learning_rate": 0.0004999983382769665, "loss": 8.5109, "step": 380000 }, { "epoch": 0.0, "learning_rate": 0.0004999982941428673, "loss": 8.5156, "step": 385000 }, { "epoch": 0.0, "learning_rate": 0.0004999982494303419, "loss": 8.4645, "step": 390000 }, { "epoch": 0.0, "learning_rate": 0.0004999982041393909, "loss": 8.4879, "step": 395000 }, { "epoch": 0.0, "learning_rate": 0.0004999981582700142, "loss": 8.5108, "step": 400000 }, { "epoch": 0.0, "learning_rate": 0.0004999981118222117, "loss": 8.545, "step": 405000 }, { "epoch": 0.0, "learning_rate": 0.0004999980647959839, "loss": 8.5518, "step": 410000 }, { "epoch": 0.0, "learning_rate": 0.0004999980171913307, "loss": 8.5067, "step": 415000 }, { "epoch": 0.0, "learning_rate": 0.0004999979690082521, "loss": 8.537, "step": 420000 }, { "epoch": 0.0, "learning_rate": 0.0004999979202467483, "loss": 8.4697, "step": 425000 }, { "epoch": 0.0, "learning_rate": 0.0004999978709068197, "loss": 8.4782, "step": 430000 }, { "epoch": 0.0, "learning_rate": 0.000499997820988466, "loss": 8.4981, "step": 435000 }, { "epoch": 0.0, "learning_rate": 0.0004999977704916876, "loss": 8.5571, "step": 440000 }, { "epoch": 0.0, "learning_rate": 0.0004999977194164844, "loss": 8.514, "step": 445000 }, { "epoch": 0.0, "learning_rate": 0.0004999976677628566, "loss": 8.4959, "step": 450000 }, { "epoch": 0.0, "learning_rate": 0.0004999976155308043, "loss": 8.58, "step": 455000 }, { "epoch": 0.0, "learning_rate": 0.0004999975627203278, "loss": 8.6084, "step": 460000 }, { "epoch": 0.0, "learning_rate": 0.0004999975093314269, "loss": 8.5743, "step": 465000 }, { "epoch": 0.0, "learning_rate": 0.000499997455364102, "loss": 8.5643, "step": 470000 }, { "epoch": 0.0, "learning_rate": 0.0004999974008183532, "loss": 8.4961, "step": 475000 }, { "epoch": 0.0, "learning_rate": 0.0004999973456941804, "loss": 8.4412, "step": 480000 }, { "epoch": 0.0, "learning_rate": 0.000499997289991584, "loss": 8.4485, "step": 485000 }, { "epoch": 0.0, "learning_rate": 0.0004999972337105639, "loss": 8.4707, "step": 490000 }, { "epoch": 0.0, "learning_rate": 0.0004999971768511204, "loss": 8.4722, "step": 495000 }, { "epoch": 0.0, "learning_rate": 0.0004999971194132537, "loss": 8.486, "step": 500000 }, { "epoch": 0.0, "learning_rate": 0.0004999970613969636, "loss": 8.438, "step": 505000 }, { "epoch": 0.0, "learning_rate": 0.0004999970028022505, "loss": 8.4606, "step": 510000 }, { "epoch": 0.0, "learning_rate": 0.0004999969436291146, "loss": 8.518, "step": 515000 }, { "epoch": 0.0, "learning_rate": 0.0004999968838775557, "loss": 8.5148, "step": 520000 }, { "epoch": 0.0, "learning_rate": 0.0004999968235475743, "loss": 8.5136, "step": 525000 }, { "epoch": 0.0, "learning_rate": 0.0004999967626391703, "loss": 8.4632, "step": 530000 }, { "epoch": 0.0, "learning_rate": 0.0004999967011523439, "loss": 8.4725, "step": 535000 }, { "epoch": 0.0, "learning_rate": 0.0004999966390870954, "loss": 8.4696, "step": 540000 }, { "epoch": 0.01, "learning_rate": 0.0004999965764434247, "loss": 8.4397, "step": 545000 }, { "epoch": 0.01, "learning_rate": 0.0004999965132213321, "loss": 8.4486, "step": 550000 }, { "epoch": 0.01, "learning_rate": 0.0004999964494208178, "loss": 8.4202, "step": 555000 }, { "epoch": 0.01, "learning_rate": 0.0004999963850418817, "loss": 8.4795, "step": 560000 }, { "epoch": 0.01, "learning_rate": 0.0004999963200845243, "loss": 8.5227, "step": 565000 }, { "epoch": 0.01, "learning_rate": 0.0004999962545487453, "loss": 8.5248, "step": 570000 }, { "epoch": 0.01, "learning_rate": 0.0004999961884345453, "loss": 8.5308, "step": 575000 }, { "epoch": 0.01, "learning_rate": 0.0004999961217419241, "loss": 8.5287, "step": 580000 }, { "epoch": 0.01, "learning_rate": 0.0004999960544708822, "loss": 8.5622, "step": 585000 }, { "epoch": 0.01, "learning_rate": 0.0004999959866214195, "loss": 8.5303, "step": 590000 }, { "epoch": 0.01, "learning_rate": 0.0004999959181935361, "loss": 8.4332, "step": 595000 }, { "epoch": 0.01, "learning_rate": 0.0004999958491872324, "loss": 8.4176, "step": 600000 }, { "epoch": 0.01, "learning_rate": 0.0004999957796025085, "loss": 8.3863, "step": 605000 }, { "epoch": 0.01, "learning_rate": 0.0004999957094393644, "loss": 8.3405, "step": 610000 }, { "epoch": 0.01, "learning_rate": 0.0004999956386978003, "loss": 8.3725, "step": 615000 }, { "epoch": 0.01, "learning_rate": 0.0004999955673778165, "loss": 8.4165, "step": 620000 }, { "epoch": 0.01, "learning_rate": 0.0004999954954794132, "loss": 8.3808, "step": 625000 }, { "epoch": 0.01, "learning_rate": 0.0004999954230025904, "loss": 8.3515, "step": 630000 }, { "epoch": 0.01, "learning_rate": 0.0004999953499473482, "loss": 8.3555, "step": 635000 }, { "epoch": 0.01, "learning_rate": 0.000499995276313687, "loss": 8.4179, "step": 640000 }, { "epoch": 0.01, "learning_rate": 0.0004999952021016069, "loss": 8.4277, "step": 645000 }, { "epoch": 0.01, "learning_rate": 0.000499995127311108, "loss": 8.4402, "step": 650000 }, { "epoch": 0.01, "learning_rate": 0.0004999950519421905, "loss": 8.4472, "step": 655000 }, { "epoch": 0.01, "learning_rate": 0.0004999949759948546, "loss": 8.4497, "step": 660000 }, { "epoch": 0.01, "learning_rate": 0.0004999948994691005, "loss": 8.4436, "step": 665000 }, { "epoch": 0.01, "learning_rate": 0.0004999948223649283, "loss": 8.4619, "step": 670000 }, { "epoch": 0.01, "learning_rate": 0.0004999947446823382, "loss": 8.4528, "step": 675000 }, { "epoch": 0.01, "learning_rate": 0.0004999946664213305, "loss": 8.4476, "step": 680000 }, { "epoch": 0.01, "learning_rate": 0.0004999945875819051, "loss": 8.5146, "step": 685000 }, { "epoch": 0.01, "learning_rate": 0.0004999945081640625, "loss": 8.5345, "step": 690000 }, { "epoch": 0.01, "learning_rate": 0.0004999944281678027, "loss": 8.5706, "step": 695000 }, { "epoch": 0.01, "learning_rate": 0.000499994347593126, "loss": 8.5314, "step": 700000 }, { "epoch": 0.01, "learning_rate": 0.0004999942664400324, "loss": 8.5054, "step": 705000 }, { "epoch": 0.01, "learning_rate": 0.0004999941847085223, "loss": 8.5103, "step": 710000 }, { "epoch": 0.01, "learning_rate": 0.0004999941023985958, "loss": 8.5057, "step": 715000 }, { "epoch": 0.01, "learning_rate": 0.000499994019510253, "loss": 8.4851, "step": 720000 }, { "epoch": 0.01, "learning_rate": 0.0004999939360434942, "loss": 8.4865, "step": 725000 }, { "epoch": 0.01, "learning_rate": 0.0004999938519983196, "loss": 8.4787, "step": 730000 }, { "epoch": 0.01, "learning_rate": 0.0004999937673747293, "loss": 8.4496, "step": 735000 }, { "epoch": 0.01, "learning_rate": 0.0004999936821727237, "loss": 8.4326, "step": 740000 }, { "epoch": 0.01, "learning_rate": 0.0004999935963923027, "loss": 8.387, "step": 745000 }, { "epoch": 0.01, "learning_rate": 0.0004999935100334667, "loss": 8.4004, "step": 750000 }, { "epoch": 0.01, "learning_rate": 0.000499993423096216, "loss": 8.4077, "step": 755000 }, { "epoch": 0.01, "learning_rate": 0.0004999933355805504, "loss": 8.3867, "step": 760000 }, { "epoch": 0.01, "learning_rate": 0.0004999932474864706, "loss": 8.4258, "step": 765000 }, { "epoch": 0.01, "learning_rate": 0.0004999931588139764, "loss": 8.4821, "step": 770000 }, { "epoch": 0.01, "learning_rate": 0.0004999930695630682, "loss": 8.4397, "step": 775000 }, { "epoch": 0.01, "learning_rate": 0.0004999929797337462, "loss": 8.5221, "step": 780000 }, { "epoch": 0.01, "learning_rate": 0.0004999928893260105, "loss": 8.4864, "step": 785000 }, { "epoch": 0.01, "learning_rate": 0.0004999927983398616, "loss": 8.4338, "step": 790000 }, { "epoch": 0.01, "learning_rate": 0.0004999927067752993, "loss": 8.4292, "step": 795000 }, { "epoch": 0.01, "learning_rate": 0.0004999926146323241, "loss": 8.3973, "step": 800000 }, { "epoch": 0.01, "learning_rate": 0.0004999925219109361, "loss": 8.4031, "step": 805000 }, { "epoch": 0.01, "learning_rate": 0.0004999924286111355, "loss": 8.3967, "step": 810000 }, { "epoch": 0.01, "learning_rate": 0.0004999923347329226, "loss": 8.4021, "step": 815000 }, { "epoch": 0.01, "learning_rate": 0.0004999922402762977, "loss": 8.414, "step": 820000 }, { "epoch": 0.01, "learning_rate": 0.0004999921452412606, "loss": 8.4847, "step": 825000 }, { "epoch": 0.01, "learning_rate": 0.000499992049627812, "loss": 8.4377, "step": 830000 }, { "epoch": 0.01, "learning_rate": 0.000499991953435952, "loss": 8.4167, "step": 835000 }, { "epoch": 0.01, "learning_rate": 0.0004999918566656806, "loss": 8.445, "step": 840000 }, { "epoch": 0.01, "learning_rate": 0.0004999917593169984, "loss": 8.4625, "step": 845000 }, { "epoch": 0.01, "learning_rate": 0.0004999916613899052, "loss": 8.459, "step": 850000 }, { "epoch": 0.01, "learning_rate": 0.0004999915628844015, "loss": 8.4831, "step": 855000 }, { "epoch": 0.01, "learning_rate": 0.0004999914638004875, "loss": 8.478, "step": 860000 }, { "epoch": 0.01, "learning_rate": 0.0004999913641381633, "loss": 8.4598, "step": 865000 }, { "epoch": 0.01, "learning_rate": 0.0004999912638974292, "loss": 8.4568, "step": 870000 }, { "epoch": 0.01, "learning_rate": 0.0004999911630782856, "loss": 8.4423, "step": 875000 }, { "epoch": 0.01, "learning_rate": 0.0004999910616807323, "loss": 8.4638, "step": 880000 }, { "epoch": 0.01, "learning_rate": 0.0004999909597047702, "loss": 8.4405, "step": 885000 }, { "epoch": 0.01, "learning_rate": 0.0004999908571503989, "loss": 8.4464, "step": 890000 }, { "epoch": 0.01, "learning_rate": 0.0004999907540176189, "loss": 8.4808, "step": 895000 }, { "epoch": 0.01, "learning_rate": 0.0004999906503064305, "loss": 8.437, "step": 900000 }, { "epoch": 0.01, "learning_rate": 0.0004999905460168339, "loss": 8.4214, "step": 905000 }, { "epoch": 0.01, "learning_rate": 0.0004999904411488293, "loss": 8.4418, "step": 910000 }, { "epoch": 0.01, "learning_rate": 0.0004999903357024169, "loss": 8.5303, "step": 915000 }, { "epoch": 0.01, "learning_rate": 0.0004999902296775971, "loss": 8.4865, "step": 920000 }, { "epoch": 0.01, "learning_rate": 0.00049999012307437, "loss": 8.4779, "step": 925000 }, { "epoch": 0.01, "learning_rate": 0.0004999900158927358, "loss": 8.4884, "step": 930000 }, { "epoch": 0.01, "learning_rate": 0.0004999899081326949, "loss": 8.4641, "step": 935000 }, { "epoch": 0.01, "learning_rate": 0.0004999897997942475, "loss": 8.485, "step": 940000 }, { "epoch": 0.01, "learning_rate": 0.0004999896908773939, "loss": 8.4615, "step": 945000 }, { "epoch": 0.01, "learning_rate": 0.0004999895813821341, "loss": 8.4412, "step": 950000 }, { "epoch": 0.01, "learning_rate": 0.0004999894713084688, "loss": 8.4274, "step": 955000 }, { "epoch": 0.01, "learning_rate": 0.0004999893606563978, "loss": 8.4331, "step": 960000 }, { "epoch": 0.01, "learning_rate": 0.0004999892494259216, "loss": 8.4737, "step": 965000 }, { "epoch": 0.01, "learning_rate": 0.0004999891376170404, "loss": 8.4471, "step": 970000 }, { "epoch": 0.01, "learning_rate": 0.0004999890252297545, "loss": 8.4929, "step": 975000 }, { "epoch": 0.01, "learning_rate": 0.0004999889122640642, "loss": 8.4602, "step": 980000 }, { "epoch": 0.01, "learning_rate": 0.0004999887987199697, "loss": 8.4671, "step": 985000 }, { "epoch": 0.01, "learning_rate": 0.0004999886845974712, "loss": 8.4589, "step": 990000 }, { "epoch": 0.01, "learning_rate": 0.0004999885698965689, "loss": 8.47, "step": 995000 }, { "epoch": 0.01, "learning_rate": 0.0004999884546172634, "loss": 8.5097, "step": 1000000 }, { "epoch": 0.01, "learning_rate": 0.0004999883387595546, "loss": 8.5082, "step": 1005000 }, { "epoch": 0.01, "learning_rate": 0.000499988222323443, "loss": 8.4846, "step": 1010000 }, { "epoch": 0.01, "learning_rate": 0.0004999881053089287, "loss": 8.4732, "step": 1015000 }, { "epoch": 0.01, "learning_rate": 0.0004999879877160121, "loss": 8.443, "step": 1020000 }, { "epoch": 0.01, "learning_rate": 0.0004999878695446934, "loss": 8.4313, "step": 1025000 }, { "epoch": 0.01, "learning_rate": 0.000499987750794973, "loss": 8.4374, "step": 1030000 }, { "epoch": 0.01, "learning_rate": 0.000499987631466851, "loss": 8.4242, "step": 1035000 }, { "epoch": 0.01, "learning_rate": 0.0004999875115603279, "loss": 8.4282, "step": 1040000 }, { "epoch": 0.01, "learning_rate": 0.0004999873910754036, "loss": 8.4442, "step": 1045000 }, { "epoch": 0.01, "learning_rate": 0.0004999872700120788, "loss": 8.5036, "step": 1050000 }, { "epoch": 0.01, "learning_rate": 0.0004999871483703536, "loss": 8.4339, "step": 1055000 }, { "epoch": 0.01, "learning_rate": 0.0004999870261502281, "loss": 8.399, "step": 1060000 }, { "epoch": 0.01, "learning_rate": 0.0004999869033517028, "loss": 8.4156, "step": 1065000 }, { "epoch": 0.01, "learning_rate": 0.000499986779974778, "loss": 8.478, "step": 1070000 }, { "epoch": 0.01, "learning_rate": 0.0004999866560194539, "loss": 8.5151, "step": 1075000 }, { "epoch": 0.01, "learning_rate": 0.000499986531485731, "loss": 8.5111, "step": 1080000 }, { "epoch": 0.01, "learning_rate": 0.0004999864063736091, "loss": 8.5124, "step": 1085000 }, { "epoch": 0.01, "learning_rate": 0.000499986280683089, "loss": 8.4821, "step": 1090000 }, { "epoch": 0.01, "learning_rate": 0.0004999861544141706, "loss": 8.4595, "step": 1095000 }, { "epoch": 0.01, "learning_rate": 0.0004999860275668545, "loss": 8.4339, "step": 1100000 }, { "epoch": 0.01, "learning_rate": 0.0004999859001411409, "loss": 8.3923, "step": 1105000 }, { "epoch": 0.01, "learning_rate": 0.00049998577213703, "loss": 8.3647, "step": 1110000 }, { "epoch": 0.01, "learning_rate": 0.0004999856435545222, "loss": 8.3865, "step": 1115000 }, { "epoch": 0.01, "learning_rate": 0.0004999855143936176, "loss": 8.4136, "step": 1120000 }, { "epoch": 0.01, "learning_rate": 0.0004999853846543169, "loss": 8.4033, "step": 1125000 }, { "epoch": 0.01, "learning_rate": 0.00049998525433662, "loss": 8.4242, "step": 1130000 }, { "epoch": 0.01, "learning_rate": 0.0004999851234405274, "loss": 8.3723, "step": 1135000 }, { "epoch": 0.01, "learning_rate": 0.0004999849919660393, "loss": 8.4118, "step": 1140000 }, { "epoch": 0.01, "learning_rate": 0.0004999848599131562, "loss": 8.3804, "step": 1145000 }, { "epoch": 0.01, "learning_rate": 0.0004999847272818781, "loss": 8.4146, "step": 1150000 }, { "epoch": 0.01, "learning_rate": 0.0004999845940722056, "loss": 8.4656, "step": 1155000 }, { "epoch": 0.01, "learning_rate": 0.0004999844602841388, "loss": 8.4474, "step": 1160000 }, { "epoch": 0.01, "learning_rate": 0.0004999843259176781, "loss": 8.4551, "step": 1165000 }, { "epoch": 0.01, "learning_rate": 0.0004999841909728239, "loss": 8.4472, "step": 1170000 }, { "epoch": 0.01, "learning_rate": 0.0004999840554495763, "loss": 8.4341, "step": 1175000 }, { "epoch": 0.01, "learning_rate": 0.0004999839193479358, "loss": 8.4335, "step": 1180000 }, { "epoch": 0.01, "learning_rate": 0.0004999837826679027, "loss": 8.4043, "step": 1185000 }, { "epoch": 0.01, "learning_rate": 0.0004999836454094771, "loss": 8.3666, "step": 1190000 }, { "epoch": 0.01, "learning_rate": 0.0004999835075726595, "loss": 8.3444, "step": 1195000 }, { "epoch": 0.01, "learning_rate": 0.0004999833691574503, "loss": 8.3216, "step": 1200000 }, { "epoch": 0.01, "learning_rate": 0.0004999832301638497, "loss": 8.3572, "step": 1205000 }, { "epoch": 0.01, "learning_rate": 0.0004999830905918581, "loss": 8.3965, "step": 1210000 }, { "epoch": 0.01, "learning_rate": 0.0004999829504414756, "loss": 8.4237, "step": 1215000 }, { "epoch": 0.01, "learning_rate": 0.0004999828097127029, "loss": 8.3765, "step": 1220000 }, { "epoch": 0.01, "learning_rate": 0.0004999826684055398, "loss": 8.3266, "step": 1225000 }, { "epoch": 0.01, "learning_rate": 0.0004999825265199872, "loss": 8.322, "step": 1230000 }, { "epoch": 0.01, "learning_rate": 0.000499982384056045, "loss": 8.3367, "step": 1235000 }, { "epoch": 0.01, "learning_rate": 0.0004999822410137139, "loss": 8.3544, "step": 1240000 }, { "epoch": 0.01, "learning_rate": 0.0004999820973929939, "loss": 8.3379, "step": 1245000 }, { "epoch": 0.01, "learning_rate": 0.0004999819531938854, "loss": 8.359, "step": 1250000 }, { "epoch": 0.01, "learning_rate": 0.0004999818084163889, "loss": 8.3699, "step": 1255000 }, { "epoch": 0.01, "learning_rate": 0.0004999816630605047, "loss": 8.3825, "step": 1260000 }, { "epoch": 0.01, "learning_rate": 0.0004999815171262328, "loss": 8.3895, "step": 1265000 }, { "epoch": 0.01, "learning_rate": 0.000499981370613574, "loss": 8.3835, "step": 1270000 }, { "epoch": 0.01, "learning_rate": 0.0004999812235225284, "loss": 8.3684, "step": 1275000 }, { "epoch": 0.01, "learning_rate": 0.0004999810758530964, "loss": 8.3881, "step": 1280000 }, { "epoch": 0.01, "learning_rate": 0.0004999809276052783, "loss": 8.376, "step": 1285000 }, { "epoch": 0.01, "learning_rate": 0.0004999807787790746, "loss": 8.4006, "step": 1290000 }, { "epoch": 0.01, "learning_rate": 0.0004999806293744853, "loss": 8.3775, "step": 1295000 }, { "epoch": 0.01, "learning_rate": 0.000499980479391511, "loss": 8.3253, "step": 1300000 }, { "epoch": 0.01, "learning_rate": 0.0004999803288301521, "loss": 8.3663, "step": 1305000 }, { "epoch": 0.01, "learning_rate": 0.0004999801776904088, "loss": 8.4, "step": 1310000 }, { "epoch": 0.01, "learning_rate": 0.0004999800259722815, "loss": 8.3802, "step": 1315000 }, { "epoch": 0.01, "learning_rate": 0.0004999798736757706, "loss": 8.3608, "step": 1320000 }, { "epoch": 0.01, "learning_rate": 0.0004999797208008763, "loss": 8.3663, "step": 1325000 }, { "epoch": 0.01, "learning_rate": 0.0004999795673475992, "loss": 8.3449, "step": 1330000 }, { "epoch": 0.01, "learning_rate": 0.0004999794133159394, "loss": 8.3615, "step": 1335000 }, { "epoch": 0.01, "learning_rate": 0.0004999792587058974, "loss": 8.3849, "step": 1340000 }, { "epoch": 0.01, "learning_rate": 0.0004999791035174736, "loss": 8.3867, "step": 1345000 }, { "epoch": 0.01, "learning_rate": 0.0004999789477506682, "loss": 8.3461, "step": 1350000 }, { "epoch": 0.01, "learning_rate": 0.0004999787914054816, "loss": 8.2994, "step": 1355000 }, { "epoch": 0.01, "learning_rate": 0.0004999786344819144, "loss": 8.3118, "step": 1360000 }, { "epoch": 0.01, "learning_rate": 0.0004999784769799666, "loss": 8.3385, "step": 1365000 }, { "epoch": 0.01, "learning_rate": 0.0004999783188996388, "loss": 8.3052, "step": 1370000 }, { "epoch": 0.01, "learning_rate": 0.0004999781602409313, "loss": 8.3561, "step": 1375000 }, { "epoch": 0.01, "learning_rate": 0.0004999780010038445, "loss": 8.3534, "step": 1380000 }, { "epoch": 0.01, "learning_rate": 0.0004999778411883786, "loss": 8.3714, "step": 1385000 }, { "epoch": 0.01, "learning_rate": 0.0004999776807945342, "loss": 8.3757, "step": 1390000 }, { "epoch": 0.01, "learning_rate": 0.0004999775198223117, "loss": 8.3769, "step": 1395000 }, { "epoch": 0.01, "learning_rate": 0.0004999773582717112, "loss": 8.3468, "step": 1400000 }, { "epoch": 0.01, "learning_rate": 0.0004999771961427332, "loss": 8.3378, "step": 1405000 }, { "epoch": 0.01, "learning_rate": 0.0004999770334353782, "loss": 8.3782, "step": 1410000 }, { "epoch": 0.01, "learning_rate": 0.0004999768701496464, "loss": 8.3934, "step": 1415000 }, { "epoch": 0.01, "learning_rate": 0.0004999767062855384, "loss": 8.3977, "step": 1420000 }, { "epoch": 0.01, "learning_rate": 0.0004999765418430543, "loss": 8.3509, "step": 1425000 }, { "epoch": 0.01, "learning_rate": 0.0004999763768221946, "loss": 8.3453, "step": 1430000 }, { "epoch": 0.01, "learning_rate": 0.0004999762112229598, "loss": 8.2951, "step": 1435000 }, { "epoch": 0.01, "learning_rate": 0.0004999760450453501, "loss": 8.3644, "step": 1440000 }, { "epoch": 0.01, "learning_rate": 0.000499975878289366, "loss": 8.3911, "step": 1445000 }, { "epoch": 0.01, "learning_rate": 0.0004999757109550078, "loss": 8.3925, "step": 1450000 }, { "epoch": 0.01, "learning_rate": 0.000499975543042276, "loss": 8.3537, "step": 1455000 }, { "epoch": 0.01, "learning_rate": 0.0004999753745511709, "loss": 8.379, "step": 1460000 }, { "epoch": 0.01, "learning_rate": 0.0004999752054816929, "loss": 8.3786, "step": 1465000 }, { "epoch": 0.01, "learning_rate": 0.0004999750358338425, "loss": 8.3861, "step": 1470000 }, { "epoch": 0.01, "learning_rate": 0.0004999748656076198, "loss": 8.4382, "step": 1475000 }, { "epoch": 0.01, "learning_rate": 0.0004999746948030256, "loss": 8.4304, "step": 1480000 }, { "epoch": 0.01, "learning_rate": 0.0004999745234200599, "loss": 8.4402, "step": 1485000 }, { "epoch": 0.01, "learning_rate": 0.0004999743514587234, "loss": 8.3865, "step": 1490000 }, { "epoch": 0.01, "learning_rate": 0.0004999741789190165, "loss": 8.4104, "step": 1495000 }, { "epoch": 0.01, "learning_rate": 0.0004999740058009392, "loss": 8.4022, "step": 1500000 }, { "epoch": 0.01, "learning_rate": 0.0004999738321044923, "loss": 8.4134, "step": 1505000 }, { "epoch": 0.01, "learning_rate": 0.0004999736578296762, "loss": 8.4199, "step": 1510000 }, { "epoch": 0.01, "learning_rate": 0.0004999734829764911, "loss": 8.4059, "step": 1515000 }, { "epoch": 0.01, "learning_rate": 0.0004999733075449375, "loss": 8.3262, "step": 1520000 }, { "epoch": 0.01, "learning_rate": 0.0004999731315350158, "loss": 8.2811, "step": 1525000 }, { "epoch": 0.01, "learning_rate": 0.0004999729549467263, "loss": 8.2573, "step": 1530000 }, { "epoch": 0.01, "learning_rate": 0.0004999727777800696, "loss": 8.3032, "step": 1535000 }, { "epoch": 0.01, "learning_rate": 0.0004999726000350461, "loss": 8.3267, "step": 1540000 }, { "epoch": 0.01, "learning_rate": 0.000499972421711656, "loss": 8.3838, "step": 1545000 }, { "epoch": 0.01, "learning_rate": 0.0004999722428098999, "loss": 8.38, "step": 1550000 }, { "epoch": 0.01, "learning_rate": 0.0004999720633297782, "loss": 8.3361, "step": 1555000 }, { "epoch": 0.01, "learning_rate": 0.0004999718832712913, "loss": 8.3276, "step": 1560000 }, { "epoch": 0.01, "learning_rate": 0.0004999717026344394, "loss": 8.294, "step": 1565000 }, { "epoch": 0.01, "learning_rate": 0.0004999715214192233, "loss": 8.3313, "step": 1570000 }, { "epoch": 0.01, "learning_rate": 0.0004999713396256432, "loss": 8.37, "step": 1575000 }, { "epoch": 0.01, "learning_rate": 0.0004999711572536995, "loss": 8.3763, "step": 1580000 }, { "epoch": 0.01, "learning_rate": 0.0004999709743033928, "loss": 8.4021, "step": 1585000 }, { "epoch": 0.01, "learning_rate": 0.0004999707907747233, "loss": 8.3807, "step": 1590000 }, { "epoch": 0.01, "learning_rate": 0.0004999706066676915, "loss": 8.4022, "step": 1595000 }, { "epoch": 0.01, "learning_rate": 0.0004999704219822979, "loss": 8.4175, "step": 1600000 }, { "epoch": 0.01, "learning_rate": 0.0004999702367185429, "loss": 8.3805, "step": 1605000 }, { "epoch": 0.01, "learning_rate": 0.0004999700508764267, "loss": 8.3644, "step": 1610000 }, { "epoch": 0.01, "learning_rate": 0.0004999698644559501, "loss": 8.3482, "step": 1615000 }, { "epoch": 0.01, "learning_rate": 0.0004999696774571134, "loss": 8.3087, "step": 1620000 }, { "epoch": 0.01, "learning_rate": 0.000499969489879917, "loss": 8.2912, "step": 1625000 }, { "epoch": 0.01, "learning_rate": 0.0004999693017243612, "loss": 8.2988, "step": 1630000 }, { "epoch": 0.02, "learning_rate": 0.0004999691129904467, "loss": 8.2964, "step": 1635000 }, { "epoch": 0.02, "learning_rate": 0.0004999689236781737, "loss": 8.3291, "step": 1640000 }, { "epoch": 0.02, "learning_rate": 0.0004999687337875427, "loss": 8.3241, "step": 1645000 }, { "epoch": 0.02, "learning_rate": 0.0004999685433185544, "loss": 8.3738, "step": 1650000 }, { "epoch": 0.02, "learning_rate": 0.0004999683522712088, "loss": 8.3744, "step": 1655000 }, { "epoch": 0.02, "learning_rate": 0.0004999681606455066, "loss": 8.3795, "step": 1660000 }, { "epoch": 0.02, "learning_rate": 0.0004999679684414483, "loss": 8.4056, "step": 1665000 }, { "epoch": 0.02, "learning_rate": 0.0004999677756590342, "loss": 8.3822, "step": 1670000 }, { "epoch": 0.02, "learning_rate": 0.0004999675822982648, "loss": 8.3657, "step": 1675000 }, { "epoch": 0.02, "learning_rate": 0.0004999673883591406, "loss": 8.3292, "step": 1680000 }, { "epoch": 0.02, "learning_rate": 0.0004999671938416619, "loss": 8.3594, "step": 1685000 }, { "epoch": 0.02, "learning_rate": 0.0004999669987458292, "loss": 8.3728, "step": 1690000 }, { "epoch": 0.02, "learning_rate": 0.0004999668030716431, "loss": 8.3581, "step": 1695000 }, { "epoch": 0.02, "learning_rate": 0.0004999666068191039, "loss": 8.3038, "step": 1700000 }, { "epoch": 0.02, "learning_rate": 0.0004999664099882121, "loss": 8.3271, "step": 1705000 }, { "epoch": 0.02, "learning_rate": 0.0004999662125789682, "loss": 8.307, "step": 1710000 }, { "epoch": 0.02, "learning_rate": 0.0004999660145913726, "loss": 8.3064, "step": 1715000 }, { "epoch": 0.02, "learning_rate": 0.0004999658160254258, "loss": 8.3224, "step": 1720000 }, { "epoch": 0.02, "learning_rate": 0.0004999656168811282, "loss": 8.3283, "step": 1725000 }, { "epoch": 0.02, "learning_rate": 0.0004999654171584802, "loss": 8.2952, "step": 1730000 }, { "epoch": 0.02, "learning_rate": 0.0004999652168574825, "loss": 8.263, "step": 1735000 }, { "epoch": 0.02, "learning_rate": 0.0004999650159781353, "loss": 8.2434, "step": 1740000 }, { "epoch": 0.02, "learning_rate": 0.0004999648145204393, "loss": 8.2752, "step": 1745000 }, { "epoch": 0.02, "learning_rate": 0.0004999646124843948, "loss": 8.2715, "step": 1750000 }, { "epoch": 0.02, "learning_rate": 0.0004999644098700023, "loss": 8.2639, "step": 1755000 }, { "epoch": 0.02, "learning_rate": 0.0004999642066772622, "loss": 8.2642, "step": 1760000 }, { "epoch": 0.02, "learning_rate": 0.0004999640029061752, "loss": 8.2577, "step": 1765000 }, { "epoch": 0.02, "learning_rate": 0.0004999637985567415, "loss": 8.2835, "step": 1770000 }, { "epoch": 0.02, "learning_rate": 0.0004999635936289618, "loss": 8.3218, "step": 1775000 }, { "epoch": 0.02, "learning_rate": 0.0004999633881228365, "loss": 8.3525, "step": 1780000 }, { "epoch": 0.02, "learning_rate": 0.0004999631820383659, "loss": 8.3198, "step": 1785000 }, { "epoch": 0.02, "learning_rate": 0.0004999629753755507, "loss": 8.3218, "step": 1790000 }, { "epoch": 0.02, "learning_rate": 0.0004999627681343913, "loss": 8.2785, "step": 1795000 }, { "epoch": 0.02, "learning_rate": 0.0004999625603148882, "loss": 8.2772, "step": 1800000 }, { "epoch": 0.02, "learning_rate": 0.0004999623519170419, "loss": 8.3014, "step": 1805000 }, { "epoch": 0.02, "learning_rate": 0.0004999621429408528, "loss": 8.3073, "step": 1810000 }, { "epoch": 0.02, "learning_rate": 0.0004999619333863214, "loss": 8.2848, "step": 1815000 }, { "epoch": 0.02, "learning_rate": 0.0004999617232534483, "loss": 8.3051, "step": 1820000 }, { "epoch": 0.02, "learning_rate": 0.0004999615125422339, "loss": 8.2821, "step": 1825000 }, { "epoch": 0.02, "learning_rate": 0.0004999613012526786, "loss": 8.2864, "step": 1830000 }, { "epoch": 0.02, "learning_rate": 0.0004999610893847829, "loss": 8.293, "step": 1835000 }, { "epoch": 0.02, "learning_rate": 0.0004999608769385475, "loss": 8.261, "step": 1840000 }, { "epoch": 0.02, "learning_rate": 0.0004999606639139728, "loss": 8.2727, "step": 1845000 }, { "epoch": 0.02, "learning_rate": 0.0004999604503110592, "loss": 8.283, "step": 1850000 }, { "epoch": 0.02, "learning_rate": 0.0004999602361298073, "loss": 8.2857, "step": 1855000 }, { "epoch": 0.02, "learning_rate": 0.0004999600213702174, "loss": 8.3006, "step": 1860000 }, { "epoch": 0.02, "learning_rate": 0.0004999598060322902, "loss": 8.3414, "step": 1865000 }, { "epoch": 0.02, "learning_rate": 0.0004999595901160262, "loss": 8.3064, "step": 1870000 }, { "epoch": 0.02, "learning_rate": 0.0004999593736214257, "loss": 8.2756, "step": 1875000 }, { "epoch": 0.02, "learning_rate": 0.0004999591565484895, "loss": 8.2984, "step": 1880000 }, { "epoch": 0.02, "learning_rate": 0.0004999589388972178, "loss": 8.3053, "step": 1885000 }, { "epoch": 0.02, "learning_rate": 0.0004999587206676113, "loss": 8.2835, "step": 1890000 }, { "epoch": 0.02, "learning_rate": 0.0004999585018596705, "loss": 8.289, "step": 1895000 }, { "epoch": 0.02, "learning_rate": 0.0004999582824733958, "loss": 8.2592, "step": 1900000 }, { "epoch": 0.02, "learning_rate": 0.0004999580625087878, "loss": 8.2264, "step": 1905000 }, { "epoch": 0.02, "learning_rate": 0.000499957841965847, "loss": 8.2443, "step": 1910000 }, { "epoch": 0.02, "learning_rate": 0.0004999576208445738, "loss": 8.244, "step": 1915000 }, { "epoch": 0.02, "learning_rate": 0.0004999573991449687, "loss": 8.2067, "step": 1920000 }, { "epoch": 0.02, "learning_rate": 0.0004999571768670325, "loss": 8.2082, "step": 1925000 }, { "epoch": 0.02, "learning_rate": 0.0004999569540107654, "loss": 8.2183, "step": 1930000 }, { "epoch": 0.02, "learning_rate": 0.0004999567305761681, "loss": 8.2743, "step": 1935000 }, { "epoch": 0.02, "learning_rate": 0.000499956506563241, "loss": 8.2565, "step": 1940000 }, { "epoch": 0.02, "learning_rate": 0.0004999562819719847, "loss": 8.2867, "step": 1945000 }, { "epoch": 0.02, "learning_rate": 0.0004999560568023997, "loss": 8.3107, "step": 1950000 }, { "epoch": 0.02, "learning_rate": 0.0004999558310544865, "loss": 8.3057, "step": 1955000 }, { "epoch": 0.02, "learning_rate": 0.0004999556047282456, "loss": 8.3432, "step": 1960000 }, { "epoch": 0.02, "learning_rate": 0.0004999553778236776, "loss": 8.3441, "step": 1965000 }, { "epoch": 0.02, "learning_rate": 0.000499955150340783, "loss": 8.3092, "step": 1970000 }, { "epoch": 0.02, "learning_rate": 0.0004999549222795622, "loss": 8.2568, "step": 1975000 }, { "epoch": 0.02, "learning_rate": 0.0004999546936400159, "loss": 8.2439, "step": 1980000 }, { "epoch": 0.02, "learning_rate": 0.0004999544644221446, "loss": 8.2598, "step": 1985000 }, { "epoch": 0.02, "learning_rate": 0.0004999542346259488, "loss": 8.2371, "step": 1990000 }, { "epoch": 0.02, "learning_rate": 0.000499954004251429, "loss": 8.2255, "step": 1995000 }, { "epoch": 0.02, "learning_rate": 0.0004999537732985857, "loss": 8.2308, "step": 2000000 }, { "epoch": 0.02, "learning_rate": 0.0004999535417674196, "loss": 8.2345, "step": 2005000 }, { "epoch": 0.02, "learning_rate": 0.000499953309657931, "loss": 8.2342, "step": 2010000 }, { "epoch": 0.02, "learning_rate": 0.0004999530769701207, "loss": 8.2473, "step": 2015000 }, { "epoch": 0.02, "learning_rate": 0.0004999528437039891, "loss": 8.2992, "step": 2020000 }, { "epoch": 0.02, "learning_rate": 0.0004999526098595367, "loss": 8.2668, "step": 2025000 }, { "epoch": 0.02, "learning_rate": 0.000499952375436764, "loss": 8.2865, "step": 2030000 }, { "epoch": 0.02, "learning_rate": 0.0004999521404356718, "loss": 8.2854, "step": 2035000 }, { "epoch": 0.02, "learning_rate": 0.0004999519048562603, "loss": 8.2697, "step": 2040000 }, { "epoch": 0.02, "learning_rate": 0.0004999516686985304, "loss": 8.2916, "step": 2045000 }, { "epoch": 0.02, "learning_rate": 0.0004999514319624823, "loss": 8.2798, "step": 2050000 }, { "epoch": 0.02, "learning_rate": 0.0004999511946481167, "loss": 8.299, "step": 2055000 }, { "epoch": 0.02, "learning_rate": 0.0004999509567554343, "loss": 8.2762, "step": 2060000 }, { "epoch": 0.02, "learning_rate": 0.0004999507182844355, "loss": 8.247, "step": 2065000 }, { "epoch": 0.02, "learning_rate": 0.0004999504792351209, "loss": 8.2446, "step": 2070000 }, { "epoch": 0.02, "learning_rate": 0.0004999502396074908, "loss": 8.2318, "step": 2075000 }, { "epoch": 0.02, "learning_rate": 0.0004999499994015462, "loss": 8.266, "step": 2080000 }, { "epoch": 0.02, "learning_rate": 0.0004999497586172873, "loss": 8.2625, "step": 2085000 }, { "epoch": 0.02, "learning_rate": 0.0004999495172547148, "loss": 8.2462, "step": 2090000 }, { "epoch": 0.02, "learning_rate": 0.0004999492753138293, "loss": 8.2909, "step": 2095000 }, { "epoch": 0.02, "learning_rate": 0.0004999490327946312, "loss": 8.2519, "step": 2100000 }, { "epoch": 0.02, "learning_rate": 0.0004999487896971212, "loss": 8.2138, "step": 2105000 }, { "epoch": 0.02, "learning_rate": 0.0004999485460212998, "loss": 8.236, "step": 2110000 }, { "epoch": 0.02, "learning_rate": 0.0004999483017671676, "loss": 8.2644, "step": 2115000 }, { "epoch": 0.02, "learning_rate": 0.0004999480569347252, "loss": 8.317, "step": 2120000 }, { "epoch": 0.02, "learning_rate": 0.0004999478115239732, "loss": 8.292, "step": 2125000 }, { "epoch": 0.02, "learning_rate": 0.0004999475655349119, "loss": 8.2746, "step": 2130000 }, { "epoch": 0.02, "learning_rate": 0.0004999473189675422, "loss": 8.2593, "step": 2135000 }, { "epoch": 0.02, "learning_rate": 0.0004999470718218645, "loss": 8.2784, "step": 2140000 }, { "epoch": 0.02, "learning_rate": 0.0004999468240978794, "loss": 8.2614, "step": 2145000 }, { "epoch": 0.02, "learning_rate": 0.0004999465757955875, "loss": 8.2859, "step": 2150000 }, { "epoch": 0.02, "learning_rate": 0.0004999463269149892, "loss": 8.2754, "step": 2155000 }, { "epoch": 0.02, "learning_rate": 0.0004999460774560853, "loss": 8.2623, "step": 2160000 }, { "epoch": 0.02, "learning_rate": 0.0004999458274188764, "loss": 8.2901, "step": 2165000 }, { "epoch": 0.02, "learning_rate": 0.0004999455768033628, "loss": 8.2651, "step": 2170000 }, { "epoch": 0.02, "learning_rate": 0.0004999453256095454, "loss": 8.2527, "step": 2175000 }, { "epoch": 0.02, "learning_rate": 0.0004999450738374246, "loss": 8.2246, "step": 2180000 }, { "epoch": 0.02, "learning_rate": 0.000499944821487001, "loss": 8.222, "step": 2185000 }, { "epoch": 0.02, "learning_rate": 0.0004999445685582752, "loss": 8.1857, "step": 2190000 }, { "epoch": 0.02, "learning_rate": 0.0004999443150512479, "loss": 8.1593, "step": 2195000 }, { "epoch": 0.02, "learning_rate": 0.0004999440609659195, "loss": 8.1482, "step": 2200000 }, { "epoch": 0.02, "learning_rate": 0.0004999438063022906, "loss": 8.2034, "step": 2205000 }, { "epoch": 0.02, "learning_rate": 0.0004999435510603619, "loss": 8.2233, "step": 2210000 }, { "epoch": 0.02, "learning_rate": 0.0004999432952401341, "loss": 8.2332, "step": 2215000 }, { "epoch": 0.02, "learning_rate": 0.0004999430388416074, "loss": 8.2519, "step": 2220000 }, { "epoch": 0.02, "learning_rate": 0.0004999427818647827, "loss": 8.2316, "step": 2225000 }, { "epoch": 0.02, "learning_rate": 0.0004999425243096605, "loss": 8.2477, "step": 2230000 }, { "epoch": 0.02, "learning_rate": 0.0004999422661762416, "loss": 8.2251, "step": 2235000 }, { "epoch": 0.02, "learning_rate": 0.0004999420074645262, "loss": 8.2782, "step": 2240000 }, { "epoch": 0.02, "learning_rate": 0.0004999417481745153, "loss": 8.26, "step": 2245000 }, { "epoch": 0.02, "learning_rate": 0.0004999414883062092, "loss": 8.2635, "step": 2250000 }, { "epoch": 0.02, "learning_rate": 0.0004999412278596087, "loss": 8.2708, "step": 2255000 }, { "epoch": 0.02, "learning_rate": 0.0004999409668347142, "loss": 8.2981, "step": 2260000 }, { "epoch": 0.02, "learning_rate": 0.0004999407052315265, "loss": 8.2581, "step": 2265000 }, { "epoch": 0.02, "learning_rate": 0.0004999404430500461, "loss": 8.2532, "step": 2270000 }, { "epoch": 0.02, "learning_rate": 0.0004999401802902737, "loss": 8.2697, "step": 2275000 }, { "epoch": 0.02, "learning_rate": 0.0004999399169522098, "loss": 8.2703, "step": 2280000 }, { "epoch": 0.02, "learning_rate": 0.0004999396530358551, "loss": 8.2604, "step": 2285000 }, { "epoch": 0.02, "learning_rate": 0.0004999393885412101, "loss": 8.2728, "step": 2290000 }, { "epoch": 0.02, "learning_rate": 0.0004999391234682756, "loss": 8.2636, "step": 2295000 }, { "epoch": 0.02, "learning_rate": 0.000499938857817052, "loss": 8.2473, "step": 2300000 }, { "epoch": 0.02, "learning_rate": 0.0004999385915875399, "loss": 8.2496, "step": 2305000 }, { "epoch": 0.02, "learning_rate": 0.0004999383247797401, "loss": 8.2689, "step": 2310000 }, { "epoch": 0.02, "learning_rate": 0.0004999380573936532, "loss": 8.303, "step": 2315000 }, { "epoch": 0.02, "learning_rate": 0.0004999377894292798, "loss": 8.2291, "step": 2320000 }, { "epoch": 0.02, "learning_rate": 0.0004999375208866203, "loss": 8.2713, "step": 2325000 }, { "epoch": 0.02, "learning_rate": 0.0004999372517656756, "loss": 8.3408, "step": 2330000 }, { "epoch": 0.02, "learning_rate": 0.0004999369820664463, "loss": 8.2794, "step": 2335000 }, { "epoch": 0.02, "learning_rate": 0.0004999367117889328, "loss": 8.3002, "step": 2340000 }, { "epoch": 0.02, "learning_rate": 0.0004999364409331358, "loss": 8.2982, "step": 2345000 }, { "epoch": 0.02, "learning_rate": 0.0004999361694990562, "loss": 8.2878, "step": 2350000 }, { "epoch": 0.02, "learning_rate": 0.0004999358974866943, "loss": 8.2482, "step": 2355000 }, { "epoch": 0.02, "learning_rate": 0.0004999356248960509, "loss": 8.2382, "step": 2360000 }, { "epoch": 0.02, "learning_rate": 0.0004999353517271267, "loss": 8.2243, "step": 2365000 }, { "epoch": 0.02, "learning_rate": 0.000499935077979922, "loss": 8.1924, "step": 2370000 }, { "epoch": 0.02, "learning_rate": 0.0004999348036544378, "loss": 8.1962, "step": 2375000 }, { "epoch": 0.02, "learning_rate": 0.0004999345287506745, "loss": 8.1667, "step": 2380000 }, { "epoch": 0.02, "learning_rate": 0.0004999342532686328, "loss": 8.1605, "step": 2385000 }, { "epoch": 0.02, "learning_rate": 0.0004999339772083134, "loss": 8.1692, "step": 2390000 }, { "epoch": 0.02, "learning_rate": 0.000499933700569717, "loss": 8.1645, "step": 2395000 }, { "epoch": 0.02, "learning_rate": 0.000499933423352844, "loss": 8.1408, "step": 2400000 }, { "epoch": 0.02, "learning_rate": 0.0004999331455576953, "loss": 8.134, "step": 2405000 }, { "epoch": 0.02, "learning_rate": 0.0004999328671842712, "loss": 8.1468, "step": 2410000 }, { "epoch": 0.02, "learning_rate": 0.0004999325882325728, "loss": 8.113, "step": 2415000 }, { "epoch": 0.02, "learning_rate": 0.0004999323087026004, "loss": 8.1044, "step": 2420000 }, { "epoch": 0.02, "learning_rate": 0.0004999320285943548, "loss": 8.0701, "step": 2425000 }, { "epoch": 0.02, "learning_rate": 0.0004999317479078366, "loss": 8.0466, "step": 2430000 }, { "epoch": 0.02, "learning_rate": 0.0004999314666430465, "loss": 8.0082, "step": 2435000 }, { "epoch": 0.02, "learning_rate": 0.000499931184799985, "loss": 8.0217, "step": 2440000 }, { "epoch": 0.02, "learning_rate": 0.0004999309023786529, "loss": 8.0033, "step": 2445000 }, { "epoch": 0.02, "learning_rate": 0.0004999306193790509, "loss": 8.0105, "step": 2450000 }, { "epoch": 0.02, "learning_rate": 0.0004999303358011794, "loss": 7.9985, "step": 2455000 }, { "epoch": 0.02, "learning_rate": 0.0004999300516450393, "loss": 8.0164, "step": 2460000 }, { "epoch": 0.02, "learning_rate": 0.0004999297669106312, "loss": 8.0093, "step": 2465000 }, { "epoch": 0.02, "learning_rate": 0.0004999294815979557, "loss": 8.0111, "step": 2470000 }, { "epoch": 0.02, "learning_rate": 0.0004999291957070135, "loss": 8.0337, "step": 2475000 }, { "epoch": 0.02, "learning_rate": 0.0004999289092378053, "loss": 8.0641, "step": 2480000 }, { "epoch": 0.02, "learning_rate": 0.0004999286221903317, "loss": 7.9939, "step": 2485000 }, { "epoch": 0.02, "learning_rate": 0.0004999283345645934, "loss": 8.0228, "step": 2490000 }, { "epoch": 0.02, "learning_rate": 0.000499928046360591, "loss": 8.0458, "step": 2495000 }, { "epoch": 0.02, "learning_rate": 0.0004999277575783253, "loss": 8.0302, "step": 2500000 }, { "epoch": 0.02, "learning_rate": 0.0004999274682177968, "loss": 8.0451, "step": 2505000 }, { "epoch": 0.02, "learning_rate": 0.0004999271782790063, "loss": 8.0334, "step": 2510000 }, { "epoch": 0.02, "learning_rate": 0.0004999268877619545, "loss": 8.0034, "step": 2515000 }, { "epoch": 0.02, "learning_rate": 0.0004999265966666419, "loss": 7.9877, "step": 2520000 }, { "epoch": 0.02, "learning_rate": 0.0004999263049930692, "loss": 8.0222, "step": 2525000 }, { "epoch": 0.02, "learning_rate": 0.0004999260127412374, "loss": 7.993, "step": 2530000 }, { "epoch": 0.02, "learning_rate": 0.0004999257199111468, "loss": 7.9746, "step": 2535000 }, { "epoch": 0.02, "learning_rate": 0.0004999254265027982, "loss": 7.9478, "step": 2540000 }, { "epoch": 0.02, "learning_rate": 0.0004999251325161922, "loss": 7.9584, "step": 2545000 }, { "epoch": 0.02, "learning_rate": 0.0004999248379513296, "loss": 7.9407, "step": 2550000 }, { "epoch": 0.02, "learning_rate": 0.000499924542808211, "loss": 7.8694, "step": 2555000 }, { "epoch": 0.02, "learning_rate": 0.0004999242470868372, "loss": 7.8591, "step": 2560000 }, { "epoch": 0.02, "learning_rate": 0.0004999239507872088, "loss": 7.8295, "step": 2565000 }, { "epoch": 0.02, "learning_rate": 0.0004999236539093266, "loss": 7.7761, "step": 2570000 }, { "epoch": 0.02, "learning_rate": 0.0004999233564531911, "loss": 7.7571, "step": 2575000 }, { "epoch": 0.02, "learning_rate": 0.000499923058418803, "loss": 7.7923, "step": 2580000 }, { "epoch": 0.02, "learning_rate": 0.0004999227598061631, "loss": 7.7985, "step": 2585000 }, { "epoch": 0.02, "learning_rate": 0.000499922460615272, "loss": 7.7888, "step": 2590000 }, { "epoch": 0.02, "learning_rate": 0.0004999221608461306, "loss": 7.7891, "step": 2595000 }, { "epoch": 0.02, "learning_rate": 0.0004999218604987393, "loss": 7.7764, "step": 2600000 }, { "epoch": 0.02, "learning_rate": 0.000499921559573099, "loss": 7.7866, "step": 2605000 }, { "epoch": 0.02, "learning_rate": 0.0004999212580692103, "loss": 7.779, "step": 2610000 }, { "epoch": 0.02, "learning_rate": 0.0004999209559870738, "loss": 7.7859, "step": 2615000 }, { "epoch": 0.02, "learning_rate": 0.0004999206533266905, "loss": 7.7553, "step": 2620000 }, { "epoch": 0.02, "learning_rate": 0.0004999203500880609, "loss": 7.723, "step": 2625000 }, { "epoch": 0.02, "learning_rate": 0.0004999200462711857, "loss": 7.6725, "step": 2630000 }, { "epoch": 0.02, "learning_rate": 0.0004999197418760656, "loss": 7.7191, "step": 2635000 }, { "epoch": 0.02, "learning_rate": 0.0004999194369027014, "loss": 7.7714, "step": 2640000 }, { "epoch": 0.02, "learning_rate": 0.0004999191313510937, "loss": 7.7341, "step": 2645000 }, { "epoch": 0.02, "learning_rate": 0.0004999188252212432, "loss": 7.7491, "step": 2650000 }, { "epoch": 0.02, "learning_rate": 0.0004999185185131507, "loss": 7.7664, "step": 2655000 }, { "epoch": 0.02, "learning_rate": 0.0004999182112268169, "loss": 7.7736, "step": 2660000 }, { "epoch": 0.02, "learning_rate": 0.0004999179033622424, "loss": 7.7262, "step": 2665000 }, { "epoch": 0.02, "learning_rate": 0.0004999175949194281, "loss": 7.6883, "step": 2670000 }, { "epoch": 0.02, "learning_rate": 0.0004999172858983745, "loss": 7.7069, "step": 2675000 }, { "epoch": 0.02, "learning_rate": 0.0004999169762990825, "loss": 7.6417, "step": 2680000 }, { "epoch": 0.02, "learning_rate": 0.0004999166661215527, "loss": 7.6366, "step": 2685000 }, { "epoch": 0.02, "learning_rate": 0.0004999163553657858, "loss": 7.6692, "step": 2690000 }, { "epoch": 0.02, "learning_rate": 0.0004999160440317825, "loss": 7.6494, "step": 2695000 }, { "epoch": 0.02, "learning_rate": 0.0004999157321195438, "loss": 7.6101, "step": 2700000 }, { "epoch": 0.02, "learning_rate": 0.0004999154196290701, "loss": 7.6184, "step": 2705000 }, { "epoch": 0.02, "learning_rate": 0.0004999151065603622, "loss": 7.6237, "step": 2710000 }, { "epoch": 0.02, "learning_rate": 0.0004999147929134209, "loss": 7.6552, "step": 2715000 }, { "epoch": 0.02, "learning_rate": 0.0004999144786882469, "loss": 7.6579, "step": 2720000 }, { "epoch": 0.03, "learning_rate": 0.0004999141638848409, "loss": 7.6001, "step": 2725000 }, { "epoch": 0.03, "learning_rate": 0.0004999138485032035, "loss": 7.5975, "step": 2730000 }, { "epoch": 0.03, "learning_rate": 0.0004999135325433358, "loss": 7.6248, "step": 2735000 }, { "epoch": 0.03, "learning_rate": 0.0004999132160052382, "loss": 7.6358, "step": 2740000 }, { "epoch": 0.03, "learning_rate": 0.0004999128988889115, "loss": 7.5984, "step": 2745000 }, { "epoch": 0.03, "learning_rate": 0.0004999125811943565, "loss": 7.5925, "step": 2750000 }, { "epoch": 0.03, "learning_rate": 0.0004999122629215739, "loss": 7.6375, "step": 2755000 }, { "epoch": 0.03, "learning_rate": 0.0004999119440705644, "loss": 7.6285, "step": 2760000 }, { "epoch": 0.03, "learning_rate": 0.0004999116246413288, "loss": 7.6137, "step": 2765000 }, { "epoch": 0.03, "learning_rate": 0.0004999113046338678, "loss": 7.6145, "step": 2770000 }, { "epoch": 0.03, "learning_rate": 0.0004999109840481822, "loss": 7.6126, "step": 2775000 }, { "epoch": 0.03, "learning_rate": 0.0004999106628842726, "loss": 7.5928, "step": 2780000 }, { "epoch": 0.03, "learning_rate": 0.0004999103411421399, "loss": 7.5962, "step": 2785000 }, { "epoch": 0.03, "learning_rate": 0.0004999100188217848, "loss": 7.5763, "step": 2790000 }, { "epoch": 0.03, "learning_rate": 0.0004999096959232082, "loss": 7.5711, "step": 2795000 }, { "epoch": 0.03, "learning_rate": 0.0004999093724464104, "loss": 7.5754, "step": 2800000 }, { "epoch": 0.03, "learning_rate": 0.0004999090483913926, "loss": 7.5963, "step": 2805000 }, { "epoch": 0.03, "learning_rate": 0.0004999087237581553, "loss": 7.581, "step": 2810000 }, { "epoch": 0.03, "learning_rate": 0.0004999083985466992, "loss": 7.5872, "step": 2815000 }, { "epoch": 0.03, "learning_rate": 0.0004999080727570254, "loss": 7.5772, "step": 2820000 }, { "epoch": 0.03, "learning_rate": 0.0004999077463891343, "loss": 7.5629, "step": 2825000 }, { "epoch": 0.03, "learning_rate": 0.0004999074194430268, "loss": 7.5575, "step": 2830000 }, { "epoch": 0.03, "learning_rate": 0.0004999070919187037, "loss": 7.5157, "step": 2835000 }, { "epoch": 0.03, "learning_rate": 0.0004999067638161657, "loss": 7.5362, "step": 2840000 }, { "epoch": 0.03, "learning_rate": 0.0004999064351354135, "loss": 7.5166, "step": 2845000 }, { "epoch": 0.03, "learning_rate": 0.0004999061058764481, "loss": 7.5412, "step": 2850000 }, { "epoch": 0.03, "learning_rate": 0.0004999057760392699, "loss": 7.5056, "step": 2855000 }, { "epoch": 0.03, "learning_rate": 0.0004999054456238798, "loss": 7.5058, "step": 2860000 }, { "epoch": 0.03, "learning_rate": 0.0004999051146302787, "loss": 7.4933, "step": 2865000 }, { "epoch": 0.03, "learning_rate": 0.0004999047830584674, "loss": 7.4638, "step": 2870000 }, { "epoch": 0.03, "learning_rate": 0.0004999044509084463, "loss": 7.4749, "step": 2875000 }, { "epoch": 0.03, "learning_rate": 0.0004999041181802165, "loss": 7.4796, "step": 2880000 }, { "epoch": 0.03, "learning_rate": 0.0004999037848737787, "loss": 7.4789, "step": 2885000 }, { "epoch": 0.03, "learning_rate": 0.0004999034509891337, "loss": 7.4779, "step": 2890000 }, { "epoch": 0.03, "learning_rate": 0.0004999031165262822, "loss": 7.4957, "step": 2895000 }, { "epoch": 0.03, "learning_rate": 0.000499902781485225, "loss": 7.4947, "step": 2900000 }, { "epoch": 0.03, "learning_rate": 0.0004999024458659628, "loss": 7.5077, "step": 2905000 }, { "epoch": 0.03, "learning_rate": 0.0004999021096684966, "loss": 7.491, "step": 2910000 }, { "epoch": 0.03, "learning_rate": 0.000499901772892827, "loss": 7.4886, "step": 2915000 }, { "epoch": 0.03, "learning_rate": 0.0004999014355389547, "loss": 7.4775, "step": 2920000 }, { "epoch": 0.03, "learning_rate": 0.0004999010976068807, "loss": 7.4578, "step": 2925000 }, { "epoch": 0.03, "learning_rate": 0.0004999007590966056, "loss": 7.4282, "step": 2930000 }, { "epoch": 0.03, "learning_rate": 0.0004999004200081303, "loss": 7.4256, "step": 2935000 }, { "epoch": 0.03, "learning_rate": 0.0004999000803414556, "loss": 7.4379, "step": 2940000 }, { "epoch": 0.03, "learning_rate": 0.000499899740096582, "loss": 7.4527, "step": 2945000 }, { "epoch": 0.03, "learning_rate": 0.0004998993992735107, "loss": 7.4464, "step": 2950000 }, { "epoch": 0.03, "learning_rate": 0.0004998990578722422, "loss": 7.4258, "step": 2955000 }, { "epoch": 0.03, "learning_rate": 0.0004998987158927775, "loss": 7.4279, "step": 2960000 }, { "epoch": 0.03, "learning_rate": 0.0004998983733351172, "loss": 7.4214, "step": 2965000 }, { "epoch": 0.03, "learning_rate": 0.0004998980301992622, "loss": 7.3997, "step": 2970000 }, { "epoch": 0.03, "learning_rate": 0.0004998976864852133, "loss": 7.3827, "step": 2975000 }, { "epoch": 0.03, "learning_rate": 0.0004998973421929711, "loss": 7.3728, "step": 2980000 }, { "epoch": 0.03, "learning_rate": 0.0004998969973225368, "loss": 7.3785, "step": 2985000 }, { "epoch": 0.03, "learning_rate": 0.0004998966518739109, "loss": 7.3772, "step": 2990000 }, { "epoch": 0.03, "learning_rate": 0.0004998963058470941, "loss": 7.371, "step": 2995000 }, { "epoch": 0.03, "learning_rate": 0.0004998959592420874, "loss": 7.375, "step": 3000000 }, { "epoch": 0.03, "learning_rate": 0.0004998956120588916, "loss": 7.3659, "step": 3005000 }, { "epoch": 0.03, "learning_rate": 0.0004998952642975076, "loss": 7.3832, "step": 3010000 }, { "epoch": 0.03, "learning_rate": 0.000499894915957936, "loss": 7.3574, "step": 3015000 }, { "epoch": 0.03, "learning_rate": 0.0004998945670401776, "loss": 7.3203, "step": 3020000 }, { "epoch": 0.03, "learning_rate": 0.0004998942175442332, "loss": 7.3212, "step": 3025000 }, { "epoch": 0.03, "learning_rate": 0.000499893867470104, "loss": 7.3108, "step": 3030000 }, { "epoch": 0.03, "learning_rate": 0.0004998935168177901, "loss": 7.3063, "step": 3035000 }, { "epoch": 0.03, "learning_rate": 0.000499893165587293, "loss": 7.2776, "step": 3040000 }, { "epoch": 0.03, "learning_rate": 0.0004998928137786131, "loss": 7.2842, "step": 3045000 }, { "epoch": 0.03, "learning_rate": 0.0004998924613917513, "loss": 7.2903, "step": 3050000 }, { "epoch": 0.03, "learning_rate": 0.0004998921084267086, "loss": 7.2383, "step": 3055000 }, { "epoch": 0.03, "learning_rate": 0.0004998917548834856, "loss": 7.2406, "step": 3060000 }, { "epoch": 0.03, "learning_rate": 0.0004998914007620832, "loss": 7.2557, "step": 3065000 }, { "epoch": 0.03, "learning_rate": 0.0004998910460625021, "loss": 7.2607, "step": 3070000 }, { "epoch": 0.03, "learning_rate": 0.0004998906907847433, "loss": 7.2446, "step": 3075000 }, { "epoch": 0.03, "learning_rate": 0.0004998903349288077, "loss": 7.2361, "step": 3080000 }, { "epoch": 0.03, "learning_rate": 0.0004998899784946957, "loss": 7.2172, "step": 3085000 }, { "epoch": 0.03, "learning_rate": 0.0004998896214824086, "loss": 7.2064, "step": 3090000 }, { "epoch": 0.03, "learning_rate": 0.000499889263891947, "loss": 7.2123, "step": 3095000 }, { "epoch": 0.03, "learning_rate": 0.0004998889057233117, "loss": 7.165, "step": 3100000 }, { "epoch": 0.03, "learning_rate": 0.0004998885469765036, "loss": 7.1333, "step": 3105000 }, { "epoch": 0.03, "learning_rate": 0.0004998881876515234, "loss": 7.1097, "step": 3110000 }, { "epoch": 0.03, "learning_rate": 0.0004998878277483722, "loss": 7.1224, "step": 3115000 }, { "epoch": 0.03, "learning_rate": 0.0004998874672670505, "loss": 7.1403, "step": 3120000 }, { "epoch": 0.03, "learning_rate": 0.0004998871062075595, "loss": 7.1434, "step": 3125000 }, { "epoch": 0.03, "learning_rate": 0.0004998867445698998, "loss": 7.1053, "step": 3130000 }, { "epoch": 0.03, "learning_rate": 0.0004998863823540723, "loss": 7.0769, "step": 3135000 }, { "epoch": 0.03, "learning_rate": 0.0004998860195600777, "loss": 7.061, "step": 3140000 }, { "epoch": 0.03, "learning_rate": 0.0004998856561879171, "loss": 7.0489, "step": 3145000 }, { "epoch": 0.03, "learning_rate": 0.000499885292237591, "loss": 7.0647, "step": 3150000 }, { "epoch": 0.03, "learning_rate": 0.0004998849277091006, "loss": 7.0588, "step": 3155000 }, { "epoch": 0.03, "learning_rate": 0.0004998845626024465, "loss": 7.0329, "step": 3160000 }, { "epoch": 0.03, "learning_rate": 0.0004998841969176297, "loss": 7.0196, "step": 3165000 }, { "epoch": 0.03, "learning_rate": 0.000499883830654651, "loss": 7.0169, "step": 3170000 }, { "epoch": 0.03, "learning_rate": 0.0004998834638135112, "loss": 7.0068, "step": 3175000 }, { "epoch": 0.03, "learning_rate": 0.000499883096394211, "loss": 6.9748, "step": 3180000 }, { "epoch": 0.03, "learning_rate": 0.0004998827283967517, "loss": 6.9723, "step": 3185000 }, { "epoch": 0.03, "learning_rate": 0.0004998823598211337, "loss": 6.9527, "step": 3190000 }, { "epoch": 0.03, "learning_rate": 0.000499881990667358, "loss": 6.9318, "step": 3195000 }, { "epoch": 0.03, "learning_rate": 0.0004998816209354256, "loss": 6.9324, "step": 3200000 }, { "epoch": 0.03, "learning_rate": 0.0004998812506253371, "loss": 6.9347, "step": 3205000 }, { "epoch": 0.03, "learning_rate": 0.0004998808797370936, "loss": 6.9538, "step": 3210000 }, { "epoch": 0.03, "learning_rate": 0.0004998805082706958, "loss": 6.93, "step": 3215000 }, { "epoch": 0.03, "learning_rate": 0.0004998801362261446, "loss": 6.9095, "step": 3220000 }, { "epoch": 0.03, "learning_rate": 0.0004998797636034408, "loss": 6.8771, "step": 3225000 }, { "epoch": 0.03, "learning_rate": 0.0004998793904025855, "loss": 6.8535, "step": 3230000 }, { "epoch": 0.03, "learning_rate": 0.0004998790166235794, "loss": 6.8399, "step": 3235000 }, { "epoch": 0.03, "learning_rate": 0.0004998786422664232, "loss": 6.8154, "step": 3240000 }, { "epoch": 0.03, "learning_rate": 0.000499878267331118, "loss": 6.8082, "step": 3245000 }, { "epoch": 0.03, "learning_rate": 0.0004998778918176647, "loss": 6.7983, "step": 3250000 }, { "epoch": 0.03, "learning_rate": 0.0004998775157260639, "loss": 6.7802, "step": 3255000 }, { "epoch": 0.03, "learning_rate": 0.0004998771390563167, "loss": 6.7625, "step": 3260000 }, { "epoch": 0.03, "learning_rate": 0.0004998767618084239, "loss": 6.747, "step": 3265000 }, { "epoch": 0.03, "learning_rate": 0.0004998763839823864, "loss": 6.7384, "step": 3270000 }, { "epoch": 0.03, "learning_rate": 0.000499876005578205, "loss": 6.7496, "step": 3275000 }, { "epoch": 0.03, "learning_rate": 0.0004998756265958806, "loss": 6.7399, "step": 3280000 }, { "epoch": 0.03, "learning_rate": 0.0004998752470354143, "loss": 6.7096, "step": 3285000 }, { "epoch": 0.03, "learning_rate": 0.0004998748668968066, "loss": 6.6752, "step": 3290000 }, { "epoch": 0.03, "learning_rate": 0.0004998744861800585, "loss": 6.6532, "step": 3295000 }, { "epoch": 0.03, "learning_rate": 0.0004998741048851711, "loss": 6.6338, "step": 3300000 }, { "epoch": 0.03, "learning_rate": 0.000499873723012145, "loss": 6.5932, "step": 3305000 }, { "epoch": 0.03, "learning_rate": 0.0004998733405609813, "loss": 6.5927, "step": 3310000 }, { "epoch": 0.03, "learning_rate": 0.0004998729575316808, "loss": 6.5595, "step": 3315000 }, { "epoch": 0.03, "learning_rate": 0.0004998725739242443, "loss": 6.5342, "step": 3320000 }, { "epoch": 0.03, "learning_rate": 0.0004998721897386729, "loss": 6.5184, "step": 3325000 }, { "epoch": 0.03, "learning_rate": 0.0004998718049749673, "loss": 6.5091, "step": 3330000 }, { "epoch": 0.03, "learning_rate": 0.0004998714196331284, "loss": 6.4739, "step": 3335000 }, { "epoch": 0.03, "learning_rate": 0.0004998710337131571, "loss": 6.44, "step": 3340000 }, { "epoch": 0.03, "learning_rate": 0.0004998706472150545, "loss": 6.431, "step": 3345000 }, { "epoch": 0.03, "learning_rate": 0.0004998702601388211, "loss": 6.3974, "step": 3350000 }, { "epoch": 0.03, "learning_rate": 0.0004998698724844582, "loss": 6.3681, "step": 3355000 }, { "epoch": 0.03, "learning_rate": 0.0004998694842519664, "loss": 6.3462, "step": 3360000 }, { "epoch": 0.03, "learning_rate": 0.0004998690954413468, "loss": 6.3171, "step": 3365000 }, { "epoch": 0.03, "learning_rate": 0.0004998687060526002, "loss": 6.3039, "step": 3370000 }, { "epoch": 0.03, "learning_rate": 0.0004998683160857275, "loss": 6.2849, "step": 3375000 }, { "epoch": 0.03, "learning_rate": 0.0004998679255407295, "loss": 6.2751, "step": 3380000 }, { "epoch": 0.03, "learning_rate": 0.0004998675344176074, "loss": 6.2425, "step": 3385000 }, { "epoch": 0.03, "learning_rate": 0.0004998671427163619, "loss": 6.2323, "step": 3390000 }, { "epoch": 0.03, "learning_rate": 0.0004998667504369939, "loss": 6.2101, "step": 3395000 }, { "epoch": 0.03, "learning_rate": 0.0004998663575795043, "loss": 6.206, "step": 3400000 }, { "epoch": 0.03, "learning_rate": 0.0004998659641438941, "loss": 6.189, "step": 3405000 }, { "epoch": 0.03, "learning_rate": 0.0004998655701301642, "loss": 6.1647, "step": 3410000 }, { "epoch": 0.03, "learning_rate": 0.0004998651755383154, "loss": 6.159, "step": 3415000 }, { "epoch": 0.03, "learning_rate": 0.0004998647803683488, "loss": 6.1456, "step": 3420000 }, { "epoch": 0.03, "learning_rate": 0.0004998643846202649, "loss": 6.1323, "step": 3425000 }, { "epoch": 0.03, "learning_rate": 0.0004998639882940652, "loss": 6.1335, "step": 3430000 }, { "epoch": 0.03, "learning_rate": 0.0004998635913897502, "loss": 6.1191, "step": 3435000 }, { "epoch": 0.03, "learning_rate": 0.000499863193907321, "loss": 6.0935, "step": 3440000 }, { "epoch": 0.03, "learning_rate": 0.0004998627958467786, "loss": 6.0872, "step": 3445000 }, { "epoch": 0.03, "learning_rate": 0.0004998623972081235, "loss": 6.0752, "step": 3450000 }, { "epoch": 0.03, "learning_rate": 0.0004998619979913571, "loss": 6.0658, "step": 3455000 }, { "epoch": 0.03, "learning_rate": 0.0004998615981964802, "loss": 6.0556, "step": 3460000 }, { "epoch": 0.03, "learning_rate": 0.0004998611978234935, "loss": 6.0453, "step": 3465000 }, { "epoch": 0.03, "learning_rate": 0.0004998607968723981, "loss": 6.0446, "step": 3470000 }, { "epoch": 0.03, "learning_rate": 0.000499860395343195, "loss": 6.0357, "step": 3475000 }, { "epoch": 0.03, "learning_rate": 0.0004998599932358851, "loss": 6.044, "step": 3480000 }, { "epoch": 0.03, "learning_rate": 0.0004998595905504692, "loss": 6.0299, "step": 3485000 }, { "epoch": 0.03, "learning_rate": 0.0004998591872869483, "loss": 6.0288, "step": 3490000 }, { "epoch": 0.03, "learning_rate": 0.0004998587834453233, "loss": 6.0161, "step": 3495000 }, { "epoch": 0.03, "learning_rate": 0.0004998583790255952, "loss": 6.0054, "step": 3500000 }, { "epoch": 0.03, "learning_rate": 0.000499857974027765, "loss": 5.9942, "step": 3505000 }, { "epoch": 0.03, "learning_rate": 0.0004998575684518334, "loss": 5.9857, "step": 3510000 }, { "epoch": 0.03, "learning_rate": 0.0004998571622978016, "loss": 5.9877, "step": 3515000 }, { "epoch": 0.03, "learning_rate": 0.0004998567555656704, "loss": 5.9862, "step": 3520000 }, { "epoch": 0.03, "learning_rate": 0.0004998563482554407, "loss": 5.9698, "step": 3525000 }, { "epoch": 0.03, "learning_rate": 0.0004998559403671136, "loss": 5.9642, "step": 3530000 }, { "epoch": 0.03, "learning_rate": 0.0004998555319006898, "loss": 5.9608, "step": 3535000 }, { "epoch": 0.03, "learning_rate": 0.0004998551228561707, "loss": 5.9667, "step": 3540000 }, { "epoch": 0.03, "learning_rate": 0.0004998547132335566, "loss": 5.9588, "step": 3545000 }, { "epoch": 0.03, "learning_rate": 0.0004998543030328489, "loss": 5.946, "step": 3550000 }, { "epoch": 0.03, "learning_rate": 0.0004998538922540485, "loss": 5.9504, "step": 3555000 }, { "epoch": 0.03, "learning_rate": 0.0004998534808971563, "loss": 5.9391, "step": 3560000 }, { "epoch": 0.03, "learning_rate": 0.0004998530689621733, "loss": 5.9356, "step": 3565000 }, { "epoch": 0.03, "learning_rate": 0.0004998526564491002, "loss": 5.9244, "step": 3570000 }, { "epoch": 0.03, "learning_rate": 0.0004998522433579384, "loss": 5.9272, "step": 3575000 }, { "epoch": 0.03, "learning_rate": 0.0004998518296886885, "loss": 5.925, "step": 3580000 }, { "epoch": 0.03, "learning_rate": 0.0004998514154413515, "loss": 5.9168, "step": 3585000 }, { "epoch": 0.03, "learning_rate": 0.0004998510006159284, "loss": 5.9175, "step": 3590000 }, { "epoch": 0.03, "learning_rate": 0.0004998505852124202, "loss": 5.9058, "step": 3595000 }, { "epoch": 0.03, "learning_rate": 0.000499850169230828, "loss": 5.8985, "step": 3600000 }, { "epoch": 0.03, "learning_rate": 0.0004998497526711524, "loss": 5.9005, "step": 3605000 }, { "epoch": 0.03, "learning_rate": 0.0004998493355333946, "loss": 5.9032, "step": 3610000 }, { "epoch": 0.03, "learning_rate": 0.0004998489178175557, "loss": 5.8991, "step": 3615000 }, { "epoch": 0.03, "learning_rate": 0.0004998484995236363, "loss": 5.8947, "step": 3620000 }, { "epoch": 0.03, "learning_rate": 0.0004998480806516377, "loss": 5.8731, "step": 3625000 }, { "epoch": 0.03, "learning_rate": 0.0004998476612015606, "loss": 5.8891, "step": 3630000 }, { "epoch": 0.03, "learning_rate": 0.0004998472411734063, "loss": 5.8775, "step": 3635000 }, { "epoch": 0.03, "learning_rate": 0.0004998468205671755, "loss": 5.8829, "step": 3640000 }, { "epoch": 0.03, "learning_rate": 0.0004998463993828692, "loss": 5.8646, "step": 3645000 }, { "epoch": 0.03, "learning_rate": 0.0004998459776204884, "loss": 5.8804, "step": 3650000 }, { "epoch": 0.03, "learning_rate": 0.0004998455552800343, "loss": 5.8714, "step": 3655000 }, { "epoch": 0.03, "learning_rate": 0.0004998451323615075, "loss": 5.8705, "step": 3660000 }, { "epoch": 0.03, "learning_rate": 0.0004998447088649092, "loss": 5.866, "step": 3665000 }, { "epoch": 0.03, "learning_rate": 0.0004998442847902404, "loss": 5.8661, "step": 3670000 }, { "epoch": 0.03, "learning_rate": 0.0004998438601375018, "loss": 5.8625, "step": 3675000 }, { "epoch": 0.03, "learning_rate": 0.0004998434349066948, "loss": 5.8624, "step": 3680000 }, { "epoch": 0.03, "learning_rate": 0.0004998430090978202, "loss": 5.8611, "step": 3685000 }, { "epoch": 0.03, "learning_rate": 0.0004998425827108789, "loss": 5.8612, "step": 3690000 }, { "epoch": 0.03, "learning_rate": 0.0004998421557458719, "loss": 5.8462, "step": 3695000 }, { "epoch": 0.03, "learning_rate": 0.0004998417282028005, "loss": 5.8616, "step": 3700000 }, { "epoch": 0.03, "learning_rate": 0.0004998413000816651, "loss": 5.8545, "step": 3705000 }, { "epoch": 0.03, "learning_rate": 0.0004998408713824673, "loss": 5.8612, "step": 3710000 }, { "epoch": 0.03, "learning_rate": 0.0004998404421052077, "loss": 5.8517, "step": 3715000 }, { "epoch": 0.03, "learning_rate": 0.0004998400122498874, "loss": 5.855, "step": 3720000 }, { "epoch": 0.03, "learning_rate": 0.0004998395818165075, "loss": 5.842, "step": 3725000 }, { "epoch": 0.03, "learning_rate": 0.0004998391508050687, "loss": 5.8499, "step": 3730000 }, { "epoch": 0.03, "learning_rate": 0.0004998387192155724, "loss": 5.8411, "step": 3735000 }, { "epoch": 0.03, "learning_rate": 0.0004998382870480193, "loss": 5.8442, "step": 3740000 }, { "epoch": 0.03, "learning_rate": 0.0004998378543024105, "loss": 5.8376, "step": 3745000 }, { "epoch": 0.03, "learning_rate": 0.000499837420978747, "loss": 5.8465, "step": 3750000 }, { "epoch": 0.03, "learning_rate": 0.0004998369870770298, "loss": 5.838, "step": 3755000 }, { "epoch": 0.03, "learning_rate": 0.00049983655259726, "loss": 5.8371, "step": 3760000 }, { "epoch": 0.03, "learning_rate": 0.0004998361175394384, "loss": 5.8365, "step": 3765000 }, { "epoch": 0.03, "learning_rate": 0.0004998356819035662, "loss": 5.8392, "step": 3770000 }, { "epoch": 0.03, "learning_rate": 0.0004998352456896442, "loss": 5.8358, "step": 3775000 }, { "epoch": 0.03, "learning_rate": 0.0004998348088976737, "loss": 5.8399, "step": 3780000 }, { "epoch": 0.03, "learning_rate": 0.0004998343715276554, "loss": 5.8297, "step": 3785000 }, { "epoch": 0.03, "learning_rate": 0.0004998339335795905, "loss": 5.825, "step": 3790000 }, { "epoch": 0.03, "learning_rate": 0.00049983349505348, "loss": 5.8235, "step": 3795000 }, { "epoch": 0.03, "learning_rate": 0.0004998330559493248, "loss": 5.8283, "step": 3800000 }, { "epoch": 0.03, "learning_rate": 0.0004998326162671262, "loss": 5.8185, "step": 3805000 }, { "epoch": 0.04, "learning_rate": 0.0004998321760068848, "loss": 5.8234, "step": 3810000 }, { "epoch": 0.04, "learning_rate": 0.0004998317351686019, "loss": 5.8218, "step": 3815000 }, { "epoch": 0.04, "learning_rate": 0.0004998312937522785, "loss": 5.8146, "step": 3820000 }, { "epoch": 0.04, "learning_rate": 0.0004998308517579155, "loss": 5.8176, "step": 3825000 }, { "epoch": 0.04, "learning_rate": 0.0004998304091855141, "loss": 5.8182, "step": 3830000 }, { "epoch": 0.04, "learning_rate": 0.0004998299660350751, "loss": 5.8208, "step": 3835000 }, { "epoch": 0.04, "learning_rate": 0.0004998295223065998, "loss": 5.8194, "step": 3840000 }, { "epoch": 0.04, "learning_rate": 0.0004998290780000891, "loss": 5.829, "step": 3845000 }, { "epoch": 0.04, "learning_rate": 0.0004998286331155438, "loss": 5.8147, "step": 3850000 }, { "epoch": 0.04, "learning_rate": 0.0004998281876529652, "loss": 5.8143, "step": 3855000 }, { "epoch": 0.04, "learning_rate": 0.0004998277416123543, "loss": 5.8108, "step": 3860000 }, { "epoch": 0.04, "learning_rate": 0.0004998272949937122, "loss": 5.8082, "step": 3865000 }, { "epoch": 0.04, "learning_rate": 0.0004998268477970397, "loss": 5.8181, "step": 3870000 }, { "epoch": 0.04, "learning_rate": 0.0004998264000223379, "loss": 5.8044, "step": 3875000 }, { "epoch": 0.04, "learning_rate": 0.0004998259516696081, "loss": 5.8118, "step": 3880000 }, { "epoch": 0.04, "learning_rate": 0.000499825502738851, "loss": 5.8059, "step": 3885000 }, { "epoch": 0.04, "learning_rate": 0.0004998250532300678, "loss": 5.8096, "step": 3890000 }, { "epoch": 0.04, "learning_rate": 0.0004998246031432595, "loss": 5.8124, "step": 3895000 }, { "epoch": 0.04, "learning_rate": 0.0004998241524784272, "loss": 5.8068, "step": 3900000 }, { "epoch": 0.04, "learning_rate": 0.0004998237012355719, "loss": 5.8133, "step": 3905000 }, { "epoch": 0.04, "learning_rate": 0.0004998232494146947, "loss": 5.8056, "step": 3910000 }, { "epoch": 0.04, "learning_rate": 0.0004998227970157965, "loss": 5.8104, "step": 3915000 }, { "epoch": 0.04, "learning_rate": 0.0004998223440388784, "loss": 5.8119, "step": 3920000 }, { "epoch": 0.04, "learning_rate": 0.0004998218904839416, "loss": 5.8128, "step": 3925000 }, { "epoch": 0.04, "learning_rate": 0.0004998214363509869, "loss": 5.8021, "step": 3930000 }, { "epoch": 0.04, "learning_rate": 0.0004998209816400156, "loss": 5.8091, "step": 3935000 }, { "epoch": 0.04, "learning_rate": 0.0004998205263510286, "loss": 5.7921, "step": 3940000 }, { "epoch": 0.04, "learning_rate": 0.000499820070484027, "loss": 5.8137, "step": 3945000 }, { "epoch": 0.04, "learning_rate": 0.0004998196140390118, "loss": 5.8, "step": 3950000 }, { "epoch": 0.04, "learning_rate": 0.0004998191570159842, "loss": 5.8085, "step": 3955000 }, { "epoch": 0.04, "learning_rate": 0.0004998186994149451, "loss": 5.8102, "step": 3960000 }, { "epoch": 0.04, "learning_rate": 0.0004998182412358955, "loss": 5.801, "step": 3965000 }, { "epoch": 0.04, "learning_rate": 0.0004998177824788367, "loss": 5.7999, "step": 3970000 }, { "epoch": 0.04, "learning_rate": 0.0004998173231437696, "loss": 5.8047, "step": 3975000 }, { "epoch": 0.04, "learning_rate": 0.0004998168632306954, "loss": 5.804, "step": 3980000 }, { "epoch": 0.04, "learning_rate": 0.000499816402739615, "loss": 5.8068, "step": 3985000 }, { "epoch": 0.04, "learning_rate": 0.0004998159416705294, "loss": 5.7973, "step": 3990000 }, { "epoch": 0.04, "learning_rate": 0.00049981548002344, "loss": 5.7941, "step": 3995000 }, { "epoch": 0.04, "learning_rate": 0.0004998150177983476, "loss": 5.7839, "step": 4000000 }, { "epoch": 0.04, "learning_rate": 0.0004998145549952533, "loss": 5.8023, "step": 4005000 }, { "epoch": 0.04, "learning_rate": 0.0004998140916141582, "loss": 5.7993, "step": 4010000 }, { "epoch": 0.04, "learning_rate": 0.0004998136276550634, "loss": 5.7915, "step": 4015000 }, { "epoch": 0.04, "learning_rate": 0.0004998131631179701, "loss": 5.8068, "step": 4020000 }, { "epoch": 0.04, "learning_rate": 0.000499812698002879, "loss": 5.7889, "step": 4025000 }, { "epoch": 0.04, "learning_rate": 0.0004998122323097916, "loss": 5.7873, "step": 4030000 }, { "epoch": 0.04, "learning_rate": 0.0004998117660387086, "loss": 5.7947, "step": 4035000 }, { "epoch": 0.04, "learning_rate": 0.0004998112991896313, "loss": 5.7878, "step": 4040000 }, { "epoch": 0.04, "learning_rate": 0.0004998108317625608, "loss": 5.8034, "step": 4045000 }, { "epoch": 0.04, "learning_rate": 0.0004998103637574981, "loss": 5.7911, "step": 4050000 }, { "epoch": 0.04, "learning_rate": 0.0004998098951744443, "loss": 5.7927, "step": 4055000 }, { "epoch": 0.04, "learning_rate": 0.0004998094260134005, "loss": 5.7973, "step": 4060000 }, { "epoch": 0.04, "learning_rate": 0.0004998089562743678, "loss": 5.8046, "step": 4065000 }, { "epoch": 0.04, "learning_rate": 0.0004998084859573472, "loss": 5.7947, "step": 4070000 }, { "epoch": 0.04, "learning_rate": 0.0004998080150623399, "loss": 5.7934, "step": 4075000 }, { "epoch": 0.04, "learning_rate": 0.0004998075435893467, "loss": 5.7906, "step": 4080000 }, { "epoch": 0.04, "learning_rate": 0.0004998070715383692, "loss": 5.7772, "step": 4085000 }, { "epoch": 0.04, "learning_rate": 0.0004998065989094082, "loss": 5.7962, "step": 4090000 }, { "epoch": 0.04, "learning_rate": 0.0004998061257024647, "loss": 5.802, "step": 4095000 }, { "epoch": 0.04, "learning_rate": 0.00049980565191754, "loss": 5.7967, "step": 4100000 }, { "epoch": 0.04, "learning_rate": 0.0004998051775546351, "loss": 5.7827, "step": 4105000 }, { "epoch": 0.04, "learning_rate": 0.0004998047026137511, "loss": 5.7844, "step": 4110000 }, { "epoch": 0.04, "learning_rate": 0.0004998042270948891, "loss": 5.7964, "step": 4115000 }, { "epoch": 0.04, "learning_rate": 0.0004998037509980502, "loss": 5.8, "step": 4120000 }, { "epoch": 0.04, "learning_rate": 0.0004998032743232355, "loss": 5.7951, "step": 4125000 }, { "epoch": 0.04, "learning_rate": 0.000499802797070446, "loss": 5.8, "step": 4130000 }, { "epoch": 0.04, "learning_rate": 0.0004998023192396831, "loss": 5.7958, "step": 4135000 }, { "epoch": 0.04, "learning_rate": 0.0004998018408309476, "loss": 5.8041, "step": 4140000 }, { "epoch": 0.04, "learning_rate": 0.0004998013618442408, "loss": 5.783, "step": 4145000 }, { "epoch": 0.04, "learning_rate": 0.0004998008822795636, "loss": 5.7961, "step": 4150000 }, { "epoch": 0.04, "learning_rate": 0.0004998004021369174, "loss": 5.7924, "step": 4155000 }, { "epoch": 0.04, "learning_rate": 0.0004997999214163031, "loss": 5.7976, "step": 4160000 }, { "epoch": 0.04, "learning_rate": 0.0004997994401177219, "loss": 5.798, "step": 4165000 }, { "epoch": 0.04, "learning_rate": 0.0004997989582411748, "loss": 5.7874, "step": 4170000 }, { "epoch": 0.04, "learning_rate": 0.000499798475786663, "loss": 5.7848, "step": 4175000 }, { "epoch": 0.04, "learning_rate": 0.0004997979927541876, "loss": 5.787, "step": 4180000 }, { "epoch": 0.04, "learning_rate": 0.0004997975091437497, "loss": 5.7947, "step": 4185000 }, { "epoch": 0.04, "learning_rate": 0.0004997970249553505, "loss": 5.7894, "step": 4190000 }, { "epoch": 0.04, "learning_rate": 0.0004997965401889911, "loss": 5.7835, "step": 4195000 }, { "epoch": 0.04, "learning_rate": 0.0004997960548446725, "loss": 5.7828, "step": 4200000 }, { "epoch": 0.04, "learning_rate": 0.000499795568922396, "loss": 5.7876, "step": 4205000 }, { "epoch": 0.04, "learning_rate": 0.0004997950824221626, "loss": 5.7814, "step": 4210000 }, { "epoch": 0.04, "learning_rate": 0.0004997945953439735, "loss": 5.7835, "step": 4215000 }, { "epoch": 0.04, "learning_rate": 0.0004997941076878297, "loss": 5.7961, "step": 4220000 }, { "epoch": 0.04, "learning_rate": 0.0004997936194537325, "loss": 5.7924, "step": 4225000 }, { "epoch": 0.04, "learning_rate": 0.0004997931306416828, "loss": 5.7789, "step": 4230000 }, { "epoch": 0.04, "learning_rate": 0.000499792641251682, "loss": 5.7848, "step": 4235000 }, { "epoch": 0.04, "learning_rate": 0.0004997921512837311, "loss": 5.7747, "step": 4240000 }, { "epoch": 0.04, "learning_rate": 0.0004997916607378312, "loss": 5.7827, "step": 4245000 }, { "epoch": 0.04, "learning_rate": 0.0004997911696139835, "loss": 5.7856, "step": 4250000 }, { "epoch": 0.04, "learning_rate": 0.0004997906779121892, "loss": 5.783, "step": 4255000 }, { "epoch": 0.04, "learning_rate": 0.0004997901856324493, "loss": 5.7864, "step": 4260000 }, { "epoch": 0.04, "learning_rate": 0.0004997896927747649, "loss": 5.7805, "step": 4265000 }, { "epoch": 0.04, "learning_rate": 0.0004997891993391373, "loss": 5.7904, "step": 4270000 }, { "epoch": 0.04, "learning_rate": 0.0004997887053255676, "loss": 5.7943, "step": 4275000 }, { "epoch": 0.04, "learning_rate": 0.0004997882107340569, "loss": 5.7962, "step": 4280000 }, { "epoch": 0.04, "learning_rate": 0.0004997877155646063, "loss": 5.7891, "step": 4285000 }, { "epoch": 0.04, "learning_rate": 0.0004997872198172171, "loss": 5.7867, "step": 4290000 }, { "epoch": 0.04, "learning_rate": 0.0004997867234918902, "loss": 5.7902, "step": 4295000 }, { "epoch": 0.04, "learning_rate": 0.0004997862265886271, "loss": 5.7774, "step": 4300000 }, { "epoch": 0.04, "learning_rate": 0.0004997857291074286, "loss": 5.7809, "step": 4305000 }, { "epoch": 0.04, "learning_rate": 0.000499785231048296, "loss": 5.7785, "step": 4310000 }, { "epoch": 0.04, "learning_rate": 0.0004997847324112306, "loss": 5.7871, "step": 4315000 }, { "epoch": 0.04, "learning_rate": 0.0004997842331962332, "loss": 5.7785, "step": 4320000 }, { "epoch": 0.04, "learning_rate": 0.0004997837334033054, "loss": 5.7766, "step": 4325000 }, { "epoch": 0.04, "learning_rate": 0.0004997832330324479, "loss": 5.7901, "step": 4330000 }, { "epoch": 0.04, "learning_rate": 0.0004997827320836622, "loss": 5.7853, "step": 4335000 }, { "epoch": 0.04, "learning_rate": 0.0004997822305569493, "loss": 5.7874, "step": 4340000 }, { "epoch": 0.04, "learning_rate": 0.0004997817284523104, "loss": 5.7914, "step": 4345000 }, { "epoch": 0.04, "learning_rate": 0.0004997812257697467, "loss": 5.7902, "step": 4350000 }, { "epoch": 0.04, "learning_rate": 0.0004997807225092593, "loss": 5.7816, "step": 4355000 }, { "epoch": 0.04, "learning_rate": 0.0004997802186708493, "loss": 5.775, "step": 4360000 }, { "epoch": 0.04, "learning_rate": 0.000499779714254518, "loss": 5.7831, "step": 4365000 }, { "epoch": 0.04, "learning_rate": 0.0004997792092602666, "loss": 5.7777, "step": 4370000 }, { "epoch": 0.04, "learning_rate": 0.0004997787036880961, "loss": 5.779, "step": 4375000 }, { "epoch": 0.04, "learning_rate": 0.0004997781975380077, "loss": 5.7796, "step": 4380000 }, { "epoch": 0.04, "learning_rate": 0.0004997776908100028, "loss": 5.7781, "step": 4385000 }, { "epoch": 0.04, "learning_rate": 0.0004997771835040823, "loss": 5.7771, "step": 4390000 }, { "epoch": 0.04, "learning_rate": 0.0004997766756202475, "loss": 5.7827, "step": 4395000 }, { "epoch": 0.04, "learning_rate": 0.0004997761671584995, "loss": 5.783, "step": 4400000 }, { "epoch": 0.04, "learning_rate": 0.0004997756581188395, "loss": 5.7835, "step": 4405000 }, { "epoch": 0.04, "learning_rate": 0.0004997751485012688, "loss": 5.7873, "step": 4410000 }, { "epoch": 0.04, "learning_rate": 0.0004997746383057885, "loss": 5.7744, "step": 4415000 }, { "epoch": 0.04, "learning_rate": 0.0004997741275323997, "loss": 5.7752, "step": 4420000 }, { "epoch": 0.04, "learning_rate": 0.0004997736161811037, "loss": 5.7843, "step": 4425000 }, { "epoch": 0.04, "learning_rate": 0.0004997731042519015, "loss": 5.7788, "step": 4430000 }, { "epoch": 0.04, "learning_rate": 0.0004997725917447945, "loss": 5.7817, "step": 4435000 }, { "epoch": 0.04, "learning_rate": 0.0004997720786597838, "loss": 5.775, "step": 4440000 }, { "epoch": 0.04, "learning_rate": 0.0004997715649968706, "loss": 5.7807, "step": 4445000 }, { "epoch": 0.04, "learning_rate": 0.000499771050756056, "loss": 5.7893, "step": 4450000 }, { "epoch": 0.04, "learning_rate": 0.0004997705359373412, "loss": 5.7742, "step": 4455000 }, { "epoch": 0.04, "learning_rate": 0.0004997700205407276, "loss": 5.7769, "step": 4460000 }, { "epoch": 0.04, "learning_rate": 0.0004997695045662162, "loss": 5.7797, "step": 4465000 }, { "epoch": 0.04, "learning_rate": 0.0004997689880138083, "loss": 5.7801, "step": 4470000 }, { "epoch": 0.04, "learning_rate": 0.000499768470883505, "loss": 5.777, "step": 4475000 }, { "epoch": 0.04, "learning_rate": 0.0004997679531753075, "loss": 5.7898, "step": 4480000 }, { "epoch": 0.04, "learning_rate": 0.000499767434889217, "loss": 5.7771, "step": 4485000 }, { "epoch": 0.04, "learning_rate": 0.0004997669160252348, "loss": 5.7716, "step": 4490000 }, { "epoch": 0.04, "learning_rate": 0.000499766396583362, "loss": 5.7714, "step": 4495000 }, { "epoch": 0.04, "learning_rate": 0.0004997658765635998, "loss": 5.7832, "step": 4500000 }, { "epoch": 0.04, "learning_rate": 0.0004997653559659495, "loss": 5.7738, "step": 4505000 }, { "epoch": 0.04, "learning_rate": 0.0004997648347904122, "loss": 5.7686, "step": 4510000 }, { "epoch": 0.04, "learning_rate": 0.0004997643130369891, "loss": 5.7733, "step": 4515000 }, { "epoch": 0.04, "learning_rate": 0.0004997637907056816, "loss": 5.7771, "step": 4520000 }, { "epoch": 0.04, "learning_rate": 0.0004997632677964906, "loss": 5.7837, "step": 4525000 }, { "epoch": 0.04, "learning_rate": 0.0004997627443094175, "loss": 5.7774, "step": 4530000 }, { "epoch": 0.04, "learning_rate": 0.0004997622202444636, "loss": 5.7821, "step": 4535000 }, { "epoch": 0.04, "learning_rate": 0.0004997616956016298, "loss": 5.761, "step": 4540000 }, { "epoch": 0.04, "learning_rate": 0.0004997611703809177, "loss": 5.7795, "step": 4545000 }, { "epoch": 0.04, "learning_rate": 0.0004997606445823283, "loss": 5.7794, "step": 4550000 }, { "epoch": 0.04, "learning_rate": 0.0004997601182058628, "loss": 5.7741, "step": 4555000 }, { "epoch": 0.04, "learning_rate": 0.0004997595912515224, "loss": 5.7792, "step": 4560000 }, { "epoch": 0.04, "learning_rate": 0.0004997590637193084, "loss": 5.7692, "step": 4565000 }, { "epoch": 0.04, "learning_rate": 0.000499758535609222, "loss": 5.7858, "step": 4570000 }, { "epoch": 0.04, "learning_rate": 0.0004997580069212644, "loss": 5.7781, "step": 4575000 }, { "epoch": 0.04, "learning_rate": 0.000499757477655437, "loss": 5.7855, "step": 4580000 }, { "epoch": 0.04, "learning_rate": 0.0004997569478117407, "loss": 5.7668, "step": 4585000 }, { "epoch": 0.04, "learning_rate": 0.000499756417390177, "loss": 5.7681, "step": 4590000 }, { "epoch": 0.04, "learning_rate": 0.0004997558863907469, "loss": 5.7772, "step": 4595000 }, { "epoch": 0.04, "learning_rate": 0.0004997553548134518, "loss": 5.7633, "step": 4600000 }, { "epoch": 0.04, "learning_rate": 0.0004997548226582929, "loss": 5.7828, "step": 4605000 }, { "epoch": 0.04, "learning_rate": 0.0004997542899252714, "loss": 5.7726, "step": 4610000 }, { "epoch": 0.04, "learning_rate": 0.0004997537566143886, "loss": 5.7748, "step": 4615000 }, { "epoch": 0.04, "learning_rate": 0.0004997532227256456, "loss": 5.7676, "step": 4620000 }, { "epoch": 0.04, "learning_rate": 0.0004997526882590437, "loss": 5.784, "step": 4625000 }, { "epoch": 0.04, "learning_rate": 0.0004997521532145841, "loss": 5.7698, "step": 4630000 }, { "epoch": 0.04, "learning_rate": 0.0004997516175922682, "loss": 5.7794, "step": 4635000 }, { "epoch": 0.04, "learning_rate": 0.0004997510813920972, "loss": 5.7775, "step": 4640000 }, { "epoch": 0.04, "learning_rate": 0.0004997505446140721, "loss": 5.7744, "step": 4645000 }, { "epoch": 0.04, "learning_rate": 0.0004997500072581943, "loss": 5.7749, "step": 4650000 }, { "epoch": 0.04, "learning_rate": 0.0004997494693244652, "loss": 5.7874, "step": 4655000 }, { "epoch": 0.04, "learning_rate": 0.0004997489308128857, "loss": 5.7749, "step": 4660000 }, { "epoch": 0.04, "learning_rate": 0.0004997483917234574, "loss": 5.776, "step": 4665000 }, { "epoch": 0.04, "learning_rate": 0.0004997478520561814, "loss": 5.7645, "step": 4670000 }, { "epoch": 0.04, "learning_rate": 0.0004997473118110589, "loss": 5.7727, "step": 4675000 }, { "epoch": 0.04, "learning_rate": 0.0004997467709880912, "loss": 5.7834, "step": 4680000 }, { "epoch": 0.04, "learning_rate": 0.0004997462295872794, "loss": 5.7788, "step": 4685000 }, { "epoch": 0.04, "learning_rate": 0.000499745687608625, "loss": 5.7777, "step": 4690000 }, { "epoch": 0.04, "learning_rate": 0.0004997451450521292, "loss": 5.7823, "step": 4695000 }, { "epoch": 0.04, "learning_rate": 0.0004997446019177932, "loss": 5.7738, "step": 4700000 }, { "epoch": 0.04, "learning_rate": 0.0004997440582056182, "loss": 5.7861, "step": 4705000 }, { "epoch": 0.04, "learning_rate": 0.0004997435139156055, "loss": 5.7768, "step": 4710000 }, { "epoch": 0.04, "learning_rate": 0.0004997429690477564, "loss": 5.7802, "step": 4715000 }, { "epoch": 0.04, "learning_rate": 0.0004997424236020722, "loss": 5.7791, "step": 4720000 }, { "epoch": 0.04, "learning_rate": 0.000499741877578554, "loss": 5.7771, "step": 4725000 }, { "epoch": 0.04, "learning_rate": 0.0004997413309772032, "loss": 5.7744, "step": 4730000 }, { "epoch": 0.04, "learning_rate": 0.000499740783798021, "loss": 5.7682, "step": 4735000 }, { "epoch": 0.04, "learning_rate": 0.0004997402360410087, "loss": 5.771, "step": 4740000 }, { "epoch": 0.04, "learning_rate": 0.0004997396877061676, "loss": 5.7762, "step": 4745000 }, { "epoch": 0.04, "learning_rate": 0.000499739138793499, "loss": 5.7693, "step": 4750000 }, { "epoch": 0.04, "learning_rate": 0.000499738589303004, "loss": 5.7779, "step": 4755000 }, { "epoch": 0.04, "learning_rate": 0.0004997380392346839, "loss": 5.7696, "step": 4760000 }, { "epoch": 0.04, "learning_rate": 0.0004997374885885402, "loss": 5.7817, "step": 4765000 }, { "epoch": 0.04, "learning_rate": 0.0004997369373645738, "loss": 5.775, "step": 4770000 }, { "epoch": 0.04, "learning_rate": 0.0004997363855627864, "loss": 5.777, "step": 4775000 }, { "epoch": 0.04, "learning_rate": 0.000499735833183179, "loss": 5.7665, "step": 4780000 }, { "epoch": 0.04, "learning_rate": 0.0004997352802257529, "loss": 5.7802, "step": 4785000 }, { "epoch": 0.04, "learning_rate": 0.0004997347266905095, "loss": 5.7794, "step": 4790000 }, { "epoch": 0.04, "learning_rate": 0.00049973417257745, "loss": 5.7793, "step": 4795000 }, { "epoch": 0.04, "learning_rate": 0.0004997336178865756, "loss": 5.7804, "step": 4800000 }, { "epoch": 0.04, "learning_rate": 0.0004997330626178878, "loss": 5.7776, "step": 4805000 }, { "epoch": 0.04, "learning_rate": 0.0004997325067713877, "loss": 5.7697, "step": 4810000 }, { "epoch": 0.04, "learning_rate": 0.0004997319503470765, "loss": 5.7728, "step": 4815000 }, { "epoch": 0.04, "learning_rate": 0.0004997313933449559, "loss": 5.7885, "step": 4820000 }, { "epoch": 0.04, "learning_rate": 0.0004997308357650267, "loss": 5.7713, "step": 4825000 }, { "epoch": 0.04, "learning_rate": 0.0004997302776072905, "loss": 5.792, "step": 4830000 }, { "epoch": 0.04, "learning_rate": 0.0004997297188717486, "loss": 5.7741, "step": 4835000 }, { "epoch": 0.04, "learning_rate": 0.000499729159558402, "loss": 5.7726, "step": 4840000 }, { "epoch": 0.04, "learning_rate": 0.0004997285996672522, "loss": 5.7735, "step": 4845000 }, { "epoch": 0.04, "learning_rate": 0.0004997280391983007, "loss": 5.7674, "step": 4850000 }, { "epoch": 0.04, "learning_rate": 0.0004997274781515484, "loss": 5.7837, "step": 4855000 }, { "epoch": 0.04, "learning_rate": 0.0004997269165269968, "loss": 5.7831, "step": 4860000 }, { "epoch": 0.04, "learning_rate": 0.0004997263543246472, "loss": 5.7744, "step": 4865000 }, { "epoch": 0.04, "learning_rate": 0.0004997257915445009, "loss": 5.7695, "step": 4870000 }, { "epoch": 0.04, "learning_rate": 0.0004997252281865592, "loss": 5.7722, "step": 4875000 }, { "epoch": 0.04, "learning_rate": 0.0004997246642508233, "loss": 5.7746, "step": 4880000 }, { "epoch": 0.04, "learning_rate": 0.0004997240997372946, "loss": 5.7707, "step": 4885000 }, { "epoch": 0.04, "learning_rate": 0.0004997235346459744, "loss": 5.789, "step": 4890000 }, { "epoch": 0.04, "learning_rate": 0.000499722968976864, "loss": 5.7864, "step": 4895000 }, { "epoch": 0.05, "learning_rate": 0.0004997224027299647, "loss": 5.7804, "step": 4900000 }, { "epoch": 0.05, "learning_rate": 0.0004997218359052779, "loss": 5.7705, "step": 4905000 }, { "epoch": 0.05, "learning_rate": 0.0004997212685028048, "loss": 5.7645, "step": 4910000 }, { "epoch": 0.05, "learning_rate": 0.0004997207005225467, "loss": 5.7834, "step": 4915000 }, { "epoch": 0.05, "learning_rate": 0.000499720131964505, "loss": 5.7885, "step": 4920000 }, { "epoch": 0.05, "learning_rate": 0.0004997195628286809, "loss": 5.7779, "step": 4925000 }, { "epoch": 0.05, "learning_rate": 0.000499718993115076, "loss": 5.7758, "step": 4930000 }, { "epoch": 0.05, "learning_rate": 0.0004997184228236912, "loss": 5.7711, "step": 4935000 }, { "epoch": 0.05, "learning_rate": 0.0004997178519545281, "loss": 5.7807, "step": 4940000 }, { "epoch": 0.05, "learning_rate": 0.0004997172805075879, "loss": 5.7835, "step": 4945000 }, { "epoch": 0.05, "learning_rate": 0.0004997167084828721, "loss": 5.7817, "step": 4950000 }, { "epoch": 0.05, "learning_rate": 0.0004997161358803818, "loss": 5.7753, "step": 4955000 }, { "epoch": 0.05, "learning_rate": 0.0004997155627001185, "loss": 5.7697, "step": 4960000 }, { "epoch": 0.05, "learning_rate": 0.0004997149889420832, "loss": 5.7705, "step": 4965000 }, { "epoch": 0.05, "learning_rate": 0.0004997144146062778, "loss": 5.7725, "step": 4970000 }, { "epoch": 0.05, "learning_rate": 0.0004997138396927031, "loss": 5.7804, "step": 4975000 }, { "epoch": 0.05, "learning_rate": 0.0004997132642013607, "loss": 5.7711, "step": 4980000 }, { "epoch": 0.05, "learning_rate": 0.0004997126881322518, "loss": 5.7751, "step": 4985000 }, { "epoch": 0.05, "learning_rate": 0.0004997121114853779, "loss": 5.7851, "step": 4990000 }, { "epoch": 0.05, "learning_rate": 0.0004997115342607402, "loss": 5.7743, "step": 4995000 }, { "epoch": 0.05, "learning_rate": 0.00049971095645834, "loss": 5.7762, "step": 5000000 }, { "epoch": 0.05, "learning_rate": 0.0004997103780781788, "loss": 5.7669, "step": 5005000 }, { "epoch": 0.05, "learning_rate": 0.0004997097991202578, "loss": 5.7694, "step": 5010000 }, { "epoch": 0.05, "learning_rate": 0.0004997092195845784, "loss": 5.7716, "step": 5015000 }, { "epoch": 0.05, "learning_rate": 0.0004997086394711419, "loss": 5.7884, "step": 5020000 }, { "epoch": 0.05, "learning_rate": 0.0004997080587799496, "loss": 5.7788, "step": 5025000 }, { "epoch": 0.05, "learning_rate": 0.000499707477511003, "loss": 5.7753, "step": 5030000 }, { "epoch": 0.05, "learning_rate": 0.0004997068956643034, "loss": 5.7691, "step": 5035000 }, { "epoch": 0.05, "learning_rate": 0.000499706313239852, "loss": 5.781, "step": 5040000 }, { "epoch": 0.05, "learning_rate": 0.0004997057302376502, "loss": 5.7799, "step": 5045000 }, { "epoch": 0.05, "learning_rate": 0.0004997051466576995, "loss": 5.7671, "step": 5050000 }, { "epoch": 0.05, "learning_rate": 0.0004997045625000012, "loss": 5.7771, "step": 5055000 }, { "epoch": 0.05, "learning_rate": 0.0004997039777645565, "loss": 5.7738, "step": 5060000 }, { "epoch": 0.05, "learning_rate": 0.0004997033924513669, "loss": 5.7746, "step": 5065000 }, { "epoch": 0.05, "learning_rate": 0.0004997028065604337, "loss": 5.7758, "step": 5070000 }, { "epoch": 0.05, "learning_rate": 0.0004997022200917582, "loss": 5.7802, "step": 5075000 }, { "epoch": 0.05, "learning_rate": 0.0004997016330453418, "loss": 5.7693, "step": 5080000 }, { "epoch": 0.05, "learning_rate": 0.000499701045421186, "loss": 5.7806, "step": 5085000 }, { "epoch": 0.05, "learning_rate": 0.0004997004572192919, "loss": 5.7844, "step": 5090000 }, { "epoch": 0.05, "learning_rate": 0.000499699868439661, "loss": 5.7856, "step": 5095000 }, { "epoch": 0.05, "learning_rate": 0.0004996992790822949, "loss": 5.7773, "step": 5100000 }, { "epoch": 0.05, "learning_rate": 0.0004996986891471945, "loss": 5.7725, "step": 5105000 }, { "epoch": 0.05, "learning_rate": 0.0004996980986343614, "loss": 5.779, "step": 5110000 } ], "logging_steps": 5000, "max_steps": 326562159, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 5000, "total_flos": 8.278749111273246e+19, "train_batch_size": 12, "trial_name": null, "trial_params": null }