|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.04694358968884695, |
|
"eval_steps": 500, |
|
"global_step": 5110000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999999998149024, |
|
"loss": 8.5332, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999999990629436, |
|
"loss": 8.1061, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000499999997732555, |
|
"loss": 8.1201, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999999958237364, |
|
"loss": 8.1658, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000499999993336488, |
|
"loss": 8.2813, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999999902708098, |
|
"loss": 8.3271, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999999866267018, |
|
"loss": 8.3799, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999999824041639, |
|
"loss": 8.4484, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999999776031961, |
|
"loss": 8.4779, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999999722237985, |
|
"loss": 8.4402, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999999662659712, |
|
"loss": 8.4371, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999999597297141, |
|
"loss": 8.4231, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999999526150273, |
|
"loss": 8.3955, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999999449219107, |
|
"loss": 8.4, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999999366503643, |
|
"loss": 8.414, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999999278003882, |
|
"loss": 8.4149, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999999183719824, |
|
"loss": 8.4205, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000499999908365147, |
|
"loss": 8.4157, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999998977798819, |
|
"loss": 8.4017, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999998866161871, |
|
"loss": 8.4086, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999998748740628, |
|
"loss": 8.3796, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000499999862553509, |
|
"loss": 8.4095, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999998496545254, |
|
"loss": 8.4944, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999998361771126, |
|
"loss": 8.4752, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999998221212701, |
|
"loss": 8.4344, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999998074869983, |
|
"loss": 8.4137, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999997922742969, |
|
"loss": 8.468, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999997764831663, |
|
"loss": 8.5349, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999997601136063, |
|
"loss": 8.5042, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999997431656169, |
|
"loss": 8.5147, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999997256391984, |
|
"loss": 8.4722, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999997075343505, |
|
"loss": 8.4144, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999996888510735, |
|
"loss": 8.3935, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999996695893673, |
|
"loss": 8.4244, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999996497492322, |
|
"loss": 8.4991, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999996293306679, |
|
"loss": 8.5739, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999996083336746, |
|
"loss": 8.5794, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999995867582523, |
|
"loss": 8.5878, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999995646044011, |
|
"loss": 8.6133, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999995418721212, |
|
"loss": 8.5593, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999995185614123, |
|
"loss": 8.5519, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999994946722748, |
|
"loss": 8.5628, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999994702047085, |
|
"loss": 8.5084, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999994451587136, |
|
"loss": 8.5437, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999994195342902, |
|
"loss": 8.5508, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999993933314382, |
|
"loss": 8.5281, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999993665501577, |
|
"loss": 8.5282, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999993391904488, |
|
"loss": 8.5707, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999993112523117, |
|
"loss": 8.5596, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999992827357463, |
|
"loss": 8.554, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999992536407527, |
|
"loss": 8.5556, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999992239673309, |
|
"loss": 8.5767, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000499999193715481, |
|
"loss": 8.579, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999991628852031, |
|
"loss": 8.5437, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999991314764974, |
|
"loss": 8.5226, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999990994893638, |
|
"loss": 8.5095, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999990669238024, |
|
"loss": 8.521, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999990337798134, |
|
"loss": 8.5687, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999990000573966, |
|
"loss": 8.567, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999989657565524, |
|
"loss": 8.5399, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999989308772806, |
|
"loss": 8.5336, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999988954195816, |
|
"loss": 8.5427, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999988593834551, |
|
"loss": 8.5369, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999988227689015, |
|
"loss": 8.5183, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999987855759207, |
|
"loss": 8.5165, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999987478045128, |
|
"loss": 8.5009, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000499998709454678, |
|
"loss": 8.4832, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999986705264164, |
|
"loss": 8.4453, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999986310197279, |
|
"loss": 8.4457, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999985909346127, |
|
"loss": 8.4958, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000499998550271071, |
|
"loss": 8.5252, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999985090291027, |
|
"loss": 8.526, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999984672087081, |
|
"loss": 8.4962, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000499998424809887, |
|
"loss": 8.471, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999983818326398, |
|
"loss": 8.4943, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999983382769665, |
|
"loss": 8.5109, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999982941428673, |
|
"loss": 8.5156, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999982494303419, |
|
"loss": 8.4645, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999982041393909, |
|
"loss": 8.4879, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999981582700142, |
|
"loss": 8.5108, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999981118222117, |
|
"loss": 8.545, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999980647959839, |
|
"loss": 8.5518, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999980171913307, |
|
"loss": 8.5067, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999979690082521, |
|
"loss": 8.537, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999979202467483, |
|
"loss": 8.4697, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999978709068197, |
|
"loss": 8.4782, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000499997820988466, |
|
"loss": 8.4981, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999977704916876, |
|
"loss": 8.5571, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999977194164844, |
|
"loss": 8.514, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999976677628566, |
|
"loss": 8.4959, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999976155308043, |
|
"loss": 8.58, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999975627203278, |
|
"loss": 8.6084, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999975093314269, |
|
"loss": 8.5743, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000499997455364102, |
|
"loss": 8.5643, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999974008183532, |
|
"loss": 8.4961, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999973456941804, |
|
"loss": 8.4412, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000499997289991584, |
|
"loss": 8.4485, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999972337105639, |
|
"loss": 8.4707, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999971768511204, |
|
"loss": 8.4722, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999971194132537, |
|
"loss": 8.486, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999970613969636, |
|
"loss": 8.438, |
|
"step": 505000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999970028022505, |
|
"loss": 8.4606, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999969436291146, |
|
"loss": 8.518, |
|
"step": 515000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999968838775557, |
|
"loss": 8.5148, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999968235475743, |
|
"loss": 8.5136, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999967626391703, |
|
"loss": 8.4632, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999967011523439, |
|
"loss": 8.4725, |
|
"step": 535000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999966390870954, |
|
"loss": 8.4696, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999965764434247, |
|
"loss": 8.4397, |
|
"step": 545000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999965132213321, |
|
"loss": 8.4486, |
|
"step": 550000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999964494208178, |
|
"loss": 8.4202, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999963850418817, |
|
"loss": 8.4795, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999963200845243, |
|
"loss": 8.5227, |
|
"step": 565000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999962545487453, |
|
"loss": 8.5248, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999961884345453, |
|
"loss": 8.5308, |
|
"step": 575000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999961217419241, |
|
"loss": 8.5287, |
|
"step": 580000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999960544708822, |
|
"loss": 8.5622, |
|
"step": 585000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999959866214195, |
|
"loss": 8.5303, |
|
"step": 590000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999959181935361, |
|
"loss": 8.4332, |
|
"step": 595000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999958491872324, |
|
"loss": 8.4176, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999957796025085, |
|
"loss": 8.3863, |
|
"step": 605000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999957094393644, |
|
"loss": 8.3405, |
|
"step": 610000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999956386978003, |
|
"loss": 8.3725, |
|
"step": 615000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999955673778165, |
|
"loss": 8.4165, |
|
"step": 620000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999954954794132, |
|
"loss": 8.3808, |
|
"step": 625000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999954230025904, |
|
"loss": 8.3515, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999953499473482, |
|
"loss": 8.3555, |
|
"step": 635000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499995276313687, |
|
"loss": 8.4179, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999952021016069, |
|
"loss": 8.4277, |
|
"step": 645000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499995127311108, |
|
"loss": 8.4402, |
|
"step": 650000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999950519421905, |
|
"loss": 8.4472, |
|
"step": 655000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999949759948546, |
|
"loss": 8.4497, |
|
"step": 660000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999948994691005, |
|
"loss": 8.4436, |
|
"step": 665000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999948223649283, |
|
"loss": 8.4619, |
|
"step": 670000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999947446823382, |
|
"loss": 8.4528, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999946664213305, |
|
"loss": 8.4476, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999945875819051, |
|
"loss": 8.5146, |
|
"step": 685000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999945081640625, |
|
"loss": 8.5345, |
|
"step": 690000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999944281678027, |
|
"loss": 8.5706, |
|
"step": 695000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499994347593126, |
|
"loss": 8.5314, |
|
"step": 700000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999942664400324, |
|
"loss": 8.5054, |
|
"step": 705000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999941847085223, |
|
"loss": 8.5103, |
|
"step": 710000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999941023985958, |
|
"loss": 8.5057, |
|
"step": 715000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499994019510253, |
|
"loss": 8.4851, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999939360434942, |
|
"loss": 8.4865, |
|
"step": 725000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999938519983196, |
|
"loss": 8.4787, |
|
"step": 730000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999937673747293, |
|
"loss": 8.4496, |
|
"step": 735000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999936821727237, |
|
"loss": 8.4326, |
|
"step": 740000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999935963923027, |
|
"loss": 8.387, |
|
"step": 745000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999935100334667, |
|
"loss": 8.4004, |
|
"step": 750000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499993423096216, |
|
"loss": 8.4077, |
|
"step": 755000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999933355805504, |
|
"loss": 8.3867, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999932474864706, |
|
"loss": 8.4258, |
|
"step": 765000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999931588139764, |
|
"loss": 8.4821, |
|
"step": 770000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999930695630682, |
|
"loss": 8.4397, |
|
"step": 775000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999929797337462, |
|
"loss": 8.5221, |
|
"step": 780000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999928893260105, |
|
"loss": 8.4864, |
|
"step": 785000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999927983398616, |
|
"loss": 8.4338, |
|
"step": 790000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999927067752993, |
|
"loss": 8.4292, |
|
"step": 795000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999926146323241, |
|
"loss": 8.3973, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999925219109361, |
|
"loss": 8.4031, |
|
"step": 805000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999924286111355, |
|
"loss": 8.3967, |
|
"step": 810000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999923347329226, |
|
"loss": 8.4021, |
|
"step": 815000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999922402762977, |
|
"loss": 8.414, |
|
"step": 820000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999921452412606, |
|
"loss": 8.4847, |
|
"step": 825000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499992049627812, |
|
"loss": 8.4377, |
|
"step": 830000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499991953435952, |
|
"loss": 8.4167, |
|
"step": 835000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999918566656806, |
|
"loss": 8.445, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999917593169984, |
|
"loss": 8.4625, |
|
"step": 845000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999916613899052, |
|
"loss": 8.459, |
|
"step": 850000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999915628844015, |
|
"loss": 8.4831, |
|
"step": 855000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999914638004875, |
|
"loss": 8.478, |
|
"step": 860000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999913641381633, |
|
"loss": 8.4598, |
|
"step": 865000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999912638974292, |
|
"loss": 8.4568, |
|
"step": 870000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999911630782856, |
|
"loss": 8.4423, |
|
"step": 875000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999910616807323, |
|
"loss": 8.4638, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999909597047702, |
|
"loss": 8.4405, |
|
"step": 885000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999908571503989, |
|
"loss": 8.4464, |
|
"step": 890000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999907540176189, |
|
"loss": 8.4808, |
|
"step": 895000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999906503064305, |
|
"loss": 8.437, |
|
"step": 900000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999905460168339, |
|
"loss": 8.4214, |
|
"step": 905000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999904411488293, |
|
"loss": 8.4418, |
|
"step": 910000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999903357024169, |
|
"loss": 8.5303, |
|
"step": 915000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999902296775971, |
|
"loss": 8.4865, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00049999012307437, |
|
"loss": 8.4779, |
|
"step": 925000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999900158927358, |
|
"loss": 8.4884, |
|
"step": 930000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999899081326949, |
|
"loss": 8.4641, |
|
"step": 935000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999897997942475, |
|
"loss": 8.485, |
|
"step": 940000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999896908773939, |
|
"loss": 8.4615, |
|
"step": 945000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999895813821341, |
|
"loss": 8.4412, |
|
"step": 950000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999894713084688, |
|
"loss": 8.4274, |
|
"step": 955000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999893606563978, |
|
"loss": 8.4331, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999892494259216, |
|
"loss": 8.4737, |
|
"step": 965000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999891376170404, |
|
"loss": 8.4471, |
|
"step": 970000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999890252297545, |
|
"loss": 8.4929, |
|
"step": 975000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999889122640642, |
|
"loss": 8.4602, |
|
"step": 980000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999887987199697, |
|
"loss": 8.4671, |
|
"step": 985000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999886845974712, |
|
"loss": 8.4589, |
|
"step": 990000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999885698965689, |
|
"loss": 8.47, |
|
"step": 995000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999884546172634, |
|
"loss": 8.5097, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999883387595546, |
|
"loss": 8.5082, |
|
"step": 1005000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499988222323443, |
|
"loss": 8.4846, |
|
"step": 1010000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999881053089287, |
|
"loss": 8.4732, |
|
"step": 1015000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999879877160121, |
|
"loss": 8.443, |
|
"step": 1020000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999878695446934, |
|
"loss": 8.4313, |
|
"step": 1025000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499987750794973, |
|
"loss": 8.4374, |
|
"step": 1030000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499987631466851, |
|
"loss": 8.4242, |
|
"step": 1035000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999875115603279, |
|
"loss": 8.4282, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999873910754036, |
|
"loss": 8.4442, |
|
"step": 1045000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999872700120788, |
|
"loss": 8.5036, |
|
"step": 1050000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999871483703536, |
|
"loss": 8.4339, |
|
"step": 1055000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999870261502281, |
|
"loss": 8.399, |
|
"step": 1060000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999869033517028, |
|
"loss": 8.4156, |
|
"step": 1065000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499986779974778, |
|
"loss": 8.478, |
|
"step": 1070000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999866560194539, |
|
"loss": 8.5151, |
|
"step": 1075000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499986531485731, |
|
"loss": 8.5111, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999864063736091, |
|
"loss": 8.5124, |
|
"step": 1085000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499986280683089, |
|
"loss": 8.4821, |
|
"step": 1090000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999861544141706, |
|
"loss": 8.4595, |
|
"step": 1095000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999860275668545, |
|
"loss": 8.4339, |
|
"step": 1100000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999859001411409, |
|
"loss": 8.3923, |
|
"step": 1105000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00049998577213703, |
|
"loss": 8.3647, |
|
"step": 1110000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999856435545222, |
|
"loss": 8.3865, |
|
"step": 1115000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999855143936176, |
|
"loss": 8.4136, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999853846543169, |
|
"loss": 8.4033, |
|
"step": 1125000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00049998525433662, |
|
"loss": 8.4242, |
|
"step": 1130000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999851234405274, |
|
"loss": 8.3723, |
|
"step": 1135000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999849919660393, |
|
"loss": 8.4118, |
|
"step": 1140000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999848599131562, |
|
"loss": 8.3804, |
|
"step": 1145000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999847272818781, |
|
"loss": 8.4146, |
|
"step": 1150000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999845940722056, |
|
"loss": 8.4656, |
|
"step": 1155000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999844602841388, |
|
"loss": 8.4474, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999843259176781, |
|
"loss": 8.4551, |
|
"step": 1165000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999841909728239, |
|
"loss": 8.4472, |
|
"step": 1170000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999840554495763, |
|
"loss": 8.4341, |
|
"step": 1175000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999839193479358, |
|
"loss": 8.4335, |
|
"step": 1180000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999837826679027, |
|
"loss": 8.4043, |
|
"step": 1185000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999836454094771, |
|
"loss": 8.3666, |
|
"step": 1190000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999835075726595, |
|
"loss": 8.3444, |
|
"step": 1195000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999833691574503, |
|
"loss": 8.3216, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999832301638497, |
|
"loss": 8.3572, |
|
"step": 1205000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999830905918581, |
|
"loss": 8.3965, |
|
"step": 1210000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999829504414756, |
|
"loss": 8.4237, |
|
"step": 1215000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999828097127029, |
|
"loss": 8.3765, |
|
"step": 1220000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999826684055398, |
|
"loss": 8.3266, |
|
"step": 1225000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999825265199872, |
|
"loss": 8.322, |
|
"step": 1230000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499982384056045, |
|
"loss": 8.3367, |
|
"step": 1235000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999822410137139, |
|
"loss": 8.3544, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999820973929939, |
|
"loss": 8.3379, |
|
"step": 1245000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999819531938854, |
|
"loss": 8.359, |
|
"step": 1250000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999818084163889, |
|
"loss": 8.3699, |
|
"step": 1255000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999816630605047, |
|
"loss": 8.3825, |
|
"step": 1260000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999815171262328, |
|
"loss": 8.3895, |
|
"step": 1265000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499981370613574, |
|
"loss": 8.3835, |
|
"step": 1270000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999812235225284, |
|
"loss": 8.3684, |
|
"step": 1275000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999810758530964, |
|
"loss": 8.3881, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999809276052783, |
|
"loss": 8.376, |
|
"step": 1285000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999807787790746, |
|
"loss": 8.4006, |
|
"step": 1290000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999806293744853, |
|
"loss": 8.3775, |
|
"step": 1295000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499980479391511, |
|
"loss": 8.3253, |
|
"step": 1300000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999803288301521, |
|
"loss": 8.3663, |
|
"step": 1305000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999801776904088, |
|
"loss": 8.4, |
|
"step": 1310000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999800259722815, |
|
"loss": 8.3802, |
|
"step": 1315000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999798736757706, |
|
"loss": 8.3608, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999797208008763, |
|
"loss": 8.3663, |
|
"step": 1325000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999795673475992, |
|
"loss": 8.3449, |
|
"step": 1330000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999794133159394, |
|
"loss": 8.3615, |
|
"step": 1335000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999792587058974, |
|
"loss": 8.3849, |
|
"step": 1340000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999791035174736, |
|
"loss": 8.3867, |
|
"step": 1345000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999789477506682, |
|
"loss": 8.3461, |
|
"step": 1350000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999787914054816, |
|
"loss": 8.2994, |
|
"step": 1355000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999786344819144, |
|
"loss": 8.3118, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999784769799666, |
|
"loss": 8.3385, |
|
"step": 1365000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999783188996388, |
|
"loss": 8.3052, |
|
"step": 1370000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999781602409313, |
|
"loss": 8.3561, |
|
"step": 1375000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999780010038445, |
|
"loss": 8.3534, |
|
"step": 1380000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999778411883786, |
|
"loss": 8.3714, |
|
"step": 1385000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999776807945342, |
|
"loss": 8.3757, |
|
"step": 1390000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999775198223117, |
|
"loss": 8.3769, |
|
"step": 1395000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999773582717112, |
|
"loss": 8.3468, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999771961427332, |
|
"loss": 8.3378, |
|
"step": 1405000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999770334353782, |
|
"loss": 8.3782, |
|
"step": 1410000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999768701496464, |
|
"loss": 8.3934, |
|
"step": 1415000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999767062855384, |
|
"loss": 8.3977, |
|
"step": 1420000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999765418430543, |
|
"loss": 8.3509, |
|
"step": 1425000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999763768221946, |
|
"loss": 8.3453, |
|
"step": 1430000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999762112229598, |
|
"loss": 8.2951, |
|
"step": 1435000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999760450453501, |
|
"loss": 8.3644, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499975878289366, |
|
"loss": 8.3911, |
|
"step": 1445000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999757109550078, |
|
"loss": 8.3925, |
|
"step": 1450000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499975543042276, |
|
"loss": 8.3537, |
|
"step": 1455000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999753745511709, |
|
"loss": 8.379, |
|
"step": 1460000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999752054816929, |
|
"loss": 8.3786, |
|
"step": 1465000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999750358338425, |
|
"loss": 8.3861, |
|
"step": 1470000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999748656076198, |
|
"loss": 8.4382, |
|
"step": 1475000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999746948030256, |
|
"loss": 8.4304, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999745234200599, |
|
"loss": 8.4402, |
|
"step": 1485000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999743514587234, |
|
"loss": 8.3865, |
|
"step": 1490000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999741789190165, |
|
"loss": 8.4104, |
|
"step": 1495000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999740058009392, |
|
"loss": 8.4022, |
|
"step": 1500000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999738321044923, |
|
"loss": 8.4134, |
|
"step": 1505000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999736578296762, |
|
"loss": 8.4199, |
|
"step": 1510000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999734829764911, |
|
"loss": 8.4059, |
|
"step": 1515000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999733075449375, |
|
"loss": 8.3262, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999731315350158, |
|
"loss": 8.2811, |
|
"step": 1525000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999729549467263, |
|
"loss": 8.2573, |
|
"step": 1530000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999727777800696, |
|
"loss": 8.3032, |
|
"step": 1535000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999726000350461, |
|
"loss": 8.3267, |
|
"step": 1540000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499972421711656, |
|
"loss": 8.3838, |
|
"step": 1545000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999722428098999, |
|
"loss": 8.38, |
|
"step": 1550000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999720633297782, |
|
"loss": 8.3361, |
|
"step": 1555000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999718832712913, |
|
"loss": 8.3276, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999717026344394, |
|
"loss": 8.294, |
|
"step": 1565000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999715214192233, |
|
"loss": 8.3313, |
|
"step": 1570000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999713396256432, |
|
"loss": 8.37, |
|
"step": 1575000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999711572536995, |
|
"loss": 8.3763, |
|
"step": 1580000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999709743033928, |
|
"loss": 8.4021, |
|
"step": 1585000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999707907747233, |
|
"loss": 8.3807, |
|
"step": 1590000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999706066676915, |
|
"loss": 8.4022, |
|
"step": 1595000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999704219822979, |
|
"loss": 8.4175, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999702367185429, |
|
"loss": 8.3805, |
|
"step": 1605000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999700508764267, |
|
"loss": 8.3644, |
|
"step": 1610000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999698644559501, |
|
"loss": 8.3482, |
|
"step": 1615000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999696774571134, |
|
"loss": 8.3087, |
|
"step": 1620000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499969489879917, |
|
"loss": 8.2912, |
|
"step": 1625000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999693017243612, |
|
"loss": 8.2988, |
|
"step": 1630000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999691129904467, |
|
"loss": 8.2964, |
|
"step": 1635000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999689236781737, |
|
"loss": 8.3291, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999687337875427, |
|
"loss": 8.3241, |
|
"step": 1645000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999685433185544, |
|
"loss": 8.3738, |
|
"step": 1650000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999683522712088, |
|
"loss": 8.3744, |
|
"step": 1655000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999681606455066, |
|
"loss": 8.3795, |
|
"step": 1660000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999679684414483, |
|
"loss": 8.4056, |
|
"step": 1665000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999677756590342, |
|
"loss": 8.3822, |
|
"step": 1670000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999675822982648, |
|
"loss": 8.3657, |
|
"step": 1675000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999673883591406, |
|
"loss": 8.3292, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999671938416619, |
|
"loss": 8.3594, |
|
"step": 1685000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999669987458292, |
|
"loss": 8.3728, |
|
"step": 1690000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999668030716431, |
|
"loss": 8.3581, |
|
"step": 1695000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999666068191039, |
|
"loss": 8.3038, |
|
"step": 1700000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999664099882121, |
|
"loss": 8.3271, |
|
"step": 1705000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999662125789682, |
|
"loss": 8.307, |
|
"step": 1710000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999660145913726, |
|
"loss": 8.3064, |
|
"step": 1715000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999658160254258, |
|
"loss": 8.3224, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999656168811282, |
|
"loss": 8.3283, |
|
"step": 1725000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999654171584802, |
|
"loss": 8.2952, |
|
"step": 1730000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999652168574825, |
|
"loss": 8.263, |
|
"step": 1735000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999650159781353, |
|
"loss": 8.2434, |
|
"step": 1740000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999648145204393, |
|
"loss": 8.2752, |
|
"step": 1745000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999646124843948, |
|
"loss": 8.2715, |
|
"step": 1750000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999644098700023, |
|
"loss": 8.2639, |
|
"step": 1755000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999642066772622, |
|
"loss": 8.2642, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999640029061752, |
|
"loss": 8.2577, |
|
"step": 1765000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999637985567415, |
|
"loss": 8.2835, |
|
"step": 1770000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999635936289618, |
|
"loss": 8.3218, |
|
"step": 1775000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999633881228365, |
|
"loss": 8.3525, |
|
"step": 1780000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999631820383659, |
|
"loss": 8.3198, |
|
"step": 1785000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999629753755507, |
|
"loss": 8.3218, |
|
"step": 1790000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999627681343913, |
|
"loss": 8.2785, |
|
"step": 1795000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999625603148882, |
|
"loss": 8.2772, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999623519170419, |
|
"loss": 8.3014, |
|
"step": 1805000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999621429408528, |
|
"loss": 8.3073, |
|
"step": 1810000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999619333863214, |
|
"loss": 8.2848, |
|
"step": 1815000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999617232534483, |
|
"loss": 8.3051, |
|
"step": 1820000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999615125422339, |
|
"loss": 8.2821, |
|
"step": 1825000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999613012526786, |
|
"loss": 8.2864, |
|
"step": 1830000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999610893847829, |
|
"loss": 8.293, |
|
"step": 1835000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999608769385475, |
|
"loss": 8.261, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999606639139728, |
|
"loss": 8.2727, |
|
"step": 1845000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999604503110592, |
|
"loss": 8.283, |
|
"step": 1850000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999602361298073, |
|
"loss": 8.2857, |
|
"step": 1855000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999600213702174, |
|
"loss": 8.3006, |
|
"step": 1860000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999598060322902, |
|
"loss": 8.3414, |
|
"step": 1865000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999595901160262, |
|
"loss": 8.3064, |
|
"step": 1870000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999593736214257, |
|
"loss": 8.2756, |
|
"step": 1875000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999591565484895, |
|
"loss": 8.2984, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999589388972178, |
|
"loss": 8.3053, |
|
"step": 1885000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999587206676113, |
|
"loss": 8.2835, |
|
"step": 1890000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999585018596705, |
|
"loss": 8.289, |
|
"step": 1895000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999582824733958, |
|
"loss": 8.2592, |
|
"step": 1900000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999580625087878, |
|
"loss": 8.2264, |
|
"step": 1905000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499957841965847, |
|
"loss": 8.2443, |
|
"step": 1910000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999576208445738, |
|
"loss": 8.244, |
|
"step": 1915000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999573991449687, |
|
"loss": 8.2067, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999571768670325, |
|
"loss": 8.2082, |
|
"step": 1925000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999569540107654, |
|
"loss": 8.2183, |
|
"step": 1930000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999567305761681, |
|
"loss": 8.2743, |
|
"step": 1935000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499956506563241, |
|
"loss": 8.2565, |
|
"step": 1940000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999562819719847, |
|
"loss": 8.2867, |
|
"step": 1945000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999560568023997, |
|
"loss": 8.3107, |
|
"step": 1950000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999558310544865, |
|
"loss": 8.3057, |
|
"step": 1955000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999556047282456, |
|
"loss": 8.3432, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999553778236776, |
|
"loss": 8.3441, |
|
"step": 1965000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499955150340783, |
|
"loss": 8.3092, |
|
"step": 1970000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999549222795622, |
|
"loss": 8.2568, |
|
"step": 1975000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999546936400159, |
|
"loss": 8.2439, |
|
"step": 1980000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999544644221446, |
|
"loss": 8.2598, |
|
"step": 1985000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999542346259488, |
|
"loss": 8.2371, |
|
"step": 1990000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499954004251429, |
|
"loss": 8.2255, |
|
"step": 1995000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999537732985857, |
|
"loss": 8.2308, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999535417674196, |
|
"loss": 8.2345, |
|
"step": 2005000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499953309657931, |
|
"loss": 8.2342, |
|
"step": 2010000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999530769701207, |
|
"loss": 8.2473, |
|
"step": 2015000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999528437039891, |
|
"loss": 8.2992, |
|
"step": 2020000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999526098595367, |
|
"loss": 8.2668, |
|
"step": 2025000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499952375436764, |
|
"loss": 8.2865, |
|
"step": 2030000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999521404356718, |
|
"loss": 8.2854, |
|
"step": 2035000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999519048562603, |
|
"loss": 8.2697, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999516686985304, |
|
"loss": 8.2916, |
|
"step": 2045000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999514319624823, |
|
"loss": 8.2798, |
|
"step": 2050000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999511946481167, |
|
"loss": 8.299, |
|
"step": 2055000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999509567554343, |
|
"loss": 8.2762, |
|
"step": 2060000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999507182844355, |
|
"loss": 8.247, |
|
"step": 2065000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999504792351209, |
|
"loss": 8.2446, |
|
"step": 2070000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999502396074908, |
|
"loss": 8.2318, |
|
"step": 2075000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999499994015462, |
|
"loss": 8.266, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999497586172873, |
|
"loss": 8.2625, |
|
"step": 2085000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999495172547148, |
|
"loss": 8.2462, |
|
"step": 2090000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999492753138293, |
|
"loss": 8.2909, |
|
"step": 2095000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999490327946312, |
|
"loss": 8.2519, |
|
"step": 2100000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999487896971212, |
|
"loss": 8.2138, |
|
"step": 2105000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999485460212998, |
|
"loss": 8.236, |
|
"step": 2110000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999483017671676, |
|
"loss": 8.2644, |
|
"step": 2115000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999480569347252, |
|
"loss": 8.317, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999478115239732, |
|
"loss": 8.292, |
|
"step": 2125000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999475655349119, |
|
"loss": 8.2746, |
|
"step": 2130000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999473189675422, |
|
"loss": 8.2593, |
|
"step": 2135000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999470718218645, |
|
"loss": 8.2784, |
|
"step": 2140000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999468240978794, |
|
"loss": 8.2614, |
|
"step": 2145000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999465757955875, |
|
"loss": 8.2859, |
|
"step": 2150000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999463269149892, |
|
"loss": 8.2754, |
|
"step": 2155000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999460774560853, |
|
"loss": 8.2623, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999458274188764, |
|
"loss": 8.2901, |
|
"step": 2165000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999455768033628, |
|
"loss": 8.2651, |
|
"step": 2170000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999453256095454, |
|
"loss": 8.2527, |
|
"step": 2175000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999450738374246, |
|
"loss": 8.2246, |
|
"step": 2180000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499944821487001, |
|
"loss": 8.222, |
|
"step": 2185000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999445685582752, |
|
"loss": 8.1857, |
|
"step": 2190000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999443150512479, |
|
"loss": 8.1593, |
|
"step": 2195000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999440609659195, |
|
"loss": 8.1482, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999438063022906, |
|
"loss": 8.2034, |
|
"step": 2205000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999435510603619, |
|
"loss": 8.2233, |
|
"step": 2210000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999432952401341, |
|
"loss": 8.2332, |
|
"step": 2215000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999430388416074, |
|
"loss": 8.2519, |
|
"step": 2220000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999427818647827, |
|
"loss": 8.2316, |
|
"step": 2225000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999425243096605, |
|
"loss": 8.2477, |
|
"step": 2230000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999422661762416, |
|
"loss": 8.2251, |
|
"step": 2235000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999420074645262, |
|
"loss": 8.2782, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999417481745153, |
|
"loss": 8.26, |
|
"step": 2245000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999414883062092, |
|
"loss": 8.2635, |
|
"step": 2250000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999412278596087, |
|
"loss": 8.2708, |
|
"step": 2255000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999409668347142, |
|
"loss": 8.2981, |
|
"step": 2260000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999407052315265, |
|
"loss": 8.2581, |
|
"step": 2265000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999404430500461, |
|
"loss": 8.2532, |
|
"step": 2270000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999401802902737, |
|
"loss": 8.2697, |
|
"step": 2275000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999399169522098, |
|
"loss": 8.2703, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999396530358551, |
|
"loss": 8.2604, |
|
"step": 2285000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999393885412101, |
|
"loss": 8.2728, |
|
"step": 2290000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999391234682756, |
|
"loss": 8.2636, |
|
"step": 2295000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499938857817052, |
|
"loss": 8.2473, |
|
"step": 2300000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999385915875399, |
|
"loss": 8.2496, |
|
"step": 2305000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999383247797401, |
|
"loss": 8.2689, |
|
"step": 2310000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999380573936532, |
|
"loss": 8.303, |
|
"step": 2315000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999377894292798, |
|
"loss": 8.2291, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999375208866203, |
|
"loss": 8.2713, |
|
"step": 2325000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999372517656756, |
|
"loss": 8.3408, |
|
"step": 2330000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999369820664463, |
|
"loss": 8.2794, |
|
"step": 2335000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999367117889328, |
|
"loss": 8.3002, |
|
"step": 2340000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999364409331358, |
|
"loss": 8.2982, |
|
"step": 2345000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999361694990562, |
|
"loss": 8.2878, |
|
"step": 2350000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999358974866943, |
|
"loss": 8.2482, |
|
"step": 2355000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999356248960509, |
|
"loss": 8.2382, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999353517271267, |
|
"loss": 8.2243, |
|
"step": 2365000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499935077979922, |
|
"loss": 8.1924, |
|
"step": 2370000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999348036544378, |
|
"loss": 8.1962, |
|
"step": 2375000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999345287506745, |
|
"loss": 8.1667, |
|
"step": 2380000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999342532686328, |
|
"loss": 8.1605, |
|
"step": 2385000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999339772083134, |
|
"loss": 8.1692, |
|
"step": 2390000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499933700569717, |
|
"loss": 8.1645, |
|
"step": 2395000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499933423352844, |
|
"loss": 8.1408, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999331455576953, |
|
"loss": 8.134, |
|
"step": 2405000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999328671842712, |
|
"loss": 8.1468, |
|
"step": 2410000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999325882325728, |
|
"loss": 8.113, |
|
"step": 2415000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999323087026004, |
|
"loss": 8.1044, |
|
"step": 2420000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999320285943548, |
|
"loss": 8.0701, |
|
"step": 2425000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999317479078366, |
|
"loss": 8.0466, |
|
"step": 2430000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999314666430465, |
|
"loss": 8.0082, |
|
"step": 2435000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499931184799985, |
|
"loss": 8.0217, |
|
"step": 2440000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999309023786529, |
|
"loss": 8.0033, |
|
"step": 2445000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999306193790509, |
|
"loss": 8.0105, |
|
"step": 2450000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999303358011794, |
|
"loss": 7.9985, |
|
"step": 2455000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999300516450393, |
|
"loss": 8.0164, |
|
"step": 2460000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999297669106312, |
|
"loss": 8.0093, |
|
"step": 2465000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999294815979557, |
|
"loss": 8.0111, |
|
"step": 2470000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999291957070135, |
|
"loss": 8.0337, |
|
"step": 2475000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999289092378053, |
|
"loss": 8.0641, |
|
"step": 2480000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999286221903317, |
|
"loss": 7.9939, |
|
"step": 2485000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999283345645934, |
|
"loss": 8.0228, |
|
"step": 2490000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499928046360591, |
|
"loss": 8.0458, |
|
"step": 2495000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999277575783253, |
|
"loss": 8.0302, |
|
"step": 2500000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999274682177968, |
|
"loss": 8.0451, |
|
"step": 2505000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999271782790063, |
|
"loss": 8.0334, |
|
"step": 2510000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999268877619545, |
|
"loss": 8.0034, |
|
"step": 2515000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999265966666419, |
|
"loss": 7.9877, |
|
"step": 2520000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999263049930692, |
|
"loss": 8.0222, |
|
"step": 2525000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999260127412374, |
|
"loss": 7.993, |
|
"step": 2530000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999257199111468, |
|
"loss": 7.9746, |
|
"step": 2535000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999254265027982, |
|
"loss": 7.9478, |
|
"step": 2540000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999251325161922, |
|
"loss": 7.9584, |
|
"step": 2545000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999248379513296, |
|
"loss": 7.9407, |
|
"step": 2550000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499924542808211, |
|
"loss": 7.8694, |
|
"step": 2555000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999242470868372, |
|
"loss": 7.8591, |
|
"step": 2560000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999239507872088, |
|
"loss": 7.8295, |
|
"step": 2565000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999236539093266, |
|
"loss": 7.7761, |
|
"step": 2570000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999233564531911, |
|
"loss": 7.7571, |
|
"step": 2575000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499923058418803, |
|
"loss": 7.7923, |
|
"step": 2580000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999227598061631, |
|
"loss": 7.7985, |
|
"step": 2585000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499922460615272, |
|
"loss": 7.7888, |
|
"step": 2590000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999221608461306, |
|
"loss": 7.7891, |
|
"step": 2595000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999218604987393, |
|
"loss": 7.7764, |
|
"step": 2600000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499921559573099, |
|
"loss": 7.7866, |
|
"step": 2605000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999212580692103, |
|
"loss": 7.779, |
|
"step": 2610000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999209559870738, |
|
"loss": 7.7859, |
|
"step": 2615000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999206533266905, |
|
"loss": 7.7553, |
|
"step": 2620000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999203500880609, |
|
"loss": 7.723, |
|
"step": 2625000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999200462711857, |
|
"loss": 7.6725, |
|
"step": 2630000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999197418760656, |
|
"loss": 7.7191, |
|
"step": 2635000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999194369027014, |
|
"loss": 7.7714, |
|
"step": 2640000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999191313510937, |
|
"loss": 7.7341, |
|
"step": 2645000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999188252212432, |
|
"loss": 7.7491, |
|
"step": 2650000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999185185131507, |
|
"loss": 7.7664, |
|
"step": 2655000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999182112268169, |
|
"loss": 7.7736, |
|
"step": 2660000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999179033622424, |
|
"loss": 7.7262, |
|
"step": 2665000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999175949194281, |
|
"loss": 7.6883, |
|
"step": 2670000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999172858983745, |
|
"loss": 7.7069, |
|
"step": 2675000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999169762990825, |
|
"loss": 7.6417, |
|
"step": 2680000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999166661215527, |
|
"loss": 7.6366, |
|
"step": 2685000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999163553657858, |
|
"loss": 7.6692, |
|
"step": 2690000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999160440317825, |
|
"loss": 7.6494, |
|
"step": 2695000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999157321195438, |
|
"loss": 7.6101, |
|
"step": 2700000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999154196290701, |
|
"loss": 7.6184, |
|
"step": 2705000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999151065603622, |
|
"loss": 7.6237, |
|
"step": 2710000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999147929134209, |
|
"loss": 7.6552, |
|
"step": 2715000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999144786882469, |
|
"loss": 7.6579, |
|
"step": 2720000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999141638848409, |
|
"loss": 7.6001, |
|
"step": 2725000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999138485032035, |
|
"loss": 7.5975, |
|
"step": 2730000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999135325433358, |
|
"loss": 7.6248, |
|
"step": 2735000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999132160052382, |
|
"loss": 7.6358, |
|
"step": 2740000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999128988889115, |
|
"loss": 7.5984, |
|
"step": 2745000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999125811943565, |
|
"loss": 7.5925, |
|
"step": 2750000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999122629215739, |
|
"loss": 7.6375, |
|
"step": 2755000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999119440705644, |
|
"loss": 7.6285, |
|
"step": 2760000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999116246413288, |
|
"loss": 7.6137, |
|
"step": 2765000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999113046338678, |
|
"loss": 7.6145, |
|
"step": 2770000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999109840481822, |
|
"loss": 7.6126, |
|
"step": 2775000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999106628842726, |
|
"loss": 7.5928, |
|
"step": 2780000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999103411421399, |
|
"loss": 7.5962, |
|
"step": 2785000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999100188217848, |
|
"loss": 7.5763, |
|
"step": 2790000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999096959232082, |
|
"loss": 7.5711, |
|
"step": 2795000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999093724464104, |
|
"loss": 7.5754, |
|
"step": 2800000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999090483913926, |
|
"loss": 7.5963, |
|
"step": 2805000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999087237581553, |
|
"loss": 7.581, |
|
"step": 2810000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999083985466992, |
|
"loss": 7.5872, |
|
"step": 2815000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999080727570254, |
|
"loss": 7.5772, |
|
"step": 2820000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999077463891343, |
|
"loss": 7.5629, |
|
"step": 2825000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999074194430268, |
|
"loss": 7.5575, |
|
"step": 2830000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999070919187037, |
|
"loss": 7.5157, |
|
"step": 2835000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999067638161657, |
|
"loss": 7.5362, |
|
"step": 2840000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999064351354135, |
|
"loss": 7.5166, |
|
"step": 2845000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999061058764481, |
|
"loss": 7.5412, |
|
"step": 2850000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999057760392699, |
|
"loss": 7.5056, |
|
"step": 2855000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999054456238798, |
|
"loss": 7.5058, |
|
"step": 2860000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999051146302787, |
|
"loss": 7.4933, |
|
"step": 2865000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999047830584674, |
|
"loss": 7.4638, |
|
"step": 2870000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999044509084463, |
|
"loss": 7.4749, |
|
"step": 2875000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999041181802165, |
|
"loss": 7.4796, |
|
"step": 2880000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999037848737787, |
|
"loss": 7.4789, |
|
"step": 2885000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999034509891337, |
|
"loss": 7.4779, |
|
"step": 2890000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999031165262822, |
|
"loss": 7.4957, |
|
"step": 2895000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000499902781485225, |
|
"loss": 7.4947, |
|
"step": 2900000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999024458659628, |
|
"loss": 7.5077, |
|
"step": 2905000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999021096684966, |
|
"loss": 7.491, |
|
"step": 2910000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000499901772892827, |
|
"loss": 7.4886, |
|
"step": 2915000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999014355389547, |
|
"loss": 7.4775, |
|
"step": 2920000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999010976068807, |
|
"loss": 7.4578, |
|
"step": 2925000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999007590966056, |
|
"loss": 7.4282, |
|
"step": 2930000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999004200081303, |
|
"loss": 7.4256, |
|
"step": 2935000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999000803414556, |
|
"loss": 7.4379, |
|
"step": 2940000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000499899740096582, |
|
"loss": 7.4527, |
|
"step": 2945000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998993992735107, |
|
"loss": 7.4464, |
|
"step": 2950000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998990578722422, |
|
"loss": 7.4258, |
|
"step": 2955000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998987158927775, |
|
"loss": 7.4279, |
|
"step": 2960000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998983733351172, |
|
"loss": 7.4214, |
|
"step": 2965000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998980301992622, |
|
"loss": 7.3997, |
|
"step": 2970000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998976864852133, |
|
"loss": 7.3827, |
|
"step": 2975000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998973421929711, |
|
"loss": 7.3728, |
|
"step": 2980000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998969973225368, |
|
"loss": 7.3785, |
|
"step": 2985000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998966518739109, |
|
"loss": 7.3772, |
|
"step": 2990000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998963058470941, |
|
"loss": 7.371, |
|
"step": 2995000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998959592420874, |
|
"loss": 7.375, |
|
"step": 3000000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998956120588916, |
|
"loss": 7.3659, |
|
"step": 3005000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998952642975076, |
|
"loss": 7.3832, |
|
"step": 3010000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000499894915957936, |
|
"loss": 7.3574, |
|
"step": 3015000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998945670401776, |
|
"loss": 7.3203, |
|
"step": 3020000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998942175442332, |
|
"loss": 7.3212, |
|
"step": 3025000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000499893867470104, |
|
"loss": 7.3108, |
|
"step": 3030000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998935168177901, |
|
"loss": 7.3063, |
|
"step": 3035000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000499893165587293, |
|
"loss": 7.2776, |
|
"step": 3040000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998928137786131, |
|
"loss": 7.2842, |
|
"step": 3045000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998924613917513, |
|
"loss": 7.2903, |
|
"step": 3050000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998921084267086, |
|
"loss": 7.2383, |
|
"step": 3055000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998917548834856, |
|
"loss": 7.2406, |
|
"step": 3060000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998914007620832, |
|
"loss": 7.2557, |
|
"step": 3065000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998910460625021, |
|
"loss": 7.2607, |
|
"step": 3070000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998906907847433, |
|
"loss": 7.2446, |
|
"step": 3075000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998903349288077, |
|
"loss": 7.2361, |
|
"step": 3080000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998899784946957, |
|
"loss": 7.2172, |
|
"step": 3085000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998896214824086, |
|
"loss": 7.2064, |
|
"step": 3090000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000499889263891947, |
|
"loss": 7.2123, |
|
"step": 3095000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998889057233117, |
|
"loss": 7.165, |
|
"step": 3100000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998885469765036, |
|
"loss": 7.1333, |
|
"step": 3105000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998881876515234, |
|
"loss": 7.1097, |
|
"step": 3110000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998878277483722, |
|
"loss": 7.1224, |
|
"step": 3115000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998874672670505, |
|
"loss": 7.1403, |
|
"step": 3120000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998871062075595, |
|
"loss": 7.1434, |
|
"step": 3125000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998867445698998, |
|
"loss": 7.1053, |
|
"step": 3130000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998863823540723, |
|
"loss": 7.0769, |
|
"step": 3135000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998860195600777, |
|
"loss": 7.061, |
|
"step": 3140000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998856561879171, |
|
"loss": 7.0489, |
|
"step": 3145000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000499885292237591, |
|
"loss": 7.0647, |
|
"step": 3150000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998849277091006, |
|
"loss": 7.0588, |
|
"step": 3155000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998845626024465, |
|
"loss": 7.0329, |
|
"step": 3160000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998841969176297, |
|
"loss": 7.0196, |
|
"step": 3165000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000499883830654651, |
|
"loss": 7.0169, |
|
"step": 3170000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998834638135112, |
|
"loss": 7.0068, |
|
"step": 3175000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000499883096394211, |
|
"loss": 6.9748, |
|
"step": 3180000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998827283967517, |
|
"loss": 6.9723, |
|
"step": 3185000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998823598211337, |
|
"loss": 6.9527, |
|
"step": 3190000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000499881990667358, |
|
"loss": 6.9318, |
|
"step": 3195000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998816209354256, |
|
"loss": 6.9324, |
|
"step": 3200000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998812506253371, |
|
"loss": 6.9347, |
|
"step": 3205000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998808797370936, |
|
"loss": 6.9538, |
|
"step": 3210000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998805082706958, |
|
"loss": 6.93, |
|
"step": 3215000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998801362261446, |
|
"loss": 6.9095, |
|
"step": 3220000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998797636034408, |
|
"loss": 6.8771, |
|
"step": 3225000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998793904025855, |
|
"loss": 6.8535, |
|
"step": 3230000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998790166235794, |
|
"loss": 6.8399, |
|
"step": 3235000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998786422664232, |
|
"loss": 6.8154, |
|
"step": 3240000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000499878267331118, |
|
"loss": 6.8082, |
|
"step": 3245000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998778918176647, |
|
"loss": 6.7983, |
|
"step": 3250000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998775157260639, |
|
"loss": 6.7802, |
|
"step": 3255000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998771390563167, |
|
"loss": 6.7625, |
|
"step": 3260000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998767618084239, |
|
"loss": 6.747, |
|
"step": 3265000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998763839823864, |
|
"loss": 6.7384, |
|
"step": 3270000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000499876005578205, |
|
"loss": 6.7496, |
|
"step": 3275000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998756265958806, |
|
"loss": 6.7399, |
|
"step": 3280000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998752470354143, |
|
"loss": 6.7096, |
|
"step": 3285000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998748668968066, |
|
"loss": 6.6752, |
|
"step": 3290000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998744861800585, |
|
"loss": 6.6532, |
|
"step": 3295000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998741048851711, |
|
"loss": 6.6338, |
|
"step": 3300000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000499873723012145, |
|
"loss": 6.5932, |
|
"step": 3305000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998733405609813, |
|
"loss": 6.5927, |
|
"step": 3310000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998729575316808, |
|
"loss": 6.5595, |
|
"step": 3315000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998725739242443, |
|
"loss": 6.5342, |
|
"step": 3320000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998721897386729, |
|
"loss": 6.5184, |
|
"step": 3325000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998718049749673, |
|
"loss": 6.5091, |
|
"step": 3330000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998714196331284, |
|
"loss": 6.4739, |
|
"step": 3335000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998710337131571, |
|
"loss": 6.44, |
|
"step": 3340000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998706472150545, |
|
"loss": 6.431, |
|
"step": 3345000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998702601388211, |
|
"loss": 6.3974, |
|
"step": 3350000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998698724844582, |
|
"loss": 6.3681, |
|
"step": 3355000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998694842519664, |
|
"loss": 6.3462, |
|
"step": 3360000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998690954413468, |
|
"loss": 6.3171, |
|
"step": 3365000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998687060526002, |
|
"loss": 6.3039, |
|
"step": 3370000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998683160857275, |
|
"loss": 6.2849, |
|
"step": 3375000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998679255407295, |
|
"loss": 6.2751, |
|
"step": 3380000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998675344176074, |
|
"loss": 6.2425, |
|
"step": 3385000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998671427163619, |
|
"loss": 6.2323, |
|
"step": 3390000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998667504369939, |
|
"loss": 6.2101, |
|
"step": 3395000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998663575795043, |
|
"loss": 6.206, |
|
"step": 3400000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998659641438941, |
|
"loss": 6.189, |
|
"step": 3405000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998655701301642, |
|
"loss": 6.1647, |
|
"step": 3410000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998651755383154, |
|
"loss": 6.159, |
|
"step": 3415000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998647803683488, |
|
"loss": 6.1456, |
|
"step": 3420000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998643846202649, |
|
"loss": 6.1323, |
|
"step": 3425000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998639882940652, |
|
"loss": 6.1335, |
|
"step": 3430000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998635913897502, |
|
"loss": 6.1191, |
|
"step": 3435000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000499863193907321, |
|
"loss": 6.0935, |
|
"step": 3440000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998627958467786, |
|
"loss": 6.0872, |
|
"step": 3445000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998623972081235, |
|
"loss": 6.0752, |
|
"step": 3450000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998619979913571, |
|
"loss": 6.0658, |
|
"step": 3455000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998615981964802, |
|
"loss": 6.0556, |
|
"step": 3460000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998611978234935, |
|
"loss": 6.0453, |
|
"step": 3465000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998607968723981, |
|
"loss": 6.0446, |
|
"step": 3470000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000499860395343195, |
|
"loss": 6.0357, |
|
"step": 3475000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998599932358851, |
|
"loss": 6.044, |
|
"step": 3480000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998595905504692, |
|
"loss": 6.0299, |
|
"step": 3485000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998591872869483, |
|
"loss": 6.0288, |
|
"step": 3490000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998587834453233, |
|
"loss": 6.0161, |
|
"step": 3495000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998583790255952, |
|
"loss": 6.0054, |
|
"step": 3500000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000499857974027765, |
|
"loss": 5.9942, |
|
"step": 3505000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998575684518334, |
|
"loss": 5.9857, |
|
"step": 3510000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998571622978016, |
|
"loss": 5.9877, |
|
"step": 3515000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998567555656704, |
|
"loss": 5.9862, |
|
"step": 3520000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998563482554407, |
|
"loss": 5.9698, |
|
"step": 3525000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998559403671136, |
|
"loss": 5.9642, |
|
"step": 3530000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998555319006898, |
|
"loss": 5.9608, |
|
"step": 3535000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998551228561707, |
|
"loss": 5.9667, |
|
"step": 3540000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998547132335566, |
|
"loss": 5.9588, |
|
"step": 3545000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998543030328489, |
|
"loss": 5.946, |
|
"step": 3550000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998538922540485, |
|
"loss": 5.9504, |
|
"step": 3555000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998534808971563, |
|
"loss": 5.9391, |
|
"step": 3560000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998530689621733, |
|
"loss": 5.9356, |
|
"step": 3565000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998526564491002, |
|
"loss": 5.9244, |
|
"step": 3570000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998522433579384, |
|
"loss": 5.9272, |
|
"step": 3575000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998518296886885, |
|
"loss": 5.925, |
|
"step": 3580000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998514154413515, |
|
"loss": 5.9168, |
|
"step": 3585000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998510006159284, |
|
"loss": 5.9175, |
|
"step": 3590000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998505852124202, |
|
"loss": 5.9058, |
|
"step": 3595000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000499850169230828, |
|
"loss": 5.8985, |
|
"step": 3600000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998497526711524, |
|
"loss": 5.9005, |
|
"step": 3605000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998493355333946, |
|
"loss": 5.9032, |
|
"step": 3610000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998489178175557, |
|
"loss": 5.8991, |
|
"step": 3615000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998484995236363, |
|
"loss": 5.8947, |
|
"step": 3620000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998480806516377, |
|
"loss": 5.8731, |
|
"step": 3625000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998476612015606, |
|
"loss": 5.8891, |
|
"step": 3630000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998472411734063, |
|
"loss": 5.8775, |
|
"step": 3635000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998468205671755, |
|
"loss": 5.8829, |
|
"step": 3640000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998463993828692, |
|
"loss": 5.8646, |
|
"step": 3645000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998459776204884, |
|
"loss": 5.8804, |
|
"step": 3650000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998455552800343, |
|
"loss": 5.8714, |
|
"step": 3655000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998451323615075, |
|
"loss": 5.8705, |
|
"step": 3660000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998447088649092, |
|
"loss": 5.866, |
|
"step": 3665000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998442847902404, |
|
"loss": 5.8661, |
|
"step": 3670000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998438601375018, |
|
"loss": 5.8625, |
|
"step": 3675000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998434349066948, |
|
"loss": 5.8624, |
|
"step": 3680000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998430090978202, |
|
"loss": 5.8611, |
|
"step": 3685000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998425827108789, |
|
"loss": 5.8612, |
|
"step": 3690000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998421557458719, |
|
"loss": 5.8462, |
|
"step": 3695000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998417282028005, |
|
"loss": 5.8616, |
|
"step": 3700000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998413000816651, |
|
"loss": 5.8545, |
|
"step": 3705000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998408713824673, |
|
"loss": 5.8612, |
|
"step": 3710000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998404421052077, |
|
"loss": 5.8517, |
|
"step": 3715000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998400122498874, |
|
"loss": 5.855, |
|
"step": 3720000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998395818165075, |
|
"loss": 5.842, |
|
"step": 3725000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998391508050687, |
|
"loss": 5.8499, |
|
"step": 3730000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998387192155724, |
|
"loss": 5.8411, |
|
"step": 3735000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998382870480193, |
|
"loss": 5.8442, |
|
"step": 3740000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998378543024105, |
|
"loss": 5.8376, |
|
"step": 3745000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000499837420978747, |
|
"loss": 5.8465, |
|
"step": 3750000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998369870770298, |
|
"loss": 5.838, |
|
"step": 3755000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00049983655259726, |
|
"loss": 5.8371, |
|
"step": 3760000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998361175394384, |
|
"loss": 5.8365, |
|
"step": 3765000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998356819035662, |
|
"loss": 5.8392, |
|
"step": 3770000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998352456896442, |
|
"loss": 5.8358, |
|
"step": 3775000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998348088976737, |
|
"loss": 5.8399, |
|
"step": 3780000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998343715276554, |
|
"loss": 5.8297, |
|
"step": 3785000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998339335795905, |
|
"loss": 5.825, |
|
"step": 3790000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00049983349505348, |
|
"loss": 5.8235, |
|
"step": 3795000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998330559493248, |
|
"loss": 5.8283, |
|
"step": 3800000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998326162671262, |
|
"loss": 5.8185, |
|
"step": 3805000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998321760068848, |
|
"loss": 5.8234, |
|
"step": 3810000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998317351686019, |
|
"loss": 5.8218, |
|
"step": 3815000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998312937522785, |
|
"loss": 5.8146, |
|
"step": 3820000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998308517579155, |
|
"loss": 5.8176, |
|
"step": 3825000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998304091855141, |
|
"loss": 5.8182, |
|
"step": 3830000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998299660350751, |
|
"loss": 5.8208, |
|
"step": 3835000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998295223065998, |
|
"loss": 5.8194, |
|
"step": 3840000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998290780000891, |
|
"loss": 5.829, |
|
"step": 3845000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998286331155438, |
|
"loss": 5.8147, |
|
"step": 3850000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998281876529652, |
|
"loss": 5.8143, |
|
"step": 3855000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998277416123543, |
|
"loss": 5.8108, |
|
"step": 3860000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998272949937122, |
|
"loss": 5.8082, |
|
"step": 3865000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998268477970397, |
|
"loss": 5.8181, |
|
"step": 3870000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998264000223379, |
|
"loss": 5.8044, |
|
"step": 3875000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998259516696081, |
|
"loss": 5.8118, |
|
"step": 3880000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499825502738851, |
|
"loss": 5.8059, |
|
"step": 3885000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998250532300678, |
|
"loss": 5.8096, |
|
"step": 3890000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998246031432595, |
|
"loss": 5.8124, |
|
"step": 3895000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998241524784272, |
|
"loss": 5.8068, |
|
"step": 3900000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998237012355719, |
|
"loss": 5.8133, |
|
"step": 3905000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998232494146947, |
|
"loss": 5.8056, |
|
"step": 3910000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998227970157965, |
|
"loss": 5.8104, |
|
"step": 3915000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998223440388784, |
|
"loss": 5.8119, |
|
"step": 3920000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998218904839416, |
|
"loss": 5.8128, |
|
"step": 3925000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998214363509869, |
|
"loss": 5.8021, |
|
"step": 3930000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998209816400156, |
|
"loss": 5.8091, |
|
"step": 3935000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998205263510286, |
|
"loss": 5.7921, |
|
"step": 3940000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499820070484027, |
|
"loss": 5.8137, |
|
"step": 3945000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998196140390118, |
|
"loss": 5.8, |
|
"step": 3950000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998191570159842, |
|
"loss": 5.8085, |
|
"step": 3955000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998186994149451, |
|
"loss": 5.8102, |
|
"step": 3960000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998182412358955, |
|
"loss": 5.801, |
|
"step": 3965000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998177824788367, |
|
"loss": 5.7999, |
|
"step": 3970000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998173231437696, |
|
"loss": 5.8047, |
|
"step": 3975000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998168632306954, |
|
"loss": 5.804, |
|
"step": 3980000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499816402739615, |
|
"loss": 5.8068, |
|
"step": 3985000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998159416705294, |
|
"loss": 5.7973, |
|
"step": 3990000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00049981548002344, |
|
"loss": 5.7941, |
|
"step": 3995000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998150177983476, |
|
"loss": 5.7839, |
|
"step": 4000000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998145549952533, |
|
"loss": 5.8023, |
|
"step": 4005000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998140916141582, |
|
"loss": 5.7993, |
|
"step": 4010000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998136276550634, |
|
"loss": 5.7915, |
|
"step": 4015000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998131631179701, |
|
"loss": 5.8068, |
|
"step": 4020000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499812698002879, |
|
"loss": 5.7889, |
|
"step": 4025000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998122323097916, |
|
"loss": 5.7873, |
|
"step": 4030000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998117660387086, |
|
"loss": 5.7947, |
|
"step": 4035000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998112991896313, |
|
"loss": 5.7878, |
|
"step": 4040000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998108317625608, |
|
"loss": 5.8034, |
|
"step": 4045000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998103637574981, |
|
"loss": 5.7911, |
|
"step": 4050000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998098951744443, |
|
"loss": 5.7927, |
|
"step": 4055000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998094260134005, |
|
"loss": 5.7973, |
|
"step": 4060000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998089562743678, |
|
"loss": 5.8046, |
|
"step": 4065000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998084859573472, |
|
"loss": 5.7947, |
|
"step": 4070000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998080150623399, |
|
"loss": 5.7934, |
|
"step": 4075000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998075435893467, |
|
"loss": 5.7906, |
|
"step": 4080000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998070715383692, |
|
"loss": 5.7772, |
|
"step": 4085000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998065989094082, |
|
"loss": 5.7962, |
|
"step": 4090000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998061257024647, |
|
"loss": 5.802, |
|
"step": 4095000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00049980565191754, |
|
"loss": 5.7967, |
|
"step": 4100000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998051775546351, |
|
"loss": 5.7827, |
|
"step": 4105000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998047026137511, |
|
"loss": 5.7844, |
|
"step": 4110000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998042270948891, |
|
"loss": 5.7964, |
|
"step": 4115000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998037509980502, |
|
"loss": 5.8, |
|
"step": 4120000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998032743232355, |
|
"loss": 5.7951, |
|
"step": 4125000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499802797070446, |
|
"loss": 5.8, |
|
"step": 4130000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998023192396831, |
|
"loss": 5.7958, |
|
"step": 4135000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998018408309476, |
|
"loss": 5.8041, |
|
"step": 4140000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998013618442408, |
|
"loss": 5.783, |
|
"step": 4145000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998008822795636, |
|
"loss": 5.7961, |
|
"step": 4150000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998004021369174, |
|
"loss": 5.7924, |
|
"step": 4155000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997999214163031, |
|
"loss": 5.7976, |
|
"step": 4160000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997994401177219, |
|
"loss": 5.798, |
|
"step": 4165000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997989582411748, |
|
"loss": 5.7874, |
|
"step": 4170000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499798475786663, |
|
"loss": 5.7848, |
|
"step": 4175000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997979927541876, |
|
"loss": 5.787, |
|
"step": 4180000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997975091437497, |
|
"loss": 5.7947, |
|
"step": 4185000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997970249553505, |
|
"loss": 5.7894, |
|
"step": 4190000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997965401889911, |
|
"loss": 5.7835, |
|
"step": 4195000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997960548446725, |
|
"loss": 5.7828, |
|
"step": 4200000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499795568922396, |
|
"loss": 5.7876, |
|
"step": 4205000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997950824221626, |
|
"loss": 5.7814, |
|
"step": 4210000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997945953439735, |
|
"loss": 5.7835, |
|
"step": 4215000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997941076878297, |
|
"loss": 5.7961, |
|
"step": 4220000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997936194537325, |
|
"loss": 5.7924, |
|
"step": 4225000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997931306416828, |
|
"loss": 5.7789, |
|
"step": 4230000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499792641251682, |
|
"loss": 5.7848, |
|
"step": 4235000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997921512837311, |
|
"loss": 5.7747, |
|
"step": 4240000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997916607378312, |
|
"loss": 5.7827, |
|
"step": 4245000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997911696139835, |
|
"loss": 5.7856, |
|
"step": 4250000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997906779121892, |
|
"loss": 5.783, |
|
"step": 4255000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997901856324493, |
|
"loss": 5.7864, |
|
"step": 4260000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997896927747649, |
|
"loss": 5.7805, |
|
"step": 4265000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997891993391373, |
|
"loss": 5.7904, |
|
"step": 4270000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997887053255676, |
|
"loss": 5.7943, |
|
"step": 4275000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997882107340569, |
|
"loss": 5.7962, |
|
"step": 4280000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997877155646063, |
|
"loss": 5.7891, |
|
"step": 4285000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997872198172171, |
|
"loss": 5.7867, |
|
"step": 4290000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997867234918902, |
|
"loss": 5.7902, |
|
"step": 4295000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997862265886271, |
|
"loss": 5.7774, |
|
"step": 4300000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997857291074286, |
|
"loss": 5.7809, |
|
"step": 4305000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499785231048296, |
|
"loss": 5.7785, |
|
"step": 4310000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997847324112306, |
|
"loss": 5.7871, |
|
"step": 4315000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997842331962332, |
|
"loss": 5.7785, |
|
"step": 4320000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997837334033054, |
|
"loss": 5.7766, |
|
"step": 4325000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997832330324479, |
|
"loss": 5.7901, |
|
"step": 4330000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997827320836622, |
|
"loss": 5.7853, |
|
"step": 4335000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997822305569493, |
|
"loss": 5.7874, |
|
"step": 4340000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997817284523104, |
|
"loss": 5.7914, |
|
"step": 4345000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997812257697467, |
|
"loss": 5.7902, |
|
"step": 4350000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997807225092593, |
|
"loss": 5.7816, |
|
"step": 4355000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997802186708493, |
|
"loss": 5.775, |
|
"step": 4360000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499779714254518, |
|
"loss": 5.7831, |
|
"step": 4365000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997792092602666, |
|
"loss": 5.7777, |
|
"step": 4370000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997787036880961, |
|
"loss": 5.779, |
|
"step": 4375000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997781975380077, |
|
"loss": 5.7796, |
|
"step": 4380000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997776908100028, |
|
"loss": 5.7781, |
|
"step": 4385000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997771835040823, |
|
"loss": 5.7771, |
|
"step": 4390000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997766756202475, |
|
"loss": 5.7827, |
|
"step": 4395000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997761671584995, |
|
"loss": 5.783, |
|
"step": 4400000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997756581188395, |
|
"loss": 5.7835, |
|
"step": 4405000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997751485012688, |
|
"loss": 5.7873, |
|
"step": 4410000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997746383057885, |
|
"loss": 5.7744, |
|
"step": 4415000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997741275323997, |
|
"loss": 5.7752, |
|
"step": 4420000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997736161811037, |
|
"loss": 5.7843, |
|
"step": 4425000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997731042519015, |
|
"loss": 5.7788, |
|
"step": 4430000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997725917447945, |
|
"loss": 5.7817, |
|
"step": 4435000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997720786597838, |
|
"loss": 5.775, |
|
"step": 4440000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997715649968706, |
|
"loss": 5.7807, |
|
"step": 4445000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499771050756056, |
|
"loss": 5.7893, |
|
"step": 4450000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997705359373412, |
|
"loss": 5.7742, |
|
"step": 4455000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997700205407276, |
|
"loss": 5.7769, |
|
"step": 4460000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997695045662162, |
|
"loss": 5.7797, |
|
"step": 4465000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997689880138083, |
|
"loss": 5.7801, |
|
"step": 4470000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499768470883505, |
|
"loss": 5.777, |
|
"step": 4475000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997679531753075, |
|
"loss": 5.7898, |
|
"step": 4480000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499767434889217, |
|
"loss": 5.7771, |
|
"step": 4485000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997669160252348, |
|
"loss": 5.7716, |
|
"step": 4490000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499766396583362, |
|
"loss": 5.7714, |
|
"step": 4495000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997658765635998, |
|
"loss": 5.7832, |
|
"step": 4500000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997653559659495, |
|
"loss": 5.7738, |
|
"step": 4505000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997648347904122, |
|
"loss": 5.7686, |
|
"step": 4510000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997643130369891, |
|
"loss": 5.7733, |
|
"step": 4515000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997637907056816, |
|
"loss": 5.7771, |
|
"step": 4520000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997632677964906, |
|
"loss": 5.7837, |
|
"step": 4525000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997627443094175, |
|
"loss": 5.7774, |
|
"step": 4530000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997622202444636, |
|
"loss": 5.7821, |
|
"step": 4535000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997616956016298, |
|
"loss": 5.761, |
|
"step": 4540000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997611703809177, |
|
"loss": 5.7795, |
|
"step": 4545000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997606445823283, |
|
"loss": 5.7794, |
|
"step": 4550000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997601182058628, |
|
"loss": 5.7741, |
|
"step": 4555000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997595912515224, |
|
"loss": 5.7792, |
|
"step": 4560000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997590637193084, |
|
"loss": 5.7692, |
|
"step": 4565000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499758535609222, |
|
"loss": 5.7858, |
|
"step": 4570000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997580069212644, |
|
"loss": 5.7781, |
|
"step": 4575000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499757477655437, |
|
"loss": 5.7855, |
|
"step": 4580000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997569478117407, |
|
"loss": 5.7668, |
|
"step": 4585000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499756417390177, |
|
"loss": 5.7681, |
|
"step": 4590000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997558863907469, |
|
"loss": 5.7772, |
|
"step": 4595000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997553548134518, |
|
"loss": 5.7633, |
|
"step": 4600000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997548226582929, |
|
"loss": 5.7828, |
|
"step": 4605000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997542899252714, |
|
"loss": 5.7726, |
|
"step": 4610000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997537566143886, |
|
"loss": 5.7748, |
|
"step": 4615000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997532227256456, |
|
"loss": 5.7676, |
|
"step": 4620000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997526882590437, |
|
"loss": 5.784, |
|
"step": 4625000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997521532145841, |
|
"loss": 5.7698, |
|
"step": 4630000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997516175922682, |
|
"loss": 5.7794, |
|
"step": 4635000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997510813920972, |
|
"loss": 5.7775, |
|
"step": 4640000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997505446140721, |
|
"loss": 5.7744, |
|
"step": 4645000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997500072581943, |
|
"loss": 5.7749, |
|
"step": 4650000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997494693244652, |
|
"loss": 5.7874, |
|
"step": 4655000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997489308128857, |
|
"loss": 5.7749, |
|
"step": 4660000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997483917234574, |
|
"loss": 5.776, |
|
"step": 4665000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997478520561814, |
|
"loss": 5.7645, |
|
"step": 4670000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997473118110589, |
|
"loss": 5.7727, |
|
"step": 4675000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997467709880912, |
|
"loss": 5.7834, |
|
"step": 4680000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997462295872794, |
|
"loss": 5.7788, |
|
"step": 4685000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499745687608625, |
|
"loss": 5.7777, |
|
"step": 4690000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997451450521292, |
|
"loss": 5.7823, |
|
"step": 4695000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997446019177932, |
|
"loss": 5.7738, |
|
"step": 4700000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997440582056182, |
|
"loss": 5.7861, |
|
"step": 4705000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997435139156055, |
|
"loss": 5.7768, |
|
"step": 4710000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997429690477564, |
|
"loss": 5.7802, |
|
"step": 4715000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997424236020722, |
|
"loss": 5.7791, |
|
"step": 4720000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499741877578554, |
|
"loss": 5.7771, |
|
"step": 4725000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997413309772032, |
|
"loss": 5.7744, |
|
"step": 4730000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499740783798021, |
|
"loss": 5.7682, |
|
"step": 4735000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997402360410087, |
|
"loss": 5.771, |
|
"step": 4740000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997396877061676, |
|
"loss": 5.7762, |
|
"step": 4745000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499739138793499, |
|
"loss": 5.7693, |
|
"step": 4750000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499738589303004, |
|
"loss": 5.7779, |
|
"step": 4755000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997380392346839, |
|
"loss": 5.7696, |
|
"step": 4760000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997374885885402, |
|
"loss": 5.7817, |
|
"step": 4765000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997369373645738, |
|
"loss": 5.775, |
|
"step": 4770000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997363855627864, |
|
"loss": 5.777, |
|
"step": 4775000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499735833183179, |
|
"loss": 5.7665, |
|
"step": 4780000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997352802257529, |
|
"loss": 5.7802, |
|
"step": 4785000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997347266905095, |
|
"loss": 5.7794, |
|
"step": 4790000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00049973417257745, |
|
"loss": 5.7793, |
|
"step": 4795000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997336178865756, |
|
"loss": 5.7804, |
|
"step": 4800000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997330626178878, |
|
"loss": 5.7776, |
|
"step": 4805000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997325067713877, |
|
"loss": 5.7697, |
|
"step": 4810000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997319503470765, |
|
"loss": 5.7728, |
|
"step": 4815000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997313933449559, |
|
"loss": 5.7885, |
|
"step": 4820000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997308357650267, |
|
"loss": 5.7713, |
|
"step": 4825000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997302776072905, |
|
"loss": 5.792, |
|
"step": 4830000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997297188717486, |
|
"loss": 5.7741, |
|
"step": 4835000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499729159558402, |
|
"loss": 5.7726, |
|
"step": 4840000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997285996672522, |
|
"loss": 5.7735, |
|
"step": 4845000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997280391983007, |
|
"loss": 5.7674, |
|
"step": 4850000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997274781515484, |
|
"loss": 5.7837, |
|
"step": 4855000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997269165269968, |
|
"loss": 5.7831, |
|
"step": 4860000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997263543246472, |
|
"loss": 5.7744, |
|
"step": 4865000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997257915445009, |
|
"loss": 5.7695, |
|
"step": 4870000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997252281865592, |
|
"loss": 5.7722, |
|
"step": 4875000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997246642508233, |
|
"loss": 5.7746, |
|
"step": 4880000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997240997372946, |
|
"loss": 5.7707, |
|
"step": 4885000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004997235346459744, |
|
"loss": 5.789, |
|
"step": 4890000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499722968976864, |
|
"loss": 5.7864, |
|
"step": 4895000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997224027299647, |
|
"loss": 5.7804, |
|
"step": 4900000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997218359052779, |
|
"loss": 5.7705, |
|
"step": 4905000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997212685028048, |
|
"loss": 5.7645, |
|
"step": 4910000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997207005225467, |
|
"loss": 5.7834, |
|
"step": 4915000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000499720131964505, |
|
"loss": 5.7885, |
|
"step": 4920000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997195628286809, |
|
"loss": 5.7779, |
|
"step": 4925000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000499718993115076, |
|
"loss": 5.7758, |
|
"step": 4930000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997184228236912, |
|
"loss": 5.7711, |
|
"step": 4935000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997178519545281, |
|
"loss": 5.7807, |
|
"step": 4940000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997172805075879, |
|
"loss": 5.7835, |
|
"step": 4945000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997167084828721, |
|
"loss": 5.7817, |
|
"step": 4950000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997161358803818, |
|
"loss": 5.7753, |
|
"step": 4955000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997155627001185, |
|
"loss": 5.7697, |
|
"step": 4960000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997149889420832, |
|
"loss": 5.7705, |
|
"step": 4965000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997144146062778, |
|
"loss": 5.7725, |
|
"step": 4970000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997138396927031, |
|
"loss": 5.7804, |
|
"step": 4975000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997132642013607, |
|
"loss": 5.7711, |
|
"step": 4980000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997126881322518, |
|
"loss": 5.7751, |
|
"step": 4985000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997121114853779, |
|
"loss": 5.7851, |
|
"step": 4990000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997115342607402, |
|
"loss": 5.7743, |
|
"step": 4995000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00049971095645834, |
|
"loss": 5.7762, |
|
"step": 5000000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997103780781788, |
|
"loss": 5.7669, |
|
"step": 5005000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997097991202578, |
|
"loss": 5.7694, |
|
"step": 5010000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997092195845784, |
|
"loss": 5.7716, |
|
"step": 5015000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997086394711419, |
|
"loss": 5.7884, |
|
"step": 5020000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997080587799496, |
|
"loss": 5.7788, |
|
"step": 5025000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000499707477511003, |
|
"loss": 5.7753, |
|
"step": 5030000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997068956643034, |
|
"loss": 5.7691, |
|
"step": 5035000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000499706313239852, |
|
"loss": 5.781, |
|
"step": 5040000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997057302376502, |
|
"loss": 5.7799, |
|
"step": 5045000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997051466576995, |
|
"loss": 5.7671, |
|
"step": 5050000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997045625000012, |
|
"loss": 5.7771, |
|
"step": 5055000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997039777645565, |
|
"loss": 5.7738, |
|
"step": 5060000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997033924513669, |
|
"loss": 5.7746, |
|
"step": 5065000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997028065604337, |
|
"loss": 5.7758, |
|
"step": 5070000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997022200917582, |
|
"loss": 5.7802, |
|
"step": 5075000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997016330453418, |
|
"loss": 5.7693, |
|
"step": 5080000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000499701045421186, |
|
"loss": 5.7806, |
|
"step": 5085000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997004572192919, |
|
"loss": 5.7844, |
|
"step": 5090000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000499699868439661, |
|
"loss": 5.7856, |
|
"step": 5095000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004996992790822949, |
|
"loss": 5.7773, |
|
"step": 5100000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004996986891471945, |
|
"loss": 5.7725, |
|
"step": 5105000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004996980986343614, |
|
"loss": 5.779, |
|
"step": 5110000 |
|
} |
|
], |
|
"logging_steps": 5000, |
|
"max_steps": 326562159, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 5000, |
|
"total_flos": 8.278749111273246e+19, |
|
"train_batch_size": 12, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|