yesj1234's picture
Upload folder using huggingface_hub
a4fb8b7 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 29.0,
"eval_steps": 500,
"global_step": 969209,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 7.5e-05,
"loss": 29.5796,
"step": 500
},
{
"epoch": 0.03,
"learning_rate": 0.00015,
"loss": 5.0487,
"step": 1000
},
{
"epoch": 0.04,
"learning_rate": 0.000225,
"loss": 4.5358,
"step": 1500
},
{
"epoch": 0.06,
"learning_rate": 0.0003,
"loss": 3.0616,
"step": 2000
},
{
"epoch": 0.07,
"learning_rate": 0.00029985009444050245,
"loss": 2.5781,
"step": 2500
},
{
"epoch": 0.09,
"learning_rate": 0.00029970018888100493,
"loss": 2.3254,
"step": 3000
},
{
"epoch": 0.1,
"learning_rate": 0.0002995502833215074,
"loss": 2.2124,
"step": 3500
},
{
"epoch": 0.12,
"learning_rate": 0.0002994003777620099,
"loss": 2.1224,
"step": 4000
},
{
"epoch": 0.13,
"learning_rate": 0.00029925047220251237,
"loss": 2.0343,
"step": 4500
},
{
"epoch": 0.15,
"learning_rate": 0.0002991005666430149,
"loss": 2.0035,
"step": 5000
},
{
"epoch": 0.16,
"learning_rate": 0.00029895066108351733,
"loss": 1.9224,
"step": 5500
},
{
"epoch": 0.18,
"learning_rate": 0.00029880075552401986,
"loss": 1.9103,
"step": 6000
},
{
"epoch": 0.19,
"learning_rate": 0.00029865084996452234,
"loss": 1.9074,
"step": 6500
},
{
"epoch": 0.21,
"learning_rate": 0.0002985009444050248,
"loss": 1.8261,
"step": 7000
},
{
"epoch": 0.22,
"learning_rate": 0.0002983510388455273,
"loss": 1.818,
"step": 7500
},
{
"epoch": 0.24,
"learning_rate": 0.0002982011332860298,
"loss": 1.7873,
"step": 8000
},
{
"epoch": 0.25,
"learning_rate": 0.00029805122772653226,
"loss": 1.7779,
"step": 8500
},
{
"epoch": 0.27,
"learning_rate": 0.00029790132216703474,
"loss": 1.7728,
"step": 9000
},
{
"epoch": 0.28,
"learning_rate": 0.0002977514166075372,
"loss": 1.782,
"step": 9500
},
{
"epoch": 0.3,
"learning_rate": 0.0002976015110480397,
"loss": 1.7174,
"step": 10000
},
{
"epoch": 0.31,
"learning_rate": 0.0002974516054885422,
"loss": 1.7356,
"step": 10500
},
{
"epoch": 0.33,
"learning_rate": 0.00029730169992904466,
"loss": 1.7125,
"step": 11000
},
{
"epoch": 0.34,
"learning_rate": 0.0002971517943695472,
"loss": 1.7017,
"step": 11500
},
{
"epoch": 0.36,
"learning_rate": 0.0002970018888100496,
"loss": 1.6844,
"step": 12000
},
{
"epoch": 0.37,
"learning_rate": 0.00029685198325055215,
"loss": 1.6783,
"step": 12500
},
{
"epoch": 0.39,
"learning_rate": 0.00029670207769105463,
"loss": 1.6738,
"step": 13000
},
{
"epoch": 0.4,
"learning_rate": 0.0002965521721315571,
"loss": 1.6553,
"step": 13500
},
{
"epoch": 0.42,
"learning_rate": 0.0002964022665720596,
"loss": 1.6784,
"step": 14000
},
{
"epoch": 0.43,
"learning_rate": 0.00029625236101256207,
"loss": 1.6356,
"step": 14500
},
{
"epoch": 0.45,
"learning_rate": 0.00029610245545306455,
"loss": 1.6308,
"step": 15000
},
{
"epoch": 0.46,
"learning_rate": 0.00029595254989356703,
"loss": 1.6176,
"step": 15500
},
{
"epoch": 0.48,
"learning_rate": 0.0002958026443340695,
"loss": 1.5953,
"step": 16000
},
{
"epoch": 0.49,
"learning_rate": 0.000295652738774572,
"loss": 1.6242,
"step": 16500
},
{
"epoch": 0.51,
"learning_rate": 0.00029550283321507447,
"loss": 1.5879,
"step": 17000
},
{
"epoch": 0.52,
"learning_rate": 0.00029535292765557695,
"loss": 1.626,
"step": 17500
},
{
"epoch": 0.54,
"learning_rate": 0.0002952030220960795,
"loss": 1.5977,
"step": 18000
},
{
"epoch": 0.55,
"learning_rate": 0.0002950531165365819,
"loss": 1.5809,
"step": 18500
},
{
"epoch": 0.57,
"learning_rate": 0.00029490321097708444,
"loss": 1.5621,
"step": 19000
},
{
"epoch": 0.58,
"learning_rate": 0.0002947533054175869,
"loss": 1.5742,
"step": 19500
},
{
"epoch": 0.6,
"learning_rate": 0.00029460339985808935,
"loss": 1.5925,
"step": 20000
},
{
"epoch": 0.61,
"learning_rate": 0.0002944534942985919,
"loss": 1.5554,
"step": 20500
},
{
"epoch": 0.63,
"learning_rate": 0.0002943035887390943,
"loss": 1.5545,
"step": 21000
},
{
"epoch": 0.64,
"learning_rate": 0.00029415368317959684,
"loss": 1.5381,
"step": 21500
},
{
"epoch": 0.66,
"learning_rate": 0.0002940037776200993,
"loss": 1.5622,
"step": 22000
},
{
"epoch": 0.67,
"learning_rate": 0.0002938538720606018,
"loss": 1.5643,
"step": 22500
},
{
"epoch": 0.69,
"learning_rate": 0.0002937039665011043,
"loss": 1.5613,
"step": 23000
},
{
"epoch": 0.7,
"learning_rate": 0.00029355406094160676,
"loss": 1.519,
"step": 23500
},
{
"epoch": 0.72,
"learning_rate": 0.00029340415538210924,
"loss": 1.5043,
"step": 24000
},
{
"epoch": 0.73,
"learning_rate": 0.0002932542498226117,
"loss": 1.5179,
"step": 24500
},
{
"epoch": 0.75,
"learning_rate": 0.0002931043442631142,
"loss": 1.5246,
"step": 25000
},
{
"epoch": 0.76,
"learning_rate": 0.00029295443870361673,
"loss": 1.5201,
"step": 25500
},
{
"epoch": 0.78,
"learning_rate": 0.00029280453314411915,
"loss": 1.5007,
"step": 26000
},
{
"epoch": 0.79,
"learning_rate": 0.00029265462758462163,
"loss": 1.5055,
"step": 26500
},
{
"epoch": 0.81,
"learning_rate": 0.00029250472202512417,
"loss": 1.469,
"step": 27000
},
{
"epoch": 0.82,
"learning_rate": 0.0002923548164656266,
"loss": 1.5371,
"step": 27500
},
{
"epoch": 0.84,
"learning_rate": 0.0002922049109061291,
"loss": 1.5006,
"step": 28000
},
{
"epoch": 0.85,
"learning_rate": 0.0002920550053466316,
"loss": 1.4942,
"step": 28500
},
{
"epoch": 0.87,
"learning_rate": 0.0002919050997871341,
"loss": 1.4926,
"step": 29000
},
{
"epoch": 0.88,
"learning_rate": 0.00029175519422763657,
"loss": 1.4735,
"step": 29500
},
{
"epoch": 0.9,
"learning_rate": 0.00029160528866813905,
"loss": 1.5184,
"step": 30000
},
{
"epoch": 0.91,
"learning_rate": 0.0002914553831086415,
"loss": 1.4969,
"step": 30500
},
{
"epoch": 0.93,
"learning_rate": 0.000291305477549144,
"loss": 1.4843,
"step": 31000
},
{
"epoch": 0.94,
"learning_rate": 0.0002911555719896465,
"loss": 1.4722,
"step": 31500
},
{
"epoch": 0.96,
"learning_rate": 0.00029100566643014896,
"loss": 1.4597,
"step": 32000
},
{
"epoch": 0.97,
"learning_rate": 0.00029085576087065144,
"loss": 1.4646,
"step": 32500
},
{
"epoch": 0.99,
"learning_rate": 0.0002907058553111539,
"loss": 1.5,
"step": 33000
},
{
"epoch": 1.0,
"learning_rate": 0.00029055594975165646,
"loss": 1.4608,
"step": 33500
},
{
"epoch": 1.02,
"learning_rate": 0.0002904060441921589,
"loss": 1.4222,
"step": 34000
},
{
"epoch": 1.03,
"learning_rate": 0.0002902561386326614,
"loss": 1.4288,
"step": 34500
},
{
"epoch": 1.05,
"learning_rate": 0.0002901062330731639,
"loss": 1.4357,
"step": 35000
},
{
"epoch": 1.06,
"learning_rate": 0.0002899563275136664,
"loss": 1.4275,
"step": 35500
},
{
"epoch": 1.08,
"learning_rate": 0.00028980642195416885,
"loss": 1.3738,
"step": 36000
},
{
"epoch": 1.09,
"learning_rate": 0.00028965651639467133,
"loss": 1.434,
"step": 36500
},
{
"epoch": 1.11,
"learning_rate": 0.0002895066108351738,
"loss": 1.4138,
"step": 37000
},
{
"epoch": 1.12,
"learning_rate": 0.0002893567052756763,
"loss": 1.4139,
"step": 37500
},
{
"epoch": 1.14,
"learning_rate": 0.00028920679971617877,
"loss": 1.3977,
"step": 38000
},
{
"epoch": 1.15,
"learning_rate": 0.00028905689415668125,
"loss": 1.4193,
"step": 38500
},
{
"epoch": 1.17,
"learning_rate": 0.00028890698859718373,
"loss": 1.4216,
"step": 39000
},
{
"epoch": 1.18,
"learning_rate": 0.0002887570830376862,
"loss": 1.376,
"step": 39500
},
{
"epoch": 1.2,
"learning_rate": 0.00028860717747818874,
"loss": 1.4231,
"step": 40000
},
{
"epoch": 1.21,
"learning_rate": 0.00028845727191869117,
"loss": 1.3734,
"step": 40500
},
{
"epoch": 1.23,
"learning_rate": 0.0002883073663591937,
"loss": 1.3961,
"step": 41000
},
{
"epoch": 1.24,
"learning_rate": 0.0002881574607996962,
"loss": 1.4046,
"step": 41500
},
{
"epoch": 1.26,
"learning_rate": 0.00028800755524019866,
"loss": 1.4058,
"step": 42000
},
{
"epoch": 1.27,
"learning_rate": 0.00028785764968070114,
"loss": 1.3845,
"step": 42500
},
{
"epoch": 1.29,
"learning_rate": 0.0002877077441212036,
"loss": 1.4175,
"step": 43000
},
{
"epoch": 1.3,
"learning_rate": 0.0002875578385617061,
"loss": 1.388,
"step": 43500
},
{
"epoch": 1.32,
"learning_rate": 0.0002874079330022086,
"loss": 1.3935,
"step": 44000
},
{
"epoch": 1.33,
"learning_rate": 0.00028725802744271106,
"loss": 1.3668,
"step": 44500
},
{
"epoch": 1.35,
"learning_rate": 0.00028710812188321354,
"loss": 1.4078,
"step": 45000
},
{
"epoch": 1.36,
"learning_rate": 0.000286958216323716,
"loss": 1.3631,
"step": 45500
},
{
"epoch": 1.38,
"learning_rate": 0.0002868083107642185,
"loss": 1.405,
"step": 46000
},
{
"epoch": 1.39,
"learning_rate": 0.00028665840520472103,
"loss": 1.3793,
"step": 46500
},
{
"epoch": 1.41,
"learning_rate": 0.00028650849964522346,
"loss": 1.421,
"step": 47000
},
{
"epoch": 1.42,
"learning_rate": 0.000286358594085726,
"loss": 1.4005,
"step": 47500
},
{
"epoch": 1.44,
"learning_rate": 0.00028620868852622847,
"loss": 1.3874,
"step": 48000
},
{
"epoch": 1.45,
"learning_rate": 0.00028605878296673095,
"loss": 1.43,
"step": 48500
},
{
"epoch": 1.47,
"learning_rate": 0.00028590887740723343,
"loss": 1.3763,
"step": 49000
},
{
"epoch": 1.48,
"learning_rate": 0.0002857589718477359,
"loss": 1.3586,
"step": 49500
},
{
"epoch": 1.5,
"learning_rate": 0.0002856090662882384,
"loss": 1.3848,
"step": 50000
},
{
"epoch": 1.51,
"learning_rate": 0.00028545916072874087,
"loss": 1.3846,
"step": 50500
},
{
"epoch": 1.53,
"learning_rate": 0.00028530925516924335,
"loss": 1.3709,
"step": 51000
},
{
"epoch": 1.54,
"learning_rate": 0.00028515934960974583,
"loss": 1.38,
"step": 51500
},
{
"epoch": 1.56,
"learning_rate": 0.0002850094440502483,
"loss": 1.3563,
"step": 52000
},
{
"epoch": 1.57,
"learning_rate": 0.0002848595384907508,
"loss": 1.3732,
"step": 52500
},
{
"epoch": 1.59,
"learning_rate": 0.0002847096329312533,
"loss": 1.3564,
"step": 53000
},
{
"epoch": 1.6,
"learning_rate": 0.00028455972737175575,
"loss": 1.3822,
"step": 53500
},
{
"epoch": 1.62,
"learning_rate": 0.0002844098218122583,
"loss": 1.3864,
"step": 54000
},
{
"epoch": 1.63,
"learning_rate": 0.00028425991625276076,
"loss": 1.345,
"step": 54500
},
{
"epoch": 1.65,
"learning_rate": 0.00028411001069326324,
"loss": 1.3678,
"step": 55000
},
{
"epoch": 1.66,
"learning_rate": 0.0002839601051337657,
"loss": 1.3789,
"step": 55500
},
{
"epoch": 1.68,
"learning_rate": 0.00028381019957426814,
"loss": 1.3545,
"step": 56000
},
{
"epoch": 1.69,
"learning_rate": 0.0002836602940147707,
"loss": 1.3542,
"step": 56500
},
{
"epoch": 1.71,
"learning_rate": 0.00028351038845527316,
"loss": 1.3543,
"step": 57000
},
{
"epoch": 1.72,
"learning_rate": 0.00028336048289577564,
"loss": 1.3892,
"step": 57500
},
{
"epoch": 1.74,
"learning_rate": 0.0002832105773362781,
"loss": 1.3512,
"step": 58000
},
{
"epoch": 1.75,
"learning_rate": 0.0002830606717767806,
"loss": 1.3513,
"step": 58500
},
{
"epoch": 1.77,
"learning_rate": 0.0002829107662172831,
"loss": 1.3483,
"step": 59000
},
{
"epoch": 1.78,
"learning_rate": 0.00028276086065778556,
"loss": 1.3846,
"step": 59500
},
{
"epoch": 1.8,
"learning_rate": 0.00028261095509828803,
"loss": 1.3375,
"step": 60000
},
{
"epoch": 1.81,
"learning_rate": 0.00028246104953879057,
"loss": 1.3498,
"step": 60500
},
{
"epoch": 1.83,
"learning_rate": 0.000282311143979293,
"loss": 1.3392,
"step": 61000
},
{
"epoch": 1.84,
"learning_rate": 0.0002821612384197955,
"loss": 1.3646,
"step": 61500
},
{
"epoch": 1.86,
"learning_rate": 0.000282011332860298,
"loss": 1.3565,
"step": 62000
},
{
"epoch": 1.87,
"learning_rate": 0.00028186142730080043,
"loss": 1.3866,
"step": 62500
},
{
"epoch": 1.89,
"learning_rate": 0.00028171152174130297,
"loss": 1.3409,
"step": 63000
},
{
"epoch": 1.9,
"learning_rate": 0.00028156161618180545,
"loss": 1.3469,
"step": 63500
},
{
"epoch": 1.91,
"learning_rate": 0.0002814117106223079,
"loss": 1.3585,
"step": 64000
},
{
"epoch": 1.93,
"learning_rate": 0.0002812618050628104,
"loss": 1.3729,
"step": 64500
},
{
"epoch": 1.94,
"learning_rate": 0.0002811118995033129,
"loss": 1.3655,
"step": 65000
},
{
"epoch": 1.96,
"learning_rate": 0.00028096199394381536,
"loss": 1.3744,
"step": 65500
},
{
"epoch": 1.97,
"learning_rate": 0.00028081208838431784,
"loss": 1.3685,
"step": 66000
},
{
"epoch": 1.99,
"learning_rate": 0.0002806621828248203,
"loss": 1.3334,
"step": 66500
},
{
"epoch": 2.0,
"learning_rate": 0.00028051227726532286,
"loss": 1.3497,
"step": 67000
},
{
"epoch": 2.02,
"learning_rate": 0.0002803623717058253,
"loss": 1.2964,
"step": 67500
},
{
"epoch": 2.03,
"learning_rate": 0.00028021246614632776,
"loss": 1.2764,
"step": 68000
},
{
"epoch": 2.05,
"learning_rate": 0.0002800625605868303,
"loss": 1.3203,
"step": 68500
},
{
"epoch": 2.06,
"learning_rate": 0.0002799126550273327,
"loss": 1.3164,
"step": 69000
},
{
"epoch": 2.08,
"learning_rate": 0.00027976274946783525,
"loss": 1.3063,
"step": 69500
},
{
"epoch": 2.09,
"learning_rate": 0.00027961284390833773,
"loss": 1.2936,
"step": 70000
},
{
"epoch": 2.11,
"learning_rate": 0.0002794629383488402,
"loss": 1.2928,
"step": 70500
},
{
"epoch": 2.12,
"learning_rate": 0.0002793130327893427,
"loss": 1.2756,
"step": 71000
},
{
"epoch": 2.14,
"learning_rate": 0.0002791631272298452,
"loss": 1.2719,
"step": 71500
},
{
"epoch": 2.15,
"learning_rate": 0.00027901322167034765,
"loss": 1.2868,
"step": 72000
},
{
"epoch": 2.17,
"learning_rate": 0.00027886331611085013,
"loss": 1.3048,
"step": 72500
},
{
"epoch": 2.18,
"learning_rate": 0.0002787134105513526,
"loss": 1.3064,
"step": 73000
},
{
"epoch": 2.2,
"learning_rate": 0.0002785635049918551,
"loss": 1.2907,
"step": 73500
},
{
"epoch": 2.21,
"learning_rate": 0.00027841359943235757,
"loss": 1.2787,
"step": 74000
},
{
"epoch": 2.23,
"learning_rate": 0.00027826369387286005,
"loss": 1.2823,
"step": 74500
},
{
"epoch": 2.24,
"learning_rate": 0.0002781137883133626,
"loss": 1.2931,
"step": 75000
},
{
"epoch": 2.26,
"learning_rate": 0.000277963882753865,
"loss": 1.3014,
"step": 75500
},
{
"epoch": 2.27,
"learning_rate": 0.00027781397719436754,
"loss": 1.2898,
"step": 76000
},
{
"epoch": 2.29,
"learning_rate": 0.00027766407163487,
"loss": 1.3193,
"step": 76500
},
{
"epoch": 2.3,
"learning_rate": 0.0002775141660753725,
"loss": 1.2956,
"step": 77000
},
{
"epoch": 2.32,
"learning_rate": 0.000277364260515875,
"loss": 1.3065,
"step": 77500
},
{
"epoch": 2.33,
"learning_rate": 0.00027721435495637746,
"loss": 1.2659,
"step": 78000
},
{
"epoch": 2.35,
"learning_rate": 0.00027706444939687994,
"loss": 1.2881,
"step": 78500
},
{
"epoch": 2.36,
"learning_rate": 0.0002769145438373824,
"loss": 1.3188,
"step": 79000
},
{
"epoch": 2.38,
"learning_rate": 0.0002767646382778849,
"loss": 1.3035,
"step": 79500
},
{
"epoch": 2.39,
"learning_rate": 0.0002766147327183874,
"loss": 1.3019,
"step": 80000
},
{
"epoch": 2.41,
"learning_rate": 0.00027646482715888986,
"loss": 1.2784,
"step": 80500
},
{
"epoch": 2.42,
"learning_rate": 0.00027631492159939234,
"loss": 1.2879,
"step": 81000
},
{
"epoch": 2.44,
"learning_rate": 0.00027616501603989487,
"loss": 1.2775,
"step": 81500
},
{
"epoch": 2.45,
"learning_rate": 0.0002760151104803973,
"loss": 1.2574,
"step": 82000
},
{
"epoch": 2.47,
"learning_rate": 0.00027586520492089983,
"loss": 1.2695,
"step": 82500
},
{
"epoch": 2.48,
"learning_rate": 0.0002757152993614023,
"loss": 1.2832,
"step": 83000
},
{
"epoch": 2.5,
"learning_rate": 0.0002755653938019048,
"loss": 1.2812,
"step": 83500
},
{
"epoch": 2.51,
"learning_rate": 0.00027541548824240727,
"loss": 1.2741,
"step": 84000
},
{
"epoch": 2.53,
"learning_rate": 0.00027526558268290975,
"loss": 1.269,
"step": 84500
},
{
"epoch": 2.54,
"learning_rate": 0.00027511567712341223,
"loss": 1.2734,
"step": 85000
},
{
"epoch": 2.56,
"learning_rate": 0.0002749657715639147,
"loss": 1.3023,
"step": 85500
},
{
"epoch": 2.57,
"learning_rate": 0.0002748158660044172,
"loss": 1.2889,
"step": 86000
},
{
"epoch": 2.59,
"learning_rate": 0.00027466596044491967,
"loss": 1.2759,
"step": 86500
},
{
"epoch": 2.6,
"learning_rate": 0.00027451605488542215,
"loss": 1.3041,
"step": 87000
},
{
"epoch": 2.62,
"learning_rate": 0.0002743661493259246,
"loss": 1.3103,
"step": 87500
},
{
"epoch": 2.63,
"learning_rate": 0.00027421624376642716,
"loss": 1.2742,
"step": 88000
},
{
"epoch": 2.65,
"learning_rate": 0.0002740663382069296,
"loss": 1.2909,
"step": 88500
},
{
"epoch": 2.66,
"learning_rate": 0.0002739164326474321,
"loss": 1.2887,
"step": 89000
},
{
"epoch": 2.68,
"learning_rate": 0.0002737665270879346,
"loss": 1.2792,
"step": 89500
},
{
"epoch": 2.69,
"learning_rate": 0.0002736166215284371,
"loss": 1.2846,
"step": 90000
},
{
"epoch": 2.71,
"learning_rate": 0.00027346671596893956,
"loss": 1.2781,
"step": 90500
},
{
"epoch": 2.72,
"learning_rate": 0.00027331681040944204,
"loss": 1.2864,
"step": 91000
},
{
"epoch": 2.74,
"learning_rate": 0.0002731669048499445,
"loss": 1.2685,
"step": 91500
},
{
"epoch": 2.75,
"learning_rate": 0.000273016999290447,
"loss": 1.2737,
"step": 92000
},
{
"epoch": 2.77,
"learning_rate": 0.0002728670937309495,
"loss": 1.305,
"step": 92500
},
{
"epoch": 2.78,
"learning_rate": 0.00027271718817145196,
"loss": 1.273,
"step": 93000
},
{
"epoch": 2.8,
"learning_rate": 0.00027256728261195444,
"loss": 1.2815,
"step": 93500
},
{
"epoch": 2.81,
"learning_rate": 0.0002724173770524569,
"loss": 1.2647,
"step": 94000
},
{
"epoch": 2.83,
"learning_rate": 0.0002722674714929594,
"loss": 1.2633,
"step": 94500
},
{
"epoch": 2.84,
"learning_rate": 0.0002721175659334619,
"loss": 1.2868,
"step": 95000
},
{
"epoch": 2.86,
"learning_rate": 0.0002719676603739644,
"loss": 1.3155,
"step": 95500
},
{
"epoch": 2.87,
"learning_rate": 0.00027181775481446683,
"loss": 1.2758,
"step": 96000
},
{
"epoch": 2.89,
"learning_rate": 0.00027166784925496937,
"loss": 1.2902,
"step": 96500
},
{
"epoch": 2.9,
"learning_rate": 0.00027151794369547185,
"loss": 1.2891,
"step": 97000
},
{
"epoch": 2.92,
"learning_rate": 0.00027136803813597427,
"loss": 1.2694,
"step": 97500
},
{
"epoch": 2.93,
"learning_rate": 0.0002712181325764768,
"loss": 1.2873,
"step": 98000
},
{
"epoch": 2.95,
"learning_rate": 0.0002710682270169793,
"loss": 1.2561,
"step": 98500
},
{
"epoch": 2.96,
"learning_rate": 0.00027091832145748176,
"loss": 1.2444,
"step": 99000
},
{
"epoch": 2.98,
"learning_rate": 0.00027076841589798424,
"loss": 1.2628,
"step": 99500
},
{
"epoch": 2.99,
"learning_rate": 0.0002706185103384867,
"loss": 1.2574,
"step": 100000
},
{
"epoch": 3.01,
"learning_rate": 0.0002704686047789892,
"loss": 1.2172,
"step": 100500
},
{
"epoch": 3.02,
"learning_rate": 0.0002703186992194917,
"loss": 1.2282,
"step": 101000
},
{
"epoch": 3.04,
"learning_rate": 0.00027016879365999416,
"loss": 1.2293,
"step": 101500
},
{
"epoch": 3.05,
"learning_rate": 0.0002700188881004967,
"loss": 1.2209,
"step": 102000
},
{
"epoch": 3.07,
"learning_rate": 0.0002698689825409991,
"loss": 1.2102,
"step": 102500
},
{
"epoch": 3.08,
"learning_rate": 0.0002697190769815016,
"loss": 1.2473,
"step": 103000
},
{
"epoch": 3.1,
"learning_rate": 0.00026956917142200413,
"loss": 1.2036,
"step": 103500
},
{
"epoch": 3.11,
"learning_rate": 0.00026941926586250656,
"loss": 1.2114,
"step": 104000
},
{
"epoch": 3.13,
"learning_rate": 0.0002692693603030091,
"loss": 1.2356,
"step": 104500
},
{
"epoch": 3.14,
"learning_rate": 0.0002691194547435116,
"loss": 1.202,
"step": 105000
},
{
"epoch": 3.16,
"learning_rate": 0.00026896954918401405,
"loss": 1.1923,
"step": 105500
},
{
"epoch": 3.17,
"learning_rate": 0.00026881964362451653,
"loss": 1.2245,
"step": 106000
},
{
"epoch": 3.19,
"learning_rate": 0.000268669738065019,
"loss": 1.23,
"step": 106500
},
{
"epoch": 3.2,
"learning_rate": 0.0002685198325055215,
"loss": 1.2165,
"step": 107000
},
{
"epoch": 3.22,
"learning_rate": 0.00026836992694602397,
"loss": 1.2066,
"step": 107500
},
{
"epoch": 3.23,
"learning_rate": 0.00026822002138652645,
"loss": 1.2418,
"step": 108000
},
{
"epoch": 3.25,
"learning_rate": 0.000268070115827029,
"loss": 1.2099,
"step": 108500
},
{
"epoch": 3.26,
"learning_rate": 0.0002679202102675314,
"loss": 1.2304,
"step": 109000
},
{
"epoch": 3.28,
"learning_rate": 0.0002677703047080339,
"loss": 1.2137,
"step": 109500
},
{
"epoch": 3.29,
"learning_rate": 0.0002676203991485364,
"loss": 1.2338,
"step": 110000
},
{
"epoch": 3.31,
"learning_rate": 0.00026747049358903885,
"loss": 1.2007,
"step": 110500
},
{
"epoch": 3.32,
"learning_rate": 0.0002673205880295414,
"loss": 1.2158,
"step": 111000
},
{
"epoch": 3.34,
"learning_rate": 0.00026717068247004386,
"loss": 1.2081,
"step": 111500
},
{
"epoch": 3.35,
"learning_rate": 0.00026702077691054634,
"loss": 1.2243,
"step": 112000
},
{
"epoch": 3.37,
"learning_rate": 0.0002668708713510488,
"loss": 1.2122,
"step": 112500
},
{
"epoch": 3.38,
"learning_rate": 0.0002667209657915513,
"loss": 1.199,
"step": 113000
},
{
"epoch": 3.4,
"learning_rate": 0.0002665710602320538,
"loss": 1.2292,
"step": 113500
},
{
"epoch": 3.41,
"learning_rate": 0.00026642115467255626,
"loss": 1.2217,
"step": 114000
},
{
"epoch": 3.43,
"learning_rate": 0.00026627124911305874,
"loss": 1.2275,
"step": 114500
},
{
"epoch": 3.44,
"learning_rate": 0.0002661213435535612,
"loss": 1.2443,
"step": 115000
},
{
"epoch": 3.46,
"learning_rate": 0.0002659714379940637,
"loss": 1.231,
"step": 115500
},
{
"epoch": 3.47,
"learning_rate": 0.0002658215324345662,
"loss": 1.2468,
"step": 116000
},
{
"epoch": 3.49,
"learning_rate": 0.0002656716268750687,
"loss": 1.2063,
"step": 116500
},
{
"epoch": 3.5,
"learning_rate": 0.00026552172131557114,
"loss": 1.2138,
"step": 117000
},
{
"epoch": 3.52,
"learning_rate": 0.00026537181575607367,
"loss": 1.2046,
"step": 117500
},
{
"epoch": 3.53,
"learning_rate": 0.00026522191019657615,
"loss": 1.206,
"step": 118000
},
{
"epoch": 3.55,
"learning_rate": 0.00026507200463707863,
"loss": 1.2186,
"step": 118500
},
{
"epoch": 3.56,
"learning_rate": 0.0002649220990775811,
"loss": 1.1986,
"step": 119000
},
{
"epoch": 3.58,
"learning_rate": 0.0002647721935180836,
"loss": 1.2548,
"step": 119500
},
{
"epoch": 3.59,
"learning_rate": 0.00026462228795858607,
"loss": 1.2416,
"step": 120000
},
{
"epoch": 3.61,
"learning_rate": 0.00026447238239908855,
"loss": 1.2324,
"step": 120500
},
{
"epoch": 3.62,
"learning_rate": 0.00026432247683959103,
"loss": 1.1915,
"step": 121000
},
{
"epoch": 3.64,
"learning_rate": 0.0002641725712800935,
"loss": 1.2168,
"step": 121500
},
{
"epoch": 3.65,
"learning_rate": 0.000264022665720596,
"loss": 1.23,
"step": 122000
},
{
"epoch": 3.67,
"learning_rate": 0.00026387276016109847,
"loss": 1.1844,
"step": 122500
},
{
"epoch": 3.68,
"learning_rate": 0.000263722854601601,
"loss": 1.2306,
"step": 123000
},
{
"epoch": 3.7,
"learning_rate": 0.0002635729490421034,
"loss": 1.2315,
"step": 123500
},
{
"epoch": 3.71,
"learning_rate": 0.00026342304348260596,
"loss": 1.2321,
"step": 124000
},
{
"epoch": 3.73,
"learning_rate": 0.00026327313792310844,
"loss": 1.1979,
"step": 124500
},
{
"epoch": 3.74,
"learning_rate": 0.0002631232323636109,
"loss": 1.2148,
"step": 125000
},
{
"epoch": 3.76,
"learning_rate": 0.0002629733268041134,
"loss": 1.2141,
"step": 125500
},
{
"epoch": 3.77,
"learning_rate": 0.0002628234212446159,
"loss": 1.2422,
"step": 126000
},
{
"epoch": 3.79,
"learning_rate": 0.00026267351568511836,
"loss": 1.199,
"step": 126500
},
{
"epoch": 3.8,
"learning_rate": 0.00026252361012562084,
"loss": 1.2199,
"step": 127000
},
{
"epoch": 3.81,
"learning_rate": 0.0002623737045661233,
"loss": 1.1985,
"step": 127500
},
{
"epoch": 3.83,
"learning_rate": 0.0002622237990066258,
"loss": 1.1969,
"step": 128000
},
{
"epoch": 3.84,
"learning_rate": 0.0002620738934471283,
"loss": 1.209,
"step": 128500
},
{
"epoch": 3.86,
"learning_rate": 0.00026192398788763075,
"loss": 1.2148,
"step": 129000
},
{
"epoch": 3.87,
"learning_rate": 0.00026177408232813323,
"loss": 1.2142,
"step": 129500
},
{
"epoch": 3.89,
"learning_rate": 0.0002616241767686357,
"loss": 1.1933,
"step": 130000
},
{
"epoch": 3.9,
"learning_rate": 0.00026147427120913825,
"loss": 1.1997,
"step": 130500
},
{
"epoch": 3.92,
"learning_rate": 0.00026132436564964067,
"loss": 1.2063,
"step": 131000
},
{
"epoch": 3.93,
"learning_rate": 0.0002611744600901432,
"loss": 1.1821,
"step": 131500
},
{
"epoch": 3.95,
"learning_rate": 0.0002610245545306457,
"loss": 1.1942,
"step": 132000
},
{
"epoch": 3.96,
"learning_rate": 0.00026087464897114817,
"loss": 1.19,
"step": 132500
},
{
"epoch": 3.98,
"learning_rate": 0.00026072474341165065,
"loss": 1.2073,
"step": 133000
},
{
"epoch": 3.99,
"learning_rate": 0.0002605748378521531,
"loss": 1.2181,
"step": 133500
},
{
"epoch": 4.01,
"learning_rate": 0.0002604249322926556,
"loss": 1.1659,
"step": 134000
},
{
"epoch": 4.02,
"learning_rate": 0.0002602750267331581,
"loss": 1.1496,
"step": 134500
},
{
"epoch": 4.04,
"learning_rate": 0.00026012512117366056,
"loss": 1.1468,
"step": 135000
},
{
"epoch": 4.05,
"learning_rate": 0.00025997521561416304,
"loss": 1.1483,
"step": 135500
},
{
"epoch": 4.07,
"learning_rate": 0.0002598253100546655,
"loss": 1.1771,
"step": 136000
},
{
"epoch": 4.08,
"learning_rate": 0.000259675404495168,
"loss": 1.1612,
"step": 136500
},
{
"epoch": 4.1,
"learning_rate": 0.00025952549893567054,
"loss": 1.1715,
"step": 137000
},
{
"epoch": 4.11,
"learning_rate": 0.00025937559337617296,
"loss": 1.1637,
"step": 137500
},
{
"epoch": 4.13,
"learning_rate": 0.0002592256878166755,
"loss": 1.1611,
"step": 138000
},
{
"epoch": 4.14,
"learning_rate": 0.000259075782257178,
"loss": 1.1684,
"step": 138500
},
{
"epoch": 4.16,
"learning_rate": 0.0002589258766976804,
"loss": 1.1652,
"step": 139000
},
{
"epoch": 4.17,
"learning_rate": 0.00025877597113818293,
"loss": 1.1482,
"step": 139500
},
{
"epoch": 4.19,
"learning_rate": 0.0002586260655786854,
"loss": 1.163,
"step": 140000
},
{
"epoch": 4.2,
"learning_rate": 0.0002584761600191879,
"loss": 1.1449,
"step": 140500
},
{
"epoch": 4.22,
"learning_rate": 0.00025832625445969037,
"loss": 1.1691,
"step": 141000
},
{
"epoch": 4.23,
"learning_rate": 0.00025817634890019285,
"loss": 1.1522,
"step": 141500
},
{
"epoch": 4.25,
"learning_rate": 0.00025802644334069533,
"loss": 1.178,
"step": 142000
},
{
"epoch": 4.26,
"learning_rate": 0.0002578765377811978,
"loss": 1.1656,
"step": 142500
},
{
"epoch": 4.28,
"learning_rate": 0.0002577266322217003,
"loss": 1.1786,
"step": 143000
},
{
"epoch": 4.29,
"learning_rate": 0.0002575767266622028,
"loss": 1.1548,
"step": 143500
},
{
"epoch": 4.31,
"learning_rate": 0.00025742682110270525,
"loss": 1.1459,
"step": 144000
},
{
"epoch": 4.32,
"learning_rate": 0.00025727691554320773,
"loss": 1.1766,
"step": 144500
},
{
"epoch": 4.34,
"learning_rate": 0.00025712700998371026,
"loss": 1.1512,
"step": 145000
},
{
"epoch": 4.35,
"learning_rate": 0.0002569771044242127,
"loss": 1.1965,
"step": 145500
},
{
"epoch": 4.37,
"learning_rate": 0.0002568271988647152,
"loss": 1.1772,
"step": 146000
},
{
"epoch": 4.38,
"learning_rate": 0.0002566772933052177,
"loss": 1.1584,
"step": 146500
},
{
"epoch": 4.4,
"learning_rate": 0.0002565273877457202,
"loss": 1.1792,
"step": 147000
},
{
"epoch": 4.41,
"learning_rate": 0.00025637748218622266,
"loss": 1.142,
"step": 147500
},
{
"epoch": 4.43,
"learning_rate": 0.00025622757662672514,
"loss": 1.1759,
"step": 148000
},
{
"epoch": 4.44,
"learning_rate": 0.0002560776710672276,
"loss": 1.1563,
"step": 148500
},
{
"epoch": 4.46,
"learning_rate": 0.0002559277655077301,
"loss": 1.1672,
"step": 149000
},
{
"epoch": 4.47,
"learning_rate": 0.0002557778599482326,
"loss": 1.1661,
"step": 149500
},
{
"epoch": 4.49,
"learning_rate": 0.0002556279543887351,
"loss": 1.1787,
"step": 150000
},
{
"epoch": 4.5,
"learning_rate": 0.00025547804882923754,
"loss": 1.1436,
"step": 150500
},
{
"epoch": 4.52,
"learning_rate": 0.00025532814326974,
"loss": 1.1812,
"step": 151000
},
{
"epoch": 4.53,
"learning_rate": 0.00025517823771024255,
"loss": 1.1749,
"step": 151500
},
{
"epoch": 4.55,
"learning_rate": 0.000255028332150745,
"loss": 1.1869,
"step": 152000
},
{
"epoch": 4.56,
"learning_rate": 0.0002548784265912475,
"loss": 1.1506,
"step": 152500
},
{
"epoch": 4.58,
"learning_rate": 0.00025472852103175,
"loss": 1.1592,
"step": 153000
},
{
"epoch": 4.59,
"learning_rate": 0.00025457861547225247,
"loss": 1.1639,
"step": 153500
},
{
"epoch": 4.61,
"learning_rate": 0.00025442870991275495,
"loss": 1.1498,
"step": 154000
},
{
"epoch": 4.62,
"learning_rate": 0.00025427880435325743,
"loss": 1.1832,
"step": 154500
},
{
"epoch": 4.64,
"learning_rate": 0.0002541288987937599,
"loss": 1.1682,
"step": 155000
},
{
"epoch": 4.65,
"learning_rate": 0.0002539789932342624,
"loss": 1.1543,
"step": 155500
},
{
"epoch": 4.67,
"learning_rate": 0.00025382908767476487,
"loss": 1.1572,
"step": 156000
},
{
"epoch": 4.68,
"learning_rate": 0.00025367918211526735,
"loss": 1.1744,
"step": 156500
},
{
"epoch": 4.7,
"learning_rate": 0.0002535292765557698,
"loss": 1.1685,
"step": 157000
},
{
"epoch": 4.71,
"learning_rate": 0.0002533793709962723,
"loss": 1.1561,
"step": 157500
},
{
"epoch": 4.73,
"learning_rate": 0.00025322946543677484,
"loss": 1.1457,
"step": 158000
},
{
"epoch": 4.74,
"learning_rate": 0.00025307955987727726,
"loss": 1.1987,
"step": 158500
},
{
"epoch": 4.76,
"learning_rate": 0.0002529296543177798,
"loss": 1.1663,
"step": 159000
},
{
"epoch": 4.77,
"learning_rate": 0.0002527797487582823,
"loss": 1.164,
"step": 159500
},
{
"epoch": 4.79,
"learning_rate": 0.00025262984319878476,
"loss": 1.1853,
"step": 160000
},
{
"epoch": 4.8,
"learning_rate": 0.00025247993763928724,
"loss": 1.1601,
"step": 160500
},
{
"epoch": 4.82,
"learning_rate": 0.0002523300320797897,
"loss": 1.1691,
"step": 161000
},
{
"epoch": 4.83,
"learning_rate": 0.0002521801265202922,
"loss": 1.1645,
"step": 161500
},
{
"epoch": 4.85,
"learning_rate": 0.0002520302209607947,
"loss": 1.139,
"step": 162000
},
{
"epoch": 4.86,
"learning_rate": 0.00025188031540129716,
"loss": 1.162,
"step": 162500
},
{
"epoch": 4.88,
"learning_rate": 0.00025173040984179963,
"loss": 1.1697,
"step": 163000
},
{
"epoch": 4.89,
"learning_rate": 0.0002515805042823021,
"loss": 1.159,
"step": 163500
},
{
"epoch": 4.91,
"learning_rate": 0.0002514305987228046,
"loss": 1.1414,
"step": 164000
},
{
"epoch": 4.92,
"learning_rate": 0.00025128069316330713,
"loss": 1.1484,
"step": 164500
},
{
"epoch": 4.94,
"learning_rate": 0.00025113078760380955,
"loss": 1.1821,
"step": 165000
},
{
"epoch": 4.95,
"learning_rate": 0.0002509808820443121,
"loss": 1.1406,
"step": 165500
},
{
"epoch": 4.97,
"learning_rate": 0.0002508309764848145,
"loss": 1.1538,
"step": 166000
},
{
"epoch": 4.98,
"learning_rate": 0.00025068107092531705,
"loss": 1.1307,
"step": 166500
},
{
"epoch": 5.0,
"learning_rate": 0.0002505311653658195,
"loss": 1.1903,
"step": 167000
},
{
"epoch": 5.01,
"learning_rate": 0.000250381259806322,
"loss": 1.124,
"step": 167500
},
{
"epoch": 5.03,
"learning_rate": 0.0002502313542468245,
"loss": 1.1119,
"step": 168000
},
{
"epoch": 5.04,
"learning_rate": 0.00025008144868732696,
"loss": 1.121,
"step": 168500
},
{
"epoch": 5.06,
"learning_rate": 0.00024993154312782944,
"loss": 1.1289,
"step": 169000
},
{
"epoch": 5.07,
"learning_rate": 0.0002497816375683319,
"loss": 1.0986,
"step": 169500
},
{
"epoch": 5.09,
"learning_rate": 0.0002496317320088344,
"loss": 1.1228,
"step": 170000
},
{
"epoch": 5.1,
"learning_rate": 0.0002494818264493369,
"loss": 1.1112,
"step": 170500
},
{
"epoch": 5.12,
"learning_rate": 0.00024933192088983936,
"loss": 1.1248,
"step": 171000
},
{
"epoch": 5.13,
"learning_rate": 0.00024918201533034184,
"loss": 1.1139,
"step": 171500
},
{
"epoch": 5.15,
"learning_rate": 0.0002490321097708444,
"loss": 1.1427,
"step": 172000
},
{
"epoch": 5.16,
"learning_rate": 0.0002488822042113468,
"loss": 1.1168,
"step": 172500
},
{
"epoch": 5.18,
"learning_rate": 0.00024873229865184933,
"loss": 1.1151,
"step": 173000
},
{
"epoch": 5.19,
"learning_rate": 0.0002485823930923518,
"loss": 1.1027,
"step": 173500
},
{
"epoch": 5.21,
"learning_rate": 0.0002484324875328543,
"loss": 1.1284,
"step": 174000
},
{
"epoch": 5.22,
"learning_rate": 0.00024828258197335677,
"loss": 1.1148,
"step": 174500
},
{
"epoch": 5.24,
"learning_rate": 0.00024813267641385925,
"loss": 1.1379,
"step": 175000
},
{
"epoch": 5.25,
"learning_rate": 0.00024798277085436173,
"loss": 1.1542,
"step": 175500
},
{
"epoch": 5.27,
"learning_rate": 0.0002478328652948642,
"loss": 1.1037,
"step": 176000
},
{
"epoch": 5.28,
"learning_rate": 0.0002476829597353667,
"loss": 1.1284,
"step": 176500
},
{
"epoch": 5.3,
"learning_rate": 0.00024753305417586917,
"loss": 1.1461,
"step": 177000
},
{
"epoch": 5.31,
"learning_rate": 0.00024738314861637165,
"loss": 1.1157,
"step": 177500
},
{
"epoch": 5.33,
"learning_rate": 0.00024723324305687413,
"loss": 1.1337,
"step": 178000
},
{
"epoch": 5.34,
"learning_rate": 0.00024708333749737666,
"loss": 1.1273,
"step": 178500
},
{
"epoch": 5.36,
"learning_rate": 0.0002469334319378791,
"loss": 1.1226,
"step": 179000
},
{
"epoch": 5.37,
"learning_rate": 0.0002467835263783816,
"loss": 1.1079,
"step": 179500
},
{
"epoch": 5.39,
"learning_rate": 0.0002466336208188841,
"loss": 1.1279,
"step": 180000
},
{
"epoch": 5.4,
"learning_rate": 0.00024648371525938653,
"loss": 1.1123,
"step": 180500
},
{
"epoch": 5.42,
"learning_rate": 0.00024633380969988906,
"loss": 1.0985,
"step": 181000
},
{
"epoch": 5.43,
"learning_rate": 0.00024618390414039154,
"loss": 1.1128,
"step": 181500
},
{
"epoch": 5.45,
"learning_rate": 0.000246033998580894,
"loss": 1.1228,
"step": 182000
},
{
"epoch": 5.46,
"learning_rate": 0.0002458840930213965,
"loss": 1.0973,
"step": 182500
},
{
"epoch": 5.48,
"learning_rate": 0.000245734187461899,
"loss": 1.1238,
"step": 183000
},
{
"epoch": 5.49,
"learning_rate": 0.00024558428190240146,
"loss": 1.1275,
"step": 183500
},
{
"epoch": 5.51,
"learning_rate": 0.00024543437634290394,
"loss": 1.1188,
"step": 184000
},
{
"epoch": 5.52,
"learning_rate": 0.0002452844707834064,
"loss": 1.1296,
"step": 184500
},
{
"epoch": 5.54,
"learning_rate": 0.00024513456522390895,
"loss": 1.1172,
"step": 185000
},
{
"epoch": 5.55,
"learning_rate": 0.0002449846596644114,
"loss": 1.1052,
"step": 185500
},
{
"epoch": 5.57,
"learning_rate": 0.0002448347541049139,
"loss": 1.1292,
"step": 186000
},
{
"epoch": 5.58,
"learning_rate": 0.0002446848485454164,
"loss": 1.0994,
"step": 186500
},
{
"epoch": 5.6,
"learning_rate": 0.0002445349429859188,
"loss": 1.1049,
"step": 187000
},
{
"epoch": 5.61,
"learning_rate": 0.00024438503742642135,
"loss": 1.1302,
"step": 187500
},
{
"epoch": 5.63,
"learning_rate": 0.00024423513186692383,
"loss": 1.1094,
"step": 188000
},
{
"epoch": 5.64,
"learning_rate": 0.0002440852263074263,
"loss": 1.1281,
"step": 188500
},
{
"epoch": 5.66,
"learning_rate": 0.0002439353207479288,
"loss": 1.1279,
"step": 189000
},
{
"epoch": 5.67,
"learning_rate": 0.00024378541518843127,
"loss": 1.1285,
"step": 189500
},
{
"epoch": 5.69,
"learning_rate": 0.00024363550962893375,
"loss": 1.1303,
"step": 190000
},
{
"epoch": 5.7,
"learning_rate": 0.00024348560406943625,
"loss": 1.0965,
"step": 190500
},
{
"epoch": 5.71,
"learning_rate": 0.0002433356985099387,
"loss": 1.1304,
"step": 191000
},
{
"epoch": 5.73,
"learning_rate": 0.0002431857929504412,
"loss": 1.1189,
"step": 191500
},
{
"epoch": 5.74,
"learning_rate": 0.0002430358873909437,
"loss": 1.1174,
"step": 192000
},
{
"epoch": 5.76,
"learning_rate": 0.00024288598183144614,
"loss": 1.1054,
"step": 192500
},
{
"epoch": 5.77,
"learning_rate": 0.00024273607627194865,
"loss": 1.1295,
"step": 193000
},
{
"epoch": 5.79,
"learning_rate": 0.00024258617071245113,
"loss": 1.1369,
"step": 193500
},
{
"epoch": 5.8,
"learning_rate": 0.0002424362651529536,
"loss": 1.1273,
"step": 194000
},
{
"epoch": 5.82,
"learning_rate": 0.0002422863595934561,
"loss": 1.1262,
"step": 194500
},
{
"epoch": 5.83,
"learning_rate": 0.0002421364540339586,
"loss": 1.1287,
"step": 195000
},
{
"epoch": 5.85,
"learning_rate": 0.00024198654847446105,
"loss": 1.1213,
"step": 195500
},
{
"epoch": 5.86,
"learning_rate": 0.00024183664291496356,
"loss": 1.1241,
"step": 196000
},
{
"epoch": 5.88,
"learning_rate": 0.00024168673735546604,
"loss": 1.1434,
"step": 196500
},
{
"epoch": 5.89,
"learning_rate": 0.00024153683179596854,
"loss": 1.1403,
"step": 197000
},
{
"epoch": 5.91,
"learning_rate": 0.000241386926236471,
"loss": 1.1259,
"step": 197500
},
{
"epoch": 5.92,
"learning_rate": 0.00024123702067697347,
"loss": 1.1449,
"step": 198000
},
{
"epoch": 5.94,
"learning_rate": 0.00024108711511747598,
"loss": 1.1108,
"step": 198500
},
{
"epoch": 5.95,
"learning_rate": 0.00024093720955797843,
"loss": 1.1183,
"step": 199000
},
{
"epoch": 5.97,
"learning_rate": 0.00024078730399848094,
"loss": 1.1448,
"step": 199500
},
{
"epoch": 5.98,
"learning_rate": 0.00024063739843898342,
"loss": 1.0985,
"step": 200000
},
{
"epoch": 6.0,
"learning_rate": 0.0002404874928794859,
"loss": 1.0981,
"step": 200500
},
{
"epoch": 6.01,
"learning_rate": 0.00024033758731998838,
"loss": 1.0913,
"step": 201000
},
{
"epoch": 6.03,
"learning_rate": 0.00024018768176049089,
"loss": 1.0394,
"step": 201500
},
{
"epoch": 6.04,
"learning_rate": 0.00024003777620099334,
"loss": 1.0508,
"step": 202000
},
{
"epoch": 6.06,
"learning_rate": 0.00023988787064149584,
"loss": 1.0817,
"step": 202500
},
{
"epoch": 6.07,
"learning_rate": 0.00023973796508199832,
"loss": 1.0739,
"step": 203000
},
{
"epoch": 6.09,
"learning_rate": 0.00023958805952250083,
"loss": 1.0685,
"step": 203500
},
{
"epoch": 6.1,
"learning_rate": 0.00023943815396300328,
"loss": 1.0605,
"step": 204000
},
{
"epoch": 6.12,
"learning_rate": 0.00023928824840350576,
"loss": 1.0902,
"step": 204500
},
{
"epoch": 6.13,
"learning_rate": 0.00023913834284400827,
"loss": 1.0964,
"step": 205000
},
{
"epoch": 6.15,
"learning_rate": 0.00023898843728451072,
"loss": 1.0949,
"step": 205500
},
{
"epoch": 6.16,
"learning_rate": 0.00023883853172501323,
"loss": 1.0703,
"step": 206000
},
{
"epoch": 6.18,
"learning_rate": 0.0002386886261655157,
"loss": 1.0941,
"step": 206500
},
{
"epoch": 6.19,
"learning_rate": 0.0002385387206060182,
"loss": 1.0928,
"step": 207000
},
{
"epoch": 6.21,
"learning_rate": 0.00023838881504652067,
"loss": 1.0928,
"step": 207500
},
{
"epoch": 6.22,
"learning_rate": 0.00023823890948702317,
"loss": 1.0852,
"step": 208000
},
{
"epoch": 6.24,
"learning_rate": 0.00023808900392752563,
"loss": 1.078,
"step": 208500
},
{
"epoch": 6.25,
"learning_rate": 0.00023793909836802813,
"loss": 1.0864,
"step": 209000
},
{
"epoch": 6.27,
"learning_rate": 0.0002377891928085306,
"loss": 1.1053,
"step": 209500
},
{
"epoch": 6.28,
"learning_rate": 0.00023763928724903306,
"loss": 1.0701,
"step": 210000
},
{
"epoch": 6.3,
"learning_rate": 0.00023748938168953557,
"loss": 1.0588,
"step": 210500
},
{
"epoch": 6.31,
"learning_rate": 0.00023733947613003805,
"loss": 1.0793,
"step": 211000
},
{
"epoch": 6.33,
"learning_rate": 0.00023718957057054053,
"loss": 1.0722,
"step": 211500
},
{
"epoch": 6.34,
"learning_rate": 0.000237039665011043,
"loss": 1.083,
"step": 212000
},
{
"epoch": 6.36,
"learning_rate": 0.00023688975945154552,
"loss": 1.0791,
"step": 212500
},
{
"epoch": 6.37,
"learning_rate": 0.00023673985389204797,
"loss": 1.0621,
"step": 213000
},
{
"epoch": 6.39,
"learning_rate": 0.00023658994833255048,
"loss": 1.0754,
"step": 213500
},
{
"epoch": 6.4,
"learning_rate": 0.00023644004277305296,
"loss": 1.0701,
"step": 214000
},
{
"epoch": 6.42,
"learning_rate": 0.00023629013721355546,
"loss": 1.1036,
"step": 214500
},
{
"epoch": 6.43,
"learning_rate": 0.00023614023165405791,
"loss": 1.077,
"step": 215000
},
{
"epoch": 6.45,
"learning_rate": 0.00023599032609456042,
"loss": 1.0959,
"step": 215500
},
{
"epoch": 6.46,
"learning_rate": 0.0002358404205350629,
"loss": 1.099,
"step": 216000
},
{
"epoch": 6.48,
"learning_rate": 0.00023569051497556535,
"loss": 1.0847,
"step": 216500
},
{
"epoch": 6.49,
"learning_rate": 0.00023554060941606786,
"loss": 1.0704,
"step": 217000
},
{
"epoch": 6.51,
"learning_rate": 0.00023539070385657034,
"loss": 1.0949,
"step": 217500
},
{
"epoch": 6.52,
"learning_rate": 0.00023524079829707282,
"loss": 1.043,
"step": 218000
},
{
"epoch": 6.54,
"learning_rate": 0.0002350908927375753,
"loss": 1.0851,
"step": 218500
},
{
"epoch": 6.55,
"learning_rate": 0.0002349409871780778,
"loss": 1.0905,
"step": 219000
},
{
"epoch": 6.57,
"learning_rate": 0.00023479108161858026,
"loss": 1.1033,
"step": 219500
},
{
"epoch": 6.58,
"learning_rate": 0.00023464117605908276,
"loss": 1.0843,
"step": 220000
},
{
"epoch": 6.6,
"learning_rate": 0.00023449127049958524,
"loss": 1.0866,
"step": 220500
},
{
"epoch": 6.61,
"learning_rate": 0.00023434136494008775,
"loss": 1.083,
"step": 221000
},
{
"epoch": 6.63,
"learning_rate": 0.0002341914593805902,
"loss": 1.0626,
"step": 221500
},
{
"epoch": 6.64,
"learning_rate": 0.00023404155382109268,
"loss": 1.0883,
"step": 222000
},
{
"epoch": 6.66,
"learning_rate": 0.0002338916482615952,
"loss": 1.0814,
"step": 222500
},
{
"epoch": 6.67,
"learning_rate": 0.00023374174270209764,
"loss": 1.0786,
"step": 223000
},
{
"epoch": 6.69,
"learning_rate": 0.00023359183714260015,
"loss": 1.0918,
"step": 223500
},
{
"epoch": 6.7,
"learning_rate": 0.00023344193158310263,
"loss": 1.1123,
"step": 224000
},
{
"epoch": 6.72,
"learning_rate": 0.0002332920260236051,
"loss": 1.0977,
"step": 224500
},
{
"epoch": 6.73,
"learning_rate": 0.0002331421204641076,
"loss": 1.0824,
"step": 225000
},
{
"epoch": 6.75,
"learning_rate": 0.0002329922149046101,
"loss": 1.0794,
"step": 225500
},
{
"epoch": 6.76,
"learning_rate": 0.00023284230934511255,
"loss": 1.0786,
"step": 226000
},
{
"epoch": 6.78,
"learning_rate": 0.00023269240378561505,
"loss": 1.0803,
"step": 226500
},
{
"epoch": 6.79,
"learning_rate": 0.00023254249822611753,
"loss": 1.0705,
"step": 227000
},
{
"epoch": 6.81,
"learning_rate": 0.00023239259266662004,
"loss": 1.0924,
"step": 227500
},
{
"epoch": 6.82,
"learning_rate": 0.0002322426871071225,
"loss": 1.114,
"step": 228000
},
{
"epoch": 6.84,
"learning_rate": 0.00023209278154762497,
"loss": 1.084,
"step": 228500
},
{
"epoch": 6.85,
"learning_rate": 0.00023194287598812745,
"loss": 1.0792,
"step": 229000
},
{
"epoch": 6.87,
"learning_rate": 0.00023179297042862993,
"loss": 1.0865,
"step": 229500
},
{
"epoch": 6.88,
"learning_rate": 0.00023164306486913244,
"loss": 1.0994,
"step": 230000
},
{
"epoch": 6.9,
"learning_rate": 0.0002314931593096349,
"loss": 1.0989,
"step": 230500
},
{
"epoch": 6.91,
"learning_rate": 0.0002313432537501374,
"loss": 1.0918,
"step": 231000
},
{
"epoch": 6.93,
"learning_rate": 0.00023119334819063987,
"loss": 1.0877,
"step": 231500
},
{
"epoch": 6.94,
"learning_rate": 0.00023104344263114238,
"loss": 1.1128,
"step": 232000
},
{
"epoch": 6.96,
"learning_rate": 0.00023089353707164483,
"loss": 1.1044,
"step": 232500
},
{
"epoch": 6.97,
"learning_rate": 0.00023074363151214734,
"loss": 1.1081,
"step": 233000
},
{
"epoch": 6.99,
"learning_rate": 0.00023059372595264982,
"loss": 1.1013,
"step": 233500
},
{
"epoch": 7.0,
"learning_rate": 0.00023044382039315227,
"loss": 1.0976,
"step": 234000
},
{
"epoch": 7.02,
"learning_rate": 0.00023029391483365478,
"loss": 1.0356,
"step": 234500
},
{
"epoch": 7.03,
"learning_rate": 0.00023014400927415726,
"loss": 1.0288,
"step": 235000
},
{
"epoch": 7.05,
"learning_rate": 0.00022999410371465974,
"loss": 1.0486,
"step": 235500
},
{
"epoch": 7.06,
"learning_rate": 0.00022984419815516222,
"loss": 1.025,
"step": 236000
},
{
"epoch": 7.08,
"learning_rate": 0.00022969429259566472,
"loss": 1.0447,
"step": 236500
},
{
"epoch": 7.09,
"learning_rate": 0.00022954438703616718,
"loss": 1.0225,
"step": 237000
},
{
"epoch": 7.11,
"learning_rate": 0.00022939448147666968,
"loss": 1.0404,
"step": 237500
},
{
"epoch": 7.12,
"learning_rate": 0.00022924457591717216,
"loss": 1.0592,
"step": 238000
},
{
"epoch": 7.14,
"learning_rate": 0.00022909467035767467,
"loss": 1.0669,
"step": 238500
},
{
"epoch": 7.15,
"learning_rate": 0.00022894476479817712,
"loss": 1.0136,
"step": 239000
},
{
"epoch": 7.17,
"learning_rate": 0.0002287948592386796,
"loss": 1.0531,
"step": 239500
},
{
"epoch": 7.18,
"learning_rate": 0.0002286449536791821,
"loss": 1.0587,
"step": 240000
},
{
"epoch": 7.2,
"learning_rate": 0.00022849504811968456,
"loss": 1.0354,
"step": 240500
},
{
"epoch": 7.21,
"learning_rate": 0.00022834514256018707,
"loss": 1.0317,
"step": 241000
},
{
"epoch": 7.23,
"learning_rate": 0.00022819523700068955,
"loss": 1.049,
"step": 241500
},
{
"epoch": 7.24,
"learning_rate": 0.00022804533144119203,
"loss": 1.0428,
"step": 242000
},
{
"epoch": 7.26,
"learning_rate": 0.0002278954258816945,
"loss": 1.0389,
"step": 242500
},
{
"epoch": 7.27,
"learning_rate": 0.000227745520322197,
"loss": 1.0639,
"step": 243000
},
{
"epoch": 7.29,
"learning_rate": 0.00022759561476269947,
"loss": 1.055,
"step": 243500
},
{
"epoch": 7.3,
"learning_rate": 0.00022744570920320197,
"loss": 1.0589,
"step": 244000
},
{
"epoch": 7.32,
"learning_rate": 0.00022729580364370445,
"loss": 1.0759,
"step": 244500
},
{
"epoch": 7.33,
"learning_rate": 0.00022714589808420696,
"loss": 1.0566,
"step": 245000
},
{
"epoch": 7.35,
"learning_rate": 0.0002269959925247094,
"loss": 1.0576,
"step": 245500
},
{
"epoch": 7.36,
"learning_rate": 0.0002268460869652119,
"loss": 1.0446,
"step": 246000
},
{
"epoch": 7.38,
"learning_rate": 0.00022669618140571437,
"loss": 1.0614,
"step": 246500
},
{
"epoch": 7.39,
"learning_rate": 0.00022654627584621685,
"loss": 1.0674,
"step": 247000
},
{
"epoch": 7.41,
"learning_rate": 0.00022639637028671936,
"loss": 1.0605,
"step": 247500
},
{
"epoch": 7.42,
"learning_rate": 0.0002262464647272218,
"loss": 1.0616,
"step": 248000
},
{
"epoch": 7.44,
"learning_rate": 0.00022609655916772432,
"loss": 1.0473,
"step": 248500
},
{
"epoch": 7.45,
"learning_rate": 0.0002259466536082268,
"loss": 1.0488,
"step": 249000
},
{
"epoch": 7.47,
"learning_rate": 0.0002257967480487293,
"loss": 1.0662,
"step": 249500
},
{
"epoch": 7.48,
"learning_rate": 0.00022564684248923175,
"loss": 1.0475,
"step": 250000
},
{
"epoch": 7.5,
"learning_rate": 0.00022549693692973426,
"loss": 1.0448,
"step": 250500
},
{
"epoch": 7.51,
"learning_rate": 0.00022534703137023674,
"loss": 1.0371,
"step": 251000
},
{
"epoch": 7.53,
"learning_rate": 0.0002251971258107392,
"loss": 1.047,
"step": 251500
},
{
"epoch": 7.54,
"learning_rate": 0.0002250472202512417,
"loss": 1.0632,
"step": 252000
},
{
"epoch": 7.56,
"learning_rate": 0.00022489731469174418,
"loss": 1.0787,
"step": 252500
},
{
"epoch": 7.57,
"learning_rate": 0.00022474740913224666,
"loss": 1.0454,
"step": 253000
},
{
"epoch": 7.59,
"learning_rate": 0.00022459750357274914,
"loss": 1.0528,
"step": 253500
},
{
"epoch": 7.6,
"learning_rate": 0.00022444759801325164,
"loss": 1.0629,
"step": 254000
},
{
"epoch": 7.61,
"learning_rate": 0.0002242976924537541,
"loss": 1.0607,
"step": 254500
},
{
"epoch": 7.63,
"learning_rate": 0.0002241477868942566,
"loss": 1.0579,
"step": 255000
},
{
"epoch": 7.64,
"learning_rate": 0.00022399788133475908,
"loss": 1.057,
"step": 255500
},
{
"epoch": 7.66,
"learning_rate": 0.0002238479757752616,
"loss": 1.0746,
"step": 256000
},
{
"epoch": 7.67,
"learning_rate": 0.00022369807021576404,
"loss": 1.0578,
"step": 256500
},
{
"epoch": 7.69,
"learning_rate": 0.00022354816465626655,
"loss": 1.0726,
"step": 257000
},
{
"epoch": 7.7,
"learning_rate": 0.00022339825909676903,
"loss": 1.0731,
"step": 257500
},
{
"epoch": 7.72,
"learning_rate": 0.00022324835353727148,
"loss": 1.0652,
"step": 258000
},
{
"epoch": 7.73,
"learning_rate": 0.000223098447977774,
"loss": 1.0546,
"step": 258500
},
{
"epoch": 7.75,
"learning_rate": 0.00022294854241827647,
"loss": 1.049,
"step": 259000
},
{
"epoch": 7.76,
"learning_rate": 0.00022279863685877895,
"loss": 1.0479,
"step": 259500
},
{
"epoch": 7.78,
"learning_rate": 0.00022264873129928143,
"loss": 1.057,
"step": 260000
},
{
"epoch": 7.79,
"learning_rate": 0.00022249882573978393,
"loss": 1.0626,
"step": 260500
},
{
"epoch": 7.81,
"learning_rate": 0.00022234892018028639,
"loss": 1.0556,
"step": 261000
},
{
"epoch": 7.82,
"learning_rate": 0.0002221990146207889,
"loss": 1.0423,
"step": 261500
},
{
"epoch": 7.84,
"learning_rate": 0.00022204910906129137,
"loss": 1.0678,
"step": 262000
},
{
"epoch": 7.85,
"learning_rate": 0.00022189920350179388,
"loss": 1.0418,
"step": 262500
},
{
"epoch": 7.87,
"learning_rate": 0.00022174929794229633,
"loss": 1.0566,
"step": 263000
},
{
"epoch": 7.88,
"learning_rate": 0.0002215993923827988,
"loss": 1.0467,
"step": 263500
},
{
"epoch": 7.9,
"learning_rate": 0.00022144948682330132,
"loss": 1.0397,
"step": 264000
},
{
"epoch": 7.91,
"learning_rate": 0.00022129958126380377,
"loss": 1.0795,
"step": 264500
},
{
"epoch": 7.93,
"learning_rate": 0.00022114967570430628,
"loss": 1.05,
"step": 265000
},
{
"epoch": 7.94,
"learning_rate": 0.00022099977014480873,
"loss": 1.0511,
"step": 265500
},
{
"epoch": 7.96,
"learning_rate": 0.00022084986458531123,
"loss": 1.0666,
"step": 266000
},
{
"epoch": 7.97,
"learning_rate": 0.00022069995902581371,
"loss": 1.0601,
"step": 266500
},
{
"epoch": 7.99,
"learning_rate": 0.00022055005346631622,
"loss": 1.0815,
"step": 267000
},
{
"epoch": 8.0,
"learning_rate": 0.00022040014790681867,
"loss": 1.0539,
"step": 267500
},
{
"epoch": 8.02,
"learning_rate": 0.00022025024234732118,
"loss": 1.0139,
"step": 268000
},
{
"epoch": 8.03,
"learning_rate": 0.00022010033678782366,
"loss": 1.0292,
"step": 268500
},
{
"epoch": 8.05,
"learning_rate": 0.00021995043122832614,
"loss": 1.0055,
"step": 269000
},
{
"epoch": 8.06,
"learning_rate": 0.00021980052566882862,
"loss": 1.0114,
"step": 269500
},
{
"epoch": 8.08,
"learning_rate": 0.0002196506201093311,
"loss": 1.0318,
"step": 270000
},
{
"epoch": 8.09,
"learning_rate": 0.00021950071454983358,
"loss": 1.0284,
"step": 270500
},
{
"epoch": 8.11,
"learning_rate": 0.00021935080899033606,
"loss": 1.0077,
"step": 271000
},
{
"epoch": 8.12,
"learning_rate": 0.00021920090343083856,
"loss": 1.0129,
"step": 271500
},
{
"epoch": 8.14,
"learning_rate": 0.00021905099787134102,
"loss": 1.0188,
"step": 272000
},
{
"epoch": 8.15,
"learning_rate": 0.00021890109231184352,
"loss": 1.008,
"step": 272500
},
{
"epoch": 8.17,
"learning_rate": 0.000218751186752346,
"loss": 1.0319,
"step": 273000
},
{
"epoch": 8.18,
"learning_rate": 0.0002186012811928485,
"loss": 1.0362,
"step": 273500
},
{
"epoch": 8.2,
"learning_rate": 0.00021845137563335096,
"loss": 1.0303,
"step": 274000
},
{
"epoch": 8.21,
"learning_rate": 0.00021830147007385347,
"loss": 1.0276,
"step": 274500
},
{
"epoch": 8.23,
"learning_rate": 0.00021815156451435595,
"loss": 1.0256,
"step": 275000
},
{
"epoch": 8.24,
"learning_rate": 0.0002180016589548584,
"loss": 1.0215,
"step": 275500
},
{
"epoch": 8.26,
"learning_rate": 0.0002178517533953609,
"loss": 1.012,
"step": 276000
},
{
"epoch": 8.27,
"learning_rate": 0.0002177018478358634,
"loss": 1.0229,
"step": 276500
},
{
"epoch": 8.29,
"learning_rate": 0.00021755194227636587,
"loss": 1.0079,
"step": 277000
},
{
"epoch": 8.3,
"learning_rate": 0.00021740203671686835,
"loss": 1.0275,
"step": 277500
},
{
"epoch": 8.32,
"learning_rate": 0.00021725213115737085,
"loss": 1.0022,
"step": 278000
},
{
"epoch": 8.33,
"learning_rate": 0.0002171022255978733,
"loss": 1.0108,
"step": 278500
},
{
"epoch": 8.35,
"learning_rate": 0.0002169523200383758,
"loss": 1.0248,
"step": 279000
},
{
"epoch": 8.36,
"learning_rate": 0.0002168024144788783,
"loss": 1.025,
"step": 279500
},
{
"epoch": 8.38,
"learning_rate": 0.0002166525089193808,
"loss": 1.0151,
"step": 280000
},
{
"epoch": 8.39,
"learning_rate": 0.00021650260335988325,
"loss": 1.0081,
"step": 280500
},
{
"epoch": 8.41,
"learning_rate": 0.00021635269780038573,
"loss": 1.0151,
"step": 281000
},
{
"epoch": 8.42,
"learning_rate": 0.00021620279224088824,
"loss": 1.0098,
"step": 281500
},
{
"epoch": 8.44,
"learning_rate": 0.0002160528866813907,
"loss": 1.0255,
"step": 282000
},
{
"epoch": 8.45,
"learning_rate": 0.0002159029811218932,
"loss": 1.0216,
"step": 282500
},
{
"epoch": 8.47,
"learning_rate": 0.00021575307556239565,
"loss": 1.0285,
"step": 283000
},
{
"epoch": 8.48,
"learning_rate": 0.00021560317000289815,
"loss": 1.0195,
"step": 283500
},
{
"epoch": 8.5,
"learning_rate": 0.00021545326444340063,
"loss": 1.0269,
"step": 284000
},
{
"epoch": 8.51,
"learning_rate": 0.00021530335888390314,
"loss": 1.0348,
"step": 284500
},
{
"epoch": 8.53,
"learning_rate": 0.0002151534533244056,
"loss": 1.0037,
"step": 285000
},
{
"epoch": 8.54,
"learning_rate": 0.0002150035477649081,
"loss": 1.0407,
"step": 285500
},
{
"epoch": 8.56,
"learning_rate": 0.00021485364220541058,
"loss": 1.0273,
"step": 286000
},
{
"epoch": 8.57,
"learning_rate": 0.00021470373664591306,
"loss": 1.0197,
"step": 286500
},
{
"epoch": 8.59,
"learning_rate": 0.00021455383108641554,
"loss": 1.0156,
"step": 287000
},
{
"epoch": 8.6,
"learning_rate": 0.00021440392552691802,
"loss": 1.0333,
"step": 287500
},
{
"epoch": 8.62,
"learning_rate": 0.0002142540199674205,
"loss": 1.0113,
"step": 288000
},
{
"epoch": 8.63,
"learning_rate": 0.00021410411440792298,
"loss": 1.0421,
"step": 288500
},
{
"epoch": 8.65,
"learning_rate": 0.00021395420884842548,
"loss": 1.0202,
"step": 289000
},
{
"epoch": 8.66,
"learning_rate": 0.00021380430328892794,
"loss": 1.0197,
"step": 289500
},
{
"epoch": 8.68,
"learning_rate": 0.00021365439772943044,
"loss": 1.0391,
"step": 290000
},
{
"epoch": 8.69,
"learning_rate": 0.00021350449216993292,
"loss": 1.0372,
"step": 290500
},
{
"epoch": 8.71,
"learning_rate": 0.00021335458661043543,
"loss": 1.0432,
"step": 291000
},
{
"epoch": 8.72,
"learning_rate": 0.00021320468105093788,
"loss": 1.0229,
"step": 291500
},
{
"epoch": 8.74,
"learning_rate": 0.0002130547754914404,
"loss": 1.0282,
"step": 292000
},
{
"epoch": 8.75,
"learning_rate": 0.00021290486993194287,
"loss": 1.0188,
"step": 292500
},
{
"epoch": 8.77,
"learning_rate": 0.00021275496437244532,
"loss": 1.0605,
"step": 293000
},
{
"epoch": 8.78,
"learning_rate": 0.00021260505881294783,
"loss": 1.0407,
"step": 293500
},
{
"epoch": 8.8,
"learning_rate": 0.0002124551532534503,
"loss": 1.0132,
"step": 294000
},
{
"epoch": 8.81,
"learning_rate": 0.00021230524769395279,
"loss": 1.0232,
"step": 294500
},
{
"epoch": 8.83,
"learning_rate": 0.00021215534213445527,
"loss": 1.0321,
"step": 295000
},
{
"epoch": 8.84,
"learning_rate": 0.00021200543657495777,
"loss": 1.0252,
"step": 295500
},
{
"epoch": 8.86,
"learning_rate": 0.00021185553101546022,
"loss": 1.0205,
"step": 296000
},
{
"epoch": 8.87,
"learning_rate": 0.00021170562545596273,
"loss": 1.0223,
"step": 296500
},
{
"epoch": 8.89,
"learning_rate": 0.0002115557198964652,
"loss": 1.0235,
"step": 297000
},
{
"epoch": 8.9,
"learning_rate": 0.00021140581433696772,
"loss": 1.0271,
"step": 297500
},
{
"epoch": 8.92,
"learning_rate": 0.00021125590877747017,
"loss": 1.0192,
"step": 298000
},
{
"epoch": 8.93,
"learning_rate": 0.00021110600321797268,
"loss": 1.0331,
"step": 298500
},
{
"epoch": 8.95,
"learning_rate": 0.00021095609765847516,
"loss": 1.0256,
"step": 299000
},
{
"epoch": 8.96,
"learning_rate": 0.0002108061920989776,
"loss": 1.035,
"step": 299500
},
{
"epoch": 8.98,
"learning_rate": 0.00021065628653948011,
"loss": 1.0241,
"step": 300000
},
{
"epoch": 8.99,
"learning_rate": 0.00021050638097998257,
"loss": 1.0195,
"step": 300500
},
{
"epoch": 9.01,
"learning_rate": 0.00021035647542048507,
"loss": 1.0039,
"step": 301000
},
{
"epoch": 9.02,
"learning_rate": 0.00021020656986098755,
"loss": 1.0019,
"step": 301500
},
{
"epoch": 9.04,
"learning_rate": 0.00021005666430149006,
"loss": 0.9851,
"step": 302000
},
{
"epoch": 9.05,
"learning_rate": 0.0002099067587419925,
"loss": 0.9638,
"step": 302500
},
{
"epoch": 9.07,
"learning_rate": 0.00020975685318249502,
"loss": 0.996,
"step": 303000
},
{
"epoch": 9.08,
"learning_rate": 0.0002096069476229975,
"loss": 0.9858,
"step": 303500
},
{
"epoch": 9.1,
"learning_rate": 0.00020945704206349998,
"loss": 0.9689,
"step": 304000
},
{
"epoch": 9.11,
"learning_rate": 0.00020930713650400246,
"loss": 0.9925,
"step": 304500
},
{
"epoch": 9.13,
"learning_rate": 0.00020915723094450494,
"loss": 0.9896,
"step": 305000
},
{
"epoch": 9.14,
"learning_rate": 0.00020900732538500742,
"loss": 0.9741,
"step": 305500
},
{
"epoch": 9.16,
"learning_rate": 0.0002088574198255099,
"loss": 0.9792,
"step": 306000
},
{
"epoch": 9.17,
"learning_rate": 0.0002087075142660124,
"loss": 0.9736,
"step": 306500
},
{
"epoch": 9.19,
"learning_rate": 0.00020855760870651486,
"loss": 0.9702,
"step": 307000
},
{
"epoch": 9.2,
"learning_rate": 0.00020840770314701736,
"loss": 0.9688,
"step": 307500
},
{
"epoch": 9.22,
"learning_rate": 0.00020825779758751984,
"loss": 0.9854,
"step": 308000
},
{
"epoch": 9.23,
"learning_rate": 0.00020810789202802235,
"loss": 0.9834,
"step": 308500
},
{
"epoch": 9.25,
"learning_rate": 0.0002079579864685248,
"loss": 0.9926,
"step": 309000
},
{
"epoch": 9.26,
"learning_rate": 0.0002078080809090273,
"loss": 1.0002,
"step": 309500
},
{
"epoch": 9.28,
"learning_rate": 0.0002076581753495298,
"loss": 1.0092,
"step": 310000
},
{
"epoch": 9.29,
"learning_rate": 0.00020750826979003227,
"loss": 0.9872,
"step": 310500
},
{
"epoch": 9.31,
"learning_rate": 0.00020735836423053475,
"loss": 0.9807,
"step": 311000
},
{
"epoch": 9.32,
"learning_rate": 0.00020720845867103723,
"loss": 0.9805,
"step": 311500
},
{
"epoch": 9.34,
"learning_rate": 0.0002070585531115397,
"loss": 0.9743,
"step": 312000
},
{
"epoch": 9.35,
"learning_rate": 0.00020690864755204218,
"loss": 1.0109,
"step": 312500
},
{
"epoch": 9.37,
"learning_rate": 0.0002067587419925447,
"loss": 1.0164,
"step": 313000
},
{
"epoch": 9.38,
"learning_rate": 0.00020660883643304714,
"loss": 0.9788,
"step": 313500
},
{
"epoch": 9.4,
"learning_rate": 0.00020645893087354965,
"loss": 0.9881,
"step": 314000
},
{
"epoch": 9.41,
"learning_rate": 0.00020630902531405213,
"loss": 0.9781,
"step": 314500
},
{
"epoch": 9.43,
"learning_rate": 0.00020615911975455464,
"loss": 0.9985,
"step": 315000
},
{
"epoch": 9.44,
"learning_rate": 0.0002060092141950571,
"loss": 0.9937,
"step": 315500
},
{
"epoch": 9.46,
"learning_rate": 0.0002058593086355596,
"loss": 0.9835,
"step": 316000
},
{
"epoch": 9.47,
"learning_rate": 0.00020570940307606208,
"loss": 0.9791,
"step": 316500
},
{
"epoch": 9.49,
"learning_rate": 0.00020555949751656453,
"loss": 0.9924,
"step": 317000
},
{
"epoch": 9.5,
"learning_rate": 0.00020540959195706703,
"loss": 0.9722,
"step": 317500
},
{
"epoch": 9.51,
"learning_rate": 0.0002052596863975695,
"loss": 1.0106,
"step": 318000
},
{
"epoch": 9.53,
"learning_rate": 0.000205109780838072,
"loss": 1.0008,
"step": 318500
},
{
"epoch": 9.54,
"learning_rate": 0.00020495987527857447,
"loss": 0.9777,
"step": 319000
},
{
"epoch": 9.56,
"learning_rate": 0.00020480996971907698,
"loss": 1.0173,
"step": 319500
},
{
"epoch": 9.57,
"learning_rate": 0.00020466006415957943,
"loss": 0.9985,
"step": 320000
},
{
"epoch": 9.59,
"learning_rate": 0.00020451015860008194,
"loss": 1.0091,
"step": 320500
},
{
"epoch": 9.6,
"learning_rate": 0.00020436025304058442,
"loss": 1.002,
"step": 321000
},
{
"epoch": 9.62,
"learning_rate": 0.0002042103474810869,
"loss": 1.0042,
"step": 321500
},
{
"epoch": 9.63,
"learning_rate": 0.00020406044192158938,
"loss": 0.9925,
"step": 322000
},
{
"epoch": 9.65,
"learning_rate": 0.00020391053636209188,
"loss": 0.9938,
"step": 322500
},
{
"epoch": 9.66,
"learning_rate": 0.00020376063080259434,
"loss": 0.9866,
"step": 323000
},
{
"epoch": 9.68,
"learning_rate": 0.00020361072524309682,
"loss": 0.9834,
"step": 323500
},
{
"epoch": 9.69,
"learning_rate": 0.00020346081968359932,
"loss": 0.994,
"step": 324000
},
{
"epoch": 9.71,
"learning_rate": 0.00020331091412410178,
"loss": 0.9763,
"step": 324500
},
{
"epoch": 9.72,
"learning_rate": 0.00020316100856460428,
"loss": 0.9965,
"step": 325000
},
{
"epoch": 9.74,
"learning_rate": 0.00020301110300510676,
"loss": 0.9956,
"step": 325500
},
{
"epoch": 9.75,
"learning_rate": 0.00020286119744560927,
"loss": 1.0259,
"step": 326000
},
{
"epoch": 9.77,
"learning_rate": 0.00020271129188611172,
"loss": 1.0093,
"step": 326500
},
{
"epoch": 9.78,
"learning_rate": 0.00020256138632661423,
"loss": 1.0012,
"step": 327000
},
{
"epoch": 9.8,
"learning_rate": 0.0002024114807671167,
"loss": 0.9822,
"step": 327500
},
{
"epoch": 9.81,
"learning_rate": 0.0002022615752076192,
"loss": 1.0148,
"step": 328000
},
{
"epoch": 9.83,
"learning_rate": 0.00020211166964812167,
"loss": 1.0118,
"step": 328500
},
{
"epoch": 9.84,
"learning_rate": 0.00020196176408862415,
"loss": 1.0119,
"step": 329000
},
{
"epoch": 9.86,
"learning_rate": 0.00020181185852912663,
"loss": 0.9932,
"step": 329500
},
{
"epoch": 9.87,
"learning_rate": 0.0002016619529696291,
"loss": 0.9958,
"step": 330000
},
{
"epoch": 9.89,
"learning_rate": 0.0002015120474101316,
"loss": 0.987,
"step": 330500
},
{
"epoch": 9.9,
"learning_rate": 0.00020136214185063406,
"loss": 0.9916,
"step": 331000
},
{
"epoch": 9.92,
"learning_rate": 0.00020121223629113657,
"loss": 0.9936,
"step": 331500
},
{
"epoch": 9.93,
"learning_rate": 0.00020106233073163905,
"loss": 0.9892,
"step": 332000
},
{
"epoch": 9.95,
"learning_rate": 0.00020091242517214156,
"loss": 0.9884,
"step": 332500
},
{
"epoch": 9.96,
"learning_rate": 0.000200762519612644,
"loss": 0.9871,
"step": 333000
},
{
"epoch": 9.98,
"learning_rate": 0.00020061261405314652,
"loss": 0.9911,
"step": 333500
},
{
"epoch": 9.99,
"learning_rate": 0.000200462708493649,
"loss": 1.005,
"step": 334000
},
{
"epoch": 10.01,
"learning_rate": 0.00020031280293415145,
"loss": 0.9681,
"step": 334500
},
{
"epoch": 10.02,
"learning_rate": 0.00020016289737465395,
"loss": 0.941,
"step": 335000
},
{
"epoch": 10.04,
"learning_rate": 0.00020001299181515643,
"loss": 0.9515,
"step": 335500
},
{
"epoch": 10.05,
"learning_rate": 0.0001998630862556589,
"loss": 0.9457,
"step": 336000
},
{
"epoch": 10.07,
"learning_rate": 0.0001997131806961614,
"loss": 0.9595,
"step": 336500
},
{
"epoch": 10.08,
"learning_rate": 0.0001995632751366639,
"loss": 0.9628,
"step": 337000
},
{
"epoch": 10.1,
"learning_rate": 0.00019941336957716635,
"loss": 0.9668,
"step": 337500
},
{
"epoch": 10.11,
"learning_rate": 0.00019926346401766886,
"loss": 0.9714,
"step": 338000
},
{
"epoch": 10.13,
"learning_rate": 0.00019911355845817134,
"loss": 0.9626,
"step": 338500
},
{
"epoch": 10.14,
"learning_rate": 0.00019896365289867382,
"loss": 0.9666,
"step": 339000
},
{
"epoch": 10.16,
"learning_rate": 0.0001988137473391763,
"loss": 0.9846,
"step": 339500
},
{
"epoch": 10.17,
"learning_rate": 0.0001986638417796788,
"loss": 0.9748,
"step": 340000
},
{
"epoch": 10.19,
"learning_rate": 0.00019851393622018126,
"loss": 0.9658,
"step": 340500
},
{
"epoch": 10.2,
"learning_rate": 0.00019836403066068374,
"loss": 0.9439,
"step": 341000
},
{
"epoch": 10.22,
"learning_rate": 0.00019821412510118624,
"loss": 0.9577,
"step": 341500
},
{
"epoch": 10.23,
"learning_rate": 0.0001980642195416887,
"loss": 0.9623,
"step": 342000
},
{
"epoch": 10.25,
"learning_rate": 0.0001979143139821912,
"loss": 0.9688,
"step": 342500
},
{
"epoch": 10.26,
"learning_rate": 0.00019776440842269368,
"loss": 0.9634,
"step": 343000
},
{
"epoch": 10.28,
"learning_rate": 0.0001976145028631962,
"loss": 0.9597,
"step": 343500
},
{
"epoch": 10.29,
"learning_rate": 0.00019746459730369864,
"loss": 0.9426,
"step": 344000
},
{
"epoch": 10.31,
"learning_rate": 0.00019731469174420115,
"loss": 0.9573,
"step": 344500
},
{
"epoch": 10.32,
"learning_rate": 0.00019716478618470363,
"loss": 0.9767,
"step": 345000
},
{
"epoch": 10.34,
"learning_rate": 0.0001970148806252061,
"loss": 0.9874,
"step": 345500
},
{
"epoch": 10.35,
"learning_rate": 0.00019686497506570859,
"loss": 0.9517,
"step": 346000
},
{
"epoch": 10.37,
"learning_rate": 0.00019671506950621107,
"loss": 0.9413,
"step": 346500
},
{
"epoch": 10.38,
"learning_rate": 0.00019656516394671354,
"loss": 0.9585,
"step": 347000
},
{
"epoch": 10.4,
"learning_rate": 0.00019641525838721602,
"loss": 0.9686,
"step": 347500
},
{
"epoch": 10.41,
"learning_rate": 0.00019626535282771853,
"loss": 0.9761,
"step": 348000
},
{
"epoch": 10.43,
"learning_rate": 0.00019611544726822098,
"loss": 0.9653,
"step": 348500
},
{
"epoch": 10.44,
"learning_rate": 0.0001959655417087235,
"loss": 0.9879,
"step": 349000
},
{
"epoch": 10.46,
"learning_rate": 0.00019581563614922597,
"loss": 0.9566,
"step": 349500
},
{
"epoch": 10.47,
"learning_rate": 0.00019566573058972848,
"loss": 0.9423,
"step": 350000
},
{
"epoch": 10.49,
"learning_rate": 0.00019551582503023093,
"loss": 0.961,
"step": 350500
},
{
"epoch": 10.5,
"learning_rate": 0.00019536591947073344,
"loss": 0.9736,
"step": 351000
},
{
"epoch": 10.52,
"learning_rate": 0.00019521601391123591,
"loss": 0.9619,
"step": 351500
},
{
"epoch": 10.53,
"learning_rate": 0.0001950661083517384,
"loss": 0.9978,
"step": 352000
},
{
"epoch": 10.55,
"learning_rate": 0.00019491620279224087,
"loss": 0.9906,
"step": 352500
},
{
"epoch": 10.56,
"learning_rate": 0.00019476629723274335,
"loss": 0.9671,
"step": 353000
},
{
"epoch": 10.58,
"learning_rate": 0.00019461639167324583,
"loss": 0.9574,
"step": 353500
},
{
"epoch": 10.59,
"learning_rate": 0.0001944664861137483,
"loss": 0.9554,
"step": 354000
},
{
"epoch": 10.61,
"learning_rate": 0.00019431658055425082,
"loss": 0.9729,
"step": 354500
},
{
"epoch": 10.62,
"learning_rate": 0.00019416667499475327,
"loss": 0.9697,
"step": 355000
},
{
"epoch": 10.64,
"learning_rate": 0.00019401676943525578,
"loss": 0.9532,
"step": 355500
},
{
"epoch": 10.65,
"learning_rate": 0.00019386686387575826,
"loss": 0.9498,
"step": 356000
},
{
"epoch": 10.67,
"learning_rate": 0.00019371695831626074,
"loss": 0.9542,
"step": 356500
},
{
"epoch": 10.68,
"learning_rate": 0.00019356705275676322,
"loss": 0.9728,
"step": 357000
},
{
"epoch": 10.7,
"learning_rate": 0.00019341714719726572,
"loss": 0.9584,
"step": 357500
},
{
"epoch": 10.71,
"learning_rate": 0.00019326724163776818,
"loss": 0.9891,
"step": 358000
},
{
"epoch": 10.73,
"learning_rate": 0.00019311733607827066,
"loss": 0.9618,
"step": 358500
},
{
"epoch": 10.74,
"learning_rate": 0.00019296743051877316,
"loss": 0.9722,
"step": 359000
},
{
"epoch": 10.76,
"learning_rate": 0.00019281752495927561,
"loss": 0.9755,
"step": 359500
},
{
"epoch": 10.77,
"learning_rate": 0.00019266761939977812,
"loss": 0.9829,
"step": 360000
},
{
"epoch": 10.79,
"learning_rate": 0.0001925177138402806,
"loss": 0.9789,
"step": 360500
},
{
"epoch": 10.8,
"learning_rate": 0.0001923678082807831,
"loss": 0.9987,
"step": 361000
},
{
"epoch": 10.82,
"learning_rate": 0.00019221790272128556,
"loss": 0.9822,
"step": 361500
},
{
"epoch": 10.83,
"learning_rate": 0.00019206799716178807,
"loss": 0.9569,
"step": 362000
},
{
"epoch": 10.85,
"learning_rate": 0.00019191809160229055,
"loss": 0.9708,
"step": 362500
},
{
"epoch": 10.86,
"learning_rate": 0.00019176818604279303,
"loss": 0.9615,
"step": 363000
},
{
"epoch": 10.88,
"learning_rate": 0.0001916182804832955,
"loss": 0.9736,
"step": 363500
},
{
"epoch": 10.89,
"learning_rate": 0.000191468374923798,
"loss": 0.9682,
"step": 364000
},
{
"epoch": 10.91,
"learning_rate": 0.00019131846936430046,
"loss": 0.9811,
"step": 364500
},
{
"epoch": 10.92,
"learning_rate": 0.00019116856380480294,
"loss": 0.9694,
"step": 365000
},
{
"epoch": 10.94,
"learning_rate": 0.00019101865824530545,
"loss": 0.9779,
"step": 365500
},
{
"epoch": 10.95,
"learning_rate": 0.0001908687526858079,
"loss": 0.9342,
"step": 366000
},
{
"epoch": 10.97,
"learning_rate": 0.0001907188471263104,
"loss": 0.9632,
"step": 366500
},
{
"epoch": 10.98,
"learning_rate": 0.0001905689415668129,
"loss": 0.9736,
"step": 367000
},
{
"epoch": 11.0,
"learning_rate": 0.0001904190360073154,
"loss": 0.985,
"step": 367500
},
{
"epoch": 11.01,
"learning_rate": 0.00019026913044781785,
"loss": 0.9425,
"step": 368000
},
{
"epoch": 11.03,
"learning_rate": 0.00019011922488832036,
"loss": 0.9354,
"step": 368500
},
{
"epoch": 11.04,
"learning_rate": 0.00018996931932882283,
"loss": 0.9302,
"step": 369000
},
{
"epoch": 11.06,
"learning_rate": 0.00018981941376932531,
"loss": 0.9397,
"step": 369500
},
{
"epoch": 11.07,
"learning_rate": 0.0001896695082098278,
"loss": 0.9397,
"step": 370000
},
{
"epoch": 11.09,
"learning_rate": 0.00018951960265033027,
"loss": 0.9249,
"step": 370500
},
{
"epoch": 11.1,
"learning_rate": 0.00018936969709083275,
"loss": 0.9421,
"step": 371000
},
{
"epoch": 11.12,
"learning_rate": 0.00018921979153133523,
"loss": 0.9346,
"step": 371500
},
{
"epoch": 11.13,
"learning_rate": 0.00018906988597183774,
"loss": 0.9198,
"step": 372000
},
{
"epoch": 11.15,
"learning_rate": 0.0001889199804123402,
"loss": 0.9489,
"step": 372500
},
{
"epoch": 11.16,
"learning_rate": 0.0001887700748528427,
"loss": 0.9155,
"step": 373000
},
{
"epoch": 11.18,
"learning_rate": 0.00018862016929334518,
"loss": 0.9275,
"step": 373500
},
{
"epoch": 11.19,
"learning_rate": 0.00018847026373384766,
"loss": 0.9397,
"step": 374000
},
{
"epoch": 11.21,
"learning_rate": 0.00018832035817435014,
"loss": 0.9401,
"step": 374500
},
{
"epoch": 11.22,
"learning_rate": 0.00018817045261485264,
"loss": 0.9301,
"step": 375000
},
{
"epoch": 11.24,
"learning_rate": 0.0001880205470553551,
"loss": 0.9641,
"step": 375500
},
{
"epoch": 11.25,
"learning_rate": 0.00018787064149585758,
"loss": 0.9419,
"step": 376000
},
{
"epoch": 11.27,
"learning_rate": 0.00018772073593636008,
"loss": 0.9402,
"step": 376500
},
{
"epoch": 11.28,
"learning_rate": 0.00018757083037686253,
"loss": 0.9323,
"step": 377000
},
{
"epoch": 11.3,
"learning_rate": 0.00018742092481736504,
"loss": 0.9392,
"step": 377500
},
{
"epoch": 11.31,
"learning_rate": 0.00018727101925786752,
"loss": 0.9338,
"step": 378000
},
{
"epoch": 11.33,
"learning_rate": 0.00018712111369837003,
"loss": 0.9176,
"step": 378500
},
{
"epoch": 11.34,
"learning_rate": 0.00018697120813887248,
"loss": 0.9402,
"step": 379000
},
{
"epoch": 11.36,
"learning_rate": 0.00018682130257937499,
"loss": 0.9319,
"step": 379500
},
{
"epoch": 11.37,
"learning_rate": 0.00018667139701987747,
"loss": 0.9298,
"step": 380000
},
{
"epoch": 11.39,
"learning_rate": 0.00018652149146037995,
"loss": 0.9318,
"step": 380500
},
{
"epoch": 11.4,
"learning_rate": 0.00018637158590088243,
"loss": 0.9349,
"step": 381000
},
{
"epoch": 11.41,
"learning_rate": 0.00018622168034138493,
"loss": 0.9365,
"step": 381500
},
{
"epoch": 11.43,
"learning_rate": 0.00018607177478188738,
"loss": 0.9404,
"step": 382000
},
{
"epoch": 11.44,
"learning_rate": 0.00018592186922238986,
"loss": 0.9246,
"step": 382500
},
{
"epoch": 11.46,
"learning_rate": 0.00018577196366289237,
"loss": 0.948,
"step": 383000
},
{
"epoch": 11.47,
"learning_rate": 0.00018562205810339482,
"loss": 0.9426,
"step": 383500
},
{
"epoch": 11.49,
"learning_rate": 0.00018547215254389733,
"loss": 0.9429,
"step": 384000
},
{
"epoch": 11.5,
"learning_rate": 0.0001853222469843998,
"loss": 0.9294,
"step": 384500
},
{
"epoch": 11.52,
"learning_rate": 0.00018517234142490232,
"loss": 0.9402,
"step": 385000
},
{
"epoch": 11.53,
"learning_rate": 0.00018502243586540477,
"loss": 0.9434,
"step": 385500
},
{
"epoch": 11.55,
"learning_rate": 0.00018487253030590727,
"loss": 0.9512,
"step": 386000
},
{
"epoch": 11.56,
"learning_rate": 0.00018472262474640975,
"loss": 0.9245,
"step": 386500
},
{
"epoch": 11.58,
"learning_rate": 0.00018457271918691223,
"loss": 0.9354,
"step": 387000
},
{
"epoch": 11.59,
"learning_rate": 0.0001844228136274147,
"loss": 0.9534,
"step": 387500
},
{
"epoch": 11.61,
"learning_rate": 0.0001842729080679172,
"loss": 0.9501,
"step": 388000
},
{
"epoch": 11.62,
"learning_rate": 0.00018412300250841967,
"loss": 0.9676,
"step": 388500
},
{
"epoch": 11.64,
"learning_rate": 0.00018397309694892215,
"loss": 0.9375,
"step": 389000
},
{
"epoch": 11.65,
"learning_rate": 0.00018382319138942466,
"loss": 0.9536,
"step": 389500
},
{
"epoch": 11.67,
"learning_rate": 0.0001836732858299271,
"loss": 0.9287,
"step": 390000
},
{
"epoch": 11.68,
"learning_rate": 0.00018352338027042962,
"loss": 0.9583,
"step": 390500
},
{
"epoch": 11.7,
"learning_rate": 0.0001833734747109321,
"loss": 0.9223,
"step": 391000
},
{
"epoch": 11.71,
"learning_rate": 0.0001832235691514346,
"loss": 0.9448,
"step": 391500
},
{
"epoch": 11.73,
"learning_rate": 0.00018307366359193706,
"loss": 0.928,
"step": 392000
},
{
"epoch": 11.74,
"learning_rate": 0.00018292375803243956,
"loss": 0.9339,
"step": 392500
},
{
"epoch": 11.76,
"learning_rate": 0.00018277385247294202,
"loss": 0.9231,
"step": 393000
},
{
"epoch": 11.77,
"learning_rate": 0.00018262394691344452,
"loss": 0.9589,
"step": 393500
},
{
"epoch": 11.79,
"learning_rate": 0.000182474041353947,
"loss": 0.9459,
"step": 394000
},
{
"epoch": 11.8,
"learning_rate": 0.00018232413579444945,
"loss": 0.9415,
"step": 394500
},
{
"epoch": 11.82,
"learning_rate": 0.00018217423023495196,
"loss": 0.9438,
"step": 395000
},
{
"epoch": 11.83,
"learning_rate": 0.00018202432467545444,
"loss": 0.943,
"step": 395500
},
{
"epoch": 11.85,
"learning_rate": 0.00018187441911595695,
"loss": 0.9526,
"step": 396000
},
{
"epoch": 11.86,
"learning_rate": 0.0001817245135564594,
"loss": 0.9404,
"step": 396500
},
{
"epoch": 11.88,
"learning_rate": 0.0001815746079969619,
"loss": 0.9394,
"step": 397000
},
{
"epoch": 11.89,
"learning_rate": 0.00018142470243746439,
"loss": 0.9484,
"step": 397500
},
{
"epoch": 11.91,
"learning_rate": 0.00018127479687796687,
"loss": 0.9396,
"step": 398000
},
{
"epoch": 11.92,
"learning_rate": 0.00018112489131846934,
"loss": 0.9565,
"step": 398500
},
{
"epoch": 11.94,
"learning_rate": 0.00018097498575897185,
"loss": 0.9344,
"step": 399000
},
{
"epoch": 11.95,
"learning_rate": 0.0001808250801994743,
"loss": 0.946,
"step": 399500
},
{
"epoch": 11.97,
"learning_rate": 0.00018067517463997678,
"loss": 0.9487,
"step": 400000
},
{
"epoch": 11.98,
"learning_rate": 0.0001805252690804793,
"loss": 0.9571,
"step": 400500
},
{
"epoch": 12.0,
"learning_rate": 0.00018037536352098174,
"loss": 0.931,
"step": 401000
},
{
"epoch": 12.01,
"learning_rate": 0.00018022545796148425,
"loss": 0.9103,
"step": 401500
},
{
"epoch": 12.03,
"learning_rate": 0.00018007555240198673,
"loss": 0.9013,
"step": 402000
},
{
"epoch": 12.04,
"learning_rate": 0.00017992564684248924,
"loss": 0.8962,
"step": 402500
},
{
"epoch": 12.06,
"learning_rate": 0.0001797757412829917,
"loss": 0.9058,
"step": 403000
},
{
"epoch": 12.07,
"learning_rate": 0.0001796258357234942,
"loss": 0.8948,
"step": 403500
},
{
"epoch": 12.09,
"learning_rate": 0.00017947593016399667,
"loss": 0.9097,
"step": 404000
},
{
"epoch": 12.1,
"learning_rate": 0.00017932602460449915,
"loss": 0.9052,
"step": 404500
},
{
"epoch": 12.12,
"learning_rate": 0.00017917611904500163,
"loss": 0.9029,
"step": 405000
},
{
"epoch": 12.13,
"learning_rate": 0.00017902621348550414,
"loss": 0.9128,
"step": 405500
},
{
"epoch": 12.15,
"learning_rate": 0.0001788763079260066,
"loss": 0.8998,
"step": 406000
},
{
"epoch": 12.16,
"learning_rate": 0.00017872640236650907,
"loss": 0.9071,
"step": 406500
},
{
"epoch": 12.18,
"learning_rate": 0.00017857649680701158,
"loss": 0.9057,
"step": 407000
},
{
"epoch": 12.19,
"learning_rate": 0.00017842659124751403,
"loss": 0.9013,
"step": 407500
},
{
"epoch": 12.21,
"learning_rate": 0.00017827668568801654,
"loss": 0.909,
"step": 408000
},
{
"epoch": 12.22,
"learning_rate": 0.00017812678012851902,
"loss": 0.9145,
"step": 408500
},
{
"epoch": 12.24,
"learning_rate": 0.00017797687456902152,
"loss": 0.9153,
"step": 409000
},
{
"epoch": 12.25,
"learning_rate": 0.00017782696900952398,
"loss": 0.8972,
"step": 409500
},
{
"epoch": 12.27,
"learning_rate": 0.00017767706345002648,
"loss": 0.9036,
"step": 410000
},
{
"epoch": 12.28,
"learning_rate": 0.00017752715789052894,
"loss": 0.8955,
"step": 410500
},
{
"epoch": 12.3,
"learning_rate": 0.00017737725233103144,
"loss": 0.91,
"step": 411000
},
{
"epoch": 12.31,
"learning_rate": 0.00017722734677153392,
"loss": 0.9116,
"step": 411500
},
{
"epoch": 12.33,
"learning_rate": 0.00017707744121203637,
"loss": 0.9182,
"step": 412000
},
{
"epoch": 12.34,
"learning_rate": 0.00017692753565253888,
"loss": 0.9106,
"step": 412500
},
{
"epoch": 12.36,
"learning_rate": 0.00017677763009304136,
"loss": 0.9066,
"step": 413000
},
{
"epoch": 12.37,
"learning_rate": 0.00017662772453354387,
"loss": 0.8996,
"step": 413500
},
{
"epoch": 12.39,
"learning_rate": 0.00017647781897404632,
"loss": 0.9269,
"step": 414000
},
{
"epoch": 12.4,
"learning_rate": 0.00017632791341454883,
"loss": 0.9236,
"step": 414500
},
{
"epoch": 12.42,
"learning_rate": 0.0001761780078550513,
"loss": 0.9161,
"step": 415000
},
{
"epoch": 12.43,
"learning_rate": 0.00017602810229555378,
"loss": 0.9081,
"step": 415500
},
{
"epoch": 12.45,
"learning_rate": 0.00017587819673605626,
"loss": 0.8983,
"step": 416000
},
{
"epoch": 12.46,
"learning_rate": 0.00017572829117655877,
"loss": 0.928,
"step": 416500
},
{
"epoch": 12.48,
"learning_rate": 0.00017557838561706122,
"loss": 0.9179,
"step": 417000
},
{
"epoch": 12.49,
"learning_rate": 0.0001754284800575637,
"loss": 0.9006,
"step": 417500
},
{
"epoch": 12.51,
"learning_rate": 0.0001752785744980662,
"loss": 0.916,
"step": 418000
},
{
"epoch": 12.52,
"learning_rate": 0.00017512866893856866,
"loss": 0.8964,
"step": 418500
},
{
"epoch": 12.54,
"learning_rate": 0.00017497876337907117,
"loss": 0.9402,
"step": 419000
},
{
"epoch": 12.55,
"learning_rate": 0.00017482885781957365,
"loss": 0.8994,
"step": 419500
},
{
"epoch": 12.57,
"learning_rate": 0.00017467895226007616,
"loss": 0.9059,
"step": 420000
},
{
"epoch": 12.58,
"learning_rate": 0.0001745290467005786,
"loss": 0.9007,
"step": 420500
},
{
"epoch": 12.6,
"learning_rate": 0.00017437914114108111,
"loss": 0.9265,
"step": 421000
},
{
"epoch": 12.61,
"learning_rate": 0.0001742292355815836,
"loss": 0.9151,
"step": 421500
},
{
"epoch": 12.63,
"learning_rate": 0.00017407933002208607,
"loss": 0.9292,
"step": 422000
},
{
"epoch": 12.64,
"learning_rate": 0.00017392942446258855,
"loss": 0.9086,
"step": 422500
},
{
"epoch": 12.66,
"learning_rate": 0.00017377951890309106,
"loss": 0.9122,
"step": 423000
},
{
"epoch": 12.67,
"learning_rate": 0.0001736296133435935,
"loss": 0.922,
"step": 423500
},
{
"epoch": 12.69,
"learning_rate": 0.000173479707784096,
"loss": 0.9351,
"step": 424000
},
{
"epoch": 12.7,
"learning_rate": 0.0001733298022245985,
"loss": 0.9161,
"step": 424500
},
{
"epoch": 12.72,
"learning_rate": 0.00017317989666510095,
"loss": 0.9264,
"step": 425000
},
{
"epoch": 12.73,
"learning_rate": 0.00017302999110560346,
"loss": 0.9007,
"step": 425500
},
{
"epoch": 12.75,
"learning_rate": 0.00017288008554610594,
"loss": 0.916,
"step": 426000
},
{
"epoch": 12.76,
"learning_rate": 0.00017273017998660844,
"loss": 0.9292,
"step": 426500
},
{
"epoch": 12.78,
"learning_rate": 0.0001725802744271109,
"loss": 0.9327,
"step": 427000
},
{
"epoch": 12.79,
"learning_rate": 0.0001724303688676134,
"loss": 0.9064,
"step": 427500
},
{
"epoch": 12.81,
"learning_rate": 0.00017228046330811585,
"loss": 0.9498,
"step": 428000
},
{
"epoch": 12.82,
"learning_rate": 0.00017213055774861836,
"loss": 0.9193,
"step": 428500
},
{
"epoch": 12.84,
"learning_rate": 0.00017198065218912084,
"loss": 0.9282,
"step": 429000
},
{
"epoch": 12.85,
"learning_rate": 0.0001718307466296233,
"loss": 0.9219,
"step": 429500
},
{
"epoch": 12.87,
"learning_rate": 0.0001716808410701258,
"loss": 0.9236,
"step": 430000
},
{
"epoch": 12.88,
"learning_rate": 0.00017153093551062828,
"loss": 0.9184,
"step": 430500
},
{
"epoch": 12.9,
"learning_rate": 0.00017138102995113079,
"loss": 0.9215,
"step": 431000
},
{
"epoch": 12.91,
"learning_rate": 0.00017123112439163324,
"loss": 0.9065,
"step": 431500
},
{
"epoch": 12.93,
"learning_rate": 0.00017108121883213575,
"loss": 0.9278,
"step": 432000
},
{
"epoch": 12.94,
"learning_rate": 0.00017093131327263823,
"loss": 0.9045,
"step": 432500
},
{
"epoch": 12.96,
"learning_rate": 0.0001707814077131407,
"loss": 0.9374,
"step": 433000
},
{
"epoch": 12.97,
"learning_rate": 0.00017063150215364318,
"loss": 0.9255,
"step": 433500
},
{
"epoch": 12.99,
"learning_rate": 0.0001704815965941457,
"loss": 0.9168,
"step": 434000
},
{
"epoch": 13.0,
"learning_rate": 0.00017033169103464814,
"loss": 0.9244,
"step": 434500
},
{
"epoch": 13.02,
"learning_rate": 0.00017018178547515065,
"loss": 0.8795,
"step": 435000
},
{
"epoch": 13.03,
"learning_rate": 0.00017003187991565313,
"loss": 0.8548,
"step": 435500
},
{
"epoch": 13.05,
"learning_rate": 0.00016988197435615558,
"loss": 0.887,
"step": 436000
},
{
"epoch": 13.06,
"learning_rate": 0.0001697320687966581,
"loss": 0.8818,
"step": 436500
},
{
"epoch": 13.08,
"learning_rate": 0.00016958216323716057,
"loss": 0.8991,
"step": 437000
},
{
"epoch": 13.09,
"learning_rate": 0.00016943225767766307,
"loss": 0.8855,
"step": 437500
},
{
"epoch": 13.11,
"learning_rate": 0.00016928235211816553,
"loss": 0.8916,
"step": 438000
},
{
"epoch": 13.12,
"learning_rate": 0.00016913244655866803,
"loss": 0.873,
"step": 438500
},
{
"epoch": 13.14,
"learning_rate": 0.0001689825409991705,
"loss": 0.87,
"step": 439000
},
{
"epoch": 13.15,
"learning_rate": 0.000168832635439673,
"loss": 0.8893,
"step": 439500
},
{
"epoch": 13.17,
"learning_rate": 0.00016868272988017547,
"loss": 0.8951,
"step": 440000
},
{
"epoch": 13.18,
"learning_rate": 0.00016853282432067798,
"loss": 0.9063,
"step": 440500
},
{
"epoch": 13.2,
"learning_rate": 0.00016838291876118043,
"loss": 0.8915,
"step": 441000
},
{
"epoch": 13.21,
"learning_rate": 0.0001682330132016829,
"loss": 0.8849,
"step": 441500
},
{
"epoch": 13.23,
"learning_rate": 0.00016808310764218542,
"loss": 0.8909,
"step": 442000
},
{
"epoch": 13.24,
"learning_rate": 0.00016793320208268787,
"loss": 0.9017,
"step": 442500
},
{
"epoch": 13.26,
"learning_rate": 0.00016778329652319038,
"loss": 0.8993,
"step": 443000
},
{
"epoch": 13.27,
"learning_rate": 0.00016763339096369286,
"loss": 0.8929,
"step": 443500
},
{
"epoch": 13.29,
"learning_rate": 0.00016748348540419536,
"loss": 0.8822,
"step": 444000
},
{
"epoch": 13.3,
"learning_rate": 0.00016733357984469782,
"loss": 0.8886,
"step": 444500
},
{
"epoch": 13.31,
"learning_rate": 0.00016718367428520032,
"loss": 0.8884,
"step": 445000
},
{
"epoch": 13.33,
"learning_rate": 0.00016703376872570277,
"loss": 0.8747,
"step": 445500
},
{
"epoch": 13.34,
"learning_rate": 0.00016688386316620528,
"loss": 0.8814,
"step": 446000
},
{
"epoch": 13.36,
"learning_rate": 0.00016673395760670776,
"loss": 0.8915,
"step": 446500
},
{
"epoch": 13.37,
"learning_rate": 0.00016658405204721027,
"loss": 0.8929,
"step": 447000
},
{
"epoch": 13.39,
"learning_rate": 0.00016643414648771272,
"loss": 0.874,
"step": 447500
},
{
"epoch": 13.4,
"learning_rate": 0.0001662842409282152,
"loss": 0.8707,
"step": 448000
},
{
"epoch": 13.42,
"learning_rate": 0.0001661343353687177,
"loss": 0.9065,
"step": 448500
},
{
"epoch": 13.43,
"learning_rate": 0.00016598442980922016,
"loss": 0.8994,
"step": 449000
},
{
"epoch": 13.45,
"learning_rate": 0.00016583452424972267,
"loss": 0.8945,
"step": 449500
},
{
"epoch": 13.46,
"learning_rate": 0.00016568461869022514,
"loss": 0.8753,
"step": 450000
},
{
"epoch": 13.48,
"learning_rate": 0.00016553471313072762,
"loss": 0.894,
"step": 450500
},
{
"epoch": 13.49,
"learning_rate": 0.0001653848075712301,
"loss": 0.8811,
"step": 451000
},
{
"epoch": 13.51,
"learning_rate": 0.0001652349020117326,
"loss": 0.8717,
"step": 451500
},
{
"epoch": 13.52,
"learning_rate": 0.00016508499645223506,
"loss": 0.9009,
"step": 452000
},
{
"epoch": 13.54,
"learning_rate": 0.00016493509089273757,
"loss": 0.901,
"step": 452500
},
{
"epoch": 13.55,
"learning_rate": 0.00016478518533324005,
"loss": 0.9092,
"step": 453000
},
{
"epoch": 13.57,
"learning_rate": 0.0001646352797737425,
"loss": 0.892,
"step": 453500
},
{
"epoch": 13.58,
"learning_rate": 0.000164485374214245,
"loss": 0.8891,
"step": 454000
},
{
"epoch": 13.6,
"learning_rate": 0.0001643354686547475,
"loss": 0.8965,
"step": 454500
},
{
"epoch": 13.61,
"learning_rate": 0.00016418556309525,
"loss": 0.8944,
"step": 455000
},
{
"epoch": 13.63,
"learning_rate": 0.00016403565753575245,
"loss": 0.905,
"step": 455500
},
{
"epoch": 13.64,
"learning_rate": 0.00016388575197625495,
"loss": 0.8966,
"step": 456000
},
{
"epoch": 13.66,
"learning_rate": 0.00016373584641675743,
"loss": 0.8835,
"step": 456500
},
{
"epoch": 13.67,
"learning_rate": 0.0001635859408572599,
"loss": 0.908,
"step": 457000
},
{
"epoch": 13.69,
"learning_rate": 0.0001634360352977624,
"loss": 0.9004,
"step": 457500
},
{
"epoch": 13.7,
"learning_rate": 0.0001632861297382649,
"loss": 0.8757,
"step": 458000
},
{
"epoch": 13.72,
"learning_rate": 0.00016313622417876735,
"loss": 0.8926,
"step": 458500
},
{
"epoch": 13.73,
"learning_rate": 0.00016298631861926986,
"loss": 0.884,
"step": 459000
},
{
"epoch": 13.75,
"learning_rate": 0.00016283641305977234,
"loss": 0.8753,
"step": 459500
},
{
"epoch": 13.76,
"learning_rate": 0.0001626865075002748,
"loss": 0.8954,
"step": 460000
},
{
"epoch": 13.78,
"learning_rate": 0.0001625366019407773,
"loss": 0.8842,
"step": 460500
},
{
"epoch": 13.79,
"learning_rate": 0.00016238669638127978,
"loss": 0.8919,
"step": 461000
},
{
"epoch": 13.81,
"learning_rate": 0.00016223679082178228,
"loss": 0.9095,
"step": 461500
},
{
"epoch": 13.82,
"learning_rate": 0.00016208688526228474,
"loss": 0.9141,
"step": 462000
},
{
"epoch": 13.84,
"learning_rate": 0.00016193697970278724,
"loss": 0.8925,
"step": 462500
},
{
"epoch": 13.85,
"learning_rate": 0.0001617870741432897,
"loss": 0.9005,
"step": 463000
},
{
"epoch": 13.87,
"learning_rate": 0.0001616371685837922,
"loss": 0.9009,
"step": 463500
},
{
"epoch": 13.88,
"learning_rate": 0.00016148726302429468,
"loss": 0.8958,
"step": 464000
},
{
"epoch": 13.9,
"learning_rate": 0.0001613373574647972,
"loss": 0.8925,
"step": 464500
},
{
"epoch": 13.91,
"learning_rate": 0.00016118745190529964,
"loss": 0.8725,
"step": 465000
},
{
"epoch": 13.93,
"learning_rate": 0.00016103754634580212,
"loss": 0.8792,
"step": 465500
},
{
"epoch": 13.94,
"learning_rate": 0.00016088764078630463,
"loss": 0.8859,
"step": 466000
},
{
"epoch": 13.96,
"learning_rate": 0.00016073773522680708,
"loss": 0.8717,
"step": 466500
},
{
"epoch": 13.97,
"learning_rate": 0.00016058782966730958,
"loss": 0.8951,
"step": 467000
},
{
"epoch": 13.99,
"learning_rate": 0.00016043792410781206,
"loss": 0.9077,
"step": 467500
},
{
"epoch": 14.0,
"learning_rate": 0.00016028801854831454,
"loss": 0.8778,
"step": 468000
},
{
"epoch": 14.02,
"learning_rate": 0.00016013811298881702,
"loss": 0.8637,
"step": 468500
},
{
"epoch": 14.03,
"learning_rate": 0.00015998820742931953,
"loss": 0.8469,
"step": 469000
},
{
"epoch": 14.05,
"learning_rate": 0.00015983830186982198,
"loss": 0.848,
"step": 469500
},
{
"epoch": 14.06,
"learning_rate": 0.0001596883963103245,
"loss": 0.8591,
"step": 470000
},
{
"epoch": 14.08,
"learning_rate": 0.00015953849075082697,
"loss": 0.8624,
"step": 470500
},
{
"epoch": 14.09,
"learning_rate": 0.00015938858519132942,
"loss": 0.8571,
"step": 471000
},
{
"epoch": 14.11,
"learning_rate": 0.00015923867963183193,
"loss": 0.8636,
"step": 471500
},
{
"epoch": 14.12,
"learning_rate": 0.0001590887740723344,
"loss": 0.8616,
"step": 472000
},
{
"epoch": 14.14,
"learning_rate": 0.00015893886851283691,
"loss": 0.8613,
"step": 472500
},
{
"epoch": 14.15,
"learning_rate": 0.00015878896295333937,
"loss": 0.866,
"step": 473000
},
{
"epoch": 14.17,
"learning_rate": 0.00015863905739384187,
"loss": 0.8645,
"step": 473500
},
{
"epoch": 14.18,
"learning_rate": 0.00015848915183434435,
"loss": 0.8551,
"step": 474000
},
{
"epoch": 14.2,
"learning_rate": 0.00015833924627484683,
"loss": 0.8524,
"step": 474500
},
{
"epoch": 14.21,
"learning_rate": 0.0001581893407153493,
"loss": 0.8559,
"step": 475000
},
{
"epoch": 14.23,
"learning_rate": 0.00015803943515585182,
"loss": 0.853,
"step": 475500
},
{
"epoch": 14.24,
"learning_rate": 0.00015788952959635427,
"loss": 0.8808,
"step": 476000
},
{
"epoch": 14.26,
"learning_rate": 0.00015773962403685678,
"loss": 0.8758,
"step": 476500
},
{
"epoch": 14.27,
"learning_rate": 0.00015758971847735926,
"loss": 0.8381,
"step": 477000
},
{
"epoch": 14.29,
"learning_rate": 0.0001574398129178617,
"loss": 0.853,
"step": 477500
},
{
"epoch": 14.3,
"learning_rate": 0.00015728990735836422,
"loss": 0.8603,
"step": 478000
},
{
"epoch": 14.32,
"learning_rate": 0.0001571400017988667,
"loss": 0.8534,
"step": 478500
},
{
"epoch": 14.33,
"learning_rate": 0.0001569900962393692,
"loss": 0.8671,
"step": 479000
},
{
"epoch": 14.35,
"learning_rate": 0.00015684019067987165,
"loss": 0.8505,
"step": 479500
},
{
"epoch": 14.36,
"learning_rate": 0.00015669028512037416,
"loss": 0.881,
"step": 480000
},
{
"epoch": 14.38,
"learning_rate": 0.00015654037956087664,
"loss": 0.8569,
"step": 480500
},
{
"epoch": 14.39,
"learning_rate": 0.00015639047400137912,
"loss": 0.8722,
"step": 481000
},
{
"epoch": 14.41,
"learning_rate": 0.0001562405684418816,
"loss": 0.8788,
"step": 481500
},
{
"epoch": 14.42,
"learning_rate": 0.0001560906628823841,
"loss": 0.8657,
"step": 482000
},
{
"epoch": 14.44,
"learning_rate": 0.00015594075732288656,
"loss": 0.8841,
"step": 482500
},
{
"epoch": 14.45,
"learning_rate": 0.00015579085176338904,
"loss": 0.8502,
"step": 483000
},
{
"epoch": 14.47,
"learning_rate": 0.00015564094620389155,
"loss": 0.8658,
"step": 483500
},
{
"epoch": 14.48,
"learning_rate": 0.000155491040644394,
"loss": 0.8626,
"step": 484000
},
{
"epoch": 14.5,
"learning_rate": 0.0001553411350848965,
"loss": 0.8678,
"step": 484500
},
{
"epoch": 14.51,
"learning_rate": 0.00015519122952539898,
"loss": 0.8488,
"step": 485000
},
{
"epoch": 14.53,
"learning_rate": 0.00015504132396590146,
"loss": 0.8703,
"step": 485500
},
{
"epoch": 14.54,
"learning_rate": 0.00015489141840640394,
"loss": 0.86,
"step": 486000
},
{
"epoch": 14.56,
"learning_rate": 0.00015474151284690645,
"loss": 0.8576,
"step": 486500
},
{
"epoch": 14.57,
"learning_rate": 0.0001545916072874089,
"loss": 0.8711,
"step": 487000
},
{
"epoch": 14.59,
"learning_rate": 0.0001544417017279114,
"loss": 0.8735,
"step": 487500
},
{
"epoch": 14.6,
"learning_rate": 0.0001542917961684139,
"loss": 0.8646,
"step": 488000
},
{
"epoch": 14.62,
"learning_rate": 0.0001541418906089164,
"loss": 0.8547,
"step": 488500
},
{
"epoch": 14.63,
"learning_rate": 0.00015399198504941885,
"loss": 0.8827,
"step": 489000
},
{
"epoch": 14.65,
"learning_rate": 0.00015384207948992133,
"loss": 0.8632,
"step": 489500
},
{
"epoch": 14.66,
"learning_rate": 0.00015369217393042383,
"loss": 0.8675,
"step": 490000
},
{
"epoch": 14.68,
"learning_rate": 0.00015354226837092629,
"loss": 0.866,
"step": 490500
},
{
"epoch": 14.69,
"learning_rate": 0.0001533923628114288,
"loss": 0.8879,
"step": 491000
},
{
"epoch": 14.71,
"learning_rate": 0.00015324245725193127,
"loss": 0.878,
"step": 491500
},
{
"epoch": 14.72,
"learning_rate": 0.00015309255169243375,
"loss": 0.8786,
"step": 492000
},
{
"epoch": 14.74,
"learning_rate": 0.00015294264613293623,
"loss": 0.8686,
"step": 492500
},
{
"epoch": 14.75,
"learning_rate": 0.00015279274057343874,
"loss": 0.8693,
"step": 493000
},
{
"epoch": 14.77,
"learning_rate": 0.0001526428350139412,
"loss": 0.8709,
"step": 493500
},
{
"epoch": 14.78,
"learning_rate": 0.0001524929294544437,
"loss": 0.8924,
"step": 494000
},
{
"epoch": 14.8,
"learning_rate": 0.00015234302389494618,
"loss": 0.8855,
"step": 494500
},
{
"epoch": 14.81,
"learning_rate": 0.00015219311833544863,
"loss": 0.8685,
"step": 495000
},
{
"epoch": 14.83,
"learning_rate": 0.00015204321277595114,
"loss": 0.8675,
"step": 495500
},
{
"epoch": 14.84,
"learning_rate": 0.00015189330721645362,
"loss": 0.8694,
"step": 496000
},
{
"epoch": 14.86,
"learning_rate": 0.00015174340165695612,
"loss": 0.8793,
"step": 496500
},
{
"epoch": 14.87,
"learning_rate": 0.00015159349609745857,
"loss": 0.8753,
"step": 497000
},
{
"epoch": 14.89,
"learning_rate": 0.00015144359053796108,
"loss": 0.8651,
"step": 497500
},
{
"epoch": 14.9,
"learning_rate": 0.00015129368497846356,
"loss": 0.8686,
"step": 498000
},
{
"epoch": 14.92,
"learning_rate": 0.00015114377941896604,
"loss": 0.8725,
"step": 498500
},
{
"epoch": 14.93,
"learning_rate": 0.00015099387385946852,
"loss": 0.8623,
"step": 499000
},
{
"epoch": 14.95,
"learning_rate": 0.00015084396829997103,
"loss": 0.8735,
"step": 499500
},
{
"epoch": 14.96,
"learning_rate": 0.00015069406274047348,
"loss": 0.886,
"step": 500000
},
{
"epoch": 14.98,
"learning_rate": 0.00015054415718097599,
"loss": 0.8643,
"step": 500500
},
{
"epoch": 14.99,
"learning_rate": 0.00015039425162147847,
"loss": 0.8696,
"step": 501000
},
{
"epoch": 15.01,
"learning_rate": 0.00015024434606198092,
"loss": 0.8534,
"step": 501500
},
{
"epoch": 15.02,
"learning_rate": 0.00015009444050248342,
"loss": 0.8438,
"step": 502000
},
{
"epoch": 15.04,
"learning_rate": 0.0001499445349429859,
"loss": 0.8503,
"step": 502500
},
{
"epoch": 15.05,
"learning_rate": 0.00014979462938348838,
"loss": 0.8536,
"step": 503000
},
{
"epoch": 15.07,
"learning_rate": 0.0001496447238239909,
"loss": 0.8408,
"step": 503500
},
{
"epoch": 15.08,
"learning_rate": 0.00014949481826449334,
"loss": 0.842,
"step": 504000
},
{
"epoch": 15.1,
"learning_rate": 0.00014934491270499582,
"loss": 0.841,
"step": 504500
},
{
"epoch": 15.11,
"learning_rate": 0.00014919500714549833,
"loss": 0.8277,
"step": 505000
},
{
"epoch": 15.13,
"learning_rate": 0.0001490451015860008,
"loss": 0.8472,
"step": 505500
},
{
"epoch": 15.14,
"learning_rate": 0.0001488951960265033,
"loss": 0.8271,
"step": 506000
},
{
"epoch": 15.16,
"learning_rate": 0.00014874529046700577,
"loss": 0.8331,
"step": 506500
},
{
"epoch": 15.17,
"learning_rate": 0.00014859538490750825,
"loss": 0.8436,
"step": 507000
},
{
"epoch": 15.19,
"learning_rate": 0.00014844547934801075,
"loss": 0.8388,
"step": 507500
},
{
"epoch": 15.2,
"learning_rate": 0.00014829557378851323,
"loss": 0.8495,
"step": 508000
},
{
"epoch": 15.21,
"learning_rate": 0.0001481456682290157,
"loss": 0.8436,
"step": 508500
},
{
"epoch": 15.23,
"learning_rate": 0.0001479957626695182,
"loss": 0.834,
"step": 509000
},
{
"epoch": 15.24,
"learning_rate": 0.00014784585711002067,
"loss": 0.8232,
"step": 509500
},
{
"epoch": 15.26,
"learning_rate": 0.00014769595155052315,
"loss": 0.8357,
"step": 510000
},
{
"epoch": 15.27,
"learning_rate": 0.00014754604599102563,
"loss": 0.8504,
"step": 510500
},
{
"epoch": 15.29,
"learning_rate": 0.0001473961404315281,
"loss": 0.8328,
"step": 511000
},
{
"epoch": 15.3,
"learning_rate": 0.00014724623487203062,
"loss": 0.8639,
"step": 511500
},
{
"epoch": 15.32,
"learning_rate": 0.0001470963293125331,
"loss": 0.8546,
"step": 512000
},
{
"epoch": 15.33,
"learning_rate": 0.00014694642375303558,
"loss": 0.8487,
"step": 512500
},
{
"epoch": 15.35,
"learning_rate": 0.00014679651819353806,
"loss": 0.8203,
"step": 513000
},
{
"epoch": 15.36,
"learning_rate": 0.00014664661263404054,
"loss": 0.8422,
"step": 513500
},
{
"epoch": 15.38,
"learning_rate": 0.00014649670707454304,
"loss": 0.8515,
"step": 514000
},
{
"epoch": 15.39,
"learning_rate": 0.00014634680151504552,
"loss": 0.8362,
"step": 514500
},
{
"epoch": 15.41,
"learning_rate": 0.000146196895955548,
"loss": 0.8327,
"step": 515000
},
{
"epoch": 15.42,
"learning_rate": 0.00014604699039605048,
"loss": 0.8561,
"step": 515500
},
{
"epoch": 15.44,
"learning_rate": 0.00014589708483655296,
"loss": 0.8647,
"step": 516000
},
{
"epoch": 15.45,
"learning_rate": 0.00014574717927705544,
"loss": 0.8487,
"step": 516500
},
{
"epoch": 15.47,
"learning_rate": 0.00014559727371755792,
"loss": 0.8379,
"step": 517000
},
{
"epoch": 15.48,
"learning_rate": 0.0001454473681580604,
"loss": 0.8607,
"step": 517500
},
{
"epoch": 15.5,
"learning_rate": 0.00014529746259856288,
"loss": 0.8548,
"step": 518000
},
{
"epoch": 15.51,
"learning_rate": 0.00014514755703906538,
"loss": 0.8504,
"step": 518500
},
{
"epoch": 15.53,
"learning_rate": 0.00014499765147956786,
"loss": 0.8485,
"step": 519000
},
{
"epoch": 15.54,
"learning_rate": 0.00014484774592007034,
"loss": 0.8419,
"step": 519500
},
{
"epoch": 15.56,
"learning_rate": 0.00014469784036057282,
"loss": 0.8509,
"step": 520000
},
{
"epoch": 15.57,
"learning_rate": 0.0001445479348010753,
"loss": 0.8377,
"step": 520500
},
{
"epoch": 15.59,
"learning_rate": 0.0001443980292415778,
"loss": 0.8417,
"step": 521000
},
{
"epoch": 15.6,
"learning_rate": 0.00014424812368208026,
"loss": 0.8422,
"step": 521500
},
{
"epoch": 15.62,
"learning_rate": 0.00014409821812258274,
"loss": 0.8228,
"step": 522000
},
{
"epoch": 15.63,
"learning_rate": 0.00014394831256308525,
"loss": 0.8544,
"step": 522500
},
{
"epoch": 15.65,
"learning_rate": 0.00014379840700358773,
"loss": 0.8495,
"step": 523000
},
{
"epoch": 15.66,
"learning_rate": 0.0001436485014440902,
"loss": 0.8498,
"step": 523500
},
{
"epoch": 15.68,
"learning_rate": 0.0001434985958845927,
"loss": 0.8513,
"step": 524000
},
{
"epoch": 15.69,
"learning_rate": 0.00014334869032509517,
"loss": 0.8416,
"step": 524500
},
{
"epoch": 15.71,
"learning_rate": 0.00014319878476559767,
"loss": 0.8563,
"step": 525000
},
{
"epoch": 15.72,
"learning_rate": 0.00014304887920610015,
"loss": 0.8463,
"step": 525500
},
{
"epoch": 15.74,
"learning_rate": 0.00014289897364660263,
"loss": 0.8371,
"step": 526000
},
{
"epoch": 15.75,
"learning_rate": 0.0001427490680871051,
"loss": 0.8415,
"step": 526500
},
{
"epoch": 15.77,
"learning_rate": 0.0001425991625276076,
"loss": 0.8303,
"step": 527000
},
{
"epoch": 15.78,
"learning_rate": 0.00014244925696811007,
"loss": 0.8581,
"step": 527500
},
{
"epoch": 15.8,
"learning_rate": 0.00014229935140861255,
"loss": 0.8418,
"step": 528000
},
{
"epoch": 15.81,
"learning_rate": 0.00014214944584911503,
"loss": 0.8296,
"step": 528500
},
{
"epoch": 15.83,
"learning_rate": 0.00014199954028961754,
"loss": 0.8515,
"step": 529000
},
{
"epoch": 15.84,
"learning_rate": 0.00014184963473012002,
"loss": 0.8382,
"step": 529500
},
{
"epoch": 15.86,
"learning_rate": 0.0001416997291706225,
"loss": 0.8395,
"step": 530000
},
{
"epoch": 15.87,
"learning_rate": 0.00014154982361112498,
"loss": 0.823,
"step": 530500
},
{
"epoch": 15.89,
"learning_rate": 0.00014139991805162745,
"loss": 0.8454,
"step": 531000
},
{
"epoch": 15.9,
"learning_rate": 0.00014125001249212996,
"loss": 0.844,
"step": 531500
},
{
"epoch": 15.92,
"learning_rate": 0.00014110010693263244,
"loss": 0.8375,
"step": 532000
},
{
"epoch": 15.93,
"learning_rate": 0.00014095020137313492,
"loss": 0.8549,
"step": 532500
},
{
"epoch": 15.95,
"learning_rate": 0.0001408002958136374,
"loss": 0.8322,
"step": 533000
},
{
"epoch": 15.96,
"learning_rate": 0.00014065039025413988,
"loss": 0.8346,
"step": 533500
},
{
"epoch": 15.98,
"learning_rate": 0.00014050048469464236,
"loss": 0.848,
"step": 534000
},
{
"epoch": 15.99,
"learning_rate": 0.00014035057913514484,
"loss": 0.8605,
"step": 534500
},
{
"epoch": 16.01,
"learning_rate": 0.00014020067357564732,
"loss": 0.8431,
"step": 535000
},
{
"epoch": 16.02,
"learning_rate": 0.0001400507680161498,
"loss": 0.7915,
"step": 535500
},
{
"epoch": 16.04,
"learning_rate": 0.0001399008624566523,
"loss": 0.8093,
"step": 536000
},
{
"epoch": 16.05,
"learning_rate": 0.00013975095689715478,
"loss": 0.8242,
"step": 536500
},
{
"epoch": 16.07,
"learning_rate": 0.00013960105133765726,
"loss": 0.8114,
"step": 537000
},
{
"epoch": 16.08,
"learning_rate": 0.00013945114577815974,
"loss": 0.7946,
"step": 537500
},
{
"epoch": 16.1,
"learning_rate": 0.00013930124021866222,
"loss": 0.8027,
"step": 538000
},
{
"epoch": 16.11,
"learning_rate": 0.00013915133465916473,
"loss": 0.8051,
"step": 538500
},
{
"epoch": 16.13,
"learning_rate": 0.0001390014290996672,
"loss": 0.8243,
"step": 539000
},
{
"epoch": 16.14,
"learning_rate": 0.00013885152354016966,
"loss": 0.821,
"step": 539500
},
{
"epoch": 16.16,
"learning_rate": 0.00013870161798067217,
"loss": 0.8271,
"step": 540000
},
{
"epoch": 16.17,
"learning_rate": 0.00013855171242117465,
"loss": 0.8185,
"step": 540500
},
{
"epoch": 16.19,
"learning_rate": 0.00013840180686167713,
"loss": 0.8149,
"step": 541000
},
{
"epoch": 16.2,
"learning_rate": 0.0001382519013021796,
"loss": 0.7955,
"step": 541500
},
{
"epoch": 16.22,
"learning_rate": 0.00013810199574268209,
"loss": 0.8273,
"step": 542000
},
{
"epoch": 16.23,
"learning_rate": 0.0001379520901831846,
"loss": 0.8367,
"step": 542500
},
{
"epoch": 16.25,
"learning_rate": 0.00013780218462368707,
"loss": 0.8414,
"step": 543000
},
{
"epoch": 16.26,
"learning_rate": 0.00013765227906418955,
"loss": 0.8162,
"step": 543500
},
{
"epoch": 16.28,
"learning_rate": 0.00013750237350469203,
"loss": 0.8197,
"step": 544000
},
{
"epoch": 16.29,
"learning_rate": 0.0001373524679451945,
"loss": 0.8085,
"step": 544500
},
{
"epoch": 16.31,
"learning_rate": 0.00013720256238569702,
"loss": 0.8006,
"step": 545000
},
{
"epoch": 16.32,
"learning_rate": 0.00013705265682619947,
"loss": 0.8421,
"step": 545500
},
{
"epoch": 16.34,
"learning_rate": 0.00013690275126670195,
"loss": 0.8027,
"step": 546000
},
{
"epoch": 16.35,
"learning_rate": 0.00013675284570720446,
"loss": 0.8164,
"step": 546500
},
{
"epoch": 16.37,
"learning_rate": 0.00013660294014770694,
"loss": 0.8271,
"step": 547000
},
{
"epoch": 16.38,
"learning_rate": 0.00013645303458820942,
"loss": 0.844,
"step": 547500
},
{
"epoch": 16.4,
"learning_rate": 0.0001363031290287119,
"loss": 0.8177,
"step": 548000
},
{
"epoch": 16.41,
"learning_rate": 0.00013615322346921437,
"loss": 0.8193,
"step": 548500
},
{
"epoch": 16.43,
"learning_rate": 0.00013600331790971688,
"loss": 0.829,
"step": 549000
},
{
"epoch": 16.44,
"learning_rate": 0.00013585341235021936,
"loss": 0.8306,
"step": 549500
},
{
"epoch": 16.46,
"learning_rate": 0.00013570350679072184,
"loss": 0.8298,
"step": 550000
},
{
"epoch": 16.47,
"learning_rate": 0.00013555360123122432,
"loss": 0.8027,
"step": 550500
},
{
"epoch": 16.49,
"learning_rate": 0.0001354036956717268,
"loss": 0.8251,
"step": 551000
},
{
"epoch": 16.5,
"learning_rate": 0.00013525379011222928,
"loss": 0.8216,
"step": 551500
},
{
"epoch": 16.52,
"learning_rate": 0.00013510388455273176,
"loss": 0.8337,
"step": 552000
},
{
"epoch": 16.53,
"learning_rate": 0.00013495397899323424,
"loss": 0.8322,
"step": 552500
},
{
"epoch": 16.55,
"learning_rate": 0.00013480407343373674,
"loss": 0.8357,
"step": 553000
},
{
"epoch": 16.56,
"learning_rate": 0.00013465416787423922,
"loss": 0.8244,
"step": 553500
},
{
"epoch": 16.58,
"learning_rate": 0.0001345042623147417,
"loss": 0.8214,
"step": 554000
},
{
"epoch": 16.59,
"learning_rate": 0.00013435435675524418,
"loss": 0.84,
"step": 554500
},
{
"epoch": 16.61,
"learning_rate": 0.00013420445119574666,
"loss": 0.8311,
"step": 555000
},
{
"epoch": 16.62,
"learning_rate": 0.00013405454563624914,
"loss": 0.8384,
"step": 555500
},
{
"epoch": 16.64,
"learning_rate": 0.00013390464007675165,
"loss": 0.8237,
"step": 556000
},
{
"epoch": 16.65,
"learning_rate": 0.00013375473451725413,
"loss": 0.8081,
"step": 556500
},
{
"epoch": 16.67,
"learning_rate": 0.00013360482895775658,
"loss": 0.8184,
"step": 557000
},
{
"epoch": 16.68,
"learning_rate": 0.0001334549233982591,
"loss": 0.8093,
"step": 557500
},
{
"epoch": 16.7,
"learning_rate": 0.00013330501783876157,
"loss": 0.8157,
"step": 558000
},
{
"epoch": 16.71,
"learning_rate": 0.00013315511227926405,
"loss": 0.8073,
"step": 558500
},
{
"epoch": 16.73,
"learning_rate": 0.00013300520671976653,
"loss": 0.8238,
"step": 559000
},
{
"epoch": 16.74,
"learning_rate": 0.000132855301160269,
"loss": 0.8137,
"step": 559500
},
{
"epoch": 16.76,
"learning_rate": 0.0001327053956007715,
"loss": 0.8181,
"step": 560000
},
{
"epoch": 16.77,
"learning_rate": 0.000132555490041274,
"loss": 0.8108,
"step": 560500
},
{
"epoch": 16.79,
"learning_rate": 0.00013240558448177647,
"loss": 0.829,
"step": 561000
},
{
"epoch": 16.8,
"learning_rate": 0.00013225567892227895,
"loss": 0.846,
"step": 561500
},
{
"epoch": 16.82,
"learning_rate": 0.00013210577336278143,
"loss": 0.7963,
"step": 562000
},
{
"epoch": 16.83,
"learning_rate": 0.00013195586780328394,
"loss": 0.8091,
"step": 562500
},
{
"epoch": 16.85,
"learning_rate": 0.0001318059622437864,
"loss": 0.8276,
"step": 563000
},
{
"epoch": 16.86,
"learning_rate": 0.00013165605668428887,
"loss": 0.8359,
"step": 563500
},
{
"epoch": 16.88,
"learning_rate": 0.00013150615112479138,
"loss": 0.8228,
"step": 564000
},
{
"epoch": 16.89,
"learning_rate": 0.00013135624556529386,
"loss": 0.83,
"step": 564500
},
{
"epoch": 16.91,
"learning_rate": 0.00013120634000579634,
"loss": 0.8201,
"step": 565000
},
{
"epoch": 16.92,
"learning_rate": 0.00013105643444629881,
"loss": 0.8283,
"step": 565500
},
{
"epoch": 16.94,
"learning_rate": 0.0001309065288868013,
"loss": 0.8175,
"step": 566000
},
{
"epoch": 16.95,
"learning_rate": 0.0001307566233273038,
"loss": 0.8318,
"step": 566500
},
{
"epoch": 16.97,
"learning_rate": 0.00013060671776780628,
"loss": 0.7978,
"step": 567000
},
{
"epoch": 16.98,
"learning_rate": 0.00013045681220830876,
"loss": 0.8185,
"step": 567500
},
{
"epoch": 17.0,
"learning_rate": 0.00013030690664881124,
"loss": 0.8267,
"step": 568000
},
{
"epoch": 17.01,
"learning_rate": 0.00013015700108931372,
"loss": 0.7983,
"step": 568500
},
{
"epoch": 17.03,
"learning_rate": 0.0001300070955298162,
"loss": 0.7953,
"step": 569000
},
{
"epoch": 17.04,
"learning_rate": 0.00012985718997031868,
"loss": 0.7757,
"step": 569500
},
{
"epoch": 17.06,
"learning_rate": 0.00012970728441082116,
"loss": 0.7859,
"step": 570000
},
{
"epoch": 17.07,
"learning_rate": 0.00012955737885132366,
"loss": 0.7836,
"step": 570500
},
{
"epoch": 17.09,
"learning_rate": 0.00012940747329182614,
"loss": 0.7968,
"step": 571000
},
{
"epoch": 17.1,
"learning_rate": 0.00012925756773232862,
"loss": 0.7894,
"step": 571500
},
{
"epoch": 17.11,
"learning_rate": 0.0001291076621728311,
"loss": 0.7958,
"step": 572000
},
{
"epoch": 17.13,
"learning_rate": 0.00012895775661333358,
"loss": 0.7711,
"step": 572500
},
{
"epoch": 17.14,
"learning_rate": 0.00012880785105383606,
"loss": 0.7975,
"step": 573000
},
{
"epoch": 17.16,
"learning_rate": 0.00012865794549433857,
"loss": 0.7934,
"step": 573500
},
{
"epoch": 17.17,
"learning_rate": 0.00012850803993484105,
"loss": 0.81,
"step": 574000
},
{
"epoch": 17.19,
"learning_rate": 0.00012835813437534353,
"loss": 0.7923,
"step": 574500
},
{
"epoch": 17.2,
"learning_rate": 0.000128208228815846,
"loss": 0.7944,
"step": 575000
},
{
"epoch": 17.22,
"learning_rate": 0.0001280583232563485,
"loss": 0.8038,
"step": 575500
},
{
"epoch": 17.23,
"learning_rate": 0.00012790841769685097,
"loss": 0.7916,
"step": 576000
},
{
"epoch": 17.25,
"learning_rate": 0.00012775851213735345,
"loss": 0.8171,
"step": 576500
},
{
"epoch": 17.26,
"learning_rate": 0.00012760860657785593,
"loss": 0.79,
"step": 577000
},
{
"epoch": 17.28,
"learning_rate": 0.00012745870101835843,
"loss": 0.7968,
"step": 577500
},
{
"epoch": 17.29,
"learning_rate": 0.0001273087954588609,
"loss": 0.7882,
"step": 578000
},
{
"epoch": 17.31,
"learning_rate": 0.0001271588898993634,
"loss": 0.7863,
"step": 578500
},
{
"epoch": 17.32,
"learning_rate": 0.00012700898433986587,
"loss": 0.8008,
"step": 579000
},
{
"epoch": 17.34,
"learning_rate": 0.00012685907878036835,
"loss": 0.7904,
"step": 579500
},
{
"epoch": 17.35,
"learning_rate": 0.00012670917322087086,
"loss": 0.8133,
"step": 580000
},
{
"epoch": 17.37,
"learning_rate": 0.00012655926766137334,
"loss": 0.8,
"step": 580500
},
{
"epoch": 17.38,
"learning_rate": 0.0001264093621018758,
"loss": 0.8,
"step": 581000
},
{
"epoch": 17.4,
"learning_rate": 0.0001262594565423783,
"loss": 0.7994,
"step": 581500
},
{
"epoch": 17.41,
"learning_rate": 0.00012610955098288078,
"loss": 0.8093,
"step": 582000
},
{
"epoch": 17.43,
"learning_rate": 0.00012595964542338325,
"loss": 0.8263,
"step": 582500
},
{
"epoch": 17.44,
"learning_rate": 0.00012580973986388573,
"loss": 0.7971,
"step": 583000
},
{
"epoch": 17.46,
"learning_rate": 0.00012565983430438821,
"loss": 0.8205,
"step": 583500
},
{
"epoch": 17.47,
"learning_rate": 0.00012550992874489072,
"loss": 0.8018,
"step": 584000
},
{
"epoch": 17.49,
"learning_rate": 0.0001253600231853932,
"loss": 0.7923,
"step": 584500
},
{
"epoch": 17.5,
"learning_rate": 0.00012521011762589568,
"loss": 0.7821,
"step": 585000
},
{
"epoch": 17.52,
"learning_rate": 0.00012506021206639816,
"loss": 0.7972,
"step": 585500
},
{
"epoch": 17.53,
"learning_rate": 0.00012491030650690064,
"loss": 0.8023,
"step": 586000
},
{
"epoch": 17.55,
"learning_rate": 0.00012476040094740315,
"loss": 0.791,
"step": 586500
},
{
"epoch": 17.56,
"learning_rate": 0.0001246104953879056,
"loss": 0.7786,
"step": 587000
},
{
"epoch": 17.58,
"learning_rate": 0.00012446058982840808,
"loss": 0.8098,
"step": 587500
},
{
"epoch": 17.59,
"learning_rate": 0.00012431068426891058,
"loss": 0.7725,
"step": 588000
},
{
"epoch": 17.61,
"learning_rate": 0.00012416077870941306,
"loss": 0.8116,
"step": 588500
},
{
"epoch": 17.62,
"learning_rate": 0.00012401087314991554,
"loss": 0.7904,
"step": 589000
},
{
"epoch": 17.64,
"learning_rate": 0.00012386096759041802,
"loss": 0.7895,
"step": 589500
},
{
"epoch": 17.65,
"learning_rate": 0.0001237110620309205,
"loss": 0.8055,
"step": 590000
},
{
"epoch": 17.67,
"learning_rate": 0.00012356115647142298,
"loss": 0.8156,
"step": 590500
},
{
"epoch": 17.68,
"learning_rate": 0.0001234112509119255,
"loss": 0.8047,
"step": 591000
},
{
"epoch": 17.7,
"learning_rate": 0.00012326134535242797,
"loss": 0.8095,
"step": 591500
},
{
"epoch": 17.71,
"learning_rate": 0.00012311143979293045,
"loss": 0.796,
"step": 592000
},
{
"epoch": 17.73,
"learning_rate": 0.00012296153423343293,
"loss": 0.8166,
"step": 592500
},
{
"epoch": 17.74,
"learning_rate": 0.0001228116286739354,
"loss": 0.8087,
"step": 593000
},
{
"epoch": 17.76,
"learning_rate": 0.00012266172311443789,
"loss": 0.8061,
"step": 593500
},
{
"epoch": 17.77,
"learning_rate": 0.00012251181755494037,
"loss": 0.8024,
"step": 594000
},
{
"epoch": 17.79,
"learning_rate": 0.00012236191199544285,
"loss": 0.7689,
"step": 594500
},
{
"epoch": 17.8,
"learning_rate": 0.00012221200643594535,
"loss": 0.8206,
"step": 595000
},
{
"epoch": 17.82,
"learning_rate": 0.00012206210087644783,
"loss": 0.8092,
"step": 595500
},
{
"epoch": 17.83,
"learning_rate": 0.00012191219531695031,
"loss": 0.7948,
"step": 596000
},
{
"epoch": 17.85,
"learning_rate": 0.00012176228975745279,
"loss": 0.7896,
"step": 596500
},
{
"epoch": 17.86,
"learning_rate": 0.00012161238419795528,
"loss": 0.7985,
"step": 597000
},
{
"epoch": 17.88,
"learning_rate": 0.00012146247863845776,
"loss": 0.8219,
"step": 597500
},
{
"epoch": 17.89,
"learning_rate": 0.00012131257307896026,
"loss": 0.7926,
"step": 598000
},
{
"epoch": 17.91,
"learning_rate": 0.00012116266751946274,
"loss": 0.8145,
"step": 598500
},
{
"epoch": 17.92,
"learning_rate": 0.0001210127619599652,
"loss": 0.7882,
"step": 599000
},
{
"epoch": 17.94,
"learning_rate": 0.0001208628564004677,
"loss": 0.8075,
"step": 599500
},
{
"epoch": 17.95,
"learning_rate": 0.00012071295084097017,
"loss": 0.8136,
"step": 600000
},
{
"epoch": 17.97,
"learning_rate": 0.00012056304528147265,
"loss": 0.8207,
"step": 600500
},
{
"epoch": 17.98,
"learning_rate": 0.00012041313972197515,
"loss": 0.7927,
"step": 601000
},
{
"epoch": 18.0,
"learning_rate": 0.00012026323416247763,
"loss": 0.7825,
"step": 601500
},
{
"epoch": 18.01,
"learning_rate": 0.00012011332860298012,
"loss": 0.7775,
"step": 602000
},
{
"epoch": 18.03,
"learning_rate": 0.0001199634230434826,
"loss": 0.7673,
"step": 602500
},
{
"epoch": 18.04,
"learning_rate": 0.00011981351748398508,
"loss": 0.772,
"step": 603000
},
{
"epoch": 18.06,
"learning_rate": 0.00011966361192448757,
"loss": 0.7624,
"step": 603500
},
{
"epoch": 18.07,
"learning_rate": 0.00011951370636499005,
"loss": 0.7818,
"step": 604000
},
{
"epoch": 18.09,
"learning_rate": 0.00011936380080549252,
"loss": 0.7454,
"step": 604500
},
{
"epoch": 18.1,
"learning_rate": 0.00011921389524599501,
"loss": 0.7679,
"step": 605000
},
{
"epoch": 18.12,
"learning_rate": 0.00011906398968649749,
"loss": 0.7645,
"step": 605500
},
{
"epoch": 18.13,
"learning_rate": 0.00011891408412699997,
"loss": 0.7758,
"step": 606000
},
{
"epoch": 18.15,
"learning_rate": 0.00011876417856750246,
"loss": 0.7485,
"step": 606500
},
{
"epoch": 18.16,
"learning_rate": 0.00011861427300800494,
"loss": 0.7823,
"step": 607000
},
{
"epoch": 18.18,
"learning_rate": 0.00011846436744850744,
"loss": 0.7966,
"step": 607500
},
{
"epoch": 18.19,
"learning_rate": 0.00011831446188900991,
"loss": 0.789,
"step": 608000
},
{
"epoch": 18.21,
"learning_rate": 0.0001181645563295124,
"loss": 0.7774,
"step": 608500
},
{
"epoch": 18.22,
"learning_rate": 0.00011801465077001489,
"loss": 0.7918,
"step": 609000
},
{
"epoch": 18.24,
"learning_rate": 0.00011786474521051737,
"loss": 0.7693,
"step": 609500
},
{
"epoch": 18.25,
"learning_rate": 0.00011771483965101986,
"loss": 0.7592,
"step": 610000
},
{
"epoch": 18.27,
"learning_rate": 0.00011756493409152233,
"loss": 0.7838,
"step": 610500
},
{
"epoch": 18.28,
"learning_rate": 0.0001174150285320248,
"loss": 0.7846,
"step": 611000
},
{
"epoch": 18.3,
"learning_rate": 0.0001172651229725273,
"loss": 0.7693,
"step": 611500
},
{
"epoch": 18.31,
"learning_rate": 0.00011711521741302978,
"loss": 0.7825,
"step": 612000
},
{
"epoch": 18.33,
"learning_rate": 0.00011696531185353226,
"loss": 0.7787,
"step": 612500
},
{
"epoch": 18.34,
"learning_rate": 0.00011681540629403475,
"loss": 0.7682,
"step": 613000
},
{
"epoch": 18.36,
"learning_rate": 0.00011666550073453723,
"loss": 0.7638,
"step": 613500
},
{
"epoch": 18.37,
"learning_rate": 0.00011651559517503971,
"loss": 0.7683,
"step": 614000
},
{
"epoch": 18.39,
"learning_rate": 0.0001163656896155422,
"loss": 0.7636,
"step": 614500
},
{
"epoch": 18.4,
"learning_rate": 0.00011621578405604468,
"loss": 0.7691,
"step": 615000
},
{
"epoch": 18.42,
"learning_rate": 0.00011606587849654718,
"loss": 0.7897,
"step": 615500
},
{
"epoch": 18.43,
"learning_rate": 0.00011591597293704966,
"loss": 0.7826,
"step": 616000
},
{
"epoch": 18.45,
"learning_rate": 0.00011576606737755212,
"loss": 0.7832,
"step": 616500
},
{
"epoch": 18.46,
"learning_rate": 0.00011561616181805461,
"loss": 0.7815,
"step": 617000
},
{
"epoch": 18.48,
"learning_rate": 0.0001154662562585571,
"loss": 0.7783,
"step": 617500
},
{
"epoch": 18.49,
"learning_rate": 0.00011531635069905957,
"loss": 0.7811,
"step": 618000
},
{
"epoch": 18.51,
"learning_rate": 0.00011516644513956207,
"loss": 0.7856,
"step": 618500
},
{
"epoch": 18.52,
"learning_rate": 0.00011501653958006455,
"loss": 0.7954,
"step": 619000
},
{
"epoch": 18.54,
"learning_rate": 0.00011486663402056704,
"loss": 0.7587,
"step": 619500
},
{
"epoch": 18.55,
"learning_rate": 0.00011471672846106952,
"loss": 0.7876,
"step": 620000
},
{
"epoch": 18.57,
"learning_rate": 0.000114566822901572,
"loss": 0.77,
"step": 620500
},
{
"epoch": 18.58,
"learning_rate": 0.00011441691734207449,
"loss": 0.7741,
"step": 621000
},
{
"epoch": 18.6,
"learning_rate": 0.00011426701178257697,
"loss": 0.7718,
"step": 621500
},
{
"epoch": 18.61,
"learning_rate": 0.00011411710622307946,
"loss": 0.7674,
"step": 622000
},
{
"epoch": 18.63,
"learning_rate": 0.00011396720066358193,
"loss": 0.7605,
"step": 622500
},
{
"epoch": 18.64,
"learning_rate": 0.00011381729510408441,
"loss": 0.7886,
"step": 623000
},
{
"epoch": 18.66,
"learning_rate": 0.00011366738954458689,
"loss": 0.7866,
"step": 623500
},
{
"epoch": 18.67,
"learning_rate": 0.00011351748398508938,
"loss": 0.7777,
"step": 624000
},
{
"epoch": 18.69,
"learning_rate": 0.00011336757842559186,
"loss": 0.7976,
"step": 624500
},
{
"epoch": 18.7,
"learning_rate": 0.00011321767286609436,
"loss": 0.7877,
"step": 625000
},
{
"epoch": 18.72,
"learning_rate": 0.00011306776730659683,
"loss": 0.7687,
"step": 625500
},
{
"epoch": 18.73,
"learning_rate": 0.00011291786174709931,
"loss": 0.767,
"step": 626000
},
{
"epoch": 18.75,
"learning_rate": 0.00011276795618760181,
"loss": 0.7761,
"step": 626500
},
{
"epoch": 18.76,
"learning_rate": 0.00011261805062810429,
"loss": 0.7931,
"step": 627000
},
{
"epoch": 18.78,
"learning_rate": 0.00011246814506860678,
"loss": 0.7701,
"step": 627500
},
{
"epoch": 18.79,
"learning_rate": 0.00011231823950910926,
"loss": 0.7706,
"step": 628000
},
{
"epoch": 18.81,
"learning_rate": 0.00011216833394961173,
"loss": 0.78,
"step": 628500
},
{
"epoch": 18.82,
"learning_rate": 0.00011201842839011422,
"loss": 0.7881,
"step": 629000
},
{
"epoch": 18.84,
"learning_rate": 0.0001118685228306167,
"loss": 0.7588,
"step": 629500
},
{
"epoch": 18.85,
"learning_rate": 0.00011171861727111918,
"loss": 0.7747,
"step": 630000
},
{
"epoch": 18.87,
"learning_rate": 0.00011156871171162167,
"loss": 0.787,
"step": 630500
},
{
"epoch": 18.88,
"learning_rate": 0.00011141880615212415,
"loss": 0.7741,
"step": 631000
},
{
"epoch": 18.9,
"learning_rate": 0.00011126890059262663,
"loss": 0.7881,
"step": 631500
},
{
"epoch": 18.91,
"learning_rate": 0.00011111899503312912,
"loss": 0.7621,
"step": 632000
},
{
"epoch": 18.93,
"learning_rate": 0.0001109690894736316,
"loss": 0.7885,
"step": 632500
},
{
"epoch": 18.94,
"learning_rate": 0.0001108191839141341,
"loss": 0.7976,
"step": 633000
},
{
"epoch": 18.96,
"learning_rate": 0.00011066927835463658,
"loss": 0.7878,
"step": 633500
},
{
"epoch": 18.97,
"learning_rate": 0.00011051937279513905,
"loss": 0.7923,
"step": 634000
},
{
"epoch": 18.99,
"learning_rate": 0.00011036946723564153,
"loss": 0.7846,
"step": 634500
},
{
"epoch": 19.0,
"learning_rate": 0.00011021956167614401,
"loss": 0.7589,
"step": 635000
},
{
"epoch": 19.01,
"learning_rate": 0.0001100696561166465,
"loss": 0.7466,
"step": 635500
},
{
"epoch": 19.03,
"learning_rate": 0.00010991975055714899,
"loss": 0.7361,
"step": 636000
},
{
"epoch": 19.04,
"learning_rate": 0.00010976984499765147,
"loss": 0.7505,
"step": 636500
},
{
"epoch": 19.06,
"learning_rate": 0.00010961993943815396,
"loss": 0.7444,
"step": 637000
},
{
"epoch": 19.07,
"learning_rate": 0.00010947003387865644,
"loss": 0.754,
"step": 637500
},
{
"epoch": 19.09,
"learning_rate": 0.00010932012831915892,
"loss": 0.7534,
"step": 638000
},
{
"epoch": 19.1,
"learning_rate": 0.00010917022275966141,
"loss": 0.7478,
"step": 638500
},
{
"epoch": 19.12,
"learning_rate": 0.00010902031720016389,
"loss": 0.7344,
"step": 639000
},
{
"epoch": 19.13,
"learning_rate": 0.00010887041164066638,
"loss": 0.7494,
"step": 639500
},
{
"epoch": 19.15,
"learning_rate": 0.00010872050608116886,
"loss": 0.7639,
"step": 640000
},
{
"epoch": 19.16,
"learning_rate": 0.00010857060052167133,
"loss": 0.7593,
"step": 640500
},
{
"epoch": 19.18,
"learning_rate": 0.00010842069496217381,
"loss": 0.747,
"step": 641000
},
{
"epoch": 19.19,
"learning_rate": 0.0001082707894026763,
"loss": 0.7525,
"step": 641500
},
{
"epoch": 19.21,
"learning_rate": 0.00010812088384317878,
"loss": 0.7525,
"step": 642000
},
{
"epoch": 19.22,
"learning_rate": 0.00010797097828368127,
"loss": 0.7513,
"step": 642500
},
{
"epoch": 19.24,
"learning_rate": 0.00010782107272418375,
"loss": 0.7505,
"step": 643000
},
{
"epoch": 19.25,
"learning_rate": 0.00010767116716468623,
"loss": 0.7652,
"step": 643500
},
{
"epoch": 19.27,
"learning_rate": 0.00010752126160518873,
"loss": 0.7597,
"step": 644000
},
{
"epoch": 19.28,
"learning_rate": 0.0001073713560456912,
"loss": 0.7486,
"step": 644500
},
{
"epoch": 19.3,
"learning_rate": 0.0001072214504861937,
"loss": 0.7477,
"step": 645000
},
{
"epoch": 19.31,
"learning_rate": 0.00010707154492669618,
"loss": 0.7661,
"step": 645500
},
{
"epoch": 19.33,
"learning_rate": 0.00010692163936719866,
"loss": 0.7683,
"step": 646000
},
{
"epoch": 19.34,
"learning_rate": 0.00010677173380770114,
"loss": 0.7677,
"step": 646500
},
{
"epoch": 19.36,
"learning_rate": 0.00010662182824820362,
"loss": 0.7516,
"step": 647000
},
{
"epoch": 19.37,
"learning_rate": 0.0001064719226887061,
"loss": 0.7573,
"step": 647500
},
{
"epoch": 19.39,
"learning_rate": 0.00010632201712920859,
"loss": 0.7576,
"step": 648000
},
{
"epoch": 19.4,
"learning_rate": 0.00010617211156971107,
"loss": 0.7436,
"step": 648500
},
{
"epoch": 19.42,
"learning_rate": 0.00010602220601021355,
"loss": 0.7434,
"step": 649000
},
{
"epoch": 19.43,
"learning_rate": 0.00010587230045071604,
"loss": 0.7356,
"step": 649500
},
{
"epoch": 19.45,
"learning_rate": 0.00010572239489121852,
"loss": 0.7596,
"step": 650000
},
{
"epoch": 19.46,
"learning_rate": 0.00010557248933172102,
"loss": 0.7531,
"step": 650500
},
{
"epoch": 19.48,
"learning_rate": 0.0001054225837722235,
"loss": 0.7313,
"step": 651000
},
{
"epoch": 19.49,
"learning_rate": 0.00010527267821272597,
"loss": 0.7474,
"step": 651500
},
{
"epoch": 19.51,
"learning_rate": 0.00010512277265322845,
"loss": 0.7477,
"step": 652000
},
{
"epoch": 19.52,
"learning_rate": 0.00010497286709373093,
"loss": 0.7572,
"step": 652500
},
{
"epoch": 19.54,
"learning_rate": 0.00010482296153423341,
"loss": 0.7561,
"step": 653000
},
{
"epoch": 19.55,
"learning_rate": 0.0001046730559747359,
"loss": 0.7451,
"step": 653500
},
{
"epoch": 19.57,
"learning_rate": 0.00010452315041523839,
"loss": 0.7607,
"step": 654000
},
{
"epoch": 19.58,
"learning_rate": 0.00010437324485574088,
"loss": 0.7335,
"step": 654500
},
{
"epoch": 19.6,
"learning_rate": 0.00010422333929624336,
"loss": 0.7619,
"step": 655000
},
{
"epoch": 19.61,
"learning_rate": 0.00010407343373674584,
"loss": 0.763,
"step": 655500
},
{
"epoch": 19.63,
"learning_rate": 0.00010392352817724833,
"loss": 0.7391,
"step": 656000
},
{
"epoch": 19.64,
"learning_rate": 0.00010377362261775081,
"loss": 0.7686,
"step": 656500
},
{
"epoch": 19.66,
"learning_rate": 0.0001036237170582533,
"loss": 0.7568,
"step": 657000
},
{
"epoch": 19.67,
"learning_rate": 0.00010347381149875578,
"loss": 0.7549,
"step": 657500
},
{
"epoch": 19.69,
"learning_rate": 0.00010332390593925825,
"loss": 0.7675,
"step": 658000
},
{
"epoch": 19.7,
"learning_rate": 0.00010317400037976073,
"loss": 0.7607,
"step": 658500
},
{
"epoch": 19.72,
"learning_rate": 0.00010302409482026322,
"loss": 0.7639,
"step": 659000
},
{
"epoch": 19.73,
"learning_rate": 0.0001028741892607657,
"loss": 0.7437,
"step": 659500
},
{
"epoch": 19.75,
"learning_rate": 0.0001027242837012682,
"loss": 0.7487,
"step": 660000
},
{
"epoch": 19.76,
"learning_rate": 0.00010257437814177067,
"loss": 0.7677,
"step": 660500
},
{
"epoch": 19.78,
"learning_rate": 0.00010242447258227315,
"loss": 0.7553,
"step": 661000
},
{
"epoch": 19.79,
"learning_rate": 0.00010227456702277565,
"loss": 0.7717,
"step": 661500
},
{
"epoch": 19.81,
"learning_rate": 0.00010212466146327813,
"loss": 0.7373,
"step": 662000
},
{
"epoch": 19.82,
"learning_rate": 0.00010197475590378062,
"loss": 0.7541,
"step": 662500
},
{
"epoch": 19.84,
"learning_rate": 0.0001018248503442831,
"loss": 0.7634,
"step": 663000
},
{
"epoch": 19.85,
"learning_rate": 0.00010167494478478558,
"loss": 0.7672,
"step": 663500
},
{
"epoch": 19.87,
"learning_rate": 0.00010152503922528806,
"loss": 0.7467,
"step": 664000
},
{
"epoch": 19.88,
"learning_rate": 0.00010137513366579054,
"loss": 0.7622,
"step": 664500
},
{
"epoch": 19.9,
"learning_rate": 0.00010122522810629302,
"loss": 0.7727,
"step": 665000
},
{
"epoch": 19.91,
"learning_rate": 0.00010107532254679551,
"loss": 0.7628,
"step": 665500
},
{
"epoch": 19.93,
"learning_rate": 0.00010092541698729799,
"loss": 0.7881,
"step": 666000
},
{
"epoch": 19.94,
"learning_rate": 0.00010077551142780048,
"loss": 0.7446,
"step": 666500
},
{
"epoch": 19.96,
"learning_rate": 0.00010062560586830296,
"loss": 0.7576,
"step": 667000
},
{
"epoch": 19.97,
"learning_rate": 0.00010047570030880544,
"loss": 0.7571,
"step": 667500
},
{
"epoch": 19.99,
"learning_rate": 0.00010032579474930793,
"loss": 0.7719,
"step": 668000
},
{
"epoch": 20.0,
"learning_rate": 0.00010017588918981041,
"loss": 0.7767,
"step": 668500
},
{
"epoch": 20.02,
"learning_rate": 0.0001000259836303129,
"loss": 0.7249,
"step": 669000
},
{
"epoch": 20.03,
"learning_rate": 9.987607807081539e-05,
"loss": 0.7378,
"step": 669500
},
{
"epoch": 20.05,
"learning_rate": 9.972617251131785e-05,
"loss": 0.73,
"step": 670000
},
{
"epoch": 20.06,
"learning_rate": 9.957626695182033e-05,
"loss": 0.733,
"step": 670500
},
{
"epoch": 20.08,
"learning_rate": 9.942636139232283e-05,
"loss": 0.7267,
"step": 671000
},
{
"epoch": 20.09,
"learning_rate": 9.92764558328253e-05,
"loss": 0.7461,
"step": 671500
},
{
"epoch": 20.11,
"learning_rate": 9.91265502733278e-05,
"loss": 0.7247,
"step": 672000
},
{
"epoch": 20.12,
"learning_rate": 9.897664471383028e-05,
"loss": 0.7317,
"step": 672500
},
{
"epoch": 20.14,
"learning_rate": 9.882673915433276e-05,
"loss": 0.738,
"step": 673000
},
{
"epoch": 20.15,
"learning_rate": 9.867683359483525e-05,
"loss": 0.7435,
"step": 673500
},
{
"epoch": 20.17,
"learning_rate": 9.852692803533773e-05,
"loss": 0.7352,
"step": 674000
},
{
"epoch": 20.18,
"learning_rate": 9.837702247584022e-05,
"loss": 0.7399,
"step": 674500
},
{
"epoch": 20.2,
"learning_rate": 9.82271169163427e-05,
"loss": 0.7315,
"step": 675000
},
{
"epoch": 20.21,
"learning_rate": 9.807721135684518e-05,
"loss": 0.717,
"step": 675500
},
{
"epoch": 20.23,
"learning_rate": 9.792730579734765e-05,
"loss": 0.7247,
"step": 676000
},
{
"epoch": 20.24,
"learning_rate": 9.777740023785014e-05,
"loss": 0.7433,
"step": 676500
},
{
"epoch": 20.26,
"learning_rate": 9.762749467835262e-05,
"loss": 0.7346,
"step": 677000
},
{
"epoch": 20.27,
"learning_rate": 9.747758911885511e-05,
"loss": 0.7363,
"step": 677500
},
{
"epoch": 20.29,
"learning_rate": 9.73276835593576e-05,
"loss": 0.7297,
"step": 678000
},
{
"epoch": 20.3,
"learning_rate": 9.717777799986007e-05,
"loss": 0.7156,
"step": 678500
},
{
"epoch": 20.32,
"learning_rate": 9.702787244036257e-05,
"loss": 0.7128,
"step": 679000
},
{
"epoch": 20.33,
"learning_rate": 9.687796688086505e-05,
"loss": 0.7357,
"step": 679500
},
{
"epoch": 20.35,
"learning_rate": 9.672806132136754e-05,
"loss": 0.7323,
"step": 680000
},
{
"epoch": 20.36,
"learning_rate": 9.657815576187002e-05,
"loss": 0.741,
"step": 680500
},
{
"epoch": 20.38,
"learning_rate": 9.64282502023725e-05,
"loss": 0.7149,
"step": 681000
},
{
"epoch": 20.39,
"learning_rate": 9.627834464287499e-05,
"loss": 0.7354,
"step": 681500
},
{
"epoch": 20.41,
"learning_rate": 9.612843908337746e-05,
"loss": 0.7245,
"step": 682000
},
{
"epoch": 20.42,
"learning_rate": 9.597853352387994e-05,
"loss": 0.7278,
"step": 682500
},
{
"epoch": 20.44,
"learning_rate": 9.582862796438243e-05,
"loss": 0.7288,
"step": 683000
},
{
"epoch": 20.45,
"learning_rate": 9.567872240488491e-05,
"loss": 0.7536,
"step": 683500
},
{
"epoch": 20.47,
"learning_rate": 9.55288168453874e-05,
"loss": 0.751,
"step": 684000
},
{
"epoch": 20.48,
"learning_rate": 9.537891128588988e-05,
"loss": 0.7343,
"step": 684500
},
{
"epoch": 20.5,
"learning_rate": 9.522900572639236e-05,
"loss": 0.7224,
"step": 685000
},
{
"epoch": 20.51,
"learning_rate": 9.507910016689485e-05,
"loss": 0.7421,
"step": 685500
},
{
"epoch": 20.53,
"learning_rate": 9.492919460739733e-05,
"loss": 0.7356,
"step": 686000
},
{
"epoch": 20.54,
"learning_rate": 9.477928904789981e-05,
"loss": 0.7373,
"step": 686500
},
{
"epoch": 20.56,
"learning_rate": 9.462938348840231e-05,
"loss": 0.7455,
"step": 687000
},
{
"epoch": 20.57,
"learning_rate": 9.447947792890479e-05,
"loss": 0.7364,
"step": 687500
},
{
"epoch": 20.59,
"learning_rate": 9.432957236940725e-05,
"loss": 0.7259,
"step": 688000
},
{
"epoch": 20.6,
"learning_rate": 9.417966680990975e-05,
"loss": 0.7167,
"step": 688500
},
{
"epoch": 20.62,
"learning_rate": 9.402976125041223e-05,
"loss": 0.7317,
"step": 689000
},
{
"epoch": 20.63,
"learning_rate": 9.387985569091472e-05,
"loss": 0.7246,
"step": 689500
},
{
"epoch": 20.65,
"learning_rate": 9.37299501314172e-05,
"loss": 0.7543,
"step": 690000
},
{
"epoch": 20.66,
"learning_rate": 9.358004457191968e-05,
"loss": 0.7507,
"step": 690500
},
{
"epoch": 20.68,
"learning_rate": 9.343013901242217e-05,
"loss": 0.7432,
"step": 691000
},
{
"epoch": 20.69,
"learning_rate": 9.328023345292465e-05,
"loss": 0.7396,
"step": 691500
},
{
"epoch": 20.71,
"learning_rate": 9.313032789342714e-05,
"loss": 0.7179,
"step": 692000
},
{
"epoch": 20.72,
"learning_rate": 9.298042233392962e-05,
"loss": 0.7368,
"step": 692500
},
{
"epoch": 20.74,
"learning_rate": 9.28305167744321e-05,
"loss": 0.7348,
"step": 693000
},
{
"epoch": 20.75,
"learning_rate": 9.26806112149346e-05,
"loss": 0.7389,
"step": 693500
},
{
"epoch": 20.77,
"learning_rate": 9.253070565543706e-05,
"loss": 0.7338,
"step": 694000
},
{
"epoch": 20.78,
"learning_rate": 9.238080009593954e-05,
"loss": 0.7407,
"step": 694500
},
{
"epoch": 20.8,
"learning_rate": 9.223089453644203e-05,
"loss": 0.724,
"step": 695000
},
{
"epoch": 20.81,
"learning_rate": 9.208098897694451e-05,
"loss": 0.7441,
"step": 695500
},
{
"epoch": 20.83,
"learning_rate": 9.193108341744699e-05,
"loss": 0.73,
"step": 696000
},
{
"epoch": 20.84,
"learning_rate": 9.178117785794949e-05,
"loss": 0.7175,
"step": 696500
},
{
"epoch": 20.86,
"learning_rate": 9.163127229845197e-05,
"loss": 0.7557,
"step": 697000
},
{
"epoch": 20.87,
"learning_rate": 9.148136673895446e-05,
"loss": 0.7302,
"step": 697500
},
{
"epoch": 20.89,
"learning_rate": 9.133146117945694e-05,
"loss": 0.7429,
"step": 698000
},
{
"epoch": 20.9,
"learning_rate": 9.118155561995942e-05,
"loss": 0.7428,
"step": 698500
},
{
"epoch": 20.91,
"learning_rate": 9.103165006046191e-05,
"loss": 0.7224,
"step": 699000
},
{
"epoch": 20.93,
"learning_rate": 9.088174450096438e-05,
"loss": 0.7218,
"step": 699500
},
{
"epoch": 20.94,
"learning_rate": 9.073183894146686e-05,
"loss": 0.7353,
"step": 700000
},
{
"epoch": 20.96,
"learning_rate": 9.058193338196935e-05,
"loss": 0.7264,
"step": 700500
},
{
"epoch": 20.97,
"learning_rate": 9.043202782247183e-05,
"loss": 0.7373,
"step": 701000
},
{
"epoch": 20.99,
"learning_rate": 9.028212226297432e-05,
"loss": 0.7362,
"step": 701500
},
{
"epoch": 21.0,
"learning_rate": 9.01322167034768e-05,
"loss": 0.7394,
"step": 702000
},
{
"epoch": 21.02,
"learning_rate": 8.998231114397928e-05,
"loss": 0.6919,
"step": 702500
},
{
"epoch": 21.03,
"learning_rate": 8.983240558448177e-05,
"loss": 0.7024,
"step": 703000
},
{
"epoch": 21.05,
"learning_rate": 8.968250002498425e-05,
"loss": 0.7343,
"step": 703500
},
{
"epoch": 21.06,
"learning_rate": 8.953259446548673e-05,
"loss": 0.7225,
"step": 704000
},
{
"epoch": 21.08,
"learning_rate": 8.938268890598923e-05,
"loss": 0.7039,
"step": 704500
},
{
"epoch": 21.09,
"learning_rate": 8.92327833464917e-05,
"loss": 0.7019,
"step": 705000
},
{
"epoch": 21.11,
"learning_rate": 8.908287778699417e-05,
"loss": 0.6986,
"step": 705500
},
{
"epoch": 21.12,
"learning_rate": 8.893297222749667e-05,
"loss": 0.7156,
"step": 706000
},
{
"epoch": 21.14,
"learning_rate": 8.878306666799914e-05,
"loss": 0.7253,
"step": 706500
},
{
"epoch": 21.15,
"learning_rate": 8.863316110850164e-05,
"loss": 0.7047,
"step": 707000
},
{
"epoch": 21.17,
"learning_rate": 8.848325554900412e-05,
"loss": 0.7133,
"step": 707500
},
{
"epoch": 21.18,
"learning_rate": 8.83333499895066e-05,
"loss": 0.7129,
"step": 708000
},
{
"epoch": 21.2,
"learning_rate": 8.818344443000909e-05,
"loss": 0.7113,
"step": 708500
},
{
"epoch": 21.21,
"learning_rate": 8.803353887051157e-05,
"loss": 0.7238,
"step": 709000
},
{
"epoch": 21.23,
"learning_rate": 8.788363331101406e-05,
"loss": 0.7064,
"step": 709500
},
{
"epoch": 21.24,
"learning_rate": 8.773372775151654e-05,
"loss": 0.7324,
"step": 710000
},
{
"epoch": 21.26,
"learning_rate": 8.758382219201902e-05,
"loss": 0.6991,
"step": 710500
},
{
"epoch": 21.27,
"learning_rate": 8.743391663252151e-05,
"loss": 0.701,
"step": 711000
},
{
"epoch": 21.29,
"learning_rate": 8.728401107302398e-05,
"loss": 0.705,
"step": 711500
},
{
"epoch": 21.3,
"learning_rate": 8.713410551352646e-05,
"loss": 0.7189,
"step": 712000
},
{
"epoch": 21.32,
"learning_rate": 8.698419995402895e-05,
"loss": 0.721,
"step": 712500
},
{
"epoch": 21.33,
"learning_rate": 8.683429439453143e-05,
"loss": 0.6843,
"step": 713000
},
{
"epoch": 21.35,
"learning_rate": 8.668438883503391e-05,
"loss": 0.7105,
"step": 713500
},
{
"epoch": 21.36,
"learning_rate": 8.65344832755364e-05,
"loss": 0.7219,
"step": 714000
},
{
"epoch": 21.38,
"learning_rate": 8.638457771603889e-05,
"loss": 0.7165,
"step": 714500
},
{
"epoch": 21.39,
"learning_rate": 8.623467215654138e-05,
"loss": 0.6908,
"step": 715000
},
{
"epoch": 21.41,
"learning_rate": 8.608476659704386e-05,
"loss": 0.7187,
"step": 715500
},
{
"epoch": 21.42,
"learning_rate": 8.593486103754634e-05,
"loss": 0.7191,
"step": 716000
},
{
"epoch": 21.44,
"learning_rate": 8.578495547804883e-05,
"loss": 0.718,
"step": 716500
},
{
"epoch": 21.45,
"learning_rate": 8.563504991855131e-05,
"loss": 0.7116,
"step": 717000
},
{
"epoch": 21.47,
"learning_rate": 8.548514435905378e-05,
"loss": 0.7347,
"step": 717500
},
{
"epoch": 21.48,
"learning_rate": 8.533523879955627e-05,
"loss": 0.715,
"step": 718000
},
{
"epoch": 21.5,
"learning_rate": 8.518533324005875e-05,
"loss": 0.7124,
"step": 718500
},
{
"epoch": 21.51,
"learning_rate": 8.503542768056124e-05,
"loss": 0.7114,
"step": 719000
},
{
"epoch": 21.53,
"learning_rate": 8.488552212106372e-05,
"loss": 0.7477,
"step": 719500
},
{
"epoch": 21.54,
"learning_rate": 8.47356165615662e-05,
"loss": 0.7172,
"step": 720000
},
{
"epoch": 21.56,
"learning_rate": 8.45857110020687e-05,
"loss": 0.7105,
"step": 720500
},
{
"epoch": 21.57,
"learning_rate": 8.443580544257117e-05,
"loss": 0.7283,
"step": 721000
},
{
"epoch": 21.59,
"learning_rate": 8.428589988307365e-05,
"loss": 0.721,
"step": 721500
},
{
"epoch": 21.6,
"learning_rate": 8.413599432357615e-05,
"loss": 0.7017,
"step": 722000
},
{
"epoch": 21.62,
"learning_rate": 8.398608876407863e-05,
"loss": 0.7044,
"step": 722500
},
{
"epoch": 21.63,
"learning_rate": 8.383618320458112e-05,
"loss": 0.7124,
"step": 723000
},
{
"epoch": 21.65,
"learning_rate": 8.368627764508358e-05,
"loss": 0.7166,
"step": 723500
},
{
"epoch": 21.66,
"learning_rate": 8.353637208558606e-05,
"loss": 0.7204,
"step": 724000
},
{
"epoch": 21.68,
"learning_rate": 8.338646652608856e-05,
"loss": 0.7228,
"step": 724500
},
{
"epoch": 21.69,
"learning_rate": 8.323656096659104e-05,
"loss": 0.7058,
"step": 725000
},
{
"epoch": 21.71,
"learning_rate": 8.308665540709352e-05,
"loss": 0.6931,
"step": 725500
},
{
"epoch": 21.72,
"learning_rate": 8.293674984759601e-05,
"loss": 0.7113,
"step": 726000
},
{
"epoch": 21.74,
"learning_rate": 8.278684428809849e-05,
"loss": 0.6993,
"step": 726500
},
{
"epoch": 21.75,
"learning_rate": 8.263693872860098e-05,
"loss": 0.7311,
"step": 727000
},
{
"epoch": 21.77,
"learning_rate": 8.248703316910346e-05,
"loss": 0.7121,
"step": 727500
},
{
"epoch": 21.78,
"learning_rate": 8.233712760960594e-05,
"loss": 0.718,
"step": 728000
},
{
"epoch": 21.8,
"learning_rate": 8.218722205010843e-05,
"loss": 0.7217,
"step": 728500
},
{
"epoch": 21.81,
"learning_rate": 8.203731649061091e-05,
"loss": 0.7172,
"step": 729000
},
{
"epoch": 21.83,
"learning_rate": 8.188741093111338e-05,
"loss": 0.7158,
"step": 729500
},
{
"epoch": 21.84,
"learning_rate": 8.173750537161587e-05,
"loss": 0.724,
"step": 730000
},
{
"epoch": 21.86,
"learning_rate": 8.158759981211835e-05,
"loss": 0.7099,
"step": 730500
},
{
"epoch": 21.87,
"learning_rate": 8.143769425262083e-05,
"loss": 0.6909,
"step": 731000
},
{
"epoch": 21.89,
"learning_rate": 8.128778869312333e-05,
"loss": 0.7204,
"step": 731500
},
{
"epoch": 21.9,
"learning_rate": 8.11378831336258e-05,
"loss": 0.7063,
"step": 732000
},
{
"epoch": 21.92,
"learning_rate": 8.09879775741283e-05,
"loss": 0.7079,
"step": 732500
},
{
"epoch": 21.93,
"learning_rate": 8.083807201463078e-05,
"loss": 0.7238,
"step": 733000
},
{
"epoch": 21.95,
"learning_rate": 8.068816645513326e-05,
"loss": 0.7334,
"step": 733500
},
{
"epoch": 21.96,
"learning_rate": 8.053826089563575e-05,
"loss": 0.7228,
"step": 734000
},
{
"epoch": 21.98,
"learning_rate": 8.038835533613823e-05,
"loss": 0.7316,
"step": 734500
},
{
"epoch": 21.99,
"learning_rate": 8.023844977664072e-05,
"loss": 0.7232,
"step": 735000
},
{
"epoch": 22.01,
"learning_rate": 8.008854421714319e-05,
"loss": 0.7005,
"step": 735500
},
{
"epoch": 22.02,
"learning_rate": 7.993863865764567e-05,
"loss": 0.6997,
"step": 736000
},
{
"epoch": 22.04,
"learning_rate": 7.978873309814816e-05,
"loss": 0.7029,
"step": 736500
},
{
"epoch": 22.05,
"learning_rate": 7.963882753865064e-05,
"loss": 0.7038,
"step": 737000
},
{
"epoch": 22.07,
"learning_rate": 7.948892197915312e-05,
"loss": 0.6937,
"step": 737500
},
{
"epoch": 22.08,
"learning_rate": 7.933901641965561e-05,
"loss": 0.6759,
"step": 738000
},
{
"epoch": 22.1,
"learning_rate": 7.91891108601581e-05,
"loss": 0.6865,
"step": 738500
},
{
"epoch": 22.11,
"learning_rate": 7.903920530066059e-05,
"loss": 0.6777,
"step": 739000
},
{
"epoch": 22.13,
"learning_rate": 7.888929974116307e-05,
"loss": 0.7043,
"step": 739500
},
{
"epoch": 22.14,
"learning_rate": 7.873939418166555e-05,
"loss": 0.699,
"step": 740000
},
{
"epoch": 22.16,
"learning_rate": 7.858948862216804e-05,
"loss": 0.6759,
"step": 740500
},
{
"epoch": 22.17,
"learning_rate": 7.84395830626705e-05,
"loss": 0.7029,
"step": 741000
},
{
"epoch": 22.19,
"learning_rate": 7.828967750317298e-05,
"loss": 0.696,
"step": 741500
},
{
"epoch": 22.2,
"learning_rate": 7.813977194367548e-05,
"loss": 0.7077,
"step": 742000
},
{
"epoch": 22.22,
"learning_rate": 7.798986638417796e-05,
"loss": 0.7031,
"step": 742500
},
{
"epoch": 22.23,
"learning_rate": 7.783996082468044e-05,
"loss": 0.6837,
"step": 743000
},
{
"epoch": 22.25,
"learning_rate": 7.769005526518293e-05,
"loss": 0.6885,
"step": 743500
},
{
"epoch": 22.26,
"learning_rate": 7.754014970568541e-05,
"loss": 0.6833,
"step": 744000
},
{
"epoch": 22.28,
"learning_rate": 7.73902441461879e-05,
"loss": 0.6837,
"step": 744500
},
{
"epoch": 22.29,
"learning_rate": 7.724033858669038e-05,
"loss": 0.6739,
"step": 745000
},
{
"epoch": 22.31,
"learning_rate": 7.709043302719286e-05,
"loss": 0.6823,
"step": 745500
},
{
"epoch": 22.32,
"learning_rate": 7.694052746769535e-05,
"loss": 0.6946,
"step": 746000
},
{
"epoch": 22.34,
"learning_rate": 7.679062190819783e-05,
"loss": 0.6924,
"step": 746500
},
{
"epoch": 22.35,
"learning_rate": 7.66407163487003e-05,
"loss": 0.6822,
"step": 747000
},
{
"epoch": 22.37,
"learning_rate": 7.649081078920279e-05,
"loss": 0.7175,
"step": 747500
},
{
"epoch": 22.38,
"learning_rate": 7.634090522970527e-05,
"loss": 0.699,
"step": 748000
},
{
"epoch": 22.4,
"learning_rate": 7.619099967020775e-05,
"loss": 0.7055,
"step": 748500
},
{
"epoch": 22.41,
"learning_rate": 7.604109411071025e-05,
"loss": 0.6743,
"step": 749000
},
{
"epoch": 22.43,
"learning_rate": 7.589118855121272e-05,
"loss": 0.6927,
"step": 749500
},
{
"epoch": 22.44,
"learning_rate": 7.574128299171522e-05,
"loss": 0.6966,
"step": 750000
},
{
"epoch": 22.46,
"learning_rate": 7.55913774322177e-05,
"loss": 0.6933,
"step": 750500
},
{
"epoch": 22.47,
"learning_rate": 7.544147187272018e-05,
"loss": 0.6986,
"step": 751000
},
{
"epoch": 22.49,
"learning_rate": 7.529156631322267e-05,
"loss": 0.7019,
"step": 751500
},
{
"epoch": 22.5,
"learning_rate": 7.514166075372515e-05,
"loss": 0.6708,
"step": 752000
},
{
"epoch": 22.52,
"learning_rate": 7.499175519422763e-05,
"loss": 0.6831,
"step": 752500
},
{
"epoch": 22.53,
"learning_rate": 7.484184963473011e-05,
"loss": 0.7063,
"step": 753000
},
{
"epoch": 22.55,
"learning_rate": 7.46919440752326e-05,
"loss": 0.7093,
"step": 753500
},
{
"epoch": 22.56,
"learning_rate": 7.454203851573508e-05,
"loss": 0.7024,
"step": 754000
},
{
"epoch": 22.58,
"learning_rate": 7.439213295623756e-05,
"loss": 0.6713,
"step": 754500
},
{
"epoch": 22.59,
"learning_rate": 7.424222739674004e-05,
"loss": 0.6837,
"step": 755000
},
{
"epoch": 22.61,
"learning_rate": 7.409232183724253e-05,
"loss": 0.6941,
"step": 755500
},
{
"epoch": 22.62,
"learning_rate": 7.394241627774501e-05,
"loss": 0.6916,
"step": 756000
},
{
"epoch": 22.64,
"learning_rate": 7.37925107182475e-05,
"loss": 0.6994,
"step": 756500
},
{
"epoch": 22.65,
"learning_rate": 7.364260515874999e-05,
"loss": 0.7029,
"step": 757000
},
{
"epoch": 22.67,
"learning_rate": 7.349269959925247e-05,
"loss": 0.6912,
"step": 757500
},
{
"epoch": 22.68,
"learning_rate": 7.334279403975494e-05,
"loss": 0.701,
"step": 758000
},
{
"epoch": 22.7,
"learning_rate": 7.319288848025742e-05,
"loss": 0.6997,
"step": 758500
},
{
"epoch": 22.71,
"learning_rate": 7.304298292075992e-05,
"loss": 0.7171,
"step": 759000
},
{
"epoch": 22.73,
"learning_rate": 7.28930773612624e-05,
"loss": 0.6932,
"step": 759500
},
{
"epoch": 22.74,
"learning_rate": 7.274317180176489e-05,
"loss": 0.6957,
"step": 760000
},
{
"epoch": 22.76,
"learning_rate": 7.259326624226736e-05,
"loss": 0.6995,
"step": 760500
},
{
"epoch": 22.77,
"learning_rate": 7.244336068276985e-05,
"loss": 0.7083,
"step": 761000
},
{
"epoch": 22.79,
"learning_rate": 7.229345512327233e-05,
"loss": 0.6979,
"step": 761500
},
{
"epoch": 22.8,
"learning_rate": 7.214354956377482e-05,
"loss": 0.6856,
"step": 762000
},
{
"epoch": 22.81,
"learning_rate": 7.19936440042773e-05,
"loss": 0.6783,
"step": 762500
},
{
"epoch": 22.83,
"learning_rate": 7.184373844477978e-05,
"loss": 0.7028,
"step": 763000
},
{
"epoch": 22.84,
"learning_rate": 7.169383288528226e-05,
"loss": 0.6838,
"step": 763500
},
{
"epoch": 22.86,
"learning_rate": 7.154392732578475e-05,
"loss": 0.7014,
"step": 764000
},
{
"epoch": 22.87,
"learning_rate": 7.139402176628723e-05,
"loss": 0.7034,
"step": 764500
},
{
"epoch": 22.89,
"learning_rate": 7.124411620678971e-05,
"loss": 0.6974,
"step": 765000
},
{
"epoch": 22.9,
"learning_rate": 7.10942106472922e-05,
"loss": 0.7062,
"step": 765500
},
{
"epoch": 22.92,
"learning_rate": 7.094430508779469e-05,
"loss": 0.6995,
"step": 766000
},
{
"epoch": 22.93,
"learning_rate": 7.079439952829716e-05,
"loss": 0.6957,
"step": 766500
},
{
"epoch": 22.95,
"learning_rate": 7.064449396879964e-05,
"loss": 0.7066,
"step": 767000
},
{
"epoch": 22.96,
"learning_rate": 7.049458840930214e-05,
"loss": 0.6975,
"step": 767500
},
{
"epoch": 22.98,
"learning_rate": 7.034468284980462e-05,
"loss": 0.6834,
"step": 768000
},
{
"epoch": 22.99,
"learning_rate": 7.01947772903071e-05,
"loss": 0.7014,
"step": 768500
},
{
"epoch": 23.01,
"learning_rate": 7.004487173080959e-05,
"loss": 0.674,
"step": 769000
},
{
"epoch": 23.02,
"learning_rate": 6.989496617131207e-05,
"loss": 0.6863,
"step": 769500
},
{
"epoch": 23.04,
"learning_rate": 6.974506061181455e-05,
"loss": 0.6731,
"step": 770000
},
{
"epoch": 23.05,
"learning_rate": 6.959515505231703e-05,
"loss": 0.6623,
"step": 770500
},
{
"epoch": 23.07,
"learning_rate": 6.944524949281952e-05,
"loss": 0.6691,
"step": 771000
},
{
"epoch": 23.08,
"learning_rate": 6.9295343933322e-05,
"loss": 0.6734,
"step": 771500
},
{
"epoch": 23.1,
"learning_rate": 6.91454383738245e-05,
"loss": 0.6675,
"step": 772000
},
{
"epoch": 23.11,
"learning_rate": 6.899553281432696e-05,
"loss": 0.6671,
"step": 772500
},
{
"epoch": 23.13,
"learning_rate": 6.884562725482945e-05,
"loss": 0.6767,
"step": 773000
},
{
"epoch": 23.14,
"learning_rate": 6.869572169533193e-05,
"loss": 0.6718,
"step": 773500
},
{
"epoch": 23.16,
"learning_rate": 6.854581613583443e-05,
"loss": 0.6682,
"step": 774000
},
{
"epoch": 23.17,
"learning_rate": 6.83959105763369e-05,
"loss": 0.6724,
"step": 774500
},
{
"epoch": 23.19,
"learning_rate": 6.824600501683938e-05,
"loss": 0.6714,
"step": 775000
},
{
"epoch": 23.2,
"learning_rate": 6.809609945734186e-05,
"loss": 0.6848,
"step": 775500
},
{
"epoch": 23.22,
"learning_rate": 6.794619389784434e-05,
"loss": 0.6646,
"step": 776000
},
{
"epoch": 23.23,
"learning_rate": 6.779628833834684e-05,
"loss": 0.6828,
"step": 776500
},
{
"epoch": 23.25,
"learning_rate": 6.764638277884932e-05,
"loss": 0.6647,
"step": 777000
},
{
"epoch": 23.26,
"learning_rate": 6.749647721935181e-05,
"loss": 0.6773,
"step": 777500
},
{
"epoch": 23.28,
"learning_rate": 6.734657165985429e-05,
"loss": 0.6844,
"step": 778000
},
{
"epoch": 23.29,
"learning_rate": 6.719666610035677e-05,
"loss": 0.6624,
"step": 778500
},
{
"epoch": 23.31,
"learning_rate": 6.704676054085925e-05,
"loss": 0.6539,
"step": 779000
},
{
"epoch": 23.32,
"learning_rate": 6.689685498136174e-05,
"loss": 0.6695,
"step": 779500
},
{
"epoch": 23.34,
"learning_rate": 6.674694942186422e-05,
"loss": 0.6765,
"step": 780000
},
{
"epoch": 23.35,
"learning_rate": 6.65970438623667e-05,
"loss": 0.6733,
"step": 780500
},
{
"epoch": 23.37,
"learning_rate": 6.64471383028692e-05,
"loss": 0.6826,
"step": 781000
},
{
"epoch": 23.38,
"learning_rate": 6.629723274337167e-05,
"loss": 0.6798,
"step": 781500
},
{
"epoch": 23.4,
"learning_rate": 6.614732718387415e-05,
"loss": 0.6649,
"step": 782000
},
{
"epoch": 23.41,
"learning_rate": 6.599742162437663e-05,
"loss": 0.6744,
"step": 782500
},
{
"epoch": 23.43,
"learning_rate": 6.584751606487913e-05,
"loss": 0.6759,
"step": 783000
},
{
"epoch": 23.44,
"learning_rate": 6.56976105053816e-05,
"loss": 0.6707,
"step": 783500
},
{
"epoch": 23.46,
"learning_rate": 6.55477049458841e-05,
"loss": 0.676,
"step": 784000
},
{
"epoch": 23.47,
"learning_rate": 6.539779938638656e-05,
"loss": 0.6724,
"step": 784500
},
{
"epoch": 23.49,
"learning_rate": 6.524789382688906e-05,
"loss": 0.6804,
"step": 785000
},
{
"epoch": 23.5,
"learning_rate": 6.509798826739154e-05,
"loss": 0.6862,
"step": 785500
},
{
"epoch": 23.52,
"learning_rate": 6.494808270789402e-05,
"loss": 0.6835,
"step": 786000
},
{
"epoch": 23.53,
"learning_rate": 6.479817714839651e-05,
"loss": 0.6967,
"step": 786500
},
{
"epoch": 23.55,
"learning_rate": 6.464827158889899e-05,
"loss": 0.6882,
"step": 787000
},
{
"epoch": 23.56,
"learning_rate": 6.449836602940147e-05,
"loss": 0.685,
"step": 787500
},
{
"epoch": 23.58,
"learning_rate": 6.434846046990395e-05,
"loss": 0.6631,
"step": 788000
},
{
"epoch": 23.59,
"learning_rate": 6.419855491040644e-05,
"loss": 0.682,
"step": 788500
},
{
"epoch": 23.61,
"learning_rate": 6.404864935090892e-05,
"loss": 0.6891,
"step": 789000
},
{
"epoch": 23.62,
"learning_rate": 6.389874379141141e-05,
"loss": 0.6896,
"step": 789500
},
{
"epoch": 23.64,
"learning_rate": 6.374883823191388e-05,
"loss": 0.6729,
"step": 790000
},
{
"epoch": 23.65,
"learning_rate": 6.359893267241637e-05,
"loss": 0.6832,
"step": 790500
},
{
"epoch": 23.67,
"learning_rate": 6.344902711291885e-05,
"loss": 0.6603,
"step": 791000
},
{
"epoch": 23.68,
"learning_rate": 6.329912155342135e-05,
"loss": 0.6817,
"step": 791500
},
{
"epoch": 23.7,
"learning_rate": 6.314921599392382e-05,
"loss": 0.6767,
"step": 792000
},
{
"epoch": 23.71,
"learning_rate": 6.29993104344263e-05,
"loss": 0.6723,
"step": 792500
},
{
"epoch": 23.73,
"learning_rate": 6.284940487492878e-05,
"loss": 0.6728,
"step": 793000
},
{
"epoch": 23.74,
"learning_rate": 6.269949931543126e-05,
"loss": 0.6846,
"step": 793500
},
{
"epoch": 23.76,
"learning_rate": 6.254959375593376e-05,
"loss": 0.678,
"step": 794000
},
{
"epoch": 23.77,
"learning_rate": 6.239968819643624e-05,
"loss": 0.6821,
"step": 794500
},
{
"epoch": 23.79,
"learning_rate": 6.224978263693873e-05,
"loss": 0.6712,
"step": 795000
},
{
"epoch": 23.8,
"learning_rate": 6.209987707744121e-05,
"loss": 0.6722,
"step": 795500
},
{
"epoch": 23.82,
"learning_rate": 6.194997151794369e-05,
"loss": 0.673,
"step": 796000
},
{
"epoch": 23.83,
"learning_rate": 6.180006595844617e-05,
"loss": 0.6891,
"step": 796500
},
{
"epoch": 23.85,
"learning_rate": 6.165016039894866e-05,
"loss": 0.6592,
"step": 797000
},
{
"epoch": 23.86,
"learning_rate": 6.150025483945114e-05,
"loss": 0.6606,
"step": 797500
},
{
"epoch": 23.88,
"learning_rate": 6.135034927995362e-05,
"loss": 0.673,
"step": 798000
},
{
"epoch": 23.89,
"learning_rate": 6.120044372045611e-05,
"loss": 0.6724,
"step": 798500
},
{
"epoch": 23.91,
"learning_rate": 6.105053816095859e-05,
"loss": 0.6778,
"step": 799000
},
{
"epoch": 23.92,
"learning_rate": 6.090063260146107e-05,
"loss": 0.6786,
"step": 799500
},
{
"epoch": 23.94,
"learning_rate": 6.075072704196356e-05,
"loss": 0.6836,
"step": 800000
},
{
"epoch": 23.95,
"learning_rate": 6.060082148246604e-05,
"loss": 0.6673,
"step": 800500
},
{
"epoch": 23.97,
"learning_rate": 6.0450915922968525e-05,
"loss": 0.6786,
"step": 801000
},
{
"epoch": 23.98,
"learning_rate": 6.030101036347101e-05,
"loss": 0.6797,
"step": 801500
},
{
"epoch": 24.0,
"learning_rate": 6.015110480397349e-05,
"loss": 0.6699,
"step": 802000
},
{
"epoch": 24.01,
"learning_rate": 6.000119924447597e-05,
"loss": 0.6681,
"step": 802500
},
{
"epoch": 24.03,
"learning_rate": 5.9851293684978456e-05,
"loss": 0.6418,
"step": 803000
},
{
"epoch": 24.04,
"learning_rate": 5.970138812548094e-05,
"loss": 0.658,
"step": 803500
},
{
"epoch": 24.06,
"learning_rate": 5.955148256598343e-05,
"loss": 0.6584,
"step": 804000
},
{
"epoch": 24.07,
"learning_rate": 5.9401577006485915e-05,
"loss": 0.6565,
"step": 804500
},
{
"epoch": 24.09,
"learning_rate": 5.925167144698839e-05,
"loss": 0.6668,
"step": 805000
},
{
"epoch": 24.1,
"learning_rate": 5.9101765887490874e-05,
"loss": 0.6486,
"step": 805500
},
{
"epoch": 24.12,
"learning_rate": 5.895186032799336e-05,
"loss": 0.6773,
"step": 806000
},
{
"epoch": 24.13,
"learning_rate": 5.880195476849584e-05,
"loss": 0.6601,
"step": 806500
},
{
"epoch": 24.15,
"learning_rate": 5.8652049208998327e-05,
"loss": 0.6572,
"step": 807000
},
{
"epoch": 24.16,
"learning_rate": 5.850214364950081e-05,
"loss": 0.6722,
"step": 807500
},
{
"epoch": 24.18,
"learning_rate": 5.835223809000329e-05,
"loss": 0.6493,
"step": 808000
},
{
"epoch": 24.19,
"learning_rate": 5.820233253050577e-05,
"loss": 0.6439,
"step": 808500
},
{
"epoch": 24.21,
"learning_rate": 5.805242697100826e-05,
"loss": 0.6486,
"step": 809000
},
{
"epoch": 24.22,
"learning_rate": 5.7902521411510745e-05,
"loss": 0.6545,
"step": 809500
},
{
"epoch": 24.24,
"learning_rate": 5.775261585201323e-05,
"loss": 0.6602,
"step": 810000
},
{
"epoch": 24.25,
"learning_rate": 5.760271029251571e-05,
"loss": 0.6505,
"step": 810500
},
{
"epoch": 24.27,
"learning_rate": 5.745280473301819e-05,
"loss": 0.6624,
"step": 811000
},
{
"epoch": 24.28,
"learning_rate": 5.7302899173520676e-05,
"loss": 0.6454,
"step": 811500
},
{
"epoch": 24.3,
"learning_rate": 5.715299361402316e-05,
"loss": 0.6359,
"step": 812000
},
{
"epoch": 24.31,
"learning_rate": 5.700308805452564e-05,
"loss": 0.6581,
"step": 812500
},
{
"epoch": 24.33,
"learning_rate": 5.685318249502813e-05,
"loss": 0.6429,
"step": 813000
},
{
"epoch": 24.34,
"learning_rate": 5.6703276935530615e-05,
"loss": 0.6763,
"step": 813500
},
{
"epoch": 24.36,
"learning_rate": 5.655337137603309e-05,
"loss": 0.6614,
"step": 814000
},
{
"epoch": 24.37,
"learning_rate": 5.6403465816535574e-05,
"loss": 0.6569,
"step": 814500
},
{
"epoch": 24.39,
"learning_rate": 5.625356025703806e-05,
"loss": 0.6869,
"step": 815000
},
{
"epoch": 24.4,
"learning_rate": 5.610365469754055e-05,
"loss": 0.6609,
"step": 815500
},
{
"epoch": 24.42,
"learning_rate": 5.595374913804303e-05,
"loss": 0.6502,
"step": 816000
},
{
"epoch": 24.43,
"learning_rate": 5.580384357854551e-05,
"loss": 0.6613,
"step": 816500
},
{
"epoch": 24.45,
"learning_rate": 5.565393801904799e-05,
"loss": 0.6481,
"step": 817000
},
{
"epoch": 24.46,
"learning_rate": 5.550403245955048e-05,
"loss": 0.6377,
"step": 817500
},
{
"epoch": 24.48,
"learning_rate": 5.5354126900052965e-05,
"loss": 0.6694,
"step": 818000
},
{
"epoch": 24.49,
"learning_rate": 5.5204221340555444e-05,
"loss": 0.6472,
"step": 818500
},
{
"epoch": 24.51,
"learning_rate": 5.505431578105793e-05,
"loss": 0.6602,
"step": 819000
},
{
"epoch": 24.52,
"learning_rate": 5.490441022156042e-05,
"loss": 0.6638,
"step": 819500
},
{
"epoch": 24.54,
"learning_rate": 5.475450466206289e-05,
"loss": 0.6743,
"step": 820000
},
{
"epoch": 24.55,
"learning_rate": 5.4604599102565376e-05,
"loss": 0.6461,
"step": 820500
},
{
"epoch": 24.57,
"learning_rate": 5.445469354306786e-05,
"loss": 0.6516,
"step": 821000
},
{
"epoch": 24.58,
"learning_rate": 5.430478798357035e-05,
"loss": 0.6662,
"step": 821500
},
{
"epoch": 24.6,
"learning_rate": 5.4154882424072835e-05,
"loss": 0.655,
"step": 822000
},
{
"epoch": 24.61,
"learning_rate": 5.4004976864575314e-05,
"loss": 0.6693,
"step": 822500
},
{
"epoch": 24.63,
"learning_rate": 5.3855071305077794e-05,
"loss": 0.661,
"step": 823000
},
{
"epoch": 24.64,
"learning_rate": 5.370516574558028e-05,
"loss": 0.66,
"step": 823500
},
{
"epoch": 24.66,
"learning_rate": 5.355526018608276e-05,
"loss": 0.6533,
"step": 824000
},
{
"epoch": 24.67,
"learning_rate": 5.3405354626585246e-05,
"loss": 0.6472,
"step": 824500
},
{
"epoch": 24.69,
"learning_rate": 5.325544906708773e-05,
"loss": 0.6688,
"step": 825000
},
{
"epoch": 24.7,
"learning_rate": 5.310554350759022e-05,
"loss": 0.6508,
"step": 825500
},
{
"epoch": 24.71,
"learning_rate": 5.295563794809269e-05,
"loss": 0.6564,
"step": 826000
},
{
"epoch": 24.73,
"learning_rate": 5.280573238859518e-05,
"loss": 0.6636,
"step": 826500
},
{
"epoch": 24.74,
"learning_rate": 5.2655826829097664e-05,
"loss": 0.6558,
"step": 827000
},
{
"epoch": 24.76,
"learning_rate": 5.250592126960015e-05,
"loss": 0.642,
"step": 827500
},
{
"epoch": 24.77,
"learning_rate": 5.235601571010263e-05,
"loss": 0.6522,
"step": 828000
},
{
"epoch": 24.79,
"learning_rate": 5.2206110150605117e-05,
"loss": 0.649,
"step": 828500
},
{
"epoch": 24.8,
"learning_rate": 5.2056204591107596e-05,
"loss": 0.6446,
"step": 829000
},
{
"epoch": 24.82,
"learning_rate": 5.190629903161008e-05,
"loss": 0.6692,
"step": 829500
},
{
"epoch": 24.83,
"learning_rate": 5.175639347211256e-05,
"loss": 0.6634,
"step": 830000
},
{
"epoch": 24.85,
"learning_rate": 5.160648791261505e-05,
"loss": 0.6762,
"step": 830500
},
{
"epoch": 24.86,
"learning_rate": 5.1456582353117535e-05,
"loss": 0.6498,
"step": 831000
},
{
"epoch": 24.88,
"learning_rate": 5.130667679362002e-05,
"loss": 0.6635,
"step": 831500
},
{
"epoch": 24.89,
"learning_rate": 5.1156771234122494e-05,
"loss": 0.6404,
"step": 832000
},
{
"epoch": 24.91,
"learning_rate": 5.100686567462498e-05,
"loss": 0.6523,
"step": 832500
},
{
"epoch": 24.92,
"learning_rate": 5.0856960115127466e-05,
"loss": 0.6744,
"step": 833000
},
{
"epoch": 24.94,
"learning_rate": 5.070705455562995e-05,
"loss": 0.6463,
"step": 833500
},
{
"epoch": 24.95,
"learning_rate": 5.055714899613243e-05,
"loss": 0.6713,
"step": 834000
},
{
"epoch": 24.97,
"learning_rate": 5.040724343663491e-05,
"loss": 0.6611,
"step": 834500
},
{
"epoch": 24.98,
"learning_rate": 5.02573378771374e-05,
"loss": 0.6516,
"step": 835000
},
{
"epoch": 25.0,
"learning_rate": 5.0107432317639884e-05,
"loss": 0.6663,
"step": 835500
},
{
"epoch": 25.01,
"learning_rate": 4.9957526758142364e-05,
"loss": 0.635,
"step": 836000
},
{
"epoch": 25.03,
"learning_rate": 4.980762119864485e-05,
"loss": 0.6347,
"step": 836500
},
{
"epoch": 25.04,
"learning_rate": 4.9657715639147337e-05,
"loss": 0.6458,
"step": 837000
},
{
"epoch": 25.06,
"learning_rate": 4.950781007964981e-05,
"loss": 0.6342,
"step": 837500
},
{
"epoch": 25.07,
"learning_rate": 4.9357904520152296e-05,
"loss": 0.6544,
"step": 838000
},
{
"epoch": 25.09,
"learning_rate": 4.920799896065478e-05,
"loss": 0.6318,
"step": 838500
},
{
"epoch": 25.1,
"learning_rate": 4.905809340115727e-05,
"loss": 0.6476,
"step": 839000
},
{
"epoch": 25.12,
"learning_rate": 4.8908187841659755e-05,
"loss": 0.6437,
"step": 839500
},
{
"epoch": 25.13,
"learning_rate": 4.8758282282162234e-05,
"loss": 0.6314,
"step": 840000
},
{
"epoch": 25.15,
"learning_rate": 4.8608376722664714e-05,
"loss": 0.6457,
"step": 840500
},
{
"epoch": 25.16,
"learning_rate": 4.84584711631672e-05,
"loss": 0.6558,
"step": 841000
},
{
"epoch": 25.18,
"learning_rate": 4.830856560366968e-05,
"loss": 0.6464,
"step": 841500
},
{
"epoch": 25.19,
"learning_rate": 4.8158660044172166e-05,
"loss": 0.6371,
"step": 842000
},
{
"epoch": 25.21,
"learning_rate": 4.800875448467465e-05,
"loss": 0.6244,
"step": 842500
},
{
"epoch": 25.22,
"learning_rate": 4.785884892517714e-05,
"loss": 0.6327,
"step": 843000
},
{
"epoch": 25.24,
"learning_rate": 4.770894336567961e-05,
"loss": 0.6447,
"step": 843500
},
{
"epoch": 25.25,
"learning_rate": 4.75590378061821e-05,
"loss": 0.6261,
"step": 844000
},
{
"epoch": 25.27,
"learning_rate": 4.7409132246684584e-05,
"loss": 0.6267,
"step": 844500
},
{
"epoch": 25.28,
"learning_rate": 4.725922668718707e-05,
"loss": 0.6354,
"step": 845000
},
{
"epoch": 25.3,
"learning_rate": 4.710932112768955e-05,
"loss": 0.651,
"step": 845500
},
{
"epoch": 25.31,
"learning_rate": 4.6959415568192036e-05,
"loss": 0.654,
"step": 846000
},
{
"epoch": 25.33,
"learning_rate": 4.6809510008694516e-05,
"loss": 0.6394,
"step": 846500
},
{
"epoch": 25.34,
"learning_rate": 4.6659604449197e-05,
"loss": 0.6439,
"step": 847000
},
{
"epoch": 25.36,
"learning_rate": 4.650969888969948e-05,
"loss": 0.6441,
"step": 847500
},
{
"epoch": 25.37,
"learning_rate": 4.635979333020197e-05,
"loss": 0.6197,
"step": 848000
},
{
"epoch": 25.39,
"learning_rate": 4.6209887770704454e-05,
"loss": 0.6318,
"step": 848500
},
{
"epoch": 25.4,
"learning_rate": 4.605998221120694e-05,
"loss": 0.6511,
"step": 849000
},
{
"epoch": 25.42,
"learning_rate": 4.591007665170941e-05,
"loss": 0.6532,
"step": 849500
},
{
"epoch": 25.43,
"learning_rate": 4.57601710922119e-05,
"loss": 0.6495,
"step": 850000
},
{
"epoch": 25.45,
"learning_rate": 4.5610265532714386e-05,
"loss": 0.6375,
"step": 850500
},
{
"epoch": 25.46,
"learning_rate": 4.546035997321687e-05,
"loss": 0.6347,
"step": 851000
},
{
"epoch": 25.48,
"learning_rate": 4.531045441371935e-05,
"loss": 0.6443,
"step": 851500
},
{
"epoch": 25.49,
"learning_rate": 4.516054885422184e-05,
"loss": 0.6396,
"step": 852000
},
{
"epoch": 25.51,
"learning_rate": 4.501064329472432e-05,
"loss": 0.6271,
"step": 852500
},
{
"epoch": 25.52,
"learning_rate": 4.4860737735226804e-05,
"loss": 0.6305,
"step": 853000
},
{
"epoch": 25.54,
"learning_rate": 4.4710832175729284e-05,
"loss": 0.642,
"step": 853500
},
{
"epoch": 25.55,
"learning_rate": 4.456092661623177e-05,
"loss": 0.6298,
"step": 854000
},
{
"epoch": 25.57,
"learning_rate": 4.4411021056734256e-05,
"loss": 0.6554,
"step": 854500
},
{
"epoch": 25.58,
"learning_rate": 4.426111549723674e-05,
"loss": 0.6406,
"step": 855000
},
{
"epoch": 25.6,
"learning_rate": 4.4111209937739215e-05,
"loss": 0.6407,
"step": 855500
},
{
"epoch": 25.61,
"learning_rate": 4.39613043782417e-05,
"loss": 0.6408,
"step": 856000
},
{
"epoch": 25.63,
"learning_rate": 4.381139881874419e-05,
"loss": 0.6378,
"step": 856500
},
{
"epoch": 25.64,
"learning_rate": 4.3661493259246674e-05,
"loss": 0.658,
"step": 857000
},
{
"epoch": 25.66,
"learning_rate": 4.3511587699749154e-05,
"loss": 0.6379,
"step": 857500
},
{
"epoch": 25.67,
"learning_rate": 4.336168214025164e-05,
"loss": 0.6436,
"step": 858000
},
{
"epoch": 25.69,
"learning_rate": 4.321177658075412e-05,
"loss": 0.6418,
"step": 858500
},
{
"epoch": 25.7,
"learning_rate": 4.30618710212566e-05,
"loss": 0.6297,
"step": 859000
},
{
"epoch": 25.72,
"learning_rate": 4.2911965461759086e-05,
"loss": 0.6468,
"step": 859500
},
{
"epoch": 25.73,
"learning_rate": 4.276205990226157e-05,
"loss": 0.643,
"step": 860000
},
{
"epoch": 25.75,
"learning_rate": 4.261215434276406e-05,
"loss": 0.6405,
"step": 860500
},
{
"epoch": 25.76,
"learning_rate": 4.2462248783266545e-05,
"loss": 0.6549,
"step": 861000
},
{
"epoch": 25.78,
"learning_rate": 4.231234322376902e-05,
"loss": 0.6577,
"step": 861500
},
{
"epoch": 25.79,
"learning_rate": 4.2162437664271504e-05,
"loss": 0.6362,
"step": 862000
},
{
"epoch": 25.81,
"learning_rate": 4.201253210477399e-05,
"loss": 0.6412,
"step": 862500
},
{
"epoch": 25.82,
"learning_rate": 4.1862626545276476e-05,
"loss": 0.6265,
"step": 863000
},
{
"epoch": 25.84,
"learning_rate": 4.1712720985778956e-05,
"loss": 0.6356,
"step": 863500
},
{
"epoch": 25.85,
"learning_rate": 4.156281542628144e-05,
"loss": 0.6583,
"step": 864000
},
{
"epoch": 25.87,
"learning_rate": 4.141290986678392e-05,
"loss": 0.647,
"step": 864500
},
{
"epoch": 25.88,
"learning_rate": 4.12630043072864e-05,
"loss": 0.6412,
"step": 865000
},
{
"epoch": 25.9,
"learning_rate": 4.111309874778889e-05,
"loss": 0.6503,
"step": 865500
},
{
"epoch": 25.91,
"learning_rate": 4.0963193188291374e-05,
"loss": 0.6381,
"step": 866000
},
{
"epoch": 25.93,
"learning_rate": 4.081328762879386e-05,
"loss": 0.6299,
"step": 866500
},
{
"epoch": 25.94,
"learning_rate": 4.0663382069296347e-05,
"loss": 0.6442,
"step": 867000
},
{
"epoch": 25.96,
"learning_rate": 4.051347650979882e-05,
"loss": 0.6178,
"step": 867500
},
{
"epoch": 25.97,
"learning_rate": 4.0363570950301306e-05,
"loss": 0.6469,
"step": 868000
},
{
"epoch": 25.99,
"learning_rate": 4.021366539080379e-05,
"loss": 0.6421,
"step": 868500
},
{
"epoch": 26.0,
"learning_rate": 4.006375983130627e-05,
"loss": 0.6367,
"step": 869000
},
{
"epoch": 26.02,
"learning_rate": 3.991385427180876e-05,
"loss": 0.6152,
"step": 869500
},
{
"epoch": 26.03,
"learning_rate": 3.9763948712311244e-05,
"loss": 0.6387,
"step": 870000
},
{
"epoch": 26.05,
"learning_rate": 3.9614043152813724e-05,
"loss": 0.6327,
"step": 870500
},
{
"epoch": 26.06,
"learning_rate": 3.94641375933162e-05,
"loss": 0.6142,
"step": 871000
},
{
"epoch": 26.08,
"learning_rate": 3.931423203381869e-05,
"loss": 0.6437,
"step": 871500
},
{
"epoch": 26.09,
"learning_rate": 3.9164326474321176e-05,
"loss": 0.6211,
"step": 872000
},
{
"epoch": 26.11,
"learning_rate": 3.901442091482366e-05,
"loss": 0.6181,
"step": 872500
},
{
"epoch": 26.12,
"learning_rate": 3.886451535532614e-05,
"loss": 0.6129,
"step": 873000
},
{
"epoch": 26.14,
"learning_rate": 3.871460979582862e-05,
"loss": 0.637,
"step": 873500
},
{
"epoch": 26.15,
"learning_rate": 3.856470423633111e-05,
"loss": 0.6191,
"step": 874000
},
{
"epoch": 26.17,
"learning_rate": 3.8414798676833594e-05,
"loss": 0.6342,
"step": 874500
},
{
"epoch": 26.18,
"learning_rate": 3.8264893117336074e-05,
"loss": 0.6314,
"step": 875000
},
{
"epoch": 26.2,
"learning_rate": 3.811498755783856e-05,
"loss": 0.6259,
"step": 875500
},
{
"epoch": 26.21,
"learning_rate": 3.7965081998341046e-05,
"loss": 0.6446,
"step": 876000
},
{
"epoch": 26.23,
"learning_rate": 3.7815176438843526e-05,
"loss": 0.6377,
"step": 876500
},
{
"epoch": 26.24,
"learning_rate": 3.7665270879346005e-05,
"loss": 0.6086,
"step": 877000
},
{
"epoch": 26.26,
"learning_rate": 3.751536531984849e-05,
"loss": 0.6378,
"step": 877500
},
{
"epoch": 26.27,
"learning_rate": 3.736545976035098e-05,
"loss": 0.6313,
"step": 878000
},
{
"epoch": 26.29,
"learning_rate": 3.721555420085346e-05,
"loss": 0.6377,
"step": 878500
},
{
"epoch": 26.3,
"learning_rate": 3.7065648641355944e-05,
"loss": 0.6311,
"step": 879000
},
{
"epoch": 26.32,
"learning_rate": 3.691574308185843e-05,
"loss": 0.6219,
"step": 879500
},
{
"epoch": 26.33,
"learning_rate": 3.676583752236091e-05,
"loss": 0.6131,
"step": 880000
},
{
"epoch": 26.35,
"learning_rate": 3.6615931962863396e-05,
"loss": 0.6142,
"step": 880500
},
{
"epoch": 26.36,
"learning_rate": 3.6466026403365876e-05,
"loss": 0.6062,
"step": 881000
},
{
"epoch": 26.38,
"learning_rate": 3.6316120843868355e-05,
"loss": 0.6393,
"step": 881500
},
{
"epoch": 26.39,
"learning_rate": 3.616621528437084e-05,
"loss": 0.618,
"step": 882000
},
{
"epoch": 26.41,
"learning_rate": 3.601630972487333e-05,
"loss": 0.6238,
"step": 882500
},
{
"epoch": 26.42,
"learning_rate": 3.586640416537581e-05,
"loss": 0.6266,
"step": 883000
},
{
"epoch": 26.44,
"learning_rate": 3.5716498605878294e-05,
"loss": 0.6211,
"step": 883500
},
{
"epoch": 26.45,
"learning_rate": 3.556659304638078e-05,
"loss": 0.6272,
"step": 884000
},
{
"epoch": 26.47,
"learning_rate": 3.541668748688326e-05,
"loss": 0.625,
"step": 884500
},
{
"epoch": 26.48,
"learning_rate": 3.5266781927385746e-05,
"loss": 0.6209,
"step": 885000
},
{
"epoch": 26.5,
"learning_rate": 3.511687636788823e-05,
"loss": 0.6407,
"step": 885500
},
{
"epoch": 26.51,
"learning_rate": 3.496697080839071e-05,
"loss": 0.6077,
"step": 886000
},
{
"epoch": 26.53,
"learning_rate": 3.481706524889319e-05,
"loss": 0.6272,
"step": 886500
},
{
"epoch": 26.54,
"learning_rate": 3.466715968939568e-05,
"loss": 0.6299,
"step": 887000
},
{
"epoch": 26.56,
"learning_rate": 3.451725412989816e-05,
"loss": 0.6205,
"step": 887500
},
{
"epoch": 26.57,
"learning_rate": 3.4367348570400643e-05,
"loss": 0.6117,
"step": 888000
},
{
"epoch": 26.59,
"learning_rate": 3.421744301090313e-05,
"loss": 0.641,
"step": 888500
},
{
"epoch": 26.6,
"learning_rate": 3.406753745140561e-05,
"loss": 0.6289,
"step": 889000
},
{
"epoch": 26.62,
"learning_rate": 3.3917631891908096e-05,
"loss": 0.6102,
"step": 889500
},
{
"epoch": 26.63,
"learning_rate": 3.376772633241058e-05,
"loss": 0.6362,
"step": 890000
},
{
"epoch": 26.64,
"learning_rate": 3.361782077291306e-05,
"loss": 0.621,
"step": 890500
},
{
"epoch": 26.66,
"learning_rate": 3.346791521341555e-05,
"loss": 0.6283,
"step": 891000
},
{
"epoch": 26.67,
"learning_rate": 3.331800965391803e-05,
"loss": 0.6255,
"step": 891500
},
{
"epoch": 26.69,
"learning_rate": 3.3168104094420514e-05,
"loss": 0.6345,
"step": 892000
},
{
"epoch": 26.7,
"learning_rate": 3.301819853492299e-05,
"loss": 0.6232,
"step": 892500
},
{
"epoch": 26.72,
"learning_rate": 3.286829297542548e-05,
"loss": 0.634,
"step": 893000
},
{
"epoch": 26.73,
"learning_rate": 3.271838741592796e-05,
"loss": 0.6113,
"step": 893500
},
{
"epoch": 26.75,
"learning_rate": 3.2568481856430445e-05,
"loss": 0.6278,
"step": 894000
},
{
"epoch": 26.76,
"learning_rate": 3.241857629693293e-05,
"loss": 0.6257,
"step": 894500
},
{
"epoch": 26.78,
"learning_rate": 3.226867073743541e-05,
"loss": 0.63,
"step": 895000
},
{
"epoch": 26.79,
"learning_rate": 3.21187651779379e-05,
"loss": 0.6132,
"step": 895500
},
{
"epoch": 26.81,
"learning_rate": 3.1968859618440384e-05,
"loss": 0.6146,
"step": 896000
},
{
"epoch": 26.82,
"learning_rate": 3.1818954058942863e-05,
"loss": 0.6183,
"step": 896500
},
{
"epoch": 26.84,
"learning_rate": 3.166904849944535e-05,
"loss": 0.6271,
"step": 897000
},
{
"epoch": 26.85,
"learning_rate": 3.151914293994783e-05,
"loss": 0.6213,
"step": 897500
},
{
"epoch": 26.87,
"learning_rate": 3.1369237380450316e-05,
"loss": 0.6187,
"step": 898000
},
{
"epoch": 26.88,
"learning_rate": 3.1219331820952795e-05,
"loss": 0.6358,
"step": 898500
},
{
"epoch": 26.9,
"learning_rate": 3.106942626145528e-05,
"loss": 0.6257,
"step": 899000
},
{
"epoch": 26.91,
"learning_rate": 3.091952070195776e-05,
"loss": 0.6058,
"step": 899500
},
{
"epoch": 26.93,
"learning_rate": 3.076961514246025e-05,
"loss": 0.6168,
"step": 900000
},
{
"epoch": 26.94,
"learning_rate": 3.0619709582962734e-05,
"loss": 0.6181,
"step": 900500
},
{
"epoch": 26.96,
"learning_rate": 3.0469804023465213e-05,
"loss": 0.6164,
"step": 901000
},
{
"epoch": 26.97,
"learning_rate": 3.03198984639677e-05,
"loss": 0.6293,
"step": 901500
},
{
"epoch": 26.99,
"learning_rate": 3.016999290447018e-05,
"loss": 0.631,
"step": 902000
},
{
"epoch": 27.0,
"learning_rate": 3.0020087344972665e-05,
"loss": 0.6212,
"step": 902500
},
{
"epoch": 27.02,
"learning_rate": 2.987018178547515e-05,
"loss": 0.5994,
"step": 903000
},
{
"epoch": 27.03,
"learning_rate": 2.9720276225977628e-05,
"loss": 0.6311,
"step": 903500
},
{
"epoch": 27.05,
"learning_rate": 2.9570370666480114e-05,
"loss": 0.6059,
"step": 904000
},
{
"epoch": 27.06,
"learning_rate": 2.94204651069826e-05,
"loss": 0.6046,
"step": 904500
},
{
"epoch": 27.08,
"learning_rate": 2.927055954748508e-05,
"loss": 0.6107,
"step": 905000
},
{
"epoch": 27.09,
"learning_rate": 2.9120653987987563e-05,
"loss": 0.6181,
"step": 905500
},
{
"epoch": 27.11,
"learning_rate": 2.897074842849005e-05,
"loss": 0.615,
"step": 906000
},
{
"epoch": 27.12,
"learning_rate": 2.882084286899253e-05,
"loss": 0.6037,
"step": 906500
},
{
"epoch": 27.14,
"learning_rate": 2.8670937309495015e-05,
"loss": 0.6293,
"step": 907000
},
{
"epoch": 27.15,
"learning_rate": 2.85210317499975e-05,
"loss": 0.6253,
"step": 907500
},
{
"epoch": 27.17,
"learning_rate": 2.837112619049998e-05,
"loss": 0.6161,
"step": 908000
},
{
"epoch": 27.18,
"learning_rate": 2.8221220631002464e-05,
"loss": 0.6192,
"step": 908500
},
{
"epoch": 27.2,
"learning_rate": 2.807131507150495e-05,
"loss": 0.6139,
"step": 909000
},
{
"epoch": 27.21,
"learning_rate": 2.792140951200743e-05,
"loss": 0.6053,
"step": 909500
},
{
"epoch": 27.23,
"learning_rate": 2.7771503952509916e-05,
"loss": 0.5955,
"step": 910000
},
{
"epoch": 27.24,
"learning_rate": 2.76215983930124e-05,
"loss": 0.6129,
"step": 910500
},
{
"epoch": 27.26,
"learning_rate": 2.7471692833514882e-05,
"loss": 0.6196,
"step": 911000
},
{
"epoch": 27.27,
"learning_rate": 2.7321787274017365e-05,
"loss": 0.6237,
"step": 911500
},
{
"epoch": 27.29,
"learning_rate": 2.717188171451985e-05,
"loss": 0.5945,
"step": 912000
},
{
"epoch": 27.3,
"learning_rate": 2.702197615502233e-05,
"loss": 0.6236,
"step": 912500
},
{
"epoch": 27.32,
"learning_rate": 2.6872070595524817e-05,
"loss": 0.614,
"step": 913000
},
{
"epoch": 27.33,
"learning_rate": 2.67221650360273e-05,
"loss": 0.6084,
"step": 913500
},
{
"epoch": 27.35,
"learning_rate": 2.6572259476529783e-05,
"loss": 0.593,
"step": 914000
},
{
"epoch": 27.36,
"learning_rate": 2.6422353917032266e-05,
"loss": 0.6267,
"step": 914500
},
{
"epoch": 27.38,
"learning_rate": 2.6272448357534752e-05,
"loss": 0.6058,
"step": 915000
},
{
"epoch": 27.39,
"learning_rate": 2.6122542798037232e-05,
"loss": 0.6168,
"step": 915500
},
{
"epoch": 27.41,
"learning_rate": 2.5972637238539718e-05,
"loss": 0.5989,
"step": 916000
},
{
"epoch": 27.42,
"learning_rate": 2.58227316790422e-05,
"loss": 0.6066,
"step": 916500
},
{
"epoch": 27.44,
"learning_rate": 2.5672826119544684e-05,
"loss": 0.6248,
"step": 917000
},
{
"epoch": 27.45,
"learning_rate": 2.5522920560047167e-05,
"loss": 0.6318,
"step": 917500
},
{
"epoch": 27.47,
"learning_rate": 2.5373015000549653e-05,
"loss": 0.6234,
"step": 918000
},
{
"epoch": 27.48,
"learning_rate": 2.5223109441052133e-05,
"loss": 0.6226,
"step": 918500
},
{
"epoch": 27.5,
"learning_rate": 2.507320388155462e-05,
"loss": 0.6342,
"step": 919000
},
{
"epoch": 27.51,
"learning_rate": 2.4923298322057102e-05,
"loss": 0.5974,
"step": 919500
},
{
"epoch": 27.53,
"learning_rate": 2.4773392762559585e-05,
"loss": 0.6084,
"step": 920000
},
{
"epoch": 27.54,
"learning_rate": 2.4623487203062068e-05,
"loss": 0.6086,
"step": 920500
},
{
"epoch": 27.56,
"learning_rate": 2.4473581643564554e-05,
"loss": 0.6073,
"step": 921000
},
{
"epoch": 27.57,
"learning_rate": 2.4323676084067034e-05,
"loss": 0.6144,
"step": 921500
},
{
"epoch": 27.59,
"learning_rate": 2.417377052456952e-05,
"loss": 0.6047,
"step": 922000
},
{
"epoch": 27.6,
"learning_rate": 2.4023864965072003e-05,
"loss": 0.6012,
"step": 922500
},
{
"epoch": 27.62,
"learning_rate": 2.3873959405574486e-05,
"loss": 0.6015,
"step": 923000
},
{
"epoch": 27.63,
"learning_rate": 2.372405384607697e-05,
"loss": 0.6039,
"step": 923500
},
{
"epoch": 27.65,
"learning_rate": 2.3574148286579455e-05,
"loss": 0.6067,
"step": 924000
},
{
"epoch": 27.66,
"learning_rate": 2.3424242727081935e-05,
"loss": 0.6227,
"step": 924500
},
{
"epoch": 27.68,
"learning_rate": 2.327433716758442e-05,
"loss": 0.6116,
"step": 925000
},
{
"epoch": 27.69,
"learning_rate": 2.3124431608086904e-05,
"loss": 0.606,
"step": 925500
},
{
"epoch": 27.71,
"learning_rate": 2.2974526048589384e-05,
"loss": 0.6118,
"step": 926000
},
{
"epoch": 27.72,
"learning_rate": 2.282462048909187e-05,
"loss": 0.6126,
"step": 926500
},
{
"epoch": 27.74,
"learning_rate": 2.2674714929594356e-05,
"loss": 0.6072,
"step": 927000
},
{
"epoch": 27.75,
"learning_rate": 2.2524809370096836e-05,
"loss": 0.6037,
"step": 927500
},
{
"epoch": 27.77,
"learning_rate": 2.237490381059932e-05,
"loss": 0.6106,
"step": 928000
},
{
"epoch": 27.78,
"learning_rate": 2.2224998251101805e-05,
"loss": 0.6031,
"step": 928500
},
{
"epoch": 27.8,
"learning_rate": 2.2075092691604285e-05,
"loss": 0.6117,
"step": 929000
},
{
"epoch": 27.81,
"learning_rate": 2.192518713210677e-05,
"loss": 0.6292,
"step": 929500
},
{
"epoch": 27.83,
"learning_rate": 2.1775281572609257e-05,
"loss": 0.6186,
"step": 930000
},
{
"epoch": 27.84,
"learning_rate": 2.1625376013111737e-05,
"loss": 0.5992,
"step": 930500
},
{
"epoch": 27.86,
"learning_rate": 2.147547045361422e-05,
"loss": 0.6177,
"step": 931000
},
{
"epoch": 27.87,
"learning_rate": 2.1325564894116706e-05,
"loss": 0.6232,
"step": 931500
},
{
"epoch": 27.89,
"learning_rate": 2.1175659334619186e-05,
"loss": 0.6028,
"step": 932000
},
{
"epoch": 27.9,
"learning_rate": 2.1025753775121672e-05,
"loss": 0.5986,
"step": 932500
},
{
"epoch": 27.92,
"learning_rate": 2.0875848215624155e-05,
"loss": 0.6152,
"step": 933000
},
{
"epoch": 27.93,
"learning_rate": 2.0725942656126638e-05,
"loss": 0.5985,
"step": 933500
},
{
"epoch": 27.95,
"learning_rate": 2.057603709662912e-05,
"loss": 0.6113,
"step": 934000
},
{
"epoch": 27.96,
"learning_rate": 2.0426131537131607e-05,
"loss": 0.6099,
"step": 934500
},
{
"epoch": 27.98,
"learning_rate": 2.0276225977634087e-05,
"loss": 0.5843,
"step": 935000
},
{
"epoch": 27.99,
"learning_rate": 2.0126320418136573e-05,
"loss": 0.6036,
"step": 935500
},
{
"epoch": 28.01,
"learning_rate": 1.9976414858639056e-05,
"loss": 0.6014,
"step": 936000
},
{
"epoch": 28.02,
"learning_rate": 1.982650929914154e-05,
"loss": 0.5958,
"step": 936500
},
{
"epoch": 28.04,
"learning_rate": 1.9676603739644022e-05,
"loss": 0.6049,
"step": 937000
},
{
"epoch": 28.05,
"learning_rate": 1.9526698180146508e-05,
"loss": 0.6062,
"step": 937500
},
{
"epoch": 28.07,
"learning_rate": 1.9376792620648988e-05,
"loss": 0.5972,
"step": 938000
},
{
"epoch": 28.08,
"learning_rate": 1.9226887061151474e-05,
"loss": 0.5964,
"step": 938500
},
{
"epoch": 28.1,
"learning_rate": 1.9076981501653957e-05,
"loss": 0.6094,
"step": 939000
},
{
"epoch": 28.11,
"learning_rate": 1.892707594215644e-05,
"loss": 0.5902,
"step": 939500
},
{
"epoch": 28.13,
"learning_rate": 1.8777170382658923e-05,
"loss": 0.6009,
"step": 940000
},
{
"epoch": 28.14,
"learning_rate": 1.8627264823161406e-05,
"loss": 0.5857,
"step": 940500
},
{
"epoch": 28.16,
"learning_rate": 1.847735926366389e-05,
"loss": 0.6009,
"step": 941000
},
{
"epoch": 28.17,
"learning_rate": 1.8327453704166375e-05,
"loss": 0.6036,
"step": 941500
},
{
"epoch": 28.19,
"learning_rate": 1.8177548144668858e-05,
"loss": 0.5965,
"step": 942000
},
{
"epoch": 28.2,
"learning_rate": 1.802764258517134e-05,
"loss": 0.6016,
"step": 942500
},
{
"epoch": 28.22,
"learning_rate": 1.7877737025673824e-05,
"loss": 0.603,
"step": 943000
},
{
"epoch": 28.23,
"learning_rate": 1.7727831466176307e-05,
"loss": 0.6095,
"step": 943500
},
{
"epoch": 28.25,
"learning_rate": 1.757792590667879e-05,
"loss": 0.5984,
"step": 944000
},
{
"epoch": 28.26,
"learning_rate": 1.7428020347181276e-05,
"loss": 0.5945,
"step": 944500
},
{
"epoch": 28.28,
"learning_rate": 1.727811478768376e-05,
"loss": 0.6038,
"step": 945000
},
{
"epoch": 28.29,
"learning_rate": 1.7128209228186242e-05,
"loss": 0.5933,
"step": 945500
},
{
"epoch": 28.31,
"learning_rate": 1.6978303668688725e-05,
"loss": 0.6037,
"step": 946000
},
{
"epoch": 28.32,
"learning_rate": 1.6828398109191208e-05,
"loss": 0.6082,
"step": 946500
},
{
"epoch": 28.34,
"learning_rate": 1.667849254969369e-05,
"loss": 0.5862,
"step": 947000
},
{
"epoch": 28.35,
"learning_rate": 1.6528586990196177e-05,
"loss": 0.598,
"step": 947500
},
{
"epoch": 28.37,
"learning_rate": 1.637868143069866e-05,
"loss": 0.591,
"step": 948000
},
{
"epoch": 28.38,
"learning_rate": 1.622877587120114e-05,
"loss": 0.5798,
"step": 948500
},
{
"epoch": 28.4,
"learning_rate": 1.6078870311703626e-05,
"loss": 0.5916,
"step": 949000
},
{
"epoch": 28.41,
"learning_rate": 1.592896475220611e-05,
"loss": 0.6024,
"step": 949500
},
{
"epoch": 28.43,
"learning_rate": 1.5779059192708592e-05,
"loss": 0.5936,
"step": 950000
},
{
"epoch": 28.44,
"learning_rate": 1.5629153633211078e-05,
"loss": 0.6059,
"step": 950500
},
{
"epoch": 28.46,
"learning_rate": 1.5479248073713558e-05,
"loss": 0.5938,
"step": 951000
},
{
"epoch": 28.47,
"learning_rate": 1.532934251421604e-05,
"loss": 0.5886,
"step": 951500
},
{
"epoch": 28.49,
"learning_rate": 1.5179436954718527e-05,
"loss": 0.5988,
"step": 952000
},
{
"epoch": 28.5,
"learning_rate": 1.502953139522101e-05,
"loss": 0.6229,
"step": 952500
},
{
"epoch": 28.52,
"learning_rate": 1.4879625835723493e-05,
"loss": 0.5935,
"step": 953000
},
{
"epoch": 28.53,
"learning_rate": 1.4729720276225977e-05,
"loss": 0.6019,
"step": 953500
},
{
"epoch": 28.54,
"learning_rate": 1.457981471672846e-05,
"loss": 0.6029,
"step": 954000
},
{
"epoch": 28.56,
"learning_rate": 1.4429909157230943e-05,
"loss": 0.5877,
"step": 954500
},
{
"epoch": 28.57,
"learning_rate": 1.4280003597733428e-05,
"loss": 0.6127,
"step": 955000
},
{
"epoch": 28.59,
"learning_rate": 1.4130098038235911e-05,
"loss": 0.6104,
"step": 955500
},
{
"epoch": 28.6,
"learning_rate": 1.3980192478738394e-05,
"loss": 0.5947,
"step": 956000
},
{
"epoch": 28.62,
"learning_rate": 1.3830286919240878e-05,
"loss": 0.6072,
"step": 956500
},
{
"epoch": 28.63,
"learning_rate": 1.3680381359743361e-05,
"loss": 0.6168,
"step": 957000
},
{
"epoch": 28.65,
"learning_rate": 1.3530475800245844e-05,
"loss": 0.5884,
"step": 957500
},
{
"epoch": 28.66,
"learning_rate": 1.3380570240748329e-05,
"loss": 0.6068,
"step": 958000
},
{
"epoch": 28.68,
"learning_rate": 1.3230664681250812e-05,
"loss": 0.5842,
"step": 958500
},
{
"epoch": 28.69,
"learning_rate": 1.3080759121753293e-05,
"loss": 0.592,
"step": 959000
},
{
"epoch": 28.71,
"learning_rate": 1.2930853562255776e-05,
"loss": 0.592,
"step": 959500
},
{
"epoch": 28.72,
"learning_rate": 1.2780948002758262e-05,
"loss": 0.6008,
"step": 960000
},
{
"epoch": 28.74,
"learning_rate": 1.2631042443260744e-05,
"loss": 0.6014,
"step": 960500
},
{
"epoch": 28.75,
"learning_rate": 1.2481136883763227e-05,
"loss": 0.6173,
"step": 961000
},
{
"epoch": 28.77,
"learning_rate": 1.2331231324265711e-05,
"loss": 0.6105,
"step": 961500
},
{
"epoch": 28.78,
"learning_rate": 1.2181325764768194e-05,
"loss": 0.5961,
"step": 962000
},
{
"epoch": 28.8,
"learning_rate": 1.2031420205270677e-05,
"loss": 0.6072,
"step": 962500
},
{
"epoch": 28.81,
"learning_rate": 1.1881514645773162e-05,
"loss": 0.6136,
"step": 963000
},
{
"epoch": 28.83,
"learning_rate": 1.1731609086275645e-05,
"loss": 0.6051,
"step": 963500
},
{
"epoch": 28.84,
"learning_rate": 1.1581703526778128e-05,
"loss": 0.5956,
"step": 964000
},
{
"epoch": 28.86,
"learning_rate": 1.1431797967280612e-05,
"loss": 0.5941,
"step": 964500
},
{
"epoch": 28.87,
"learning_rate": 1.1281892407783095e-05,
"loss": 0.6001,
"step": 965000
},
{
"epoch": 28.89,
"learning_rate": 1.1131986848285578e-05,
"loss": 0.5945,
"step": 965500
},
{
"epoch": 28.9,
"learning_rate": 1.0982081288788063e-05,
"loss": 0.6144,
"step": 966000
},
{
"epoch": 28.92,
"learning_rate": 1.0832175729290546e-05,
"loss": 0.606,
"step": 966500
},
{
"epoch": 28.93,
"learning_rate": 1.0682270169793029e-05,
"loss": 0.5952,
"step": 967000
},
{
"epoch": 28.95,
"learning_rate": 1.0532364610295513e-05,
"loss": 0.6121,
"step": 967500
},
{
"epoch": 28.96,
"learning_rate": 1.0382459050797996e-05,
"loss": 0.5926,
"step": 968000
},
{
"epoch": 28.98,
"learning_rate": 1.0232553491300479e-05,
"loss": 0.5864,
"step": 968500
},
{
"epoch": 28.99,
"learning_rate": 1.0082647931802964e-05,
"loss": 0.5794,
"step": 969000
}
],
"logging_steps": 500,
"max_steps": 1002630,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"total_flos": 2.4453843078595006e+21,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}