yesj1234's picture
Upload folder using huggingface_hub
c93022c
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 28.0,
"eval_steps": 500,
"global_step": 1124480,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07,
"learning_rate": 7.47011952191235e-05,
"loss": 11.8613,
"step": 3000
},
{
"epoch": 0.15,
"learning_rate": 0.000149402390438247,
"loss": 1.9866,
"step": 6000
},
{
"epoch": 0.22,
"learning_rate": 0.0002241035856573705,
"loss": 1.4383,
"step": 9000
},
{
"epoch": 0.3,
"learning_rate": 0.000298804780876494,
"loss": 1.3467,
"step": 12000
},
{
"epoch": 0.37,
"learning_rate": 0.0002992575153929735,
"loss": 1.3175,
"step": 15000
},
{
"epoch": 0.45,
"learning_rate": 0.00029850295786550764,
"loss": 1.2551,
"step": 18000
},
{
"epoch": 0.52,
"learning_rate": 0.00029774840033804177,
"loss": 1.2058,
"step": 21000
},
{
"epoch": 0.6,
"learning_rate": 0.00029699384281057584,
"loss": 1.1759,
"step": 24000
},
{
"epoch": 0.67,
"learning_rate": 0.00029623928528310997,
"loss": 1.148,
"step": 27000
},
{
"epoch": 0.75,
"learning_rate": 0.0002954847277556441,
"loss": 1.116,
"step": 30000
},
{
"epoch": 0.82,
"learning_rate": 0.00029473017022817817,
"loss": 1.0933,
"step": 33000
},
{
"epoch": 0.9,
"learning_rate": 0.00029397561270071225,
"loss": 1.0788,
"step": 36000
},
{
"epoch": 0.97,
"learning_rate": 0.0002932210551732464,
"loss": 1.0578,
"step": 39000
},
{
"epoch": 1.05,
"learning_rate": 0.0002924664976457805,
"loss": 1.0363,
"step": 42000
},
{
"epoch": 1.12,
"learning_rate": 0.0002917119401183146,
"loss": 1.0198,
"step": 45000
},
{
"epoch": 1.2,
"learning_rate": 0.0002909573825908487,
"loss": 1.0132,
"step": 48000
},
{
"epoch": 1.27,
"learning_rate": 0.00029020282506338283,
"loss": 0.9995,
"step": 51000
},
{
"epoch": 1.34,
"learning_rate": 0.0002894482675359169,
"loss": 0.992,
"step": 54000
},
{
"epoch": 1.42,
"learning_rate": 0.000288693710008451,
"loss": 0.9821,
"step": 57000
},
{
"epoch": 1.49,
"learning_rate": 0.0002879391524809851,
"loss": 0.9878,
"step": 60000
},
{
"epoch": 1.57,
"learning_rate": 0.00028718459495351924,
"loss": 0.9952,
"step": 63000
},
{
"epoch": 1.64,
"learning_rate": 0.0002864300374260533,
"loss": 0.9804,
"step": 66000
},
{
"epoch": 1.72,
"learning_rate": 0.00028567547989858744,
"loss": 0.9736,
"step": 69000
},
{
"epoch": 1.79,
"learning_rate": 0.00028492092237112157,
"loss": 0.9681,
"step": 72000
},
{
"epoch": 1.87,
"learning_rate": 0.00028416636484365564,
"loss": 0.965,
"step": 75000
},
{
"epoch": 1.94,
"learning_rate": 0.00028341180731618977,
"loss": 0.9519,
"step": 78000
},
{
"epoch": 2.02,
"learning_rate": 0.00028265724978872385,
"loss": 0.9525,
"step": 81000
},
{
"epoch": 2.09,
"learning_rate": 0.000281902692261258,
"loss": 0.9175,
"step": 84000
},
{
"epoch": 2.17,
"learning_rate": 0.00028114813473379205,
"loss": 0.9129,
"step": 87000
},
{
"epoch": 2.24,
"learning_rate": 0.0002803935772063262,
"loss": 0.9167,
"step": 90000
},
{
"epoch": 2.32,
"learning_rate": 0.0002796390196788603,
"loss": 0.9037,
"step": 93000
},
{
"epoch": 2.39,
"learning_rate": 0.0002788844621513944,
"loss": 0.8967,
"step": 96000
},
{
"epoch": 2.47,
"learning_rate": 0.0002781299046239285,
"loss": 0.8896,
"step": 99000
},
{
"epoch": 2.54,
"learning_rate": 0.0002773753470964626,
"loss": 0.8848,
"step": 102000
},
{
"epoch": 2.61,
"learning_rate": 0.0002766207895689967,
"loss": 0.8864,
"step": 105000
},
{
"epoch": 2.69,
"learning_rate": 0.00027586623204153084,
"loss": 0.8793,
"step": 108000
},
{
"epoch": 2.76,
"learning_rate": 0.0002751116745140649,
"loss": 0.8727,
"step": 111000
},
{
"epoch": 2.84,
"learning_rate": 0.00027435711698659904,
"loss": 0.8605,
"step": 114000
},
{
"epoch": 2.91,
"learning_rate": 0.00027360255945913317,
"loss": 0.8614,
"step": 117000
},
{
"epoch": 2.99,
"learning_rate": 0.00027284800193166724,
"loss": 0.8584,
"step": 120000
},
{
"epoch": 3.06,
"learning_rate": 0.00027209344440420137,
"loss": 0.8322,
"step": 123000
},
{
"epoch": 3.14,
"learning_rate": 0.0002713388868767355,
"loss": 0.8182,
"step": 126000
},
{
"epoch": 3.21,
"learning_rate": 0.0002705843293492696,
"loss": 0.8203,
"step": 129000
},
{
"epoch": 3.29,
"learning_rate": 0.00026982977182180365,
"loss": 0.824,
"step": 132000
},
{
"epoch": 3.36,
"learning_rate": 0.0002690752142943378,
"loss": 0.819,
"step": 135000
},
{
"epoch": 3.44,
"learning_rate": 0.0002683206567668719,
"loss": 0.8144,
"step": 138000
},
{
"epoch": 3.51,
"learning_rate": 0.000267566099239406,
"loss": 0.8158,
"step": 141000
},
{
"epoch": 3.59,
"learning_rate": 0.0002668115417119401,
"loss": 0.8148,
"step": 144000
},
{
"epoch": 3.66,
"learning_rate": 0.00026605698418447424,
"loss": 0.8188,
"step": 147000
},
{
"epoch": 3.74,
"learning_rate": 0.0002653024266570083,
"loss": 0.8064,
"step": 150000
},
{
"epoch": 3.81,
"learning_rate": 0.0002645478691295424,
"loss": 0.8047,
"step": 153000
},
{
"epoch": 3.88,
"learning_rate": 0.0002637933116020765,
"loss": 0.8008,
"step": 156000
},
{
"epoch": 3.96,
"learning_rate": 0.00026303875407461064,
"loss": 0.799,
"step": 159000
},
{
"epoch": 4.03,
"learning_rate": 0.0002622841965471447,
"loss": 0.7822,
"step": 162000
},
{
"epoch": 4.11,
"learning_rate": 0.00026152963901967884,
"loss": 0.7657,
"step": 165000
},
{
"epoch": 4.18,
"learning_rate": 0.00026077508149221297,
"loss": 0.7627,
"step": 168000
},
{
"epoch": 4.26,
"learning_rate": 0.00026002052396474705,
"loss": 0.7603,
"step": 171000
},
{
"epoch": 4.33,
"learning_rate": 0.0002592659664372812,
"loss": 0.7608,
"step": 174000
},
{
"epoch": 4.41,
"learning_rate": 0.00025851140890981525,
"loss": 0.7642,
"step": 177000
},
{
"epoch": 4.48,
"learning_rate": 0.0002577568513823494,
"loss": 0.7607,
"step": 180000
},
{
"epoch": 4.56,
"learning_rate": 0.0002570022938548835,
"loss": 0.7546,
"step": 183000
},
{
"epoch": 4.63,
"learning_rate": 0.0002562477363274176,
"loss": 0.7531,
"step": 186000
},
{
"epoch": 4.71,
"learning_rate": 0.0002554931787999517,
"loss": 0.7572,
"step": 189000
},
{
"epoch": 4.78,
"learning_rate": 0.0002547386212724858,
"loss": 0.7578,
"step": 192000
},
{
"epoch": 4.86,
"learning_rate": 0.0002539840637450199,
"loss": 0.7558,
"step": 195000
},
{
"epoch": 4.93,
"learning_rate": 0.000253229506217554,
"loss": 0.7556,
"step": 198000
},
{
"epoch": 5.0,
"learning_rate": 0.0002524749486900881,
"loss": 0.7476,
"step": 201000
},
{
"epoch": 5.08,
"learning_rate": 0.00025172039116262224,
"loss": 0.721,
"step": 204000
},
{
"epoch": 5.15,
"learning_rate": 0.0002509658336351563,
"loss": 0.7241,
"step": 207000
},
{
"epoch": 5.23,
"learning_rate": 0.00025021127610769044,
"loss": 0.7183,
"step": 210000
},
{
"epoch": 5.3,
"learning_rate": 0.00024945671858022457,
"loss": 0.7163,
"step": 213000
},
{
"epoch": 5.38,
"learning_rate": 0.00024870216105275865,
"loss": 0.716,
"step": 216000
},
{
"epoch": 5.45,
"learning_rate": 0.0002479476035252927,
"loss": 0.7208,
"step": 219000
},
{
"epoch": 5.53,
"learning_rate": 0.00024719304599782685,
"loss": 0.7149,
"step": 222000
},
{
"epoch": 5.6,
"learning_rate": 0.000246438488470361,
"loss": 0.7168,
"step": 225000
},
{
"epoch": 5.68,
"learning_rate": 0.00024568393094289505,
"loss": 0.7131,
"step": 228000
},
{
"epoch": 5.75,
"learning_rate": 0.0002449293734154292,
"loss": 0.7134,
"step": 231000
},
{
"epoch": 5.83,
"learning_rate": 0.0002441748158879633,
"loss": 0.7088,
"step": 234000
},
{
"epoch": 5.9,
"learning_rate": 0.00024342025836049738,
"loss": 0.7101,
"step": 237000
},
{
"epoch": 5.98,
"learning_rate": 0.00024266570083303148,
"loss": 0.7106,
"step": 240000
},
{
"epoch": 6.05,
"learning_rate": 0.0002419111433055656,
"loss": 0.6846,
"step": 243000
},
{
"epoch": 6.13,
"learning_rate": 0.00024115658577809968,
"loss": 0.6761,
"step": 246000
},
{
"epoch": 6.2,
"learning_rate": 0.00024040202825063379,
"loss": 0.6783,
"step": 249000
},
{
"epoch": 6.27,
"learning_rate": 0.00023964747072316791,
"loss": 0.6815,
"step": 252000
},
{
"epoch": 6.35,
"learning_rate": 0.00023889291319570202,
"loss": 0.6864,
"step": 255000
},
{
"epoch": 6.42,
"learning_rate": 0.00023813835566823612,
"loss": 0.688,
"step": 258000
},
{
"epoch": 6.5,
"learning_rate": 0.00023738379814077025,
"loss": 0.6843,
"step": 261000
},
{
"epoch": 6.57,
"learning_rate": 0.00023662924061330435,
"loss": 0.6842,
"step": 264000
},
{
"epoch": 6.65,
"learning_rate": 0.00023587468308583842,
"loss": 0.6874,
"step": 267000
},
{
"epoch": 6.72,
"learning_rate": 0.00023512012555837258,
"loss": 0.6777,
"step": 270000
},
{
"epoch": 6.8,
"learning_rate": 0.00023436556803090665,
"loss": 0.6813,
"step": 273000
},
{
"epoch": 6.87,
"learning_rate": 0.00023361101050344075,
"loss": 0.6859,
"step": 276000
},
{
"epoch": 6.95,
"learning_rate": 0.00023285645297597488,
"loss": 0.6756,
"step": 279000
},
{
"epoch": 7.02,
"learning_rate": 0.00023210189544850898,
"loss": 0.6682,
"step": 282000
},
{
"epoch": 7.1,
"learning_rate": 0.00023134733792104308,
"loss": 0.6456,
"step": 285000
},
{
"epoch": 7.17,
"learning_rate": 0.00023059278039357716,
"loss": 0.6463,
"step": 288000
},
{
"epoch": 7.25,
"learning_rate": 0.0002298382228661113,
"loss": 0.6508,
"step": 291000
},
{
"epoch": 7.32,
"learning_rate": 0.00022908366533864539,
"loss": 0.6513,
"step": 294000
},
{
"epoch": 7.4,
"learning_rate": 0.0002283291078111795,
"loss": 0.6479,
"step": 297000
},
{
"epoch": 7.47,
"learning_rate": 0.00022757455028371362,
"loss": 0.6514,
"step": 300000
},
{
"epoch": 7.54,
"learning_rate": 0.00022681999275624772,
"loss": 0.648,
"step": 303000
},
{
"epoch": 7.62,
"learning_rate": 0.00022606543522878182,
"loss": 0.6467,
"step": 306000
},
{
"epoch": 7.69,
"learning_rate": 0.00022531087770131595,
"loss": 0.6476,
"step": 309000
},
{
"epoch": 7.77,
"learning_rate": 0.00022455632017385005,
"loss": 0.6485,
"step": 312000
},
{
"epoch": 7.84,
"learning_rate": 0.00022380176264638412,
"loss": 0.6464,
"step": 315000
},
{
"epoch": 7.92,
"learning_rate": 0.00022304720511891825,
"loss": 0.644,
"step": 318000
},
{
"epoch": 7.99,
"learning_rate": 0.00022229264759145235,
"loss": 0.6423,
"step": 321000
},
{
"epoch": 8.07,
"learning_rate": 0.00022153809006398645,
"loss": 0.6169,
"step": 324000
},
{
"epoch": 8.14,
"learning_rate": 0.00022078353253652058,
"loss": 0.6166,
"step": 327000
},
{
"epoch": 8.22,
"learning_rate": 0.00022002897500905468,
"loss": 0.6174,
"step": 330000
},
{
"epoch": 8.29,
"learning_rate": 0.00021927441748158878,
"loss": 0.62,
"step": 333000
},
{
"epoch": 8.37,
"learning_rate": 0.00021851985995412286,
"loss": 0.6217,
"step": 336000
},
{
"epoch": 8.44,
"learning_rate": 0.00021776530242665699,
"loss": 0.6187,
"step": 339000
},
{
"epoch": 8.52,
"learning_rate": 0.0002170107448991911,
"loss": 0.6221,
"step": 342000
},
{
"epoch": 8.59,
"learning_rate": 0.0002162561873717252,
"loss": 0.6202,
"step": 345000
},
{
"epoch": 8.67,
"learning_rate": 0.00021550162984425932,
"loss": 0.6198,
"step": 348000
},
{
"epoch": 8.74,
"learning_rate": 0.00021474707231679342,
"loss": 0.6159,
"step": 351000
},
{
"epoch": 8.81,
"learning_rate": 0.00021399251478932752,
"loss": 0.615,
"step": 354000
},
{
"epoch": 8.89,
"learning_rate": 0.00021323795726186165,
"loss": 0.6188,
"step": 357000
},
{
"epoch": 8.96,
"learning_rate": 0.00021248339973439572,
"loss": 0.6167,
"step": 360000
},
{
"epoch": 9.04,
"learning_rate": 0.00021172884220692982,
"loss": 0.6022,
"step": 363000
},
{
"epoch": 9.11,
"learning_rate": 0.00021097428467946395,
"loss": 0.5912,
"step": 366000
},
{
"epoch": 9.19,
"learning_rate": 0.00021021972715199805,
"loss": 0.5906,
"step": 369000
},
{
"epoch": 9.26,
"learning_rate": 0.00020946516962453215,
"loss": 0.5921,
"step": 372000
},
{
"epoch": 9.34,
"learning_rate": 0.00020871061209706628,
"loss": 0.584,
"step": 375000
},
{
"epoch": 9.41,
"learning_rate": 0.00020795605456960038,
"loss": 0.5884,
"step": 378000
},
{
"epoch": 9.49,
"learning_rate": 0.00020720149704213446,
"loss": 0.585,
"step": 381000
},
{
"epoch": 9.56,
"learning_rate": 0.00020644693951466859,
"loss": 0.5878,
"step": 384000
},
{
"epoch": 9.64,
"learning_rate": 0.0002056923819872027,
"loss": 0.5894,
"step": 387000
},
{
"epoch": 9.71,
"learning_rate": 0.0002049378244597368,
"loss": 0.5875,
"step": 390000
},
{
"epoch": 9.79,
"learning_rate": 0.0002041832669322709,
"loss": 0.5877,
"step": 393000
},
{
"epoch": 9.86,
"learning_rate": 0.00020342870940480502,
"loss": 0.5876,
"step": 396000
},
{
"epoch": 9.94,
"learning_rate": 0.00020267415187733912,
"loss": 0.5876,
"step": 399000
},
{
"epoch": 10.01,
"learning_rate": 0.0002019195943498732,
"loss": 0.5885,
"step": 402000
},
{
"epoch": 10.08,
"learning_rate": 0.00020116503682240732,
"loss": 0.5606,
"step": 405000
},
{
"epoch": 10.16,
"learning_rate": 0.00020041047929494142,
"loss": 0.5618,
"step": 408000
},
{
"epoch": 10.23,
"learning_rate": 0.00019965592176747552,
"loss": 0.5661,
"step": 411000
},
{
"epoch": 10.31,
"learning_rate": 0.00019890136424000965,
"loss": 0.5723,
"step": 414000
},
{
"epoch": 10.38,
"learning_rate": 0.00019814680671254375,
"loss": 0.5665,
"step": 417000
},
{
"epoch": 10.46,
"learning_rate": 0.00019739224918507786,
"loss": 0.5635,
"step": 420000
},
{
"epoch": 10.53,
"learning_rate": 0.00019663769165761198,
"loss": 0.5677,
"step": 423000
},
{
"epoch": 10.61,
"learning_rate": 0.00019588313413014606,
"loss": 0.5657,
"step": 426000
},
{
"epoch": 10.68,
"learning_rate": 0.00019512857660268016,
"loss": 0.5675,
"step": 429000
},
{
"epoch": 10.76,
"learning_rate": 0.0001943740190752143,
"loss": 0.5646,
"step": 432000
},
{
"epoch": 10.83,
"learning_rate": 0.0001936194615477484,
"loss": 0.5663,
"step": 435000
},
{
"epoch": 10.91,
"learning_rate": 0.0001928649040202825,
"loss": 0.5692,
"step": 438000
},
{
"epoch": 10.98,
"learning_rate": 0.0001921103464928166,
"loss": 0.5664,
"step": 441000
},
{
"epoch": 11.06,
"learning_rate": 0.00019135578896535072,
"loss": 0.5487,
"step": 444000
},
{
"epoch": 11.13,
"learning_rate": 0.0001906012314378848,
"loss": 0.5436,
"step": 447000
},
{
"epoch": 11.21,
"learning_rate": 0.0001898466739104189,
"loss": 0.5438,
"step": 450000
},
{
"epoch": 11.28,
"learning_rate": 0.00018909211638295302,
"loss": 0.5444,
"step": 453000
},
{
"epoch": 11.35,
"learning_rate": 0.00018833755885548712,
"loss": 0.5422,
"step": 456000
},
{
"epoch": 11.43,
"learning_rate": 0.00018758300132802123,
"loss": 0.5452,
"step": 459000
},
{
"epoch": 11.5,
"learning_rate": 0.00018682844380055535,
"loss": 0.545,
"step": 462000
},
{
"epoch": 11.58,
"learning_rate": 0.00018607388627308946,
"loss": 0.5418,
"step": 465000
},
{
"epoch": 11.65,
"learning_rate": 0.00018531932874562353,
"loss": 0.5457,
"step": 468000
},
{
"epoch": 11.73,
"learning_rate": 0.00018456477121815766,
"loss": 0.5425,
"step": 471000
},
{
"epoch": 11.8,
"learning_rate": 0.00018381021369069176,
"loss": 0.5435,
"step": 474000
},
{
"epoch": 11.88,
"learning_rate": 0.00018305565616322586,
"loss": 0.5489,
"step": 477000
},
{
"epoch": 11.95,
"learning_rate": 0.00018230109863576,
"loss": 0.5457,
"step": 480000
},
{
"epoch": 12.03,
"learning_rate": 0.0001815465411082941,
"loss": 0.5353,
"step": 483000
},
{
"epoch": 12.1,
"learning_rate": 0.0001807919835808282,
"loss": 0.5185,
"step": 486000
},
{
"epoch": 12.18,
"learning_rate": 0.00018003742605336227,
"loss": 0.5223,
"step": 489000
},
{
"epoch": 12.25,
"learning_rate": 0.0001792828685258964,
"loss": 0.5172,
"step": 492000
},
{
"epoch": 12.33,
"learning_rate": 0.0001785283109984305,
"loss": 0.5191,
"step": 495000
},
{
"epoch": 12.4,
"learning_rate": 0.0001777737534709646,
"loss": 0.5221,
"step": 498000
},
{
"epoch": 12.48,
"learning_rate": 0.00017701919594349872,
"loss": 0.522,
"step": 501000
},
{
"epoch": 12.55,
"learning_rate": 0.00017626463841603283,
"loss": 0.525,
"step": 504000
},
{
"epoch": 12.62,
"learning_rate": 0.00017551008088856693,
"loss": 0.5265,
"step": 507000
},
{
"epoch": 12.7,
"learning_rate": 0.00017475552336110106,
"loss": 0.526,
"step": 510000
},
{
"epoch": 12.77,
"learning_rate": 0.00017400096583363513,
"loss": 0.527,
"step": 513000
},
{
"epoch": 12.85,
"learning_rate": 0.00017324640830616923,
"loss": 0.5259,
"step": 516000
},
{
"epoch": 12.92,
"learning_rate": 0.00017249185077870336,
"loss": 0.5234,
"step": 519000
},
{
"epoch": 13.0,
"learning_rate": 0.00017173729325123746,
"loss": 0.5259,
"step": 522000
},
{
"epoch": 13.07,
"learning_rate": 0.00017098273572377156,
"loss": 0.5027,
"step": 525000
},
{
"epoch": 13.15,
"learning_rate": 0.0001702281781963057,
"loss": 0.5043,
"step": 528000
},
{
"epoch": 13.22,
"learning_rate": 0.0001694736206688398,
"loss": 0.5051,
"step": 531000
},
{
"epoch": 13.3,
"learning_rate": 0.00016871906314137387,
"loss": 0.5062,
"step": 534000
},
{
"epoch": 13.37,
"learning_rate": 0.00016796450561390797,
"loss": 0.5062,
"step": 537000
},
{
"epoch": 13.45,
"learning_rate": 0.0001672099480864421,
"loss": 0.508,
"step": 540000
},
{
"epoch": 13.52,
"learning_rate": 0.0001664553905589762,
"loss": 0.5086,
"step": 543000
},
{
"epoch": 13.6,
"learning_rate": 0.0001657008330315103,
"loss": 0.5072,
"step": 546000
},
{
"epoch": 13.67,
"learning_rate": 0.00016494627550404443,
"loss": 0.5,
"step": 549000
},
{
"epoch": 13.75,
"learning_rate": 0.00016419171797657853,
"loss": 0.5067,
"step": 552000
},
{
"epoch": 13.82,
"learning_rate": 0.0001634371604491126,
"loss": 0.5055,
"step": 555000
},
{
"epoch": 13.89,
"learning_rate": 0.00016268260292164673,
"loss": 0.501,
"step": 558000
},
{
"epoch": 13.97,
"learning_rate": 0.00016192804539418083,
"loss": 0.5068,
"step": 561000
},
{
"epoch": 14.04,
"learning_rate": 0.00016117348786671493,
"loss": 0.4915,
"step": 564000
},
{
"epoch": 14.12,
"learning_rate": 0.00016041893033924906,
"loss": 0.4865,
"step": 567000
},
{
"epoch": 14.19,
"learning_rate": 0.00015966437281178316,
"loss": 0.4881,
"step": 570000
},
{
"epoch": 14.27,
"learning_rate": 0.00015890981528431726,
"loss": 0.4842,
"step": 573000
},
{
"epoch": 14.34,
"learning_rate": 0.0001581552577568514,
"loss": 0.4873,
"step": 576000
},
{
"epoch": 14.42,
"learning_rate": 0.00015740070022938547,
"loss": 0.4861,
"step": 579000
},
{
"epoch": 14.49,
"learning_rate": 0.00015664614270191957,
"loss": 0.4867,
"step": 582000
},
{
"epoch": 14.57,
"learning_rate": 0.00015589158517445367,
"loss": 0.4902,
"step": 585000
},
{
"epoch": 14.64,
"learning_rate": 0.0001551370276469878,
"loss": 0.4927,
"step": 588000
},
{
"epoch": 14.72,
"learning_rate": 0.0001543824701195219,
"loss": 0.4904,
"step": 591000
},
{
"epoch": 14.79,
"learning_rate": 0.000153627912592056,
"loss": 0.4909,
"step": 594000
},
{
"epoch": 14.87,
"learning_rate": 0.00015287335506459013,
"loss": 0.4914,
"step": 597000
},
{
"epoch": 14.94,
"learning_rate": 0.00015211879753712423,
"loss": 0.4864,
"step": 600000
},
{
"epoch": 15.01,
"learning_rate": 0.0001513642400096583,
"loss": 0.4834,
"step": 603000
},
{
"epoch": 15.09,
"learning_rate": 0.00015060968248219243,
"loss": 0.4687,
"step": 606000
},
{
"epoch": 15.16,
"learning_rate": 0.00014985512495472653,
"loss": 0.4683,
"step": 609000
},
{
"epoch": 15.24,
"learning_rate": 0.00014910056742726066,
"loss": 0.4657,
"step": 612000
},
{
"epoch": 15.31,
"learning_rate": 0.00014834600989979473,
"loss": 0.4727,
"step": 615000
},
{
"epoch": 15.39,
"learning_rate": 0.00014759145237232886,
"loss": 0.4709,
"step": 618000
},
{
"epoch": 15.46,
"learning_rate": 0.00014683689484486296,
"loss": 0.4703,
"step": 621000
},
{
"epoch": 15.54,
"learning_rate": 0.00014608233731739707,
"loss": 0.4709,
"step": 624000
},
{
"epoch": 15.61,
"learning_rate": 0.00014532777978993117,
"loss": 0.4726,
"step": 627000
},
{
"epoch": 15.69,
"learning_rate": 0.00014457322226246527,
"loss": 0.4684,
"step": 630000
},
{
"epoch": 15.76,
"learning_rate": 0.0001438186647349994,
"loss": 0.4725,
"step": 633000
},
{
"epoch": 15.84,
"learning_rate": 0.0001430641072075335,
"loss": 0.4689,
"step": 636000
},
{
"epoch": 15.91,
"learning_rate": 0.0001423095496800676,
"loss": 0.468,
"step": 639000
},
{
"epoch": 15.99,
"learning_rate": 0.0001415549921526017,
"loss": 0.4749,
"step": 642000
},
{
"epoch": 16.06,
"learning_rate": 0.00014080043462513583,
"loss": 0.4551,
"step": 645000
},
{
"epoch": 16.14,
"learning_rate": 0.0001400458770976699,
"loss": 0.4536,
"step": 648000
},
{
"epoch": 16.21,
"learning_rate": 0.00013929131957020403,
"loss": 0.4548,
"step": 651000
},
{
"epoch": 16.28,
"learning_rate": 0.00013853676204273813,
"loss": 0.4541,
"step": 654000
},
{
"epoch": 16.36,
"learning_rate": 0.00013778220451527223,
"loss": 0.4573,
"step": 657000
},
{
"epoch": 16.43,
"learning_rate": 0.00013702764698780633,
"loss": 0.4521,
"step": 660000
},
{
"epoch": 16.51,
"learning_rate": 0.00013627308946034044,
"loss": 0.4564,
"step": 663000
},
{
"epoch": 16.58,
"learning_rate": 0.00013551853193287456,
"loss": 0.4563,
"step": 666000
},
{
"epoch": 16.66,
"learning_rate": 0.00013476397440540867,
"loss": 0.4582,
"step": 669000
},
{
"epoch": 16.73,
"learning_rate": 0.00013400941687794277,
"loss": 0.4556,
"step": 672000
},
{
"epoch": 16.81,
"learning_rate": 0.00013325485935047687,
"loss": 0.4527,
"step": 675000
},
{
"epoch": 16.88,
"learning_rate": 0.00013250030182301097,
"loss": 0.4535,
"step": 678000
},
{
"epoch": 16.96,
"learning_rate": 0.00013174574429554507,
"loss": 0.4578,
"step": 681000
},
{
"epoch": 17.03,
"learning_rate": 0.0001309911867680792,
"loss": 0.449,
"step": 684000
},
{
"epoch": 17.11,
"learning_rate": 0.0001302366292406133,
"loss": 0.4389,
"step": 687000
},
{
"epoch": 17.18,
"learning_rate": 0.0001294820717131474,
"loss": 0.438,
"step": 690000
},
{
"epoch": 17.26,
"learning_rate": 0.0001287275141856815,
"loss": 0.4396,
"step": 693000
},
{
"epoch": 17.33,
"learning_rate": 0.0001279729566582156,
"loss": 0.437,
"step": 696000
},
{
"epoch": 17.41,
"learning_rate": 0.00012721839913074973,
"loss": 0.443,
"step": 699000
},
{
"epoch": 17.48,
"learning_rate": 0.0001264638416032838,
"loss": 0.4428,
"step": 702000
},
{
"epoch": 17.55,
"learning_rate": 0.00012570928407581793,
"loss": 0.4384,
"step": 705000
},
{
"epoch": 17.63,
"learning_rate": 0.00012495472654835204,
"loss": 0.4387,
"step": 708000
},
{
"epoch": 17.7,
"learning_rate": 0.00012420016902088614,
"loss": 0.4416,
"step": 711000
},
{
"epoch": 17.78,
"learning_rate": 0.00012344561149342024,
"loss": 0.4398,
"step": 714000
},
{
"epoch": 17.85,
"learning_rate": 0.00012269105396595437,
"loss": 0.437,
"step": 717000
},
{
"epoch": 17.93,
"learning_rate": 0.00012193649643848845,
"loss": 0.4393,
"step": 720000
},
{
"epoch": 18.0,
"learning_rate": 0.00012118193891102257,
"loss": 0.4416,
"step": 723000
},
{
"epoch": 18.08,
"learning_rate": 0.00012042738138355667,
"loss": 0.4216,
"step": 726000
},
{
"epoch": 18.15,
"learning_rate": 0.00011967282385609077,
"loss": 0.4206,
"step": 729000
},
{
"epoch": 18.23,
"learning_rate": 0.00011891826632862489,
"loss": 0.4223,
"step": 732000
},
{
"epoch": 18.3,
"learning_rate": 0.00011816370880115899,
"loss": 0.4261,
"step": 735000
},
{
"epoch": 18.38,
"learning_rate": 0.0001174091512736931,
"loss": 0.4238,
"step": 738000
},
{
"epoch": 18.45,
"learning_rate": 0.0001166545937462272,
"loss": 0.4224,
"step": 741000
},
{
"epoch": 18.53,
"learning_rate": 0.0001159000362187613,
"loss": 0.4261,
"step": 744000
},
{
"epoch": 18.6,
"learning_rate": 0.00011514547869129542,
"loss": 0.4287,
"step": 747000
},
{
"epoch": 18.68,
"learning_rate": 0.00011439092116382951,
"loss": 0.4261,
"step": 750000
},
{
"epoch": 18.75,
"learning_rate": 0.00011363636363636362,
"loss": 0.4259,
"step": 753000
},
{
"epoch": 18.82,
"learning_rate": 0.00011288180610889774,
"loss": 0.4233,
"step": 756000
},
{
"epoch": 18.9,
"learning_rate": 0.00011212724858143184,
"loss": 0.427,
"step": 759000
},
{
"epoch": 18.97,
"learning_rate": 0.00011137269105396594,
"loss": 0.4302,
"step": 762000
},
{
"epoch": 19.05,
"learning_rate": 0.00011061813352650005,
"loss": 0.4109,
"step": 765000
},
{
"epoch": 19.12,
"learning_rate": 0.00010986357599903416,
"loss": 0.4088,
"step": 768000
},
{
"epoch": 19.2,
"learning_rate": 0.00010910901847156827,
"loss": 0.4078,
"step": 771000
},
{
"epoch": 19.27,
"learning_rate": 0.00010835446094410236,
"loss": 0.4094,
"step": 774000
},
{
"epoch": 19.35,
"learning_rate": 0.00010759990341663647,
"loss": 0.409,
"step": 777000
},
{
"epoch": 19.42,
"learning_rate": 0.00010684534588917059,
"loss": 0.4086,
"step": 780000
},
{
"epoch": 19.5,
"learning_rate": 0.00010609078836170468,
"loss": 0.406,
"step": 783000
},
{
"epoch": 19.57,
"learning_rate": 0.00010533623083423879,
"loss": 0.4102,
"step": 786000
},
{
"epoch": 19.65,
"learning_rate": 0.0001045816733067729,
"loss": 0.4089,
"step": 789000
},
{
"epoch": 19.72,
"learning_rate": 0.000103827115779307,
"loss": 0.4096,
"step": 792000
},
{
"epoch": 19.8,
"learning_rate": 0.00010307255825184111,
"loss": 0.4119,
"step": 795000
},
{
"epoch": 19.87,
"learning_rate": 0.00010231800072437521,
"loss": 0.4101,
"step": 798000
},
{
"epoch": 19.95,
"learning_rate": 0.00010156344319690932,
"loss": 0.4125,
"step": 801000
},
{
"epoch": 20.02,
"learning_rate": 0.00010080888566944344,
"loss": 0.4091,
"step": 804000
},
{
"epoch": 20.09,
"learning_rate": 0.00010005432814197753,
"loss": 0.3946,
"step": 807000
},
{
"epoch": 20.17,
"learning_rate": 9.929977061451164e-05,
"loss": 0.3959,
"step": 810000
},
{
"epoch": 20.24,
"learning_rate": 9.854521308704576e-05,
"loss": 0.3954,
"step": 813000
},
{
"epoch": 20.32,
"learning_rate": 9.779065555957984e-05,
"loss": 0.3974,
"step": 816000
},
{
"epoch": 20.39,
"learning_rate": 9.703609803211396e-05,
"loss": 0.3943,
"step": 819000
},
{
"epoch": 20.47,
"learning_rate": 9.628154050464806e-05,
"loss": 0.3984,
"step": 822000
},
{
"epoch": 20.54,
"learning_rate": 9.552698297718217e-05,
"loss": 0.3963,
"step": 825000
},
{
"epoch": 20.62,
"learning_rate": 9.477242544971629e-05,
"loss": 0.3927,
"step": 828000
},
{
"epoch": 20.69,
"learning_rate": 9.401786792225038e-05,
"loss": 0.3955,
"step": 831000
},
{
"epoch": 20.77,
"learning_rate": 9.326331039478449e-05,
"loss": 0.3982,
"step": 834000
},
{
"epoch": 20.84,
"learning_rate": 9.25087528673186e-05,
"loss": 0.3976,
"step": 837000
},
{
"epoch": 20.92,
"learning_rate": 9.17541953398527e-05,
"loss": 0.3957,
"step": 840000
},
{
"epoch": 20.99,
"learning_rate": 9.099963781238681e-05,
"loss": 0.3982,
"step": 843000
},
{
"epoch": 21.07,
"learning_rate": 9.024508028492091e-05,
"loss": 0.3806,
"step": 846000
},
{
"epoch": 21.14,
"learning_rate": 8.949052275745503e-05,
"loss": 0.3819,
"step": 849000
},
{
"epoch": 21.22,
"learning_rate": 8.873596522998913e-05,
"loss": 0.3847,
"step": 852000
},
{
"epoch": 21.29,
"learning_rate": 8.798140770252323e-05,
"loss": 0.3843,
"step": 855000
},
{
"epoch": 21.36,
"learning_rate": 8.722685017505734e-05,
"loss": 0.3859,
"step": 858000
},
{
"epoch": 21.44,
"learning_rate": 8.647229264759146e-05,
"loss": 0.3821,
"step": 861000
},
{
"epoch": 21.51,
"learning_rate": 8.571773512012555e-05,
"loss": 0.386,
"step": 864000
},
{
"epoch": 21.59,
"learning_rate": 8.496317759265966e-05,
"loss": 0.3853,
"step": 867000
},
{
"epoch": 21.66,
"learning_rate": 8.420862006519376e-05,
"loss": 0.3856,
"step": 870000
},
{
"epoch": 21.74,
"learning_rate": 8.345406253772786e-05,
"loss": 0.3843,
"step": 873000
},
{
"epoch": 21.81,
"learning_rate": 8.269950501026198e-05,
"loss": 0.3845,
"step": 876000
},
{
"epoch": 21.89,
"learning_rate": 8.194494748279608e-05,
"loss": 0.3809,
"step": 879000
},
{
"epoch": 21.96,
"learning_rate": 8.11903899553302e-05,
"loss": 0.3824,
"step": 882000
},
{
"epoch": 22.04,
"learning_rate": 8.04358324278643e-05,
"loss": 0.3801,
"step": 885000
},
{
"epoch": 22.11,
"learning_rate": 7.96812749003984e-05,
"loss": 0.3702,
"step": 888000
},
{
"epoch": 22.19,
"learning_rate": 7.892671737293251e-05,
"loss": 0.3713,
"step": 891000
},
{
"epoch": 22.26,
"learning_rate": 7.817215984546663e-05,
"loss": 0.3715,
"step": 894000
},
{
"epoch": 22.34,
"learning_rate": 7.741760231800071e-05,
"loss": 0.3681,
"step": 897000
},
{
"epoch": 22.41,
"learning_rate": 7.666304479053483e-05,
"loss": 0.3714,
"step": 900000
},
{
"epoch": 22.49,
"learning_rate": 7.590848726306893e-05,
"loss": 0.3725,
"step": 903000
},
{
"epoch": 22.56,
"learning_rate": 7.515392973560303e-05,
"loss": 0.3692,
"step": 906000
},
{
"epoch": 22.63,
"learning_rate": 7.439937220813715e-05,
"loss": 0.375,
"step": 909000
},
{
"epoch": 22.71,
"learning_rate": 7.364481468067125e-05,
"loss": 0.3721,
"step": 912000
},
{
"epoch": 22.78,
"learning_rate": 7.289025715320536e-05,
"loss": 0.3699,
"step": 915000
},
{
"epoch": 22.86,
"learning_rate": 7.213569962573946e-05,
"loss": 0.37,
"step": 918000
},
{
"epoch": 22.93,
"learning_rate": 7.138114209827356e-05,
"loss": 0.3702,
"step": 921000
},
{
"epoch": 23.01,
"learning_rate": 7.062658457080768e-05,
"loss": 0.3668,
"step": 924000
},
{
"epoch": 23.08,
"learning_rate": 6.987202704334178e-05,
"loss": 0.3602,
"step": 927000
},
{
"epoch": 23.16,
"learning_rate": 6.911746951587588e-05,
"loss": 0.3557,
"step": 930000
},
{
"epoch": 23.23,
"learning_rate": 6.836291198840998e-05,
"loss": 0.3607,
"step": 933000
},
{
"epoch": 23.31,
"learning_rate": 6.76083544609441e-05,
"loss": 0.3571,
"step": 936000
},
{
"epoch": 23.38,
"learning_rate": 6.68537969334782e-05,
"loss": 0.3589,
"step": 939000
},
{
"epoch": 23.46,
"learning_rate": 6.609923940601231e-05,
"loss": 0.361,
"step": 942000
},
{
"epoch": 23.53,
"learning_rate": 6.534468187854641e-05,
"loss": 0.3595,
"step": 945000
},
{
"epoch": 23.61,
"learning_rate": 6.459012435108053e-05,
"loss": 0.3554,
"step": 948000
},
{
"epoch": 23.68,
"learning_rate": 6.383556682361463e-05,
"loss": 0.3572,
"step": 951000
},
{
"epoch": 23.75,
"learning_rate": 6.308100929614873e-05,
"loss": 0.3604,
"step": 954000
},
{
"epoch": 23.83,
"learning_rate": 6.232645176868283e-05,
"loss": 0.3591,
"step": 957000
},
{
"epoch": 23.9,
"learning_rate": 6.157189424121695e-05,
"loss": 0.3582,
"step": 960000
},
{
"epoch": 23.98,
"learning_rate": 6.0817336713751056e-05,
"loss": 0.3568,
"step": 963000
},
{
"epoch": 24.05,
"learning_rate": 6.006277918628516e-05,
"loss": 0.3495,
"step": 966000
},
{
"epoch": 24.13,
"learning_rate": 5.930822165881926e-05,
"loss": 0.3484,
"step": 969000
},
{
"epoch": 24.2,
"learning_rate": 5.855366413135337e-05,
"loss": 0.3449,
"step": 972000
},
{
"epoch": 24.28,
"learning_rate": 5.7799106603887474e-05,
"loss": 0.3471,
"step": 975000
},
{
"epoch": 24.35,
"learning_rate": 5.704454907642158e-05,
"loss": 0.3495,
"step": 978000
},
{
"epoch": 24.43,
"learning_rate": 5.6289991548955684e-05,
"loss": 0.3489,
"step": 981000
},
{
"epoch": 24.5,
"learning_rate": 5.55354340214898e-05,
"loss": 0.3464,
"step": 984000
},
{
"epoch": 24.58,
"learning_rate": 5.47808764940239e-05,
"loss": 0.3485,
"step": 987000
},
{
"epoch": 24.65,
"learning_rate": 5.402631896655801e-05,
"loss": 0.3486,
"step": 990000
},
{
"epoch": 24.73,
"learning_rate": 5.327176143909211e-05,
"loss": 0.3476,
"step": 993000
},
{
"epoch": 24.8,
"learning_rate": 5.2517203911626224e-05,
"loss": 0.3502,
"step": 996000
},
{
"epoch": 24.88,
"learning_rate": 5.1762646384160325e-05,
"loss": 0.3492,
"step": 999000
},
{
"epoch": 24.95,
"learning_rate": 5.1008088856694426e-05,
"loss": 0.347,
"step": 1002000
},
{
"epoch": 25.02,
"learning_rate": 5.0253531329228534e-05,
"loss": 0.343,
"step": 1005000
},
{
"epoch": 25.1,
"learning_rate": 4.949897380176264e-05,
"loss": 0.3366,
"step": 1008000
},
{
"epoch": 25.17,
"learning_rate": 4.874441627429675e-05,
"loss": 0.3348,
"step": 1011000
},
{
"epoch": 25.25,
"learning_rate": 4.798985874683085e-05,
"loss": 0.3344,
"step": 1014000
},
{
"epoch": 25.32,
"learning_rate": 4.723530121936496e-05,
"loss": 0.3406,
"step": 1017000
},
{
"epoch": 25.4,
"learning_rate": 4.648074369189907e-05,
"loss": 0.3366,
"step": 1020000
},
{
"epoch": 25.47,
"learning_rate": 4.5726186164433176e-05,
"loss": 0.3392,
"step": 1023000
},
{
"epoch": 25.55,
"learning_rate": 4.497162863696728e-05,
"loss": 0.3379,
"step": 1026000
},
{
"epoch": 25.62,
"learning_rate": 4.421707110950138e-05,
"loss": 0.3388,
"step": 1029000
},
{
"epoch": 25.7,
"learning_rate": 4.346251358203549e-05,
"loss": 0.3383,
"step": 1032000
},
{
"epoch": 25.77,
"learning_rate": 4.27079560545696e-05,
"loss": 0.3372,
"step": 1035000
},
{
"epoch": 25.85,
"learning_rate": 4.19533985271037e-05,
"loss": 0.3358,
"step": 1038000
},
{
"epoch": 25.92,
"learning_rate": 4.1198840999637803e-05,
"loss": 0.3353,
"step": 1041000
},
{
"epoch": 26.0,
"learning_rate": 4.044428347217192e-05,
"loss": 0.34,
"step": 1044000
},
{
"epoch": 26.07,
"learning_rate": 3.968972594470602e-05,
"loss": 0.3282,
"step": 1047000
},
{
"epoch": 26.15,
"learning_rate": 3.893516841724013e-05,
"loss": 0.3267,
"step": 1050000
},
{
"epoch": 26.22,
"learning_rate": 3.818061088977423e-05,
"loss": 0.3268,
"step": 1053000
},
{
"epoch": 26.29,
"learning_rate": 3.742605336230834e-05,
"loss": 0.3248,
"step": 1056000
},
{
"epoch": 26.37,
"learning_rate": 3.6671495834842445e-05,
"loss": 0.3268,
"step": 1059000
},
{
"epoch": 26.44,
"learning_rate": 3.591693830737655e-05,
"loss": 0.324,
"step": 1062000
},
{
"epoch": 26.52,
"learning_rate": 3.516238077991066e-05,
"loss": 0.3298,
"step": 1065000
},
{
"epoch": 26.59,
"learning_rate": 3.440782325244476e-05,
"loss": 0.3296,
"step": 1068000
},
{
"epoch": 26.67,
"learning_rate": 3.365326572497887e-05,
"loss": 0.3261,
"step": 1071000
},
{
"epoch": 26.74,
"learning_rate": 3.289870819751297e-05,
"loss": 0.3284,
"step": 1074000
},
{
"epoch": 26.82,
"learning_rate": 3.214415067004708e-05,
"loss": 0.3279,
"step": 1077000
},
{
"epoch": 26.89,
"learning_rate": 3.138959314258119e-05,
"loss": 0.3273,
"step": 1080000
},
{
"epoch": 26.97,
"learning_rate": 3.0635035615115295e-05,
"loss": 0.3275,
"step": 1083000
},
{
"epoch": 27.04,
"learning_rate": 2.9880478087649397e-05,
"loss": 0.323,
"step": 1086000
},
{
"epoch": 27.12,
"learning_rate": 2.9125920560183505e-05,
"loss": 0.3182,
"step": 1089000
},
{
"epoch": 27.19,
"learning_rate": 2.837136303271761e-05,
"loss": 0.3209,
"step": 1092000
},
{
"epoch": 27.27,
"learning_rate": 2.7616805505251717e-05,
"loss": 0.3185,
"step": 1095000
},
{
"epoch": 27.34,
"learning_rate": 2.6862247977785822e-05,
"loss": 0.3214,
"step": 1098000
},
{
"epoch": 27.42,
"learning_rate": 2.610769045031993e-05,
"loss": 0.3164,
"step": 1101000
},
{
"epoch": 27.49,
"learning_rate": 2.5353132922854035e-05,
"loss": 0.3206,
"step": 1104000
},
{
"epoch": 27.56,
"learning_rate": 2.4598575395388143e-05,
"loss": 0.3175,
"step": 1107000
},
{
"epoch": 27.64,
"learning_rate": 2.3844017867922247e-05,
"loss": 0.3185,
"step": 1110000
},
{
"epoch": 27.71,
"learning_rate": 2.3089460340456355e-05,
"loss": 0.3199,
"step": 1113000
},
{
"epoch": 27.79,
"learning_rate": 2.233490281299046e-05,
"loss": 0.318,
"step": 1116000
},
{
"epoch": 27.86,
"learning_rate": 2.1580345285524568e-05,
"loss": 0.319,
"step": 1119000
},
{
"epoch": 27.94,
"learning_rate": 2.082578775805867e-05,
"loss": 0.3152,
"step": 1122000
}
],
"logging_steps": 3000,
"max_steps": 1204800,
"num_train_epochs": 30,
"save_steps": 500,
"total_flos": 2.2337303175142268e+21,
"trial_name": null,
"trial_params": null
}