{ "best_metric": null, "best_model_checkpoint": null, "epoch": 28.0, "eval_steps": 500, "global_step": 1124480, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 7.47011952191235e-05, "loss": 11.8613, "step": 3000 }, { "epoch": 0.15, "learning_rate": 0.000149402390438247, "loss": 1.9866, "step": 6000 }, { "epoch": 0.22, "learning_rate": 0.0002241035856573705, "loss": 1.4383, "step": 9000 }, { "epoch": 0.3, "learning_rate": 0.000298804780876494, "loss": 1.3467, "step": 12000 }, { "epoch": 0.37, "learning_rate": 0.0002992575153929735, "loss": 1.3175, "step": 15000 }, { "epoch": 0.45, "learning_rate": 0.00029850295786550764, "loss": 1.2551, "step": 18000 }, { "epoch": 0.52, "learning_rate": 0.00029774840033804177, "loss": 1.2058, "step": 21000 }, { "epoch": 0.6, "learning_rate": 0.00029699384281057584, "loss": 1.1759, "step": 24000 }, { "epoch": 0.67, "learning_rate": 0.00029623928528310997, "loss": 1.148, "step": 27000 }, { "epoch": 0.75, "learning_rate": 0.0002954847277556441, "loss": 1.116, "step": 30000 }, { "epoch": 0.82, "learning_rate": 0.00029473017022817817, "loss": 1.0933, "step": 33000 }, { "epoch": 0.9, "learning_rate": 0.00029397561270071225, "loss": 1.0788, "step": 36000 }, { "epoch": 0.97, "learning_rate": 0.0002932210551732464, "loss": 1.0578, "step": 39000 }, { "epoch": 1.05, "learning_rate": 0.0002924664976457805, "loss": 1.0363, "step": 42000 }, { "epoch": 1.12, "learning_rate": 0.0002917119401183146, "loss": 1.0198, "step": 45000 }, { "epoch": 1.2, "learning_rate": 0.0002909573825908487, "loss": 1.0132, "step": 48000 }, { "epoch": 1.27, "learning_rate": 0.00029020282506338283, "loss": 0.9995, "step": 51000 }, { "epoch": 1.34, "learning_rate": 0.0002894482675359169, "loss": 0.992, "step": 54000 }, { "epoch": 1.42, "learning_rate": 0.000288693710008451, "loss": 0.9821, "step": 57000 }, { "epoch": 1.49, "learning_rate": 0.0002879391524809851, "loss": 0.9878, "step": 60000 }, { "epoch": 1.57, "learning_rate": 0.00028718459495351924, "loss": 0.9952, "step": 63000 }, { "epoch": 1.64, "learning_rate": 0.0002864300374260533, "loss": 0.9804, "step": 66000 }, { "epoch": 1.72, "learning_rate": 0.00028567547989858744, "loss": 0.9736, "step": 69000 }, { "epoch": 1.79, "learning_rate": 0.00028492092237112157, "loss": 0.9681, "step": 72000 }, { "epoch": 1.87, "learning_rate": 0.00028416636484365564, "loss": 0.965, "step": 75000 }, { "epoch": 1.94, "learning_rate": 0.00028341180731618977, "loss": 0.9519, "step": 78000 }, { "epoch": 2.02, "learning_rate": 0.00028265724978872385, "loss": 0.9525, "step": 81000 }, { "epoch": 2.09, "learning_rate": 0.000281902692261258, "loss": 0.9175, "step": 84000 }, { "epoch": 2.17, "learning_rate": 0.00028114813473379205, "loss": 0.9129, "step": 87000 }, { "epoch": 2.24, "learning_rate": 0.0002803935772063262, "loss": 0.9167, "step": 90000 }, { "epoch": 2.32, "learning_rate": 0.0002796390196788603, "loss": 0.9037, "step": 93000 }, { "epoch": 2.39, "learning_rate": 0.0002788844621513944, "loss": 0.8967, "step": 96000 }, { "epoch": 2.47, "learning_rate": 0.0002781299046239285, "loss": 0.8896, "step": 99000 }, { "epoch": 2.54, "learning_rate": 0.0002773753470964626, "loss": 0.8848, "step": 102000 }, { "epoch": 2.61, "learning_rate": 0.0002766207895689967, "loss": 0.8864, "step": 105000 }, { "epoch": 2.69, "learning_rate": 0.00027586623204153084, "loss": 0.8793, "step": 108000 }, { "epoch": 2.76, "learning_rate": 0.0002751116745140649, "loss": 0.8727, "step": 111000 }, { "epoch": 2.84, "learning_rate": 0.00027435711698659904, "loss": 0.8605, "step": 114000 }, { "epoch": 2.91, "learning_rate": 0.00027360255945913317, "loss": 0.8614, "step": 117000 }, { "epoch": 2.99, "learning_rate": 0.00027284800193166724, "loss": 0.8584, "step": 120000 }, { "epoch": 3.06, "learning_rate": 0.00027209344440420137, "loss": 0.8322, "step": 123000 }, { "epoch": 3.14, "learning_rate": 0.0002713388868767355, "loss": 0.8182, "step": 126000 }, { "epoch": 3.21, "learning_rate": 0.0002705843293492696, "loss": 0.8203, "step": 129000 }, { "epoch": 3.29, "learning_rate": 0.00026982977182180365, "loss": 0.824, "step": 132000 }, { "epoch": 3.36, "learning_rate": 0.0002690752142943378, "loss": 0.819, "step": 135000 }, { "epoch": 3.44, "learning_rate": 0.0002683206567668719, "loss": 0.8144, "step": 138000 }, { "epoch": 3.51, "learning_rate": 0.000267566099239406, "loss": 0.8158, "step": 141000 }, { "epoch": 3.59, "learning_rate": 0.0002668115417119401, "loss": 0.8148, "step": 144000 }, { "epoch": 3.66, "learning_rate": 0.00026605698418447424, "loss": 0.8188, "step": 147000 }, { "epoch": 3.74, "learning_rate": 0.0002653024266570083, "loss": 0.8064, "step": 150000 }, { "epoch": 3.81, "learning_rate": 0.0002645478691295424, "loss": 0.8047, "step": 153000 }, { "epoch": 3.88, "learning_rate": 0.0002637933116020765, "loss": 0.8008, "step": 156000 }, { "epoch": 3.96, "learning_rate": 0.00026303875407461064, "loss": 0.799, "step": 159000 }, { "epoch": 4.03, "learning_rate": 0.0002622841965471447, "loss": 0.7822, "step": 162000 }, { "epoch": 4.11, "learning_rate": 0.00026152963901967884, "loss": 0.7657, "step": 165000 }, { "epoch": 4.18, "learning_rate": 0.00026077508149221297, "loss": 0.7627, "step": 168000 }, { "epoch": 4.26, "learning_rate": 0.00026002052396474705, "loss": 0.7603, "step": 171000 }, { "epoch": 4.33, "learning_rate": 0.0002592659664372812, "loss": 0.7608, "step": 174000 }, { "epoch": 4.41, "learning_rate": 0.00025851140890981525, "loss": 0.7642, "step": 177000 }, { "epoch": 4.48, "learning_rate": 0.0002577568513823494, "loss": 0.7607, "step": 180000 }, { "epoch": 4.56, "learning_rate": 0.0002570022938548835, "loss": 0.7546, "step": 183000 }, { "epoch": 4.63, "learning_rate": 0.0002562477363274176, "loss": 0.7531, "step": 186000 }, { "epoch": 4.71, "learning_rate": 0.0002554931787999517, "loss": 0.7572, "step": 189000 }, { "epoch": 4.78, "learning_rate": 0.0002547386212724858, "loss": 0.7578, "step": 192000 }, { "epoch": 4.86, "learning_rate": 0.0002539840637450199, "loss": 0.7558, "step": 195000 }, { "epoch": 4.93, "learning_rate": 0.000253229506217554, "loss": 0.7556, "step": 198000 }, { "epoch": 5.0, "learning_rate": 0.0002524749486900881, "loss": 0.7476, "step": 201000 }, { "epoch": 5.08, "learning_rate": 0.00025172039116262224, "loss": 0.721, "step": 204000 }, { "epoch": 5.15, "learning_rate": 0.0002509658336351563, "loss": 0.7241, "step": 207000 }, { "epoch": 5.23, "learning_rate": 0.00025021127610769044, "loss": 0.7183, "step": 210000 }, { "epoch": 5.3, "learning_rate": 0.00024945671858022457, "loss": 0.7163, "step": 213000 }, { "epoch": 5.38, "learning_rate": 0.00024870216105275865, "loss": 0.716, "step": 216000 }, { "epoch": 5.45, "learning_rate": 0.0002479476035252927, "loss": 0.7208, "step": 219000 }, { "epoch": 5.53, "learning_rate": 0.00024719304599782685, "loss": 0.7149, "step": 222000 }, { "epoch": 5.6, "learning_rate": 0.000246438488470361, "loss": 0.7168, "step": 225000 }, { "epoch": 5.68, "learning_rate": 0.00024568393094289505, "loss": 0.7131, "step": 228000 }, { "epoch": 5.75, "learning_rate": 0.0002449293734154292, "loss": 0.7134, "step": 231000 }, { "epoch": 5.83, "learning_rate": 0.0002441748158879633, "loss": 0.7088, "step": 234000 }, { "epoch": 5.9, "learning_rate": 0.00024342025836049738, "loss": 0.7101, "step": 237000 }, { "epoch": 5.98, "learning_rate": 0.00024266570083303148, "loss": 0.7106, "step": 240000 }, { "epoch": 6.05, "learning_rate": 0.0002419111433055656, "loss": 0.6846, "step": 243000 }, { "epoch": 6.13, "learning_rate": 0.00024115658577809968, "loss": 0.6761, "step": 246000 }, { "epoch": 6.2, "learning_rate": 0.00024040202825063379, "loss": 0.6783, "step": 249000 }, { "epoch": 6.27, "learning_rate": 0.00023964747072316791, "loss": 0.6815, "step": 252000 }, { "epoch": 6.35, "learning_rate": 0.00023889291319570202, "loss": 0.6864, "step": 255000 }, { "epoch": 6.42, "learning_rate": 0.00023813835566823612, "loss": 0.688, "step": 258000 }, { "epoch": 6.5, "learning_rate": 0.00023738379814077025, "loss": 0.6843, "step": 261000 }, { "epoch": 6.57, "learning_rate": 0.00023662924061330435, "loss": 0.6842, "step": 264000 }, { "epoch": 6.65, "learning_rate": 0.00023587468308583842, "loss": 0.6874, "step": 267000 }, { "epoch": 6.72, "learning_rate": 0.00023512012555837258, "loss": 0.6777, "step": 270000 }, { "epoch": 6.8, "learning_rate": 0.00023436556803090665, "loss": 0.6813, "step": 273000 }, { "epoch": 6.87, "learning_rate": 0.00023361101050344075, "loss": 0.6859, "step": 276000 }, { "epoch": 6.95, "learning_rate": 0.00023285645297597488, "loss": 0.6756, "step": 279000 }, { "epoch": 7.02, "learning_rate": 0.00023210189544850898, "loss": 0.6682, "step": 282000 }, { "epoch": 7.1, "learning_rate": 0.00023134733792104308, "loss": 0.6456, "step": 285000 }, { "epoch": 7.17, "learning_rate": 0.00023059278039357716, "loss": 0.6463, "step": 288000 }, { "epoch": 7.25, "learning_rate": 0.0002298382228661113, "loss": 0.6508, "step": 291000 }, { "epoch": 7.32, "learning_rate": 0.00022908366533864539, "loss": 0.6513, "step": 294000 }, { "epoch": 7.4, "learning_rate": 0.0002283291078111795, "loss": 0.6479, "step": 297000 }, { "epoch": 7.47, "learning_rate": 0.00022757455028371362, "loss": 0.6514, "step": 300000 }, { "epoch": 7.54, "learning_rate": 0.00022681999275624772, "loss": 0.648, "step": 303000 }, { "epoch": 7.62, "learning_rate": 0.00022606543522878182, "loss": 0.6467, "step": 306000 }, { "epoch": 7.69, "learning_rate": 0.00022531087770131595, "loss": 0.6476, "step": 309000 }, { "epoch": 7.77, "learning_rate": 0.00022455632017385005, "loss": 0.6485, "step": 312000 }, { "epoch": 7.84, "learning_rate": 0.00022380176264638412, "loss": 0.6464, "step": 315000 }, { "epoch": 7.92, "learning_rate": 0.00022304720511891825, "loss": 0.644, "step": 318000 }, { "epoch": 7.99, "learning_rate": 0.00022229264759145235, "loss": 0.6423, "step": 321000 }, { "epoch": 8.07, "learning_rate": 0.00022153809006398645, "loss": 0.6169, "step": 324000 }, { "epoch": 8.14, "learning_rate": 0.00022078353253652058, "loss": 0.6166, "step": 327000 }, { "epoch": 8.22, "learning_rate": 0.00022002897500905468, "loss": 0.6174, "step": 330000 }, { "epoch": 8.29, "learning_rate": 0.00021927441748158878, "loss": 0.62, "step": 333000 }, { "epoch": 8.37, "learning_rate": 0.00021851985995412286, "loss": 0.6217, "step": 336000 }, { "epoch": 8.44, "learning_rate": 0.00021776530242665699, "loss": 0.6187, "step": 339000 }, { "epoch": 8.52, "learning_rate": 0.0002170107448991911, "loss": 0.6221, "step": 342000 }, { "epoch": 8.59, "learning_rate": 0.0002162561873717252, "loss": 0.6202, "step": 345000 }, { "epoch": 8.67, "learning_rate": 0.00021550162984425932, "loss": 0.6198, "step": 348000 }, { "epoch": 8.74, "learning_rate": 0.00021474707231679342, "loss": 0.6159, "step": 351000 }, { "epoch": 8.81, "learning_rate": 0.00021399251478932752, "loss": 0.615, "step": 354000 }, { "epoch": 8.89, "learning_rate": 0.00021323795726186165, "loss": 0.6188, "step": 357000 }, { "epoch": 8.96, "learning_rate": 0.00021248339973439572, "loss": 0.6167, "step": 360000 }, { "epoch": 9.04, "learning_rate": 0.00021172884220692982, "loss": 0.6022, "step": 363000 }, { "epoch": 9.11, "learning_rate": 0.00021097428467946395, "loss": 0.5912, "step": 366000 }, { "epoch": 9.19, "learning_rate": 0.00021021972715199805, "loss": 0.5906, "step": 369000 }, { "epoch": 9.26, "learning_rate": 0.00020946516962453215, "loss": 0.5921, "step": 372000 }, { "epoch": 9.34, "learning_rate": 0.00020871061209706628, "loss": 0.584, "step": 375000 }, { "epoch": 9.41, "learning_rate": 0.00020795605456960038, "loss": 0.5884, "step": 378000 }, { "epoch": 9.49, "learning_rate": 0.00020720149704213446, "loss": 0.585, "step": 381000 }, { "epoch": 9.56, "learning_rate": 0.00020644693951466859, "loss": 0.5878, "step": 384000 }, { "epoch": 9.64, "learning_rate": 0.0002056923819872027, "loss": 0.5894, "step": 387000 }, { "epoch": 9.71, "learning_rate": 0.0002049378244597368, "loss": 0.5875, "step": 390000 }, { "epoch": 9.79, "learning_rate": 0.0002041832669322709, "loss": 0.5877, "step": 393000 }, { "epoch": 9.86, "learning_rate": 0.00020342870940480502, "loss": 0.5876, "step": 396000 }, { "epoch": 9.94, "learning_rate": 0.00020267415187733912, "loss": 0.5876, "step": 399000 }, { "epoch": 10.01, "learning_rate": 0.0002019195943498732, "loss": 0.5885, "step": 402000 }, { "epoch": 10.08, "learning_rate": 0.00020116503682240732, "loss": 0.5606, "step": 405000 }, { "epoch": 10.16, "learning_rate": 0.00020041047929494142, "loss": 0.5618, "step": 408000 }, { "epoch": 10.23, "learning_rate": 0.00019965592176747552, "loss": 0.5661, "step": 411000 }, { "epoch": 10.31, "learning_rate": 0.00019890136424000965, "loss": 0.5723, "step": 414000 }, { "epoch": 10.38, "learning_rate": 0.00019814680671254375, "loss": 0.5665, "step": 417000 }, { "epoch": 10.46, "learning_rate": 0.00019739224918507786, "loss": 0.5635, "step": 420000 }, { "epoch": 10.53, "learning_rate": 0.00019663769165761198, "loss": 0.5677, "step": 423000 }, { "epoch": 10.61, "learning_rate": 0.00019588313413014606, "loss": 0.5657, "step": 426000 }, { "epoch": 10.68, "learning_rate": 0.00019512857660268016, "loss": 0.5675, "step": 429000 }, { "epoch": 10.76, "learning_rate": 0.0001943740190752143, "loss": 0.5646, "step": 432000 }, { "epoch": 10.83, "learning_rate": 0.0001936194615477484, "loss": 0.5663, "step": 435000 }, { "epoch": 10.91, "learning_rate": 0.0001928649040202825, "loss": 0.5692, "step": 438000 }, { "epoch": 10.98, "learning_rate": 0.0001921103464928166, "loss": 0.5664, "step": 441000 }, { "epoch": 11.06, "learning_rate": 0.00019135578896535072, "loss": 0.5487, "step": 444000 }, { "epoch": 11.13, "learning_rate": 0.0001906012314378848, "loss": 0.5436, "step": 447000 }, { "epoch": 11.21, "learning_rate": 0.0001898466739104189, "loss": 0.5438, "step": 450000 }, { "epoch": 11.28, "learning_rate": 0.00018909211638295302, "loss": 0.5444, "step": 453000 }, { "epoch": 11.35, "learning_rate": 0.00018833755885548712, "loss": 0.5422, "step": 456000 }, { "epoch": 11.43, "learning_rate": 0.00018758300132802123, "loss": 0.5452, "step": 459000 }, { "epoch": 11.5, "learning_rate": 0.00018682844380055535, "loss": 0.545, "step": 462000 }, { "epoch": 11.58, "learning_rate": 0.00018607388627308946, "loss": 0.5418, "step": 465000 }, { "epoch": 11.65, "learning_rate": 0.00018531932874562353, "loss": 0.5457, "step": 468000 }, { "epoch": 11.73, "learning_rate": 0.00018456477121815766, "loss": 0.5425, "step": 471000 }, { "epoch": 11.8, "learning_rate": 0.00018381021369069176, "loss": 0.5435, "step": 474000 }, { "epoch": 11.88, "learning_rate": 0.00018305565616322586, "loss": 0.5489, "step": 477000 }, { "epoch": 11.95, "learning_rate": 0.00018230109863576, "loss": 0.5457, "step": 480000 }, { "epoch": 12.03, "learning_rate": 0.0001815465411082941, "loss": 0.5353, "step": 483000 }, { "epoch": 12.1, "learning_rate": 0.0001807919835808282, "loss": 0.5185, "step": 486000 }, { "epoch": 12.18, "learning_rate": 0.00018003742605336227, "loss": 0.5223, "step": 489000 }, { "epoch": 12.25, "learning_rate": 0.0001792828685258964, "loss": 0.5172, "step": 492000 }, { "epoch": 12.33, "learning_rate": 0.0001785283109984305, "loss": 0.5191, "step": 495000 }, { "epoch": 12.4, "learning_rate": 0.0001777737534709646, "loss": 0.5221, "step": 498000 }, { "epoch": 12.48, "learning_rate": 0.00017701919594349872, "loss": 0.522, "step": 501000 }, { "epoch": 12.55, "learning_rate": 0.00017626463841603283, "loss": 0.525, "step": 504000 }, { "epoch": 12.62, "learning_rate": 0.00017551008088856693, "loss": 0.5265, "step": 507000 }, { "epoch": 12.7, "learning_rate": 0.00017475552336110106, "loss": 0.526, "step": 510000 }, { "epoch": 12.77, "learning_rate": 0.00017400096583363513, "loss": 0.527, "step": 513000 }, { "epoch": 12.85, "learning_rate": 0.00017324640830616923, "loss": 0.5259, "step": 516000 }, { "epoch": 12.92, "learning_rate": 0.00017249185077870336, "loss": 0.5234, "step": 519000 }, { "epoch": 13.0, "learning_rate": 0.00017173729325123746, "loss": 0.5259, "step": 522000 }, { "epoch": 13.07, "learning_rate": 0.00017098273572377156, "loss": 0.5027, "step": 525000 }, { "epoch": 13.15, "learning_rate": 0.0001702281781963057, "loss": 0.5043, "step": 528000 }, { "epoch": 13.22, "learning_rate": 0.0001694736206688398, "loss": 0.5051, "step": 531000 }, { "epoch": 13.3, "learning_rate": 0.00016871906314137387, "loss": 0.5062, "step": 534000 }, { "epoch": 13.37, "learning_rate": 0.00016796450561390797, "loss": 0.5062, "step": 537000 }, { "epoch": 13.45, "learning_rate": 0.0001672099480864421, "loss": 0.508, "step": 540000 }, { "epoch": 13.52, "learning_rate": 0.0001664553905589762, "loss": 0.5086, "step": 543000 }, { "epoch": 13.6, "learning_rate": 0.0001657008330315103, "loss": 0.5072, "step": 546000 }, { "epoch": 13.67, "learning_rate": 0.00016494627550404443, "loss": 0.5, "step": 549000 }, { "epoch": 13.75, "learning_rate": 0.00016419171797657853, "loss": 0.5067, "step": 552000 }, { "epoch": 13.82, "learning_rate": 0.0001634371604491126, "loss": 0.5055, "step": 555000 }, { "epoch": 13.89, "learning_rate": 0.00016268260292164673, "loss": 0.501, "step": 558000 }, { "epoch": 13.97, "learning_rate": 0.00016192804539418083, "loss": 0.5068, "step": 561000 }, { "epoch": 14.04, "learning_rate": 0.00016117348786671493, "loss": 0.4915, "step": 564000 }, { "epoch": 14.12, "learning_rate": 0.00016041893033924906, "loss": 0.4865, "step": 567000 }, { "epoch": 14.19, "learning_rate": 0.00015966437281178316, "loss": 0.4881, "step": 570000 }, { "epoch": 14.27, "learning_rate": 0.00015890981528431726, "loss": 0.4842, "step": 573000 }, { "epoch": 14.34, "learning_rate": 0.0001581552577568514, "loss": 0.4873, "step": 576000 }, { "epoch": 14.42, "learning_rate": 0.00015740070022938547, "loss": 0.4861, "step": 579000 }, { "epoch": 14.49, "learning_rate": 0.00015664614270191957, "loss": 0.4867, "step": 582000 }, { "epoch": 14.57, "learning_rate": 0.00015589158517445367, "loss": 0.4902, "step": 585000 }, { "epoch": 14.64, "learning_rate": 0.0001551370276469878, "loss": 0.4927, "step": 588000 }, { "epoch": 14.72, "learning_rate": 0.0001543824701195219, "loss": 0.4904, "step": 591000 }, { "epoch": 14.79, "learning_rate": 0.000153627912592056, "loss": 0.4909, "step": 594000 }, { "epoch": 14.87, "learning_rate": 0.00015287335506459013, "loss": 0.4914, "step": 597000 }, { "epoch": 14.94, "learning_rate": 0.00015211879753712423, "loss": 0.4864, "step": 600000 }, { "epoch": 15.01, "learning_rate": 0.0001513642400096583, "loss": 0.4834, "step": 603000 }, { "epoch": 15.09, "learning_rate": 0.00015060968248219243, "loss": 0.4687, "step": 606000 }, { "epoch": 15.16, "learning_rate": 0.00014985512495472653, "loss": 0.4683, "step": 609000 }, { "epoch": 15.24, "learning_rate": 0.00014910056742726066, "loss": 0.4657, "step": 612000 }, { "epoch": 15.31, "learning_rate": 0.00014834600989979473, "loss": 0.4727, "step": 615000 }, { "epoch": 15.39, "learning_rate": 0.00014759145237232886, "loss": 0.4709, "step": 618000 }, { "epoch": 15.46, "learning_rate": 0.00014683689484486296, "loss": 0.4703, "step": 621000 }, { "epoch": 15.54, "learning_rate": 0.00014608233731739707, "loss": 0.4709, "step": 624000 }, { "epoch": 15.61, "learning_rate": 0.00014532777978993117, "loss": 0.4726, "step": 627000 }, { "epoch": 15.69, "learning_rate": 0.00014457322226246527, "loss": 0.4684, "step": 630000 }, { "epoch": 15.76, "learning_rate": 0.0001438186647349994, "loss": 0.4725, "step": 633000 }, { "epoch": 15.84, "learning_rate": 0.0001430641072075335, "loss": 0.4689, "step": 636000 }, { "epoch": 15.91, "learning_rate": 0.0001423095496800676, "loss": 0.468, "step": 639000 }, { "epoch": 15.99, "learning_rate": 0.0001415549921526017, "loss": 0.4749, "step": 642000 }, { "epoch": 16.06, "learning_rate": 0.00014080043462513583, "loss": 0.4551, "step": 645000 }, { "epoch": 16.14, "learning_rate": 0.0001400458770976699, "loss": 0.4536, "step": 648000 }, { "epoch": 16.21, "learning_rate": 0.00013929131957020403, "loss": 0.4548, "step": 651000 }, { "epoch": 16.28, "learning_rate": 0.00013853676204273813, "loss": 0.4541, "step": 654000 }, { "epoch": 16.36, "learning_rate": 0.00013778220451527223, "loss": 0.4573, "step": 657000 }, { "epoch": 16.43, "learning_rate": 0.00013702764698780633, "loss": 0.4521, "step": 660000 }, { "epoch": 16.51, "learning_rate": 0.00013627308946034044, "loss": 0.4564, "step": 663000 }, { "epoch": 16.58, "learning_rate": 0.00013551853193287456, "loss": 0.4563, "step": 666000 }, { "epoch": 16.66, "learning_rate": 0.00013476397440540867, "loss": 0.4582, "step": 669000 }, { "epoch": 16.73, "learning_rate": 0.00013400941687794277, "loss": 0.4556, "step": 672000 }, { "epoch": 16.81, "learning_rate": 0.00013325485935047687, "loss": 0.4527, "step": 675000 }, { "epoch": 16.88, "learning_rate": 0.00013250030182301097, "loss": 0.4535, "step": 678000 }, { "epoch": 16.96, "learning_rate": 0.00013174574429554507, "loss": 0.4578, "step": 681000 }, { "epoch": 17.03, "learning_rate": 0.0001309911867680792, "loss": 0.449, "step": 684000 }, { "epoch": 17.11, "learning_rate": 0.0001302366292406133, "loss": 0.4389, "step": 687000 }, { "epoch": 17.18, "learning_rate": 0.0001294820717131474, "loss": 0.438, "step": 690000 }, { "epoch": 17.26, "learning_rate": 0.0001287275141856815, "loss": 0.4396, "step": 693000 }, { "epoch": 17.33, "learning_rate": 0.0001279729566582156, "loss": 0.437, "step": 696000 }, { "epoch": 17.41, "learning_rate": 0.00012721839913074973, "loss": 0.443, "step": 699000 }, { "epoch": 17.48, "learning_rate": 0.0001264638416032838, "loss": 0.4428, "step": 702000 }, { "epoch": 17.55, "learning_rate": 0.00012570928407581793, "loss": 0.4384, "step": 705000 }, { "epoch": 17.63, "learning_rate": 0.00012495472654835204, "loss": 0.4387, "step": 708000 }, { "epoch": 17.7, "learning_rate": 0.00012420016902088614, "loss": 0.4416, "step": 711000 }, { "epoch": 17.78, "learning_rate": 0.00012344561149342024, "loss": 0.4398, "step": 714000 }, { "epoch": 17.85, "learning_rate": 0.00012269105396595437, "loss": 0.437, "step": 717000 }, { "epoch": 17.93, "learning_rate": 0.00012193649643848845, "loss": 0.4393, "step": 720000 }, { "epoch": 18.0, "learning_rate": 0.00012118193891102257, "loss": 0.4416, "step": 723000 }, { "epoch": 18.08, "learning_rate": 0.00012042738138355667, "loss": 0.4216, "step": 726000 }, { "epoch": 18.15, "learning_rate": 0.00011967282385609077, "loss": 0.4206, "step": 729000 }, { "epoch": 18.23, "learning_rate": 0.00011891826632862489, "loss": 0.4223, "step": 732000 }, { "epoch": 18.3, "learning_rate": 0.00011816370880115899, "loss": 0.4261, "step": 735000 }, { "epoch": 18.38, "learning_rate": 0.0001174091512736931, "loss": 0.4238, "step": 738000 }, { "epoch": 18.45, "learning_rate": 0.0001166545937462272, "loss": 0.4224, "step": 741000 }, { "epoch": 18.53, "learning_rate": 0.0001159000362187613, "loss": 0.4261, "step": 744000 }, { "epoch": 18.6, "learning_rate": 0.00011514547869129542, "loss": 0.4287, "step": 747000 }, { "epoch": 18.68, "learning_rate": 0.00011439092116382951, "loss": 0.4261, "step": 750000 }, { "epoch": 18.75, "learning_rate": 0.00011363636363636362, "loss": 0.4259, "step": 753000 }, { "epoch": 18.82, "learning_rate": 0.00011288180610889774, "loss": 0.4233, "step": 756000 }, { "epoch": 18.9, "learning_rate": 0.00011212724858143184, "loss": 0.427, "step": 759000 }, { "epoch": 18.97, "learning_rate": 0.00011137269105396594, "loss": 0.4302, "step": 762000 }, { "epoch": 19.05, "learning_rate": 0.00011061813352650005, "loss": 0.4109, "step": 765000 }, { "epoch": 19.12, "learning_rate": 0.00010986357599903416, "loss": 0.4088, "step": 768000 }, { "epoch": 19.2, "learning_rate": 0.00010910901847156827, "loss": 0.4078, "step": 771000 }, { "epoch": 19.27, "learning_rate": 0.00010835446094410236, "loss": 0.4094, "step": 774000 }, { "epoch": 19.35, "learning_rate": 0.00010759990341663647, "loss": 0.409, "step": 777000 }, { "epoch": 19.42, "learning_rate": 0.00010684534588917059, "loss": 0.4086, "step": 780000 }, { "epoch": 19.5, "learning_rate": 0.00010609078836170468, "loss": 0.406, "step": 783000 }, { "epoch": 19.57, "learning_rate": 0.00010533623083423879, "loss": 0.4102, "step": 786000 }, { "epoch": 19.65, "learning_rate": 0.0001045816733067729, "loss": 0.4089, "step": 789000 }, { "epoch": 19.72, "learning_rate": 0.000103827115779307, "loss": 0.4096, "step": 792000 }, { "epoch": 19.8, "learning_rate": 0.00010307255825184111, "loss": 0.4119, "step": 795000 }, { "epoch": 19.87, "learning_rate": 0.00010231800072437521, "loss": 0.4101, "step": 798000 }, { "epoch": 19.95, "learning_rate": 0.00010156344319690932, "loss": 0.4125, "step": 801000 }, { "epoch": 20.02, "learning_rate": 0.00010080888566944344, "loss": 0.4091, "step": 804000 }, { "epoch": 20.09, "learning_rate": 0.00010005432814197753, "loss": 0.3946, "step": 807000 }, { "epoch": 20.17, "learning_rate": 9.929977061451164e-05, "loss": 0.3959, "step": 810000 }, { "epoch": 20.24, "learning_rate": 9.854521308704576e-05, "loss": 0.3954, "step": 813000 }, { "epoch": 20.32, "learning_rate": 9.779065555957984e-05, "loss": 0.3974, "step": 816000 }, { "epoch": 20.39, "learning_rate": 9.703609803211396e-05, "loss": 0.3943, "step": 819000 }, { "epoch": 20.47, "learning_rate": 9.628154050464806e-05, "loss": 0.3984, "step": 822000 }, { "epoch": 20.54, "learning_rate": 9.552698297718217e-05, "loss": 0.3963, "step": 825000 }, { "epoch": 20.62, "learning_rate": 9.477242544971629e-05, "loss": 0.3927, "step": 828000 }, { "epoch": 20.69, "learning_rate": 9.401786792225038e-05, "loss": 0.3955, "step": 831000 }, { "epoch": 20.77, "learning_rate": 9.326331039478449e-05, "loss": 0.3982, "step": 834000 }, { "epoch": 20.84, "learning_rate": 9.25087528673186e-05, "loss": 0.3976, "step": 837000 }, { "epoch": 20.92, "learning_rate": 9.17541953398527e-05, "loss": 0.3957, "step": 840000 }, { "epoch": 20.99, "learning_rate": 9.099963781238681e-05, "loss": 0.3982, "step": 843000 }, { "epoch": 21.07, "learning_rate": 9.024508028492091e-05, "loss": 0.3806, "step": 846000 }, { "epoch": 21.14, "learning_rate": 8.949052275745503e-05, "loss": 0.3819, "step": 849000 }, { "epoch": 21.22, "learning_rate": 8.873596522998913e-05, "loss": 0.3847, "step": 852000 }, { "epoch": 21.29, "learning_rate": 8.798140770252323e-05, "loss": 0.3843, "step": 855000 }, { "epoch": 21.36, "learning_rate": 8.722685017505734e-05, "loss": 0.3859, "step": 858000 }, { "epoch": 21.44, "learning_rate": 8.647229264759146e-05, "loss": 0.3821, "step": 861000 }, { "epoch": 21.51, "learning_rate": 8.571773512012555e-05, "loss": 0.386, "step": 864000 }, { "epoch": 21.59, "learning_rate": 8.496317759265966e-05, "loss": 0.3853, "step": 867000 }, { "epoch": 21.66, "learning_rate": 8.420862006519376e-05, "loss": 0.3856, "step": 870000 }, { "epoch": 21.74, "learning_rate": 8.345406253772786e-05, "loss": 0.3843, "step": 873000 }, { "epoch": 21.81, "learning_rate": 8.269950501026198e-05, "loss": 0.3845, "step": 876000 }, { "epoch": 21.89, "learning_rate": 8.194494748279608e-05, "loss": 0.3809, "step": 879000 }, { "epoch": 21.96, "learning_rate": 8.11903899553302e-05, "loss": 0.3824, "step": 882000 }, { "epoch": 22.04, "learning_rate": 8.04358324278643e-05, "loss": 0.3801, "step": 885000 }, { "epoch": 22.11, "learning_rate": 7.96812749003984e-05, "loss": 0.3702, "step": 888000 }, { "epoch": 22.19, "learning_rate": 7.892671737293251e-05, "loss": 0.3713, "step": 891000 }, { "epoch": 22.26, "learning_rate": 7.817215984546663e-05, "loss": 0.3715, "step": 894000 }, { "epoch": 22.34, "learning_rate": 7.741760231800071e-05, "loss": 0.3681, "step": 897000 }, { "epoch": 22.41, "learning_rate": 7.666304479053483e-05, "loss": 0.3714, "step": 900000 }, { "epoch": 22.49, "learning_rate": 7.590848726306893e-05, "loss": 0.3725, "step": 903000 }, { "epoch": 22.56, "learning_rate": 7.515392973560303e-05, "loss": 0.3692, "step": 906000 }, { "epoch": 22.63, "learning_rate": 7.439937220813715e-05, "loss": 0.375, "step": 909000 }, { "epoch": 22.71, "learning_rate": 7.364481468067125e-05, "loss": 0.3721, "step": 912000 }, { "epoch": 22.78, "learning_rate": 7.289025715320536e-05, "loss": 0.3699, "step": 915000 }, { "epoch": 22.86, "learning_rate": 7.213569962573946e-05, "loss": 0.37, "step": 918000 }, { "epoch": 22.93, "learning_rate": 7.138114209827356e-05, "loss": 0.3702, "step": 921000 }, { "epoch": 23.01, "learning_rate": 7.062658457080768e-05, "loss": 0.3668, "step": 924000 }, { "epoch": 23.08, "learning_rate": 6.987202704334178e-05, "loss": 0.3602, "step": 927000 }, { "epoch": 23.16, "learning_rate": 6.911746951587588e-05, "loss": 0.3557, "step": 930000 }, { "epoch": 23.23, "learning_rate": 6.836291198840998e-05, "loss": 0.3607, "step": 933000 }, { "epoch": 23.31, "learning_rate": 6.76083544609441e-05, "loss": 0.3571, "step": 936000 }, { "epoch": 23.38, "learning_rate": 6.68537969334782e-05, "loss": 0.3589, "step": 939000 }, { "epoch": 23.46, "learning_rate": 6.609923940601231e-05, "loss": 0.361, "step": 942000 }, { "epoch": 23.53, "learning_rate": 6.534468187854641e-05, "loss": 0.3595, "step": 945000 }, { "epoch": 23.61, "learning_rate": 6.459012435108053e-05, "loss": 0.3554, "step": 948000 }, { "epoch": 23.68, "learning_rate": 6.383556682361463e-05, "loss": 0.3572, "step": 951000 }, { "epoch": 23.75, "learning_rate": 6.308100929614873e-05, "loss": 0.3604, "step": 954000 }, { "epoch": 23.83, "learning_rate": 6.232645176868283e-05, "loss": 0.3591, "step": 957000 }, { "epoch": 23.9, "learning_rate": 6.157189424121695e-05, "loss": 0.3582, "step": 960000 }, { "epoch": 23.98, "learning_rate": 6.0817336713751056e-05, "loss": 0.3568, "step": 963000 }, { "epoch": 24.05, "learning_rate": 6.006277918628516e-05, "loss": 0.3495, "step": 966000 }, { "epoch": 24.13, "learning_rate": 5.930822165881926e-05, "loss": 0.3484, "step": 969000 }, { "epoch": 24.2, "learning_rate": 5.855366413135337e-05, "loss": 0.3449, "step": 972000 }, { "epoch": 24.28, "learning_rate": 5.7799106603887474e-05, "loss": 0.3471, "step": 975000 }, { "epoch": 24.35, "learning_rate": 5.704454907642158e-05, "loss": 0.3495, "step": 978000 }, { "epoch": 24.43, "learning_rate": 5.6289991548955684e-05, "loss": 0.3489, "step": 981000 }, { "epoch": 24.5, "learning_rate": 5.55354340214898e-05, "loss": 0.3464, "step": 984000 }, { "epoch": 24.58, "learning_rate": 5.47808764940239e-05, "loss": 0.3485, "step": 987000 }, { "epoch": 24.65, "learning_rate": 5.402631896655801e-05, "loss": 0.3486, "step": 990000 }, { "epoch": 24.73, "learning_rate": 5.327176143909211e-05, "loss": 0.3476, "step": 993000 }, { "epoch": 24.8, "learning_rate": 5.2517203911626224e-05, "loss": 0.3502, "step": 996000 }, { "epoch": 24.88, "learning_rate": 5.1762646384160325e-05, "loss": 0.3492, "step": 999000 }, { "epoch": 24.95, "learning_rate": 5.1008088856694426e-05, "loss": 0.347, "step": 1002000 }, { "epoch": 25.02, "learning_rate": 5.0253531329228534e-05, "loss": 0.343, "step": 1005000 }, { "epoch": 25.1, "learning_rate": 4.949897380176264e-05, "loss": 0.3366, "step": 1008000 }, { "epoch": 25.17, "learning_rate": 4.874441627429675e-05, "loss": 0.3348, "step": 1011000 }, { "epoch": 25.25, "learning_rate": 4.798985874683085e-05, "loss": 0.3344, "step": 1014000 }, { "epoch": 25.32, "learning_rate": 4.723530121936496e-05, "loss": 0.3406, "step": 1017000 }, { "epoch": 25.4, "learning_rate": 4.648074369189907e-05, "loss": 0.3366, "step": 1020000 }, { "epoch": 25.47, "learning_rate": 4.5726186164433176e-05, "loss": 0.3392, "step": 1023000 }, { "epoch": 25.55, "learning_rate": 4.497162863696728e-05, "loss": 0.3379, "step": 1026000 }, { "epoch": 25.62, "learning_rate": 4.421707110950138e-05, "loss": 0.3388, "step": 1029000 }, { "epoch": 25.7, "learning_rate": 4.346251358203549e-05, "loss": 0.3383, "step": 1032000 }, { "epoch": 25.77, "learning_rate": 4.27079560545696e-05, "loss": 0.3372, "step": 1035000 }, { "epoch": 25.85, "learning_rate": 4.19533985271037e-05, "loss": 0.3358, "step": 1038000 }, { "epoch": 25.92, "learning_rate": 4.1198840999637803e-05, "loss": 0.3353, "step": 1041000 }, { "epoch": 26.0, "learning_rate": 4.044428347217192e-05, "loss": 0.34, "step": 1044000 }, { "epoch": 26.07, "learning_rate": 3.968972594470602e-05, "loss": 0.3282, "step": 1047000 }, { "epoch": 26.15, "learning_rate": 3.893516841724013e-05, "loss": 0.3267, "step": 1050000 }, { "epoch": 26.22, "learning_rate": 3.818061088977423e-05, "loss": 0.3268, "step": 1053000 }, { "epoch": 26.29, "learning_rate": 3.742605336230834e-05, "loss": 0.3248, "step": 1056000 }, { "epoch": 26.37, "learning_rate": 3.6671495834842445e-05, "loss": 0.3268, "step": 1059000 }, { "epoch": 26.44, "learning_rate": 3.591693830737655e-05, "loss": 0.324, "step": 1062000 }, { "epoch": 26.52, "learning_rate": 3.516238077991066e-05, "loss": 0.3298, "step": 1065000 }, { "epoch": 26.59, "learning_rate": 3.440782325244476e-05, "loss": 0.3296, "step": 1068000 }, { "epoch": 26.67, "learning_rate": 3.365326572497887e-05, "loss": 0.3261, "step": 1071000 }, { "epoch": 26.74, "learning_rate": 3.289870819751297e-05, "loss": 0.3284, "step": 1074000 }, { "epoch": 26.82, "learning_rate": 3.214415067004708e-05, "loss": 0.3279, "step": 1077000 }, { "epoch": 26.89, "learning_rate": 3.138959314258119e-05, "loss": 0.3273, "step": 1080000 }, { "epoch": 26.97, "learning_rate": 3.0635035615115295e-05, "loss": 0.3275, "step": 1083000 }, { "epoch": 27.04, "learning_rate": 2.9880478087649397e-05, "loss": 0.323, "step": 1086000 }, { "epoch": 27.12, "learning_rate": 2.9125920560183505e-05, "loss": 0.3182, "step": 1089000 }, { "epoch": 27.19, "learning_rate": 2.837136303271761e-05, "loss": 0.3209, "step": 1092000 }, { "epoch": 27.27, "learning_rate": 2.7616805505251717e-05, "loss": 0.3185, "step": 1095000 }, { "epoch": 27.34, "learning_rate": 2.6862247977785822e-05, "loss": 0.3214, "step": 1098000 }, { "epoch": 27.42, "learning_rate": 2.610769045031993e-05, "loss": 0.3164, "step": 1101000 }, { "epoch": 27.49, "learning_rate": 2.5353132922854035e-05, "loss": 0.3206, "step": 1104000 }, { "epoch": 27.56, "learning_rate": 2.4598575395388143e-05, "loss": 0.3175, "step": 1107000 }, { "epoch": 27.64, "learning_rate": 2.3844017867922247e-05, "loss": 0.3185, "step": 1110000 }, { "epoch": 27.71, "learning_rate": 2.3089460340456355e-05, "loss": 0.3199, "step": 1113000 }, { "epoch": 27.79, "learning_rate": 2.233490281299046e-05, "loss": 0.318, "step": 1116000 }, { "epoch": 27.86, "learning_rate": 2.1580345285524568e-05, "loss": 0.319, "step": 1119000 }, { "epoch": 27.94, "learning_rate": 2.082578775805867e-05, "loss": 0.3152, "step": 1122000 } ], "logging_steps": 3000, "max_steps": 1204800, "num_train_epochs": 30, "save_steps": 500, "total_flos": 2.2337303175142268e+21, "trial_name": null, "trial_params": null }