KoSOLAR-10.7B-v0.1 / trainer_state.json
dev7halo's picture
Upload folder using huggingface_hub
5e85985 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.6425043183126764,
"eval_steps": 500,
"global_step": 43600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 0.00029999755171150014,
"loss": 1.6385,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 0.00029998908859460167,
"loss": 0.8837,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 0.00029997458076451173,
"loss": 0.8445,
"step": 300
},
{
"epoch": 0.02,
"learning_rate": 0.0002999540288059106,
"loss": 0.8257,
"step": 400
},
{
"epoch": 0.03,
"learning_rate": 0.0002999274335470631,
"loss": 0.8054,
"step": 500
},
{
"epoch": 0.04,
"learning_rate": 0.00029989479605978546,
"loss": 0.7917,
"step": 600
},
{
"epoch": 0.04,
"learning_rate": 0.0002998561176594015,
"loss": 0.7802,
"step": 700
},
{
"epoch": 0.05,
"learning_rate": 0.00029981139990469034,
"loss": 0.7826,
"step": 800
},
{
"epoch": 0.05,
"learning_rate": 0.00029976064459782297,
"loss": 0.7833,
"step": 900
},
{
"epoch": 0.06,
"learning_rate": 0.0002997038537842901,
"loss": 0.7782,
"step": 1000
},
{
"epoch": 0.07,
"learning_rate": 0.0002996410297528193,
"loss": 0.7649,
"step": 1100
},
{
"epoch": 0.07,
"learning_rate": 0.00029957217503528326,
"loss": 0.7606,
"step": 1200
},
{
"epoch": 0.08,
"learning_rate": 0.0002994972924065971,
"loss": 0.7673,
"step": 1300
},
{
"epoch": 0.08,
"learning_rate": 0.00029941638488460725,
"loss": 0.7534,
"step": 1400
},
{
"epoch": 0.09,
"learning_rate": 0.0002993294557299693,
"loss": 0.7711,
"step": 1500
},
{
"epoch": 0.1,
"learning_rate": 0.00029923650844601677,
"loss": 0.7499,
"step": 1600
},
{
"epoch": 0.1,
"learning_rate": 0.00029913754677862004,
"loss": 0.7241,
"step": 1700
},
{
"epoch": 0.11,
"learning_rate": 0.0002990325747160351,
"loss": 0.7471,
"step": 1800
},
{
"epoch": 0.12,
"learning_rate": 0.00029892159648874317,
"loss": 0.7424,
"step": 1900
},
{
"epoch": 0.12,
"learning_rate": 0.00029880461656927996,
"loss": 0.7413,
"step": 2000
},
{
"epoch": 0.13,
"learning_rate": 0.0002986816396720555,
"loss": 0.7299,
"step": 2100
},
{
"epoch": 0.13,
"learning_rate": 0.000298552670753164,
"loss": 0.7102,
"step": 2200
},
{
"epoch": 0.14,
"learning_rate": 0.00029841771501018456,
"loss": 0.736,
"step": 2300
},
{
"epoch": 0.15,
"learning_rate": 0.0002982767778819711,
"loss": 0.716,
"step": 2400
},
{
"epoch": 0.15,
"learning_rate": 0.00029812986504843366,
"loss": 0.7419,
"step": 2500
},
{
"epoch": 0.16,
"learning_rate": 0.0002979769824303091,
"loss": 0.714,
"step": 2600
},
{
"epoch": 0.16,
"learning_rate": 0.00029781813618892303,
"loss": 0.7306,
"step": 2700
},
{
"epoch": 0.17,
"learning_rate": 0.00029765333272594065,
"loss": 0.7121,
"step": 2800
},
{
"epoch": 0.18,
"learning_rate": 0.0002974825786831097,
"loss": 0.7107,
"step": 2900
},
{
"epoch": 0.18,
"learning_rate": 0.00029730588094199214,
"loss": 0.7332,
"step": 3000
},
{
"epoch": 0.19,
"learning_rate": 0.0002971232466236871,
"loss": 0.7102,
"step": 3100
},
{
"epoch": 0.19,
"learning_rate": 0.0002969346830885439,
"loss": 0.7205,
"step": 3200
},
{
"epoch": 0.2,
"learning_rate": 0.00029674019793586516,
"loss": 0.7113,
"step": 3300
},
{
"epoch": 0.21,
"learning_rate": 0.0002965397990036008,
"loss": 0.7141,
"step": 3400
},
{
"epoch": 0.21,
"learning_rate": 0.0002963334943680322,
"loss": 0.7039,
"step": 3500
},
{
"epoch": 0.22,
"learning_rate": 0.0002961212923434465,
"loss": 0.7067,
"step": 3600
},
{
"epoch": 0.22,
"learning_rate": 0.0002959032014818015,
"loss": 0.7073,
"step": 3700
},
{
"epoch": 0.23,
"learning_rate": 0.0002956792305723814,
"loss": 0.7113,
"step": 3800
},
{
"epoch": 0.24,
"learning_rate": 0.00029544938864144225,
"loss": 0.7035,
"step": 3900
},
{
"epoch": 0.24,
"learning_rate": 0.00029521368495184807,
"loss": 0.6961,
"step": 4000
},
{
"epoch": 0.25,
"learning_rate": 0.0002949721290026979,
"loss": 0.6884,
"step": 4100
},
{
"epoch": 0.25,
"learning_rate": 0.0002947247305289429,
"loss": 0.686,
"step": 4200
},
{
"epoch": 0.26,
"learning_rate": 0.0002944714995009936,
"loss": 0.6998,
"step": 4300
},
{
"epoch": 0.27,
"learning_rate": 0.00029421244612431877,
"loss": 0.7003,
"step": 4400
},
{
"epoch": 0.27,
"learning_rate": 0.00029394758083903347,
"loss": 0.6928,
"step": 4500
},
{
"epoch": 0.28,
"learning_rate": 0.00029367691431947884,
"loss": 0.7097,
"step": 4600
},
{
"epoch": 0.28,
"learning_rate": 0.0002934004574737915,
"loss": 0.7065,
"step": 4700
},
{
"epoch": 0.29,
"learning_rate": 0.0002931182214434643,
"loss": 0.6929,
"step": 4800
},
{
"epoch": 0.3,
"learning_rate": 0.00029283021760289686,
"loss": 0.6902,
"step": 4900
},
{
"epoch": 0.3,
"learning_rate": 0.00029253645755893777,
"loss": 0.6813,
"step": 5000
},
{
"epoch": 0.31,
"learning_rate": 0.00029223695315041615,
"loss": 0.682,
"step": 5100
},
{
"epoch": 0.32,
"learning_rate": 0.0002919317164476651,
"loss": 0.6885,
"step": 5200
},
{
"epoch": 0.32,
"learning_rate": 0.0002916207597520349,
"loss": 0.6629,
"step": 5300
},
{
"epoch": 0.33,
"learning_rate": 0.00029130409559539747,
"loss": 0.7018,
"step": 5400
},
{
"epoch": 0.33,
"learning_rate": 0.0002909817367396412,
"loss": 0.6746,
"step": 5500
},
{
"epoch": 0.34,
"learning_rate": 0.00029065369617615653,
"loss": 0.699,
"step": 5600
},
{
"epoch": 0.35,
"learning_rate": 0.00029031998712531273,
"loss": 0.6768,
"step": 5700
},
{
"epoch": 0.35,
"learning_rate": 0.00028998062303592473,
"loss": 0.682,
"step": 5800
},
{
"epoch": 0.36,
"learning_rate": 0.00028963561758471135,
"loss": 0.674,
"step": 5900
},
{
"epoch": 0.36,
"learning_rate": 0.00028928498467574394,
"loss": 0.6898,
"step": 6000
},
{
"epoch": 0.37,
"learning_rate": 0.00028892873843988637,
"loss": 0.6947,
"step": 6100
},
{
"epoch": 0.38,
"learning_rate": 0.000288566893234225,
"loss": 0.6751,
"step": 6200
},
{
"epoch": 0.38,
"learning_rate": 0.00028819946364149065,
"loss": 0.6815,
"step": 6300
},
{
"epoch": 0.39,
"learning_rate": 0.0002878264644694705,
"loss": 0.6678,
"step": 6400
},
{
"epoch": 0.39,
"learning_rate": 0.0002874479107504114,
"loss": 0.7046,
"step": 6500
},
{
"epoch": 0.4,
"learning_rate": 0.0002870638177404143,
"loss": 0.6793,
"step": 6600
},
{
"epoch": 0.41,
"learning_rate": 0.00028667420091881896,
"loss": 0.6718,
"step": 6700
},
{
"epoch": 0.41,
"learning_rate": 0.0002862790759875807,
"loss": 0.6744,
"step": 6800
},
{
"epoch": 0.42,
"learning_rate": 0.00028587845887063695,
"loss": 0.6779,
"step": 6900
},
{
"epoch": 0.42,
"learning_rate": 0.00028547236571326603,
"loss": 0.6737,
"step": 7000
},
{
"epoch": 0.43,
"learning_rate": 0.00028506081288143617,
"loss": 0.6643,
"step": 7100
},
{
"epoch": 0.44,
"learning_rate": 0.0002846438169611462,
"loss": 0.678,
"step": 7200
},
{
"epoch": 0.44,
"learning_rate": 0.00028422139475775673,
"loss": 0.6726,
"step": 7300
},
{
"epoch": 0.45,
"learning_rate": 0.0002837935632953133,
"loss": 0.6779,
"step": 7400
},
{
"epoch": 0.45,
"learning_rate": 0.00028336033981586005,
"loss": 0.6765,
"step": 7500
},
{
"epoch": 0.46,
"learning_rate": 0.00028292174177874487,
"loss": 0.6765,
"step": 7600
},
{
"epoch": 0.47,
"learning_rate": 0.0002824777868599158,
"loss": 0.6804,
"step": 7700
},
{
"epoch": 0.47,
"learning_rate": 0.0002820284929512088,
"loss": 0.6838,
"step": 7800
},
{
"epoch": 0.48,
"learning_rate": 0.00028157387815962637,
"loss": 0.6774,
"step": 7900
},
{
"epoch": 0.48,
"learning_rate": 0.00028111396080660815,
"loss": 0.6759,
"step": 8000
},
{
"epoch": 0.49,
"learning_rate": 0.00028064875942729236,
"loss": 0.6586,
"step": 8100
},
{
"epoch": 0.5,
"learning_rate": 0.0002801782927697689,
"loss": 0.6711,
"step": 8200
},
{
"epoch": 0.5,
"learning_rate": 0.0002797025797943237,
"loss": 0.6582,
"step": 8300
},
{
"epoch": 0.51,
"learning_rate": 0.0002792216396726747,
"loss": 0.6516,
"step": 8400
},
{
"epoch": 0.52,
"learning_rate": 0.0002787354917871992,
"loss": 0.6722,
"step": 8500
},
{
"epoch": 0.52,
"learning_rate": 0.0002782441557301526,
"loss": 0.6697,
"step": 8600
},
{
"epoch": 0.53,
"learning_rate": 0.0002777476513028789,
"loss": 0.6678,
"step": 8700
},
{
"epoch": 0.53,
"learning_rate": 0.0002772459985150127,
"loss": 0.6529,
"step": 8800
},
{
"epoch": 0.54,
"learning_rate": 0.00027673921758367294,
"loss": 0.669,
"step": 8900
},
{
"epoch": 0.55,
"learning_rate": 0.00027622732893264776,
"loss": 0.67,
"step": 9000
},
{
"epoch": 0.55,
"learning_rate": 0.00027571035319157167,
"loss": 0.6703,
"step": 9100
},
{
"epoch": 0.56,
"learning_rate": 0.0002751883111950942,
"loss": 0.6603,
"step": 9200
},
{
"epoch": 0.56,
"learning_rate": 0.00027466122398203994,
"loss": 0.6509,
"step": 9300
},
{
"epoch": 0.57,
"learning_rate": 0.00027412911279456104,
"loss": 0.6677,
"step": 9400
},
{
"epoch": 0.58,
"learning_rate": 0.0002735919990772809,
"loss": 0.6593,
"step": 9500
},
{
"epoch": 0.58,
"learning_rate": 0.0002730499044764299,
"loss": 0.652,
"step": 9600
},
{
"epoch": 0.59,
"learning_rate": 0.0002725028508389731,
"loss": 0.658,
"step": 9700
},
{
"epoch": 0.59,
"learning_rate": 0.00027195086021172994,
"loss": 0.6633,
"step": 9800
},
{
"epoch": 0.6,
"learning_rate": 0.0002713939548404853,
"loss": 0.6597,
"step": 9900
},
{
"epoch": 0.61,
"learning_rate": 0.0002708321571690937,
"loss": 0.6578,
"step": 10000
},
{
"epoch": 0.61,
"learning_rate": 0.00027026548983857384,
"loss": 0.6624,
"step": 10100
},
{
"epoch": 0.62,
"learning_rate": 0.000269693975686197,
"loss": 0.6546,
"step": 10200
},
{
"epoch": 0.62,
"learning_rate": 0.0002691176377445662,
"loss": 0.664,
"step": 10300
},
{
"epoch": 0.63,
"learning_rate": 0.000268536499240688,
"loss": 0.6626,
"step": 10400
},
{
"epoch": 0.64,
"learning_rate": 0.00026795058359503675,
"loss": 0.6549,
"step": 10500
},
{
"epoch": 0.64,
"learning_rate": 0.0002673599144206103,
"loss": 0.6506,
"step": 10600
},
{
"epoch": 0.65,
"learning_rate": 0.0002667645155219785,
"loss": 0.6523,
"step": 10700
},
{
"epoch": 0.65,
"learning_rate": 0.0002661644108943241,
"loss": 0.6721,
"step": 10800
},
{
"epoch": 0.66,
"learning_rate": 0.00026555962472247537,
"loss": 0.653,
"step": 10900
},
{
"epoch": 0.67,
"learning_rate": 0.0002649501813799317,
"loss": 0.6623,
"step": 11000
},
{
"epoch": 0.67,
"learning_rate": 0.00026433610542788116,
"loss": 0.6517,
"step": 11100
},
{
"epoch": 0.68,
"learning_rate": 0.0002637174216142106,
"loss": 0.6662,
"step": 11200
},
{
"epoch": 0.68,
"learning_rate": 0.0002630941548725086,
"loss": 0.6713,
"step": 11300
},
{
"epoch": 0.69,
"learning_rate": 0.0002624663303210602,
"loss": 0.646,
"step": 11400
},
{
"epoch": 0.7,
"learning_rate": 0.000261833973261835,
"loss": 0.6539,
"step": 11500
},
{
"epoch": 0.7,
"learning_rate": 0.0002611971091794672,
"loss": 0.6602,
"step": 11600
},
{
"epoch": 0.71,
"learning_rate": 0.00026055576374022855,
"loss": 0.6422,
"step": 11700
},
{
"epoch": 0.72,
"learning_rate": 0.00025990996279099424,
"loss": 0.6511,
"step": 11800
},
{
"epoch": 0.72,
"learning_rate": 0.00025925973235820096,
"loss": 0.6547,
"step": 11900
},
{
"epoch": 0.73,
"learning_rate": 0.00025860509864679795,
"loss": 0.6464,
"step": 12000
},
{
"epoch": 0.73,
"learning_rate": 0.00025794608803919133,
"loss": 0.6591,
"step": 12100
},
{
"epoch": 0.74,
"learning_rate": 0.00025728272709418044,
"loss": 0.6517,
"step": 12200
},
{
"epoch": 0.75,
"learning_rate": 0.00025661504254588773,
"loss": 0.6471,
"step": 12300
},
{
"epoch": 0.75,
"learning_rate": 0.0002559430613026812,
"loss": 0.6601,
"step": 12400
},
{
"epoch": 0.76,
"learning_rate": 0.00025526681044609004,
"loss": 0.6608,
"step": 12500
},
{
"epoch": 0.76,
"learning_rate": 0.0002545863172297133,
"loss": 0.6448,
"step": 12600
},
{
"epoch": 0.77,
"learning_rate": 0.0002539016090781214,
"loss": 0.6466,
"step": 12700
},
{
"epoch": 0.78,
"learning_rate": 0.0002532127135857509,
"loss": 0.6367,
"step": 12800
},
{
"epoch": 0.78,
"learning_rate": 0.00025251965851579245,
"loss": 0.6464,
"step": 12900
},
{
"epoch": 0.79,
"learning_rate": 0.0002518224717990721,
"loss": 0.6489,
"step": 13000
},
{
"epoch": 0.79,
"learning_rate": 0.0002511211815329253,
"loss": 0.6455,
"step": 13100
},
{
"epoch": 0.8,
"learning_rate": 0.00025041581598006475,
"loss": 0.6486,
"step": 13200
},
{
"epoch": 0.81,
"learning_rate": 0.00024970640356744144,
"loss": 0.6414,
"step": 13300
},
{
"epoch": 0.81,
"learning_rate": 0.0002489929728850988,
"loss": 0.6455,
"step": 13400
},
{
"epoch": 0.82,
"learning_rate": 0.00024827555268502075,
"loss": 0.6534,
"step": 13500
},
{
"epoch": 0.82,
"learning_rate": 0.00024755417187997275,
"loss": 0.6609,
"step": 13600
},
{
"epoch": 0.83,
"learning_rate": 0.0002468288595423368,
"loss": 0.6415,
"step": 13700
},
{
"epoch": 0.84,
"learning_rate": 0.00024609964490293954,
"loss": 0.6583,
"step": 13800
},
{
"epoch": 0.84,
"learning_rate": 0.0002453665573498745,
"loss": 0.6467,
"step": 13900
},
{
"epoch": 0.85,
"learning_rate": 0.0002446296264273174,
"loss": 0.6433,
"step": 14000
},
{
"epoch": 0.85,
"learning_rate": 0.00024388888183433577,
"loss": 0.6383,
"step": 14100
},
{
"epoch": 0.86,
"learning_rate": 0.0002431443534236919,
"loss": 0.636,
"step": 14200
},
{
"epoch": 0.87,
"learning_rate": 0.00024239607120063995,
"loss": 0.6385,
"step": 14300
},
{
"epoch": 0.87,
"learning_rate": 0.00024164406532171628,
"loss": 0.6407,
"step": 14400
},
{
"epoch": 0.88,
"learning_rate": 0.00024088836609352458,
"loss": 0.6344,
"step": 14500
},
{
"epoch": 0.88,
"learning_rate": 0.00024012900397151418,
"loss": 0.6262,
"step": 14600
},
{
"epoch": 0.89,
"learning_rate": 0.0002393660095587529,
"loss": 0.6405,
"step": 14700
},
{
"epoch": 0.9,
"learning_rate": 0.0002385994136046933,
"loss": 0.6487,
"step": 14800
},
{
"epoch": 0.9,
"learning_rate": 0.0002378292470039341,
"loss": 0.6302,
"step": 14900
},
{
"epoch": 0.91,
"learning_rate": 0.00023705554079497446,
"loss": 0.6342,
"step": 15000
},
{
"epoch": 0.92,
"learning_rate": 0.0002362783261589634,
"loss": 0.6386,
"step": 15100
},
{
"epoch": 0.92,
"learning_rate": 0.00023549763441844322,
"loss": 0.6415,
"step": 15200
},
{
"epoch": 0.93,
"learning_rate": 0.00023471349703608696,
"loss": 0.6387,
"step": 15300
},
{
"epoch": 0.93,
"learning_rate": 0.0002339259456134306,
"loss": 0.6381,
"step": 15400
},
{
"epoch": 0.94,
"learning_rate": 0.00023313501188959948,
"loss": 0.6511,
"step": 15500
},
{
"epoch": 0.95,
"learning_rate": 0.000232340727740029,
"loss": 0.6413,
"step": 15600
},
{
"epoch": 0.95,
"learning_rate": 0.00023154312517518024,
"loss": 0.6497,
"step": 15700
},
{
"epoch": 0.96,
"learning_rate": 0.00023074223633924977,
"loss": 0.6515,
"step": 15800
},
{
"epoch": 0.96,
"learning_rate": 0.00022993809350887413,
"loss": 0.6363,
"step": 15900
},
{
"epoch": 0.97,
"learning_rate": 0.00022913072909182936,
"loss": 0.6316,
"step": 16000
},
{
"epoch": 0.98,
"learning_rate": 0.0002283201756257245,
"loss": 0.633,
"step": 16100
},
{
"epoch": 0.98,
"learning_rate": 0.00022750646577669083,
"loss": 0.6478,
"step": 16200
},
{
"epoch": 0.99,
"learning_rate": 0.00022668963233806464,
"loss": 0.6363,
"step": 16300
},
{
"epoch": 0.99,
"learning_rate": 0.00022586970822906647,
"loss": 0.6303,
"step": 16400
},
{
"epoch": 1.0,
"learning_rate": 0.0002250467264934738,
"loss": 0.6237,
"step": 16500
},
{
"epoch": 1.01,
"learning_rate": 0.00022422072029828965,
"loss": 0.6181,
"step": 16600
},
{
"epoch": 1.01,
"learning_rate": 0.00022339172293240586,
"loss": 0.6164,
"step": 16700
},
{
"epoch": 1.02,
"learning_rate": 0.00022255976780526145,
"loss": 0.613,
"step": 16800
},
{
"epoch": 1.02,
"learning_rate": 0.0002217248884454963,
"loss": 0.6179,
"step": 16900
},
{
"epoch": 1.03,
"learning_rate": 0.00022088711849959982,
"loss": 0.6066,
"step": 17000
},
{
"epoch": 1.04,
"learning_rate": 0.0002200464917305549,
"loss": 0.6081,
"step": 17100
},
{
"epoch": 1.04,
"learning_rate": 0.00021920304201647744,
"loss": 0.6057,
"step": 17200
},
{
"epoch": 1.05,
"learning_rate": 0.00021835680334925087,
"loss": 0.6165,
"step": 17300
},
{
"epoch": 1.05,
"learning_rate": 0.0002175078098331562,
"loss": 0.6157,
"step": 17400
},
{
"epoch": 1.06,
"learning_rate": 0.0002166560956834978,
"loss": 0.6085,
"step": 17500
},
{
"epoch": 1.07,
"learning_rate": 0.00021580169522522424,
"loss": 0.615,
"step": 17600
},
{
"epoch": 1.07,
"learning_rate": 0.00021494464289154505,
"loss": 0.6119,
"step": 17700
},
{
"epoch": 1.08,
"learning_rate": 0.0002140849732225431,
"loss": 0.6097,
"step": 17800
},
{
"epoch": 1.08,
"learning_rate": 0.0002132227208637826,
"loss": 0.6155,
"step": 17900
},
{
"epoch": 1.09,
"learning_rate": 0.0002123579205649126,
"loss": 0.6066,
"step": 18000
},
{
"epoch": 1.1,
"learning_rate": 0.00021149060717826694,
"loss": 0.6001,
"step": 18100
},
{
"epoch": 1.1,
"learning_rate": 0.00021062081565745928,
"loss": 0.6068,
"step": 18200
},
{
"epoch": 1.11,
"learning_rate": 0.0002097485810559748,
"loss": 0.6131,
"step": 18300
},
{
"epoch": 1.12,
"learning_rate": 0.00020887393852575716,
"loss": 0.612,
"step": 18400
},
{
"epoch": 1.12,
"learning_rate": 0.00020799692331579213,
"loss": 0.6051,
"step": 18500
},
{
"epoch": 1.13,
"learning_rate": 0.00020711757077068675,
"loss": 0.6145,
"step": 18600
},
{
"epoch": 1.13,
"learning_rate": 0.00020623591632924515,
"loss": 0.6077,
"step": 18700
},
{
"epoch": 1.14,
"learning_rate": 0.00020535199552304033,
"loss": 0.6005,
"step": 18800
},
{
"epoch": 1.15,
"learning_rate": 0.00020446584397498178,
"loss": 0.6178,
"step": 18900
},
{
"epoch": 1.15,
"learning_rate": 0.00020357749739788054,
"loss": 0.6038,
"step": 19000
},
{
"epoch": 1.16,
"learning_rate": 0.00020268699159300927,
"loss": 0.5974,
"step": 19100
},
{
"epoch": 1.16,
"learning_rate": 0.00020179436244865986,
"loss": 0.6136,
"step": 19200
},
{
"epoch": 1.17,
"learning_rate": 0.00020089964593869694,
"loss": 0.6098,
"step": 19300
},
{
"epoch": 1.18,
"learning_rate": 0.00020000287812110793,
"loss": 0.6127,
"step": 19400
},
{
"epoch": 1.18,
"learning_rate": 0.00019910409513655038,
"loss": 0.6073,
"step": 19500
},
{
"epoch": 1.19,
"learning_rate": 0.00019820333320689473,
"loss": 0.6008,
"step": 19600
},
{
"epoch": 1.19,
"learning_rate": 0.00019730062863376524,
"loss": 0.6124,
"step": 19700
},
{
"epoch": 1.2,
"learning_rate": 0.00019639601779707655,
"loss": 0.6144,
"step": 19800
},
{
"epoch": 1.21,
"learning_rate": 0.00019548953715356758,
"loss": 0.6123,
"step": 19900
},
{
"epoch": 1.21,
"learning_rate": 0.0001945812232353326,
"loss": 0.6138,
"step": 20000
},
{
"epoch": 1.22,
"learning_rate": 0.00019367111264834846,
"loss": 0.6173,
"step": 20100
},
{
"epoch": 1.22,
"learning_rate": 0.0001927592420709998,
"loss": 0.5983,
"step": 20200
},
{
"epoch": 1.23,
"learning_rate": 0.00019184564825260053,
"loss": 0.6037,
"step": 20300
},
{
"epoch": 1.24,
"learning_rate": 0.000190930368011913,
"loss": 0.602,
"step": 20400
},
{
"epoch": 1.24,
"learning_rate": 0.00019001343823566412,
"loss": 0.6095,
"step": 20500
},
{
"epoch": 1.25,
"learning_rate": 0.0001890948958770587,
"loss": 0.6122,
"step": 20600
},
{
"epoch": 1.25,
"learning_rate": 0.00018817477795429028,
"loss": 0.61,
"step": 20700
},
{
"epoch": 1.26,
"learning_rate": 0.00018725312154904925,
"loss": 0.6034,
"step": 20800
},
{
"epoch": 1.27,
"learning_rate": 0.00018632996380502846,
"loss": 0.6085,
"step": 20900
},
{
"epoch": 1.27,
"learning_rate": 0.00018540534192642614,
"loss": 0.5977,
"step": 21000
},
{
"epoch": 1.28,
"learning_rate": 0.00018447929317644672,
"loss": 0.6071,
"step": 21100
},
{
"epoch": 1.28,
"learning_rate": 0.00018355185487579898,
"loss": 0.6118,
"step": 21200
},
{
"epoch": 1.29,
"learning_rate": 0.00018262306440119198,
"loss": 0.6078,
"step": 21300
},
{
"epoch": 1.3,
"learning_rate": 0.00018169295918382883,
"loss": 0.6029,
"step": 21400
},
{
"epoch": 1.3,
"learning_rate": 0.00018076157670789803,
"loss": 0.5974,
"step": 21500
},
{
"epoch": 1.31,
"learning_rate": 0.00017982895450906303,
"loss": 0.6077,
"step": 21600
},
{
"epoch": 1.32,
"learning_rate": 0.00017889513017294923,
"loss": 0.5953,
"step": 21700
},
{
"epoch": 1.32,
"learning_rate": 0.00017796014133362946,
"loss": 0.5985,
"step": 21800
},
{
"epoch": 1.33,
"learning_rate": 0.00017702402567210723,
"loss": 0.5987,
"step": 21900
},
{
"epoch": 1.33,
"learning_rate": 0.00017608682091479813,
"loss": 0.6017,
"step": 22000
},
{
"epoch": 1.34,
"learning_rate": 0.00017514856483200937,
"loss": 0.5985,
"step": 22100
},
{
"epoch": 1.35,
"learning_rate": 0.00017420929523641766,
"loss": 0.5999,
"step": 22200
},
{
"epoch": 1.35,
"learning_rate": 0.0001732690499815454,
"loss": 0.6052,
"step": 22300
},
{
"epoch": 1.36,
"learning_rate": 0.00017232786696023492,
"loss": 0.6015,
"step": 22400
},
{
"epoch": 1.36,
"learning_rate": 0.00017138578410312162,
"loss": 0.6002,
"step": 22500
},
{
"epoch": 1.37,
"learning_rate": 0.0001704428393771051,
"loss": 0.6011,
"step": 22600
},
{
"epoch": 1.38,
"learning_rate": 0.00016949907078381927,
"loss": 0.6045,
"step": 22700
},
{
"epoch": 1.38,
"learning_rate": 0.00016855451635810058,
"loss": 0.607,
"step": 22800
},
{
"epoch": 1.39,
"learning_rate": 0.00016760921416645544,
"loss": 0.6062,
"step": 22900
},
{
"epoch": 1.39,
"learning_rate": 0.00016666320230552593,
"loss": 0.5966,
"step": 23000
},
{
"epoch": 1.4,
"learning_rate": 0.00016571651890055452,
"loss": 0.6026,
"step": 23100
},
{
"epoch": 1.41,
"learning_rate": 0.0001647692021038477,
"loss": 0.5941,
"step": 23200
},
{
"epoch": 1.41,
"learning_rate": 0.00016382129009323817,
"loss": 0.5919,
"step": 23300
},
{
"epoch": 1.42,
"learning_rate": 0.00016287282107054643,
"loss": 0.6035,
"step": 23400
},
{
"epoch": 1.42,
"learning_rate": 0.00016192383326004106,
"loss": 0.6033,
"step": 23500
},
{
"epoch": 1.43,
"learning_rate": 0.00016097436490689838,
"loss": 0.5961,
"step": 23600
},
{
"epoch": 1.44,
"learning_rate": 0.00016002445427566107,
"loss": 0.589,
"step": 23700
},
{
"epoch": 1.44,
"learning_rate": 0.000159074139648696,
"loss": 0.6,
"step": 23800
},
{
"epoch": 1.45,
"learning_rate": 0.0001581234593246516,
"loss": 0.5951,
"step": 23900
},
{
"epoch": 1.45,
"learning_rate": 0.0001571724516169141,
"loss": 0.6027,
"step": 24000
},
{
"epoch": 1.46,
"learning_rate": 0.00015622115485206385,
"loss": 0.5993,
"step": 24100
},
{
"epoch": 1.47,
"learning_rate": 0.00015526960736833025,
"loss": 0.5915,
"step": 24200
},
{
"epoch": 1.47,
"learning_rate": 0.00015431784751404707,
"loss": 0.5977,
"step": 24300
},
{
"epoch": 1.48,
"learning_rate": 0.00015336591364610686,
"loss": 0.6022,
"step": 24400
},
{
"epoch": 1.48,
"learning_rate": 0.00015241384412841493,
"loss": 0.6019,
"step": 24500
},
{
"epoch": 1.49,
"learning_rate": 0.00015146167733034367,
"loss": 0.5921,
"step": 24600
},
{
"epoch": 1.5,
"learning_rate": 0.00015050945162518574,
"loss": 0.6011,
"step": 24700
},
{
"epoch": 1.5,
"learning_rate": 0.0001495572053886079,
"loss": 0.5934,
"step": 24800
},
{
"epoch": 1.51,
"learning_rate": 0.00014860497699710433,
"loss": 0.5893,
"step": 24900
},
{
"epoch": 1.52,
"learning_rate": 0.00014765280482645005,
"loss": 0.5951,
"step": 25000
},
{
"epoch": 1.52,
"learning_rate": 0.00014670072725015437,
"loss": 0.59,
"step": 25100
},
{
"epoch": 1.53,
"learning_rate": 0.00014574878263791426,
"loss": 0.6008,
"step": 25200
},
{
"epoch": 1.53,
"learning_rate": 0.00014479700935406817,
"loss": 0.5907,
"step": 25300
},
{
"epoch": 1.54,
"learning_rate": 0.0001438454457560498,
"loss": 0.5977,
"step": 25400
},
{
"epoch": 1.55,
"learning_rate": 0.00014289413019284236,
"loss": 0.5999,
"step": 25500
},
{
"epoch": 1.55,
"learning_rate": 0.00014194310100343292,
"loss": 0.5979,
"step": 25600
},
{
"epoch": 1.56,
"learning_rate": 0.00014099239651526742,
"loss": 0.5889,
"step": 25700
},
{
"epoch": 1.56,
"learning_rate": 0.0001400420550427061,
"loss": 0.5994,
"step": 25800
},
{
"epoch": 1.57,
"learning_rate": 0.0001390921148854791,
"loss": 0.5881,
"step": 25900
},
{
"epoch": 1.58,
"learning_rate": 0.00013814261432714336,
"loss": 0.5947,
"step": 26000
},
{
"epoch": 1.58,
"learning_rate": 0.00013719359163353944,
"loss": 0.5939,
"step": 26100
},
{
"epoch": 1.59,
"learning_rate": 0.0001362450850512494,
"loss": 0.5905,
"step": 26200
},
{
"epoch": 1.59,
"learning_rate": 0.00013529713280605567,
"loss": 0.5866,
"step": 26300
},
{
"epoch": 1.6,
"learning_rate": 0.00013434977310140012,
"loss": 0.5942,
"step": 26400
},
{
"epoch": 1.61,
"learning_rate": 0.0001334030441168447,
"loss": 0.584,
"step": 26500
},
{
"epoch": 1.61,
"learning_rate": 0.0001324569840065328,
"loss": 0.5932,
"step": 26600
},
{
"epoch": 1.62,
"learning_rate": 0.0001315116308976514,
"loss": 0.5896,
"step": 26700
},
{
"epoch": 1.62,
"learning_rate": 0.00013056702288889458,
"loss": 0.5899,
"step": 26800
},
{
"epoch": 1.63,
"learning_rate": 0.00012962319804892827,
"loss": 0.583,
"step": 26900
},
{
"epoch": 1.64,
"learning_rate": 0.00012868019441485568,
"loss": 0.5895,
"step": 27000
},
{
"epoch": 1.64,
"learning_rate": 0.00012773804999068473,
"loss": 0.5857,
"step": 27100
},
{
"epoch": 1.65,
"learning_rate": 0.00012679680274579636,
"loss": 0.5929,
"step": 27200
},
{
"epoch": 1.65,
"learning_rate": 0.00012585649061341405,
"loss": 0.5899,
"step": 27300
},
{
"epoch": 1.66,
"learning_rate": 0.00012491715148907554,
"loss": 0.5856,
"step": 27400
},
{
"epoch": 1.67,
"learning_rate": 0.0001239788232291052,
"loss": 0.5907,
"step": 27500
},
{
"epoch": 1.67,
"learning_rate": 0.00012304154364908856,
"loss": 0.591,
"step": 27600
},
{
"epoch": 1.68,
"learning_rate": 0.00012210535052234835,
"loss": 0.593,
"step": 27700
},
{
"epoch": 1.68,
"learning_rate": 0.00012117028157842202,
"loss": 0.5879,
"step": 27800
},
{
"epoch": 1.69,
"learning_rate": 0.00012023637450154138,
"loss": 0.5859,
"step": 27900
},
{
"epoch": 1.7,
"learning_rate": 0.00011930366692911378,
"loss": 0.5866,
"step": 28000
},
{
"epoch": 1.7,
"learning_rate": 0.00011837219645020536,
"loss": 0.587,
"step": 28100
},
{
"epoch": 1.71,
"learning_rate": 0.00011744200060402608,
"loss": 0.5844,
"step": 28200
},
{
"epoch": 1.72,
"learning_rate": 0.00011651311687841697,
"loss": 0.5848,
"step": 28300
},
{
"epoch": 1.72,
"learning_rate": 0.00011558558270833906,
"loss": 0.5925,
"step": 28400
},
{
"epoch": 1.73,
"learning_rate": 0.00011465943547436524,
"loss": 0.5885,
"step": 28500
},
{
"epoch": 1.73,
"learning_rate": 0.00011373471250117322,
"loss": 0.5904,
"step": 28600
},
{
"epoch": 1.74,
"learning_rate": 0.0001128114510560416,
"loss": 0.581,
"step": 28700
},
{
"epoch": 1.75,
"learning_rate": 0.00011188968834734798,
"loss": 0.5822,
"step": 28800
},
{
"epoch": 1.75,
"learning_rate": 0.00011096946152306923,
"loss": 0.5816,
"step": 28900
},
{
"epoch": 1.76,
"learning_rate": 0.00011005080766928467,
"loss": 0.5848,
"step": 29000
},
{
"epoch": 1.76,
"learning_rate": 0.00010913376380868118,
"loss": 0.5783,
"step": 29100
},
{
"epoch": 1.77,
"learning_rate": 0.00010821836689906128,
"loss": 0.5801,
"step": 29200
},
{
"epoch": 1.78,
"learning_rate": 0.00010730465383185379,
"loss": 0.5799,
"step": 29300
},
{
"epoch": 1.78,
"learning_rate": 0.00010639266143062683,
"loss": 0.5884,
"step": 29400
},
{
"epoch": 1.79,
"learning_rate": 0.00010548242644960404,
"loss": 0.5763,
"step": 29500
},
{
"epoch": 1.79,
"learning_rate": 0.00010457398557218315,
"loss": 0.5836,
"step": 29600
},
{
"epoch": 1.8,
"learning_rate": 0.00010366737540945772,
"loss": 0.5814,
"step": 29700
},
{
"epoch": 1.81,
"learning_rate": 0.00010276263249874166,
"loss": 0.5764,
"step": 29800
},
{
"epoch": 1.81,
"learning_rate": 0.00010185979330209668,
"loss": 0.5776,
"step": 29900
},
{
"epoch": 1.82,
"learning_rate": 0.00010095889420486292,
"loss": 0.5788,
"step": 30000
},
{
"epoch": 1.82,
"learning_rate": 0.0001000599715141925,
"loss": 0.5809,
"step": 30100
},
{
"epoch": 1.83,
"learning_rate": 9.916306145758637e-05,
"loss": 0.5828,
"step": 30200
},
{
"epoch": 1.84,
"learning_rate": 9.826820018143417e-05,
"loss": 0.5809,
"step": 30300
},
{
"epoch": 1.84,
"learning_rate": 9.737542374955779e-05,
"loss": 0.5855,
"step": 30400
},
{
"epoch": 1.85,
"learning_rate": 9.648476814175755e-05,
"loss": 0.5814,
"step": 30500
},
{
"epoch": 1.85,
"learning_rate": 9.559626925236263e-05,
"loss": 0.5905,
"step": 30600
},
{
"epoch": 1.86,
"learning_rate": 9.470996288878409e-05,
"loss": 0.5781,
"step": 30700
},
{
"epoch": 1.87,
"learning_rate": 9.382588477007196e-05,
"loss": 0.5846,
"step": 30800
},
{
"epoch": 1.87,
"learning_rate": 9.294407052547586e-05,
"loss": 0.5803,
"step": 30900
},
{
"epoch": 1.88,
"learning_rate": 9.206455569300888e-05,
"loss": 0.5849,
"step": 31000
},
{
"epoch": 1.88,
"learning_rate": 9.118737571801549e-05,
"loss": 0.5753,
"step": 31100
},
{
"epoch": 1.89,
"learning_rate": 9.031256595174299e-05,
"loss": 0.5806,
"step": 31200
},
{
"epoch": 1.9,
"learning_rate": 8.944016164991682e-05,
"loss": 0.5783,
"step": 31300
},
{
"epoch": 1.9,
"learning_rate": 8.857019797131991e-05,
"loss": 0.5853,
"step": 31400
},
{
"epoch": 1.91,
"learning_rate": 8.77027099763754e-05,
"loss": 0.5874,
"step": 31500
},
{
"epoch": 1.92,
"learning_rate": 8.683773262573396e-05,
"loss": 0.5801,
"step": 31600
},
{
"epoch": 1.92,
"learning_rate": 8.597530077886474e-05,
"loss": 0.5749,
"step": 31700
},
{
"epoch": 1.93,
"learning_rate": 8.511544919265039e-05,
"loss": 0.5751,
"step": 31800
},
{
"epoch": 1.93,
"learning_rate": 8.425821251998646e-05,
"loss": 0.585,
"step": 31900
},
{
"epoch": 1.94,
"learning_rate": 8.340362530838499e-05,
"loss": 0.5758,
"step": 32000
},
{
"epoch": 1.95,
"learning_rate": 8.255172199858192e-05,
"loss": 0.5839,
"step": 32100
},
{
"epoch": 1.95,
"learning_rate": 8.1702536923149e-05,
"loss": 0.5723,
"step": 32200
},
{
"epoch": 1.96,
"learning_rate": 8.085610430511064e-05,
"loss": 0.5811,
"step": 32300
},
{
"epoch": 1.96,
"learning_rate": 8.001245825656439e-05,
"loss": 0.5797,
"step": 32400
},
{
"epoch": 1.97,
"learning_rate": 7.917163277730609e-05,
"loss": 0.5683,
"step": 32500
},
{
"epoch": 1.98,
"learning_rate": 7.833366175345985e-05,
"loss": 0.5804,
"step": 32600
},
{
"epoch": 1.98,
"learning_rate": 7.749857895611223e-05,
"loss": 0.5676,
"step": 32700
},
{
"epoch": 1.99,
"learning_rate": 7.666641803995134e-05,
"loss": 0.5692,
"step": 32800
},
{
"epoch": 1.99,
"learning_rate": 7.583721254191065e-05,
"loss": 0.5822,
"step": 32900
},
{
"epoch": 2.0,
"learning_rate": 7.50109958798171e-05,
"loss": 0.5727,
"step": 33000
},
{
"epoch": 2.01,
"learning_rate": 7.418780135104454e-05,
"loss": 0.5524,
"step": 33100
},
{
"epoch": 2.01,
"learning_rate": 7.336766213117173e-05,
"loss": 0.5589,
"step": 33200
},
{
"epoch": 2.02,
"learning_rate": 7.255061127264536e-05,
"loss": 0.5523,
"step": 33300
},
{
"epoch": 2.02,
"learning_rate": 7.173668170344819e-05,
"loss": 0.5576,
"step": 33400
},
{
"epoch": 2.03,
"learning_rate": 7.092590622577162e-05,
"loss": 0.5579,
"step": 33500
},
{
"epoch": 2.04,
"learning_rate": 7.011831751469404e-05,
"loss": 0.5557,
"step": 33600
},
{
"epoch": 2.04,
"learning_rate": 6.931394811686386e-05,
"loss": 0.5579,
"step": 33700
},
{
"epoch": 2.05,
"learning_rate": 6.851283044918787e-05,
"loss": 0.5561,
"step": 33800
},
{
"epoch": 2.05,
"learning_rate": 6.771499679752496e-05,
"loss": 0.5587,
"step": 33900
},
{
"epoch": 2.06,
"learning_rate": 6.692047931538474e-05,
"loss": 0.557,
"step": 34000
},
{
"epoch": 2.07,
"learning_rate": 6.612931002263158e-05,
"loss": 0.5597,
"step": 34100
},
{
"epoch": 2.07,
"learning_rate": 6.534152080419484e-05,
"loss": 0.5546,
"step": 34200
},
{
"epoch": 2.08,
"learning_rate": 6.455714340878308e-05,
"loss": 0.5592,
"step": 34300
},
{
"epoch": 2.08,
"learning_rate": 6.377620944760513e-05,
"loss": 0.5576,
"step": 34400
},
{
"epoch": 2.09,
"learning_rate": 6.299875039309576e-05,
"loss": 0.562,
"step": 34500
},
{
"epoch": 2.1,
"learning_rate": 6.22247975776475e-05,
"loss": 0.5596,
"step": 34600
},
{
"epoch": 2.1,
"learning_rate": 6.1454382192348e-05,
"loss": 0.5537,
"step": 34700
},
{
"epoch": 2.11,
"learning_rate": 6.068753528572271e-05,
"loss": 0.5592,
"step": 34800
},
{
"epoch": 2.12,
"learning_rate": 5.992428776248381e-05,
"loss": 0.555,
"step": 34900
},
{
"epoch": 2.12,
"learning_rate": 5.9164670382284635e-05,
"loss": 0.5572,
"step": 35000
},
{
"epoch": 2.13,
"learning_rate": 5.840871375848003e-05,
"loss": 0.5531,
"step": 35100
},
{
"epoch": 2.13,
"learning_rate": 5.7656448356892776e-05,
"loss": 0.5583,
"step": 35200
},
{
"epoch": 2.14,
"learning_rate": 5.690790449458548e-05,
"loss": 0.5529,
"step": 35300
},
{
"epoch": 2.15,
"learning_rate": 5.6163112338638965e-05,
"loss": 0.5566,
"step": 35400
},
{
"epoch": 2.15,
"learning_rate": 5.5422101904936474e-05,
"loss": 0.5539,
"step": 35500
},
{
"epoch": 2.16,
"learning_rate": 5.468490305695393e-05,
"loss": 0.5507,
"step": 35600
},
{
"epoch": 2.16,
"learning_rate": 5.39515455045566e-05,
"loss": 0.557,
"step": 35700
},
{
"epoch": 2.17,
"learning_rate": 5.32220588028015e-05,
"loss": 0.5498,
"step": 35800
},
{
"epoch": 2.18,
"learning_rate": 5.249647235074647e-05,
"loss": 0.5547,
"step": 35900
},
{
"epoch": 2.18,
"learning_rate": 5.177481539026529e-05,
"loss": 0.5542,
"step": 36000
},
{
"epoch": 2.19,
"learning_rate": 5.105711700486922e-05,
"loss": 0.5597,
"step": 36100
},
{
"epoch": 2.19,
"learning_rate": 5.0343406118534935e-05,
"loss": 0.5583,
"step": 36200
},
{
"epoch": 2.2,
"learning_rate": 4.963371149453881e-05,
"loss": 0.5565,
"step": 36300
},
{
"epoch": 2.21,
"learning_rate": 4.8928061734297765e-05,
"loss": 0.5534,
"step": 36400
},
{
"epoch": 2.21,
"learning_rate": 4.822648527621653e-05,
"loss": 0.5549,
"step": 36500
},
{
"epoch": 2.22,
"learning_rate": 4.7529010394541746e-05,
"loss": 0.5552,
"step": 36600
},
{
"epoch": 2.22,
"learning_rate": 4.6835665198222224e-05,
"loss": 0.5515,
"step": 36700
},
{
"epoch": 2.23,
"learning_rate": 4.614647762977626e-05,
"loss": 0.5538,
"step": 36800
},
{
"epoch": 2.24,
"learning_rate": 4.5461475464165534e-05,
"loss": 0.5562,
"step": 36900
},
{
"epoch": 2.24,
"learning_rate": 4.478068630767565e-05,
"loss": 0.5553,
"step": 37000
},
{
"epoch": 2.25,
"learning_rate": 4.410413759680383e-05,
"loss": 0.5539,
"step": 37100
},
{
"epoch": 2.25,
"learning_rate": 4.343185659715283e-05,
"loss": 0.557,
"step": 37200
},
{
"epoch": 2.26,
"learning_rate": 4.276387040233236e-05,
"loss": 0.5542,
"step": 37300
},
{
"epoch": 2.27,
"learning_rate": 4.210020593286711e-05,
"loss": 0.5518,
"step": 37400
},
{
"epoch": 2.27,
"learning_rate": 4.144088993511181e-05,
"loss": 0.5555,
"step": 37500
},
{
"epoch": 2.28,
"learning_rate": 4.078594898017346e-05,
"loss": 0.5565,
"step": 37600
},
{
"epoch": 2.28,
"learning_rate": 4.013540946284024e-05,
"loss": 0.5544,
"step": 37700
},
{
"epoch": 2.29,
"learning_rate": 3.948929760051797e-05,
"loss": 0.5555,
"step": 37800
},
{
"epoch": 2.3,
"learning_rate": 3.8847639432173405e-05,
"loss": 0.5563,
"step": 37900
},
{
"epoch": 2.3,
"learning_rate": 3.821046081728497e-05,
"loss": 0.5525,
"step": 38000
},
{
"epoch": 2.31,
"learning_rate": 3.757778743480045e-05,
"loss": 0.5541,
"step": 38100
},
{
"epoch": 2.32,
"learning_rate": 3.694964478210221e-05,
"loss": 0.5564,
"step": 38200
},
{
"epoch": 2.32,
"learning_rate": 3.63260581739796e-05,
"loss": 0.5551,
"step": 38300
},
{
"epoch": 2.33,
"learning_rate": 3.5707052741608636e-05,
"loss": 0.5506,
"step": 38400
},
{
"epoch": 2.33,
"learning_rate": 3.5092653431539436e-05,
"loss": 0.5483,
"step": 38500
},
{
"epoch": 2.34,
"learning_rate": 3.448288500469058e-05,
"loss": 0.556,
"step": 38600
},
{
"epoch": 2.35,
"learning_rate": 3.3877772035351326e-05,
"loss": 0.5509,
"step": 38700
},
{
"epoch": 2.35,
"learning_rate": 3.327733891019132e-05,
"loss": 0.5574,
"step": 38800
},
{
"epoch": 2.36,
"learning_rate": 3.268160982727759e-05,
"loss": 0.5525,
"step": 38900
},
{
"epoch": 2.36,
"learning_rate": 3.209060879509968e-05,
"loss": 0.5502,
"step": 39000
},
{
"epoch": 2.37,
"learning_rate": 3.150435963160168e-05,
"loss": 0.5523,
"step": 39100
},
{
"epoch": 2.38,
"learning_rate": 3.0922885963222585e-05,
"loss": 0.5483,
"step": 39200
},
{
"epoch": 2.38,
"learning_rate": 3.0346211223944077e-05,
"loss": 0.547,
"step": 39300
},
{
"epoch": 2.39,
"learning_rate": 2.9774358654346046e-05,
"loss": 0.55,
"step": 39400
},
{
"epoch": 2.39,
"learning_rate": 2.9207351300670178e-05,
"loss": 0.549,
"step": 39500
},
{
"epoch": 2.4,
"learning_rate": 2.864521201389085e-05,
"loss": 0.5536,
"step": 39600
},
{
"epoch": 2.41,
"learning_rate": 2.8087963448794476e-05,
"loss": 0.546,
"step": 39700
},
{
"epoch": 2.41,
"learning_rate": 2.7535628063066368e-05,
"loss": 0.551,
"step": 39800
},
{
"epoch": 2.42,
"learning_rate": 2.698822811638569e-05,
"loss": 0.5547,
"step": 39900
},
{
"epoch": 2.42,
"learning_rate": 2.6445785669528386e-05,
"loss": 0.547,
"step": 40000
},
{
"epoch": 2.43,
"learning_rate": 2.590832258347814e-05,
"loss": 0.5556,
"step": 40100
},
{
"epoch": 2.44,
"learning_rate": 2.537586051854522e-05,
"loss": 0.5505,
"step": 40200
},
{
"epoch": 2.44,
"learning_rate": 2.4848420933493824e-05,
"loss": 0.547,
"step": 40300
},
{
"epoch": 2.45,
"learning_rate": 2.432602508467691e-05,
"loss": 0.5483,
"step": 40400
},
{
"epoch": 2.45,
"learning_rate": 2.3808694025179804e-05,
"loss": 0.5534,
"step": 40500
},
{
"epoch": 2.46,
"learning_rate": 2.3296448603971657e-05,
"loss": 0.5479,
"step": 40600
},
{
"epoch": 2.47,
"learning_rate": 2.2789309465065154e-05,
"loss": 0.5593,
"step": 40700
},
{
"epoch": 2.47,
"learning_rate": 2.2287297046684737e-05,
"loss": 0.5478,
"step": 40800
},
{
"epoch": 2.48,
"learning_rate": 2.179043158044263e-05,
"loss": 0.5562,
"step": 40900
},
{
"epoch": 2.48,
"learning_rate": 2.1298733090523722e-05,
"loss": 0.5513,
"step": 41000
},
{
"epoch": 2.49,
"learning_rate": 2.0812221392878463e-05,
"loss": 0.5507,
"step": 41100
},
{
"epoch": 2.5,
"learning_rate": 2.0330916094424244e-05,
"loss": 0.5465,
"step": 41200
},
{
"epoch": 2.5,
"learning_rate": 1.985483659225539e-05,
"loss": 0.5504,
"step": 41300
},
{
"epoch": 2.51,
"learning_rate": 1.9384002072861186e-05,
"loss": 0.5444,
"step": 41400
},
{
"epoch": 2.52,
"learning_rate": 1.89184315113528e-05,
"loss": 0.5514,
"step": 41500
},
{
"epoch": 2.52,
"learning_rate": 1.8458143670698522e-05,
"loss": 0.5448,
"step": 41600
},
{
"epoch": 2.53,
"learning_rate": 1.80031571009676e-05,
"loss": 0.5419,
"step": 41700
},
{
"epoch": 2.53,
"learning_rate": 1.7553490138582786e-05,
"loss": 0.5537,
"step": 41800
},
{
"epoch": 2.54,
"learning_rate": 1.7109160905580982e-05,
"loss": 0.5493,
"step": 41900
},
{
"epoch": 2.55,
"learning_rate": 1.6670187308883364e-05,
"loss": 0.5496,
"step": 42000
},
{
"epoch": 2.55,
"learning_rate": 1.6236587039573383e-05,
"loss": 0.5491,
"step": 42100
},
{
"epoch": 2.56,
"learning_rate": 1.5808377572184044e-05,
"loss": 0.5502,
"step": 42200
},
{
"epoch": 2.56,
"learning_rate": 1.5385576163993417e-05,
"loss": 0.5539,
"step": 42300
},
{
"epoch": 2.57,
"learning_rate": 1.4968199854329322e-05,
"loss": 0.5473,
"step": 42400
},
{
"epoch": 2.58,
"learning_rate": 1.4556265463882594e-05,
"loss": 0.5523,
"step": 42500
},
{
"epoch": 2.58,
"learning_rate": 1.4149789594029093e-05,
"loss": 0.554,
"step": 42600
},
{
"epoch": 2.59,
"learning_rate": 1.3748788626160878e-05,
"loss": 0.5487,
"step": 42700
},
{
"epoch": 2.59,
"learning_rate": 1.3353278721025756e-05,
"loss": 0.5558,
"step": 42800
},
{
"epoch": 2.6,
"learning_rate": 1.2963275818076152e-05,
"loss": 0.5501,
"step": 42900
},
{
"epoch": 2.61,
"learning_rate": 1.2578795634826671e-05,
"loss": 0.5561,
"step": 43000
},
{
"epoch": 2.61,
"learning_rate": 1.2199853666220678e-05,
"loss": 0.5486,
"step": 43100
},
{
"epoch": 2.62,
"learning_rate": 1.18264651840059e-05,
"loss": 0.5514,
"step": 43200
},
{
"epoch": 2.62,
"learning_rate": 1.145864523611884e-05,
"loss": 0.5537,
"step": 43300
},
{
"epoch": 2.63,
"learning_rate": 1.1096408646078409e-05,
"loss": 0.548,
"step": 43400
},
{
"epoch": 2.64,
"learning_rate": 1.073977001238851e-05,
"loss": 0.5509,
"step": 43500
},
{
"epoch": 2.64,
"learning_rate": 1.0388743707949648e-05,
"loss": 0.549,
"step": 43600
}
],
"logging_steps": 100,
"max_steps": 49497,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"total_flos": 3.4286253848387387e+19,
"train_batch_size": 3,
"trial_name": null,
"trial_params": null
}