|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.6425043183126764, |
|
"eval_steps": 500, |
|
"global_step": 43600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00029999755171150014, |
|
"loss": 1.6385, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00029998908859460167, |
|
"loss": 0.8837, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029997458076451173, |
|
"loss": 0.8445, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002999540288059106, |
|
"loss": 0.8257, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002999274335470631, |
|
"loss": 0.8054, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00029989479605978546, |
|
"loss": 0.7917, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002998561176594015, |
|
"loss": 0.7802, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00029981139990469034, |
|
"loss": 0.7826, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00029976064459782297, |
|
"loss": 0.7833, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002997038537842901, |
|
"loss": 0.7782, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002996410297528193, |
|
"loss": 0.7649, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00029957217503528326, |
|
"loss": 0.7606, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002994972924065971, |
|
"loss": 0.7673, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00029941638488460725, |
|
"loss": 0.7534, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002993294557299693, |
|
"loss": 0.7711, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00029923650844601677, |
|
"loss": 0.7499, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00029913754677862004, |
|
"loss": 0.7241, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002990325747160351, |
|
"loss": 0.7471, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00029892159648874317, |
|
"loss": 0.7424, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00029880461656927996, |
|
"loss": 0.7413, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002986816396720555, |
|
"loss": 0.7299, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.000298552670753164, |
|
"loss": 0.7102, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00029841771501018456, |
|
"loss": 0.736, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002982767778819711, |
|
"loss": 0.716, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00029812986504843366, |
|
"loss": 0.7419, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002979769824303091, |
|
"loss": 0.714, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00029781813618892303, |
|
"loss": 0.7306, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00029765333272594065, |
|
"loss": 0.7121, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002974825786831097, |
|
"loss": 0.7107, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00029730588094199214, |
|
"loss": 0.7332, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002971232466236871, |
|
"loss": 0.7102, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002969346830885439, |
|
"loss": 0.7205, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00029674019793586516, |
|
"loss": 0.7113, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002965397990036008, |
|
"loss": 0.7141, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002963334943680322, |
|
"loss": 0.7039, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002961212923434465, |
|
"loss": 0.7067, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002959032014818015, |
|
"loss": 0.7073, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002956792305723814, |
|
"loss": 0.7113, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00029544938864144225, |
|
"loss": 0.7035, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00029521368495184807, |
|
"loss": 0.6961, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002949721290026979, |
|
"loss": 0.6884, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002947247305289429, |
|
"loss": 0.686, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002944714995009936, |
|
"loss": 0.6998, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00029421244612431877, |
|
"loss": 0.7003, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00029394758083903347, |
|
"loss": 0.6928, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00029367691431947884, |
|
"loss": 0.7097, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002934004574737915, |
|
"loss": 0.7065, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002931182214434643, |
|
"loss": 0.6929, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00029283021760289686, |
|
"loss": 0.6902, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00029253645755893777, |
|
"loss": 0.6813, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00029223695315041615, |
|
"loss": 0.682, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002919317164476651, |
|
"loss": 0.6885, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002916207597520349, |
|
"loss": 0.6629, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00029130409559539747, |
|
"loss": 0.7018, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002909817367396412, |
|
"loss": 0.6746, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00029065369617615653, |
|
"loss": 0.699, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00029031998712531273, |
|
"loss": 0.6768, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00028998062303592473, |
|
"loss": 0.682, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00028963561758471135, |
|
"loss": 0.674, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00028928498467574394, |
|
"loss": 0.6898, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00028892873843988637, |
|
"loss": 0.6947, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.000288566893234225, |
|
"loss": 0.6751, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00028819946364149065, |
|
"loss": 0.6815, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0002878264644694705, |
|
"loss": 0.6678, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0002874479107504114, |
|
"loss": 0.7046, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002870638177404143, |
|
"loss": 0.6793, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00028667420091881896, |
|
"loss": 0.6718, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0002862790759875807, |
|
"loss": 0.6744, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00028587845887063695, |
|
"loss": 0.6779, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00028547236571326603, |
|
"loss": 0.6737, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00028506081288143617, |
|
"loss": 0.6643, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002846438169611462, |
|
"loss": 0.678, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00028422139475775673, |
|
"loss": 0.6726, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0002837935632953133, |
|
"loss": 0.6779, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00028336033981586005, |
|
"loss": 0.6765, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00028292174177874487, |
|
"loss": 0.6765, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002824777868599158, |
|
"loss": 0.6804, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002820284929512088, |
|
"loss": 0.6838, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00028157387815962637, |
|
"loss": 0.6774, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00028111396080660815, |
|
"loss": 0.6759, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00028064875942729236, |
|
"loss": 0.6586, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0002801782927697689, |
|
"loss": 0.6711, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0002797025797943237, |
|
"loss": 0.6582, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002792216396726747, |
|
"loss": 0.6516, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0002787354917871992, |
|
"loss": 0.6722, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0002782441557301526, |
|
"loss": 0.6697, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002777476513028789, |
|
"loss": 0.6678, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002772459985150127, |
|
"loss": 0.6529, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00027673921758367294, |
|
"loss": 0.669, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00027622732893264776, |
|
"loss": 0.67, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00027571035319157167, |
|
"loss": 0.6703, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002751883111950942, |
|
"loss": 0.6603, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00027466122398203994, |
|
"loss": 0.6509, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00027412911279456104, |
|
"loss": 0.6677, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002735919990772809, |
|
"loss": 0.6593, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002730499044764299, |
|
"loss": 0.652, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0002725028508389731, |
|
"loss": 0.658, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00027195086021172994, |
|
"loss": 0.6633, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0002713939548404853, |
|
"loss": 0.6597, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0002708321571690937, |
|
"loss": 0.6578, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00027026548983857384, |
|
"loss": 0.6624, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.000269693975686197, |
|
"loss": 0.6546, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0002691176377445662, |
|
"loss": 0.664, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.000268536499240688, |
|
"loss": 0.6626, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00026795058359503675, |
|
"loss": 0.6549, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0002673599144206103, |
|
"loss": 0.6506, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0002667645155219785, |
|
"loss": 0.6523, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0002661644108943241, |
|
"loss": 0.6721, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00026555962472247537, |
|
"loss": 0.653, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002649501813799317, |
|
"loss": 0.6623, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00026433610542788116, |
|
"loss": 0.6517, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0002637174216142106, |
|
"loss": 0.6662, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0002630941548725086, |
|
"loss": 0.6713, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0002624663303210602, |
|
"loss": 0.646, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.000261833973261835, |
|
"loss": 0.6539, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0002611971091794672, |
|
"loss": 0.6602, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00026055576374022855, |
|
"loss": 0.6422, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00025990996279099424, |
|
"loss": 0.6511, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00025925973235820096, |
|
"loss": 0.6547, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00025860509864679795, |
|
"loss": 0.6464, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00025794608803919133, |
|
"loss": 0.6591, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00025728272709418044, |
|
"loss": 0.6517, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00025661504254588773, |
|
"loss": 0.6471, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0002559430613026812, |
|
"loss": 0.6601, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00025526681044609004, |
|
"loss": 0.6608, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0002545863172297133, |
|
"loss": 0.6448, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0002539016090781214, |
|
"loss": 0.6466, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0002532127135857509, |
|
"loss": 0.6367, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00025251965851579245, |
|
"loss": 0.6464, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0002518224717990721, |
|
"loss": 0.6489, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0002511211815329253, |
|
"loss": 0.6455, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00025041581598006475, |
|
"loss": 0.6486, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00024970640356744144, |
|
"loss": 0.6414, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0002489929728850988, |
|
"loss": 0.6455, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00024827555268502075, |
|
"loss": 0.6534, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00024755417187997275, |
|
"loss": 0.6609, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0002468288595423368, |
|
"loss": 0.6415, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00024609964490293954, |
|
"loss": 0.6583, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0002453665573498745, |
|
"loss": 0.6467, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0002446296264273174, |
|
"loss": 0.6433, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00024388888183433577, |
|
"loss": 0.6383, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0002431443534236919, |
|
"loss": 0.636, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00024239607120063995, |
|
"loss": 0.6385, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00024164406532171628, |
|
"loss": 0.6407, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00024088836609352458, |
|
"loss": 0.6344, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00024012900397151418, |
|
"loss": 0.6262, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0002393660095587529, |
|
"loss": 0.6405, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0002385994136046933, |
|
"loss": 0.6487, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0002378292470039341, |
|
"loss": 0.6302, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00023705554079497446, |
|
"loss": 0.6342, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0002362783261589634, |
|
"loss": 0.6386, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00023549763441844322, |
|
"loss": 0.6415, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00023471349703608696, |
|
"loss": 0.6387, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0002339259456134306, |
|
"loss": 0.6381, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00023313501188959948, |
|
"loss": 0.6511, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.000232340727740029, |
|
"loss": 0.6413, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00023154312517518024, |
|
"loss": 0.6497, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00023074223633924977, |
|
"loss": 0.6515, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00022993809350887413, |
|
"loss": 0.6363, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00022913072909182936, |
|
"loss": 0.6316, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0002283201756257245, |
|
"loss": 0.633, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00022750646577669083, |
|
"loss": 0.6478, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00022668963233806464, |
|
"loss": 0.6363, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00022586970822906647, |
|
"loss": 0.6303, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0002250467264934738, |
|
"loss": 0.6237, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00022422072029828965, |
|
"loss": 0.6181, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00022339172293240586, |
|
"loss": 0.6164, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00022255976780526145, |
|
"loss": 0.613, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002217248884454963, |
|
"loss": 0.6179, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00022088711849959982, |
|
"loss": 0.6066, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0002200464917305549, |
|
"loss": 0.6081, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00021920304201647744, |
|
"loss": 0.6057, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00021835680334925087, |
|
"loss": 0.6165, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0002175078098331562, |
|
"loss": 0.6157, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0002166560956834978, |
|
"loss": 0.6085, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00021580169522522424, |
|
"loss": 0.615, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00021494464289154505, |
|
"loss": 0.6119, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0002140849732225431, |
|
"loss": 0.6097, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0002132227208637826, |
|
"loss": 0.6155, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002123579205649126, |
|
"loss": 0.6066, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00021149060717826694, |
|
"loss": 0.6001, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00021062081565745928, |
|
"loss": 0.6068, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0002097485810559748, |
|
"loss": 0.6131, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00020887393852575716, |
|
"loss": 0.612, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00020799692331579213, |
|
"loss": 0.6051, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00020711757077068675, |
|
"loss": 0.6145, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00020623591632924515, |
|
"loss": 0.6077, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00020535199552304033, |
|
"loss": 0.6005, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00020446584397498178, |
|
"loss": 0.6178, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00020357749739788054, |
|
"loss": 0.6038, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00020268699159300927, |
|
"loss": 0.5974, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00020179436244865986, |
|
"loss": 0.6136, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00020089964593869694, |
|
"loss": 0.6098, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00020000287812110793, |
|
"loss": 0.6127, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00019910409513655038, |
|
"loss": 0.6073, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00019820333320689473, |
|
"loss": 0.6008, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00019730062863376524, |
|
"loss": 0.6124, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00019639601779707655, |
|
"loss": 0.6144, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00019548953715356758, |
|
"loss": 0.6123, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0001945812232353326, |
|
"loss": 0.6138, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00019367111264834846, |
|
"loss": 0.6173, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0001927592420709998, |
|
"loss": 0.5983, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00019184564825260053, |
|
"loss": 0.6037, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.000190930368011913, |
|
"loss": 0.602, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00019001343823566412, |
|
"loss": 0.6095, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0001890948958770587, |
|
"loss": 0.6122, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00018817477795429028, |
|
"loss": 0.61, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00018725312154904925, |
|
"loss": 0.6034, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00018632996380502846, |
|
"loss": 0.6085, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00018540534192642614, |
|
"loss": 0.5977, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00018447929317644672, |
|
"loss": 0.6071, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00018355185487579898, |
|
"loss": 0.6118, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00018262306440119198, |
|
"loss": 0.6078, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00018169295918382883, |
|
"loss": 0.6029, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00018076157670789803, |
|
"loss": 0.5974, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00017982895450906303, |
|
"loss": 0.6077, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00017889513017294923, |
|
"loss": 0.5953, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00017796014133362946, |
|
"loss": 0.5985, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00017702402567210723, |
|
"loss": 0.5987, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00017608682091479813, |
|
"loss": 0.6017, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00017514856483200937, |
|
"loss": 0.5985, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00017420929523641766, |
|
"loss": 0.5999, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0001732690499815454, |
|
"loss": 0.6052, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00017232786696023492, |
|
"loss": 0.6015, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00017138578410312162, |
|
"loss": 0.6002, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0001704428393771051, |
|
"loss": 0.6011, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00016949907078381927, |
|
"loss": 0.6045, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00016855451635810058, |
|
"loss": 0.607, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00016760921416645544, |
|
"loss": 0.6062, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00016666320230552593, |
|
"loss": 0.5966, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00016571651890055452, |
|
"loss": 0.6026, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0001647692021038477, |
|
"loss": 0.5941, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00016382129009323817, |
|
"loss": 0.5919, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00016287282107054643, |
|
"loss": 0.6035, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00016192383326004106, |
|
"loss": 0.6033, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00016097436490689838, |
|
"loss": 0.5961, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00016002445427566107, |
|
"loss": 0.589, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.000159074139648696, |
|
"loss": 0.6, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0001581234593246516, |
|
"loss": 0.5951, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0001571724516169141, |
|
"loss": 0.6027, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00015622115485206385, |
|
"loss": 0.5993, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00015526960736833025, |
|
"loss": 0.5915, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00015431784751404707, |
|
"loss": 0.5977, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00015336591364610686, |
|
"loss": 0.6022, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00015241384412841493, |
|
"loss": 0.6019, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00015146167733034367, |
|
"loss": 0.5921, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00015050945162518574, |
|
"loss": 0.6011, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0001495572053886079, |
|
"loss": 0.5934, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00014860497699710433, |
|
"loss": 0.5893, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00014765280482645005, |
|
"loss": 0.5951, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00014670072725015437, |
|
"loss": 0.59, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00014574878263791426, |
|
"loss": 0.6008, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00014479700935406817, |
|
"loss": 0.5907, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0001438454457560498, |
|
"loss": 0.5977, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00014289413019284236, |
|
"loss": 0.5999, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00014194310100343292, |
|
"loss": 0.5979, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00014099239651526742, |
|
"loss": 0.5889, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0001400420550427061, |
|
"loss": 0.5994, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0001390921148854791, |
|
"loss": 0.5881, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00013814261432714336, |
|
"loss": 0.5947, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00013719359163353944, |
|
"loss": 0.5939, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.0001362450850512494, |
|
"loss": 0.5905, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00013529713280605567, |
|
"loss": 0.5866, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00013434977310140012, |
|
"loss": 0.5942, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.0001334030441168447, |
|
"loss": 0.584, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.0001324569840065328, |
|
"loss": 0.5932, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0001315116308976514, |
|
"loss": 0.5896, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00013056702288889458, |
|
"loss": 0.5899, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00012962319804892827, |
|
"loss": 0.583, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00012868019441485568, |
|
"loss": 0.5895, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00012773804999068473, |
|
"loss": 0.5857, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00012679680274579636, |
|
"loss": 0.5929, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00012585649061341405, |
|
"loss": 0.5899, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00012491715148907554, |
|
"loss": 0.5856, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.0001239788232291052, |
|
"loss": 0.5907, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00012304154364908856, |
|
"loss": 0.591, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00012210535052234835, |
|
"loss": 0.593, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00012117028157842202, |
|
"loss": 0.5879, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00012023637450154138, |
|
"loss": 0.5859, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00011930366692911378, |
|
"loss": 0.5866, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00011837219645020536, |
|
"loss": 0.587, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00011744200060402608, |
|
"loss": 0.5844, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00011651311687841697, |
|
"loss": 0.5848, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00011558558270833906, |
|
"loss": 0.5925, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00011465943547436524, |
|
"loss": 0.5885, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00011373471250117322, |
|
"loss": 0.5904, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0001128114510560416, |
|
"loss": 0.581, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00011188968834734798, |
|
"loss": 0.5822, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00011096946152306923, |
|
"loss": 0.5816, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00011005080766928467, |
|
"loss": 0.5848, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00010913376380868118, |
|
"loss": 0.5783, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00010821836689906128, |
|
"loss": 0.5801, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00010730465383185379, |
|
"loss": 0.5799, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00010639266143062683, |
|
"loss": 0.5884, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00010548242644960404, |
|
"loss": 0.5763, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00010457398557218315, |
|
"loss": 0.5836, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00010366737540945772, |
|
"loss": 0.5814, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00010276263249874166, |
|
"loss": 0.5764, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00010185979330209668, |
|
"loss": 0.5776, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00010095889420486292, |
|
"loss": 0.5788, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0001000599715141925, |
|
"loss": 0.5809, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 9.916306145758637e-05, |
|
"loss": 0.5828, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 9.826820018143417e-05, |
|
"loss": 0.5809, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 9.737542374955779e-05, |
|
"loss": 0.5855, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 9.648476814175755e-05, |
|
"loss": 0.5814, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 9.559626925236263e-05, |
|
"loss": 0.5905, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 9.470996288878409e-05, |
|
"loss": 0.5781, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 9.382588477007196e-05, |
|
"loss": 0.5846, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 9.294407052547586e-05, |
|
"loss": 0.5803, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 9.206455569300888e-05, |
|
"loss": 0.5849, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 9.118737571801549e-05, |
|
"loss": 0.5753, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 9.031256595174299e-05, |
|
"loss": 0.5806, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 8.944016164991682e-05, |
|
"loss": 0.5783, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 8.857019797131991e-05, |
|
"loss": 0.5853, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 8.77027099763754e-05, |
|
"loss": 0.5874, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 8.683773262573396e-05, |
|
"loss": 0.5801, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 8.597530077886474e-05, |
|
"loss": 0.5749, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 8.511544919265039e-05, |
|
"loss": 0.5751, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 8.425821251998646e-05, |
|
"loss": 0.585, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 8.340362530838499e-05, |
|
"loss": 0.5758, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 8.255172199858192e-05, |
|
"loss": 0.5839, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 8.1702536923149e-05, |
|
"loss": 0.5723, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 8.085610430511064e-05, |
|
"loss": 0.5811, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 8.001245825656439e-05, |
|
"loss": 0.5797, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 7.917163277730609e-05, |
|
"loss": 0.5683, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 7.833366175345985e-05, |
|
"loss": 0.5804, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 7.749857895611223e-05, |
|
"loss": 0.5676, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 7.666641803995134e-05, |
|
"loss": 0.5692, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 7.583721254191065e-05, |
|
"loss": 0.5822, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 7.50109958798171e-05, |
|
"loss": 0.5727, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 7.418780135104454e-05, |
|
"loss": 0.5524, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 7.336766213117173e-05, |
|
"loss": 0.5589, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 7.255061127264536e-05, |
|
"loss": 0.5523, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 7.173668170344819e-05, |
|
"loss": 0.5576, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 7.092590622577162e-05, |
|
"loss": 0.5579, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 7.011831751469404e-05, |
|
"loss": 0.5557, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 6.931394811686386e-05, |
|
"loss": 0.5579, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 6.851283044918787e-05, |
|
"loss": 0.5561, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 6.771499679752496e-05, |
|
"loss": 0.5587, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 6.692047931538474e-05, |
|
"loss": 0.557, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 6.612931002263158e-05, |
|
"loss": 0.5597, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 6.534152080419484e-05, |
|
"loss": 0.5546, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 6.455714340878308e-05, |
|
"loss": 0.5592, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 6.377620944760513e-05, |
|
"loss": 0.5576, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 6.299875039309576e-05, |
|
"loss": 0.562, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 6.22247975776475e-05, |
|
"loss": 0.5596, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 6.1454382192348e-05, |
|
"loss": 0.5537, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 6.068753528572271e-05, |
|
"loss": 0.5592, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 5.992428776248381e-05, |
|
"loss": 0.555, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 5.9164670382284635e-05, |
|
"loss": 0.5572, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 5.840871375848003e-05, |
|
"loss": 0.5531, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 5.7656448356892776e-05, |
|
"loss": 0.5583, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 5.690790449458548e-05, |
|
"loss": 0.5529, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 5.6163112338638965e-05, |
|
"loss": 0.5566, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 5.5422101904936474e-05, |
|
"loss": 0.5539, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 5.468490305695393e-05, |
|
"loss": 0.5507, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 5.39515455045566e-05, |
|
"loss": 0.557, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 5.32220588028015e-05, |
|
"loss": 0.5498, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 5.249647235074647e-05, |
|
"loss": 0.5547, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 5.177481539026529e-05, |
|
"loss": 0.5542, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 5.105711700486922e-05, |
|
"loss": 0.5597, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 5.0343406118534935e-05, |
|
"loss": 0.5583, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 4.963371149453881e-05, |
|
"loss": 0.5565, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 4.8928061734297765e-05, |
|
"loss": 0.5534, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 4.822648527621653e-05, |
|
"loss": 0.5549, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 4.7529010394541746e-05, |
|
"loss": 0.5552, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 4.6835665198222224e-05, |
|
"loss": 0.5515, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 4.614647762977626e-05, |
|
"loss": 0.5538, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 4.5461475464165534e-05, |
|
"loss": 0.5562, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 4.478068630767565e-05, |
|
"loss": 0.5553, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.410413759680383e-05, |
|
"loss": 0.5539, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.343185659715283e-05, |
|
"loss": 0.557, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.276387040233236e-05, |
|
"loss": 0.5542, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.210020593286711e-05, |
|
"loss": 0.5518, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.144088993511181e-05, |
|
"loss": 0.5555, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.078594898017346e-05, |
|
"loss": 0.5565, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.013540946284024e-05, |
|
"loss": 0.5544, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.948929760051797e-05, |
|
"loss": 0.5555, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.8847639432173405e-05, |
|
"loss": 0.5563, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.821046081728497e-05, |
|
"loss": 0.5525, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 3.757778743480045e-05, |
|
"loss": 0.5541, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 3.694964478210221e-05, |
|
"loss": 0.5564, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 3.63260581739796e-05, |
|
"loss": 0.5551, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 3.5707052741608636e-05, |
|
"loss": 0.5506, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 3.5092653431539436e-05, |
|
"loss": 0.5483, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 3.448288500469058e-05, |
|
"loss": 0.556, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 3.3877772035351326e-05, |
|
"loss": 0.5509, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 3.327733891019132e-05, |
|
"loss": 0.5574, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.268160982727759e-05, |
|
"loss": 0.5525, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.209060879509968e-05, |
|
"loss": 0.5502, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.150435963160168e-05, |
|
"loss": 0.5523, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 3.0922885963222585e-05, |
|
"loss": 0.5483, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 3.0346211223944077e-05, |
|
"loss": 0.547, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.9774358654346046e-05, |
|
"loss": 0.55, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.9207351300670178e-05, |
|
"loss": 0.549, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.864521201389085e-05, |
|
"loss": 0.5536, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.8087963448794476e-05, |
|
"loss": 0.546, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.7535628063066368e-05, |
|
"loss": 0.551, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.698822811638569e-05, |
|
"loss": 0.5547, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.6445785669528386e-05, |
|
"loss": 0.547, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.590832258347814e-05, |
|
"loss": 0.5556, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 2.537586051854522e-05, |
|
"loss": 0.5505, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 2.4848420933493824e-05, |
|
"loss": 0.547, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 2.432602508467691e-05, |
|
"loss": 0.5483, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 2.3808694025179804e-05, |
|
"loss": 0.5534, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 2.3296448603971657e-05, |
|
"loss": 0.5479, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 2.2789309465065154e-05, |
|
"loss": 0.5593, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 2.2287297046684737e-05, |
|
"loss": 0.5478, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 2.179043158044263e-05, |
|
"loss": 0.5562, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 2.1298733090523722e-05, |
|
"loss": 0.5513, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 2.0812221392878463e-05, |
|
"loss": 0.5507, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 2.0330916094424244e-05, |
|
"loss": 0.5465, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.985483659225539e-05, |
|
"loss": 0.5504, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.9384002072861186e-05, |
|
"loss": 0.5444, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.89184315113528e-05, |
|
"loss": 0.5514, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.8458143670698522e-05, |
|
"loss": 0.5448, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.80031571009676e-05, |
|
"loss": 0.5419, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.7553490138582786e-05, |
|
"loss": 0.5537, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.7109160905580982e-05, |
|
"loss": 0.5493, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.6670187308883364e-05, |
|
"loss": 0.5496, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.6236587039573383e-05, |
|
"loss": 0.5491, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.5808377572184044e-05, |
|
"loss": 0.5502, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.5385576163993417e-05, |
|
"loss": 0.5539, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.4968199854329322e-05, |
|
"loss": 0.5473, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.4556265463882594e-05, |
|
"loss": 0.5523, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.4149789594029093e-05, |
|
"loss": 0.554, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.3748788626160878e-05, |
|
"loss": 0.5487, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.3353278721025756e-05, |
|
"loss": 0.5558, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.2963275818076152e-05, |
|
"loss": 0.5501, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.2578795634826671e-05, |
|
"loss": 0.5561, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.2199853666220678e-05, |
|
"loss": 0.5486, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.18264651840059e-05, |
|
"loss": 0.5514, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.145864523611884e-05, |
|
"loss": 0.5537, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.1096408646078409e-05, |
|
"loss": 0.548, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.073977001238851e-05, |
|
"loss": 0.5509, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.0388743707949648e-05, |
|
"loss": 0.549, |
|
"step": 43600 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 49497, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 3.4286253848387387e+19, |
|
"train_batch_size": 3, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|