diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json" --- "a/last-checkpoint/trainer_state.json" +++ "b/last-checkpoint/trainer_state.json" @@ -1,8 +1,8 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.646752091838797, - "global_step": 8000, + "epoch": 0.9701281377581955, + "global_step": 12000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -9727,11 +9727,4822 @@ "eval_samples_per_second": 0.103, "eval_steps_per_second": 0.052, "step": 8000 + }, + { + "epoch": 0.65, + "learning_rate": 3.923114237205918e-05, + "loss": 3.0919, + "step": 8005 + }, + { + "epoch": 0.65, + "learning_rate": 3.922440509876843e-05, + "loss": 3.1249, + "step": 8010 + }, + { + "epoch": 0.65, + "learning_rate": 3.921766782547767e-05, + "loss": 2.8888, + "step": 8015 + }, + { + "epoch": 0.65, + "learning_rate": 3.921093055218692e-05, + "loss": 3.0429, + "step": 8020 + }, + { + "epoch": 0.65, + "learning_rate": 3.920419327889617e-05, + "loss": 3.0775, + "step": 8025 + }, + { + "epoch": 0.65, + "learning_rate": 3.919745600560542e-05, + "loss": 2.9262, + "step": 8030 + }, + { + "epoch": 0.65, + "learning_rate": 3.919071873231466e-05, + "loss": 3.1079, + "step": 8035 + }, + { + "epoch": 0.65, + "learning_rate": 3.91839814590239e-05, + "loss": 2.9723, + "step": 8040 + }, + { + "epoch": 0.65, + "learning_rate": 3.917724418573315e-05, + "loss": 3.3847, + "step": 8045 + }, + { + "epoch": 0.65, + "learning_rate": 3.91705069124424e-05, + "loss": 3.084, + "step": 8050 + }, + { + "epoch": 0.65, + "learning_rate": 3.916376963915164e-05, + "loss": 2.9241, + "step": 8055 + }, + { + "epoch": 0.65, + "learning_rate": 3.915703236586089e-05, + "loss": 2.8274, + "step": 8060 + }, + { + "epoch": 0.65, + "learning_rate": 3.915029509257014e-05, + "loss": 3.0998, + "step": 8065 + }, + { + "epoch": 0.65, + "learning_rate": 3.914355781927938e-05, + "loss": 3.2271, + "step": 8070 + }, + { + "epoch": 0.65, + "learning_rate": 3.913682054598863e-05, + "loss": 3.0446, + "step": 8075 + }, + { + "epoch": 0.65, + "learning_rate": 3.913008327269787e-05, + "loss": 3.2226, + "step": 8080 + }, + { + "epoch": 0.65, + "learning_rate": 3.912334599940712e-05, + "loss": 2.9655, + "step": 8085 + }, + { + "epoch": 0.65, + "learning_rate": 3.9116608726116364e-05, + "loss": 3.2028, + "step": 8090 + }, + { + "epoch": 0.65, + "learning_rate": 3.910987145282561e-05, + "loss": 3.1591, + "step": 8095 + }, + { + "epoch": 0.65, + "learning_rate": 3.910313417953486e-05, + "loss": 2.9486, + "step": 8100 + }, + { + "epoch": 0.66, + "learning_rate": 3.909639690624411e-05, + "loss": 3.0678, + "step": 8105 + }, + { + "epoch": 0.66, + "learning_rate": 3.908965963295335e-05, + "loss": 2.7782, + "step": 8110 + }, + { + "epoch": 0.66, + "learning_rate": 3.90829223596626e-05, + "loss": 3.3781, + "step": 8115 + }, + { + "epoch": 0.66, + "learning_rate": 3.9076185086371844e-05, + "loss": 3.2015, + "step": 8120 + }, + { + "epoch": 0.66, + "learning_rate": 3.9069447813081086e-05, + "loss": 3.2439, + "step": 8125 + }, + { + "epoch": 0.66, + "learning_rate": 3.9062710539790335e-05, + "loss": 2.8638, + "step": 8130 + }, + { + "epoch": 0.66, + "learning_rate": 3.9055973266499584e-05, + "loss": 2.9865, + "step": 8135 + }, + { + "epoch": 0.66, + "learning_rate": 3.904923599320883e-05, + "loss": 3.0172, + "step": 8140 + }, + { + "epoch": 0.66, + "learning_rate": 3.9042498719918075e-05, + "loss": 2.7773, + "step": 8145 + }, + { + "epoch": 0.66, + "learning_rate": 3.9035761446627324e-05, + "loss": 3.3144, + "step": 8150 + }, + { + "epoch": 0.66, + "learning_rate": 3.902902417333657e-05, + "loss": 3.3635, + "step": 8155 + }, + { + "epoch": 0.66, + "learning_rate": 3.902228690004582e-05, + "loss": 2.9169, + "step": 8160 + }, + { + "epoch": 0.66, + "learning_rate": 3.9015549626755063e-05, + "loss": 2.8164, + "step": 8165 + }, + { + "epoch": 0.66, + "learning_rate": 3.9008812353464306e-05, + "loss": 2.8467, + "step": 8170 + }, + { + "epoch": 0.66, + "learning_rate": 3.9002075080173554e-05, + "loss": 3.0674, + "step": 8175 + }, + { + "epoch": 0.66, + "learning_rate": 3.8995337806882797e-05, + "loss": 2.9448, + "step": 8180 + }, + { + "epoch": 0.66, + "learning_rate": 3.8988600533592045e-05, + "loss": 3.3653, + "step": 8185 + }, + { + "epoch": 0.66, + "learning_rate": 3.8981863260301294e-05, + "loss": 2.9465, + "step": 8190 + }, + { + "epoch": 0.66, + "learning_rate": 3.897512598701054e-05, + "loss": 2.9197, + "step": 8195 + }, + { + "epoch": 0.66, + "learning_rate": 3.8968388713719785e-05, + "loss": 3.0495, + "step": 8200 + }, + { + "epoch": 0.66, + "learning_rate": 3.8961651440429034e-05, + "loss": 3.104, + "step": 8205 + }, + { + "epoch": 0.66, + "learning_rate": 3.8954914167138276e-05, + "loss": 3.0136, + "step": 8210 + }, + { + "epoch": 0.66, + "learning_rate": 3.8948176893847525e-05, + "loss": 2.9737, + "step": 8215 + }, + { + "epoch": 0.66, + "learning_rate": 3.894143962055677e-05, + "loss": 3.0539, + "step": 8220 + }, + { + "epoch": 0.66, + "learning_rate": 3.8934702347266016e-05, + "loss": 3.0518, + "step": 8225 + }, + { + "epoch": 0.67, + "learning_rate": 3.8927965073975265e-05, + "loss": 3.2109, + "step": 8230 + }, + { + "epoch": 0.67, + "learning_rate": 3.892122780068451e-05, + "loss": 3.0303, + "step": 8235 + }, + { + "epoch": 0.67, + "learning_rate": 3.8914490527393756e-05, + "loss": 3.2467, + "step": 8240 + }, + { + "epoch": 0.67, + "learning_rate": 3.8907753254103005e-05, + "loss": 3.0201, + "step": 8245 + }, + { + "epoch": 0.67, + "learning_rate": 3.890101598081225e-05, + "loss": 3.052, + "step": 8250 + }, + { + "epoch": 0.67, + "learning_rate": 3.889427870752149e-05, + "loss": 3.2192, + "step": 8255 + }, + { + "epoch": 0.67, + "learning_rate": 3.888754143423074e-05, + "loss": 2.8097, + "step": 8260 + }, + { + "epoch": 0.67, + "learning_rate": 3.888080416093999e-05, + "loss": 2.9116, + "step": 8265 + }, + { + "epoch": 0.67, + "learning_rate": 3.887406688764923e-05, + "loss": 3.1297, + "step": 8270 + }, + { + "epoch": 0.67, + "learning_rate": 3.886732961435848e-05, + "loss": 3.2419, + "step": 8275 + }, + { + "epoch": 0.67, + "learning_rate": 3.8860592341067727e-05, + "loss": 2.9156, + "step": 8280 + }, + { + "epoch": 0.67, + "learning_rate": 3.8853855067776975e-05, + "loss": 3.1024, + "step": 8285 + }, + { + "epoch": 0.67, + "learning_rate": 3.884711779448622e-05, + "loss": 2.9807, + "step": 8290 + }, + { + "epoch": 0.67, + "learning_rate": 3.8840380521195466e-05, + "loss": 3.2677, + "step": 8295 + }, + { + "epoch": 0.67, + "learning_rate": 3.883364324790471e-05, + "loss": 3.0497, + "step": 8300 + }, + { + "epoch": 0.67, + "learning_rate": 3.882690597461396e-05, + "loss": 3.1933, + "step": 8305 + }, + { + "epoch": 0.67, + "learning_rate": 3.88201687013232e-05, + "loss": 2.9817, + "step": 8310 + }, + { + "epoch": 0.67, + "learning_rate": 3.881343142803245e-05, + "loss": 2.9614, + "step": 8315 + }, + { + "epoch": 0.67, + "learning_rate": 3.88066941547417e-05, + "loss": 3.2148, + "step": 8320 + }, + { + "epoch": 0.67, + "learning_rate": 3.879995688145094e-05, + "loss": 3.4115, + "step": 8325 + }, + { + "epoch": 0.67, + "learning_rate": 3.879321960816019e-05, + "loss": 3.109, + "step": 8330 + }, + { + "epoch": 0.67, + "learning_rate": 3.878648233486944e-05, + "loss": 3.3976, + "step": 8335 + }, + { + "epoch": 0.67, + "learning_rate": 3.877974506157868e-05, + "loss": 2.9871, + "step": 8340 + }, + { + "epoch": 0.67, + "learning_rate": 3.877300778828792e-05, + "loss": 3.0941, + "step": 8345 + }, + { + "epoch": 0.68, + "learning_rate": 3.876627051499717e-05, + "loss": 3.0284, + "step": 8350 + }, + { + "epoch": 0.68, + "learning_rate": 3.875953324170642e-05, + "loss": 2.8833, + "step": 8355 + }, + { + "epoch": 0.68, + "learning_rate": 3.875279596841567e-05, + "loss": 3.0523, + "step": 8360 + }, + { + "epoch": 0.68, + "learning_rate": 3.874605869512491e-05, + "loss": 3.09, + "step": 8365 + }, + { + "epoch": 0.68, + "learning_rate": 3.873932142183416e-05, + "loss": 3.1666, + "step": 8370 + }, + { + "epoch": 0.68, + "learning_rate": 3.873258414854341e-05, + "loss": 3.1402, + "step": 8375 + }, + { + "epoch": 0.68, + "learning_rate": 3.872584687525265e-05, + "loss": 3.0485, + "step": 8380 + }, + { + "epoch": 0.68, + "learning_rate": 3.871910960196189e-05, + "loss": 3.1616, + "step": 8385 + }, + { + "epoch": 0.68, + "learning_rate": 3.871237232867114e-05, + "loss": 2.9211, + "step": 8390 + }, + { + "epoch": 0.68, + "learning_rate": 3.870563505538039e-05, + "loss": 2.9857, + "step": 8395 + }, + { + "epoch": 0.68, + "learning_rate": 3.869889778208963e-05, + "loss": 3.1664, + "step": 8400 + }, + { + "epoch": 0.68, + "learning_rate": 3.869216050879888e-05, + "loss": 3.0174, + "step": 8405 + }, + { + "epoch": 0.68, + "learning_rate": 3.868542323550813e-05, + "loss": 3.0245, + "step": 8410 + }, + { + "epoch": 0.68, + "learning_rate": 3.867868596221738e-05, + "loss": 3.1736, + "step": 8415 + }, + { + "epoch": 0.68, + "learning_rate": 3.867194868892662e-05, + "loss": 3.1161, + "step": 8420 + }, + { + "epoch": 0.68, + "learning_rate": 3.866521141563586e-05, + "loss": 3.0927, + "step": 8425 + }, + { + "epoch": 0.68, + "learning_rate": 3.865847414234511e-05, + "loss": 3.0474, + "step": 8430 + }, + { + "epoch": 0.68, + "learning_rate": 3.8651736869054354e-05, + "loss": 2.7999, + "step": 8435 + }, + { + "epoch": 0.68, + "learning_rate": 3.86449995957636e-05, + "loss": 2.9191, + "step": 8440 + }, + { + "epoch": 0.68, + "learning_rate": 3.863826232247285e-05, + "loss": 2.9211, + "step": 8445 + }, + { + "epoch": 0.68, + "learning_rate": 3.86315250491821e-05, + "loss": 3.1388, + "step": 8450 + }, + { + "epoch": 0.68, + "learning_rate": 3.862478777589134e-05, + "loss": 3.2235, + "step": 8455 + }, + { + "epoch": 0.68, + "learning_rate": 3.861805050260059e-05, + "loss": 3.0092, + "step": 8460 + }, + { + "epoch": 0.68, + "learning_rate": 3.861131322930984e-05, + "loss": 3.0099, + "step": 8465 + }, + { + "epoch": 0.68, + "learning_rate": 3.860457595601908e-05, + "loss": 3.1131, + "step": 8470 + }, + { + "epoch": 0.69, + "learning_rate": 3.8597838682728324e-05, + "loss": 3.0014, + "step": 8475 + }, + { + "epoch": 0.69, + "learning_rate": 3.859110140943757e-05, + "loss": 3.2398, + "step": 8480 + }, + { + "epoch": 0.69, + "learning_rate": 3.858436413614682e-05, + "loss": 3.0938, + "step": 8485 + }, + { + "epoch": 0.69, + "learning_rate": 3.8577626862856064e-05, + "loss": 2.9129, + "step": 8490 + }, + { + "epoch": 0.69, + "learning_rate": 3.857088958956531e-05, + "loss": 3.3385, + "step": 8495 + }, + { + "epoch": 0.69, + "learning_rate": 3.856415231627456e-05, + "loss": 2.9213, + "step": 8500 + }, + { + "epoch": 0.69, + "learning_rate": 3.855741504298381e-05, + "loss": 3.254, + "step": 8505 + }, + { + "epoch": 0.69, + "learning_rate": 3.855067776969305e-05, + "loss": 3.041, + "step": 8510 + }, + { + "epoch": 0.69, + "learning_rate": 3.8543940496402295e-05, + "loss": 3.0149, + "step": 8515 + }, + { + "epoch": 0.69, + "learning_rate": 3.8537203223111544e-05, + "loss": 2.9648, + "step": 8520 + }, + { + "epoch": 0.69, + "learning_rate": 3.8530465949820786e-05, + "loss": 2.8249, + "step": 8525 + }, + { + "epoch": 0.69, + "learning_rate": 3.8523728676530035e-05, + "loss": 3.4272, + "step": 8530 + }, + { + "epoch": 0.69, + "learning_rate": 3.8516991403239284e-05, + "loss": 3.2086, + "step": 8535 + }, + { + "epoch": 0.69, + "learning_rate": 3.851025412994853e-05, + "loss": 3.3207, + "step": 8540 + }, + { + "epoch": 0.69, + "learning_rate": 3.8503516856657775e-05, + "loss": 3.0051, + "step": 8545 + }, + { + "epoch": 0.69, + "learning_rate": 3.849677958336702e-05, + "loss": 3.0208, + "step": 8550 + }, + { + "epoch": 0.69, + "learning_rate": 3.8490042310076265e-05, + "loss": 3.1673, + "step": 8555 + }, + { + "epoch": 0.69, + "learning_rate": 3.8483305036785514e-05, + "loss": 3.1774, + "step": 8560 + }, + { + "epoch": 0.69, + "learning_rate": 3.8476567763494756e-05, + "loss": 2.9722, + "step": 8565 + }, + { + "epoch": 0.69, + "learning_rate": 3.8469830490204005e-05, + "loss": 2.7325, + "step": 8570 + }, + { + "epoch": 0.69, + "learning_rate": 3.8463093216913254e-05, + "loss": 3.0779, + "step": 8575 + }, + { + "epoch": 0.69, + "learning_rate": 3.8456355943622496e-05, + "loss": 3.1809, + "step": 8580 + }, + { + "epoch": 0.69, + "learning_rate": 3.8449618670331745e-05, + "loss": 2.9728, + "step": 8585 + }, + { + "epoch": 0.69, + "learning_rate": 3.8442881397040994e-05, + "loss": 2.8935, + "step": 8590 + }, + { + "epoch": 0.69, + "learning_rate": 3.843614412375024e-05, + "loss": 3.0019, + "step": 8595 + }, + { + "epoch": 0.7, + "learning_rate": 3.8429406850459485e-05, + "loss": 3.1546, + "step": 8600 + }, + { + "epoch": 0.7, + "learning_rate": 3.842266957716873e-05, + "loss": 2.9821, + "step": 8605 + }, + { + "epoch": 0.7, + "learning_rate": 3.8415932303877976e-05, + "loss": 2.9484, + "step": 8610 + }, + { + "epoch": 0.7, + "learning_rate": 3.8409195030587225e-05, + "loss": 3.1934, + "step": 8615 + }, + { + "epoch": 0.7, + "learning_rate": 3.840245775729647e-05, + "loss": 3.2961, + "step": 8620 + }, + { + "epoch": 0.7, + "learning_rate": 3.8395720484005716e-05, + "loss": 3.0485, + "step": 8625 + }, + { + "epoch": 0.7, + "learning_rate": 3.8388983210714965e-05, + "loss": 3.0174, + "step": 8630 + }, + { + "epoch": 0.7, + "learning_rate": 3.838224593742421e-05, + "loss": 3.1566, + "step": 8635 + }, + { + "epoch": 0.7, + "learning_rate": 3.8375508664133456e-05, + "loss": 2.893, + "step": 8640 + }, + { + "epoch": 0.7, + "learning_rate": 3.83687713908427e-05, + "loss": 3.1131, + "step": 8645 + }, + { + "epoch": 0.7, + "learning_rate": 3.836203411755195e-05, + "loss": 2.8845, + "step": 8650 + }, + { + "epoch": 0.7, + "learning_rate": 3.835529684426119e-05, + "loss": 2.8261, + "step": 8655 + }, + { + "epoch": 0.7, + "learning_rate": 3.834855957097044e-05, + "loss": 3.0335, + "step": 8660 + }, + { + "epoch": 0.7, + "learning_rate": 3.8341822297679686e-05, + "loss": 3.0395, + "step": 8665 + }, + { + "epoch": 0.7, + "learning_rate": 3.8335085024388935e-05, + "loss": 3.2093, + "step": 8670 + }, + { + "epoch": 0.7, + "learning_rate": 3.832834775109818e-05, + "loss": 3.0218, + "step": 8675 + }, + { + "epoch": 0.7, + "learning_rate": 3.8321610477807426e-05, + "loss": 3.0037, + "step": 8680 + }, + { + "epoch": 0.7, + "learning_rate": 3.831487320451667e-05, + "loss": 3.0408, + "step": 8685 + }, + { + "epoch": 0.7, + "learning_rate": 3.830813593122591e-05, + "loss": 2.9611, + "step": 8690 + }, + { + "epoch": 0.7, + "learning_rate": 3.830139865793516e-05, + "loss": 3.2521, + "step": 8695 + }, + { + "epoch": 0.7, + "learning_rate": 3.829466138464441e-05, + "loss": 2.9996, + "step": 8700 + }, + { + "epoch": 0.7, + "learning_rate": 3.828792411135366e-05, + "loss": 3.1156, + "step": 8705 + }, + { + "epoch": 0.7, + "learning_rate": 3.82811868380629e-05, + "loss": 2.7829, + "step": 8710 + }, + { + "epoch": 0.7, + "learning_rate": 3.827444956477215e-05, + "loss": 2.9055, + "step": 8715 + }, + { + "epoch": 0.7, + "learning_rate": 3.82677122914814e-05, + "loss": 2.969, + "step": 8720 + }, + { + "epoch": 0.71, + "learning_rate": 3.826097501819064e-05, + "loss": 2.7114, + "step": 8725 + }, + { + "epoch": 0.71, + "learning_rate": 3.825423774489988e-05, + "loss": 2.9136, + "step": 8730 + }, + { + "epoch": 0.71, + "learning_rate": 3.824750047160913e-05, + "loss": 2.8986, + "step": 8735 + }, + { + "epoch": 0.71, + "learning_rate": 3.824076319831838e-05, + "loss": 3.0897, + "step": 8740 + }, + { + "epoch": 0.71, + "learning_rate": 3.823402592502762e-05, + "loss": 3.1301, + "step": 8745 + }, + { + "epoch": 0.71, + "learning_rate": 3.822728865173687e-05, + "loss": 2.9386, + "step": 8750 + }, + { + "epoch": 0.71, + "learning_rate": 3.822055137844612e-05, + "loss": 3.1028, + "step": 8755 + }, + { + "epoch": 0.71, + "learning_rate": 3.821381410515537e-05, + "loss": 2.7394, + "step": 8760 + }, + { + "epoch": 0.71, + "learning_rate": 3.820707683186461e-05, + "loss": 3.0343, + "step": 8765 + }, + { + "epoch": 0.71, + "learning_rate": 3.820033955857386e-05, + "loss": 3.0186, + "step": 8770 + }, + { + "epoch": 0.71, + "learning_rate": 3.81936022852831e-05, + "loss": 3.1434, + "step": 8775 + }, + { + "epoch": 0.71, + "learning_rate": 3.818686501199235e-05, + "loss": 3.1984, + "step": 8780 + }, + { + "epoch": 0.71, + "learning_rate": 3.818012773870159e-05, + "loss": 2.9959, + "step": 8785 + }, + { + "epoch": 0.71, + "learning_rate": 3.817339046541084e-05, + "loss": 2.9381, + "step": 8790 + }, + { + "epoch": 0.71, + "learning_rate": 3.816665319212009e-05, + "loss": 3.2456, + "step": 8795 + }, + { + "epoch": 0.71, + "learning_rate": 3.815991591882933e-05, + "loss": 3.0803, + "step": 8800 + }, + { + "epoch": 0.71, + "learning_rate": 3.815317864553858e-05, + "loss": 2.755, + "step": 8805 + }, + { + "epoch": 0.71, + "learning_rate": 3.814644137224783e-05, + "loss": 3.3534, + "step": 8810 + }, + { + "epoch": 0.71, + "learning_rate": 3.813970409895707e-05, + "loss": 3.0963, + "step": 8815 + }, + { + "epoch": 0.71, + "learning_rate": 3.8132966825666313e-05, + "loss": 2.9195, + "step": 8820 + }, + { + "epoch": 0.71, + "learning_rate": 3.812622955237556e-05, + "loss": 2.8593, + "step": 8825 + }, + { + "epoch": 0.71, + "learning_rate": 3.811949227908481e-05, + "loss": 2.8676, + "step": 8830 + }, + { + "epoch": 0.71, + "learning_rate": 3.811275500579405e-05, + "loss": 2.7496, + "step": 8835 + }, + { + "epoch": 0.71, + "learning_rate": 3.81060177325033e-05, + "loss": 3.2668, + "step": 8840 + }, + { + "epoch": 0.72, + "learning_rate": 3.809928045921255e-05, + "loss": 3.1165, + "step": 8845 + }, + { + "epoch": 0.72, + "learning_rate": 3.80925431859218e-05, + "loss": 3.0659, + "step": 8850 + }, + { + "epoch": 0.72, + "learning_rate": 3.808580591263104e-05, + "loss": 2.8842, + "step": 8855 + }, + { + "epoch": 0.72, + "learning_rate": 3.8079068639340284e-05, + "loss": 3.003, + "step": 8860 + }, + { + "epoch": 0.72, + "learning_rate": 3.807233136604953e-05, + "loss": 3.0949, + "step": 8865 + }, + { + "epoch": 0.72, + "learning_rate": 3.806559409275878e-05, + "loss": 2.9894, + "step": 8870 + }, + { + "epoch": 0.72, + "learning_rate": 3.8058856819468024e-05, + "loss": 2.8607, + "step": 8875 + }, + { + "epoch": 0.72, + "learning_rate": 3.805211954617727e-05, + "loss": 3.3431, + "step": 8880 + }, + { + "epoch": 0.72, + "learning_rate": 3.804538227288652e-05, + "loss": 3.0648, + "step": 8885 + }, + { + "epoch": 0.72, + "learning_rate": 3.8038644999595764e-05, + "loss": 3.2921, + "step": 8890 + }, + { + "epoch": 0.72, + "learning_rate": 3.803190772630501e-05, + "loss": 2.9446, + "step": 8895 + }, + { + "epoch": 0.72, + "learning_rate": 3.802517045301426e-05, + "loss": 3.1472, + "step": 8900 + }, + { + "epoch": 0.72, + "learning_rate": 3.8018433179723504e-05, + "loss": 3.2051, + "step": 8905 + }, + { + "epoch": 0.72, + "learning_rate": 3.8011695906432746e-05, + "loss": 3.4834, + "step": 8910 + }, + { + "epoch": 0.72, + "learning_rate": 3.8004958633141995e-05, + "loss": 3.0165, + "step": 8915 + }, + { + "epoch": 0.72, + "learning_rate": 3.7998221359851243e-05, + "loss": 3.028, + "step": 8920 + }, + { + "epoch": 0.72, + "learning_rate": 3.799148408656049e-05, + "loss": 3.0388, + "step": 8925 + }, + { + "epoch": 0.72, + "learning_rate": 3.7984746813269734e-05, + "loss": 2.9662, + "step": 8930 + }, + { + "epoch": 0.72, + "learning_rate": 3.797800953997898e-05, + "loss": 3.0036, + "step": 8935 + }, + { + "epoch": 0.72, + "learning_rate": 3.797127226668823e-05, + "loss": 2.9179, + "step": 8940 + }, + { + "epoch": 0.72, + "learning_rate": 3.7964534993397474e-05, + "loss": 2.934, + "step": 8945 + }, + { + "epoch": 0.72, + "learning_rate": 3.7957797720106716e-05, + "loss": 3.2011, + "step": 8950 + }, + { + "epoch": 0.72, + "learning_rate": 3.7951060446815965e-05, + "loss": 3.1329, + "step": 8955 + }, + { + "epoch": 0.72, + "learning_rate": 3.7944323173525214e-05, + "loss": 3.2528, + "step": 8960 + }, + { + "epoch": 0.72, + "learning_rate": 3.7937585900234456e-05, + "loss": 3.3252, + "step": 8965 + }, + { + "epoch": 0.73, + "learning_rate": 3.7930848626943705e-05, + "loss": 3.2545, + "step": 8970 + }, + { + "epoch": 0.73, + "learning_rate": 3.7924111353652954e-05, + "loss": 3.2199, + "step": 8975 + }, + { + "epoch": 0.73, + "learning_rate": 3.79173740803622e-05, + "loss": 2.9862, + "step": 8980 + }, + { + "epoch": 0.73, + "learning_rate": 3.7910636807071445e-05, + "loss": 3.0251, + "step": 8985 + }, + { + "epoch": 0.73, + "learning_rate": 3.790389953378069e-05, + "loss": 2.9251, + "step": 8990 + }, + { + "epoch": 0.73, + "learning_rate": 3.7897162260489936e-05, + "loss": 3.0674, + "step": 8995 + }, + { + "epoch": 0.73, + "learning_rate": 3.789042498719918e-05, + "loss": 3.0134, + "step": 9000 + }, + { + "epoch": 0.73, + "learning_rate": 3.788368771390843e-05, + "loss": 3.4247, + "step": 9005 + }, + { + "epoch": 0.73, + "learning_rate": 3.787829789527583e-05, + "loss": 3.4266, + "step": 9010 + }, + { + "epoch": 0.73, + "learning_rate": 3.787156062198507e-05, + "loss": 3.334, + "step": 9015 + }, + { + "epoch": 0.73, + "learning_rate": 3.786482334869432e-05, + "loss": 2.6583, + "step": 9020 + }, + { + "epoch": 0.73, + "learning_rate": 3.785808607540356e-05, + "loss": 2.9451, + "step": 9025 + }, + { + "epoch": 0.73, + "learning_rate": 3.785134880211281e-05, + "loss": 2.9675, + "step": 9030 + }, + { + "epoch": 0.73, + "learning_rate": 3.784461152882205e-05, + "loss": 3.0311, + "step": 9035 + }, + { + "epoch": 0.73, + "learning_rate": 3.78378742555313e-05, + "loss": 3.0225, + "step": 9040 + }, + { + "epoch": 0.73, + "learning_rate": 3.783113698224055e-05, + "loss": 3.4978, + "step": 9045 + }, + { + "epoch": 0.73, + "learning_rate": 3.78243997089498e-05, + "loss": 2.8633, + "step": 9050 + }, + { + "epoch": 0.73, + "learning_rate": 3.781766243565904e-05, + "loss": 3.1776, + "step": 9055 + }, + { + "epoch": 0.73, + "learning_rate": 3.781092516236829e-05, + "loss": 2.9358, + "step": 9060 + }, + { + "epoch": 0.73, + "learning_rate": 3.780418788907754e-05, + "loss": 2.9577, + "step": 9065 + }, + { + "epoch": 0.73, + "learning_rate": 3.779745061578678e-05, + "loss": 2.8513, + "step": 9070 + }, + { + "epoch": 0.73, + "learning_rate": 3.779071334249602e-05, + "loss": 2.9536, + "step": 9075 + }, + { + "epoch": 0.73, + "learning_rate": 3.778397606920527e-05, + "loss": 3.3119, + "step": 9080 + }, + { + "epoch": 0.73, + "learning_rate": 3.777723879591452e-05, + "loss": 3.1788, + "step": 9085 + }, + { + "epoch": 0.73, + "learning_rate": 3.777050152262376e-05, + "loss": 3.2256, + "step": 9090 + }, + { + "epoch": 0.74, + "learning_rate": 3.776376424933301e-05, + "loss": 3.4169, + "step": 9095 + }, + { + "epoch": 0.74, + "learning_rate": 3.775702697604226e-05, + "loss": 3.2875, + "step": 9100 + }, + { + "epoch": 0.74, + "learning_rate": 3.775028970275151e-05, + "loss": 2.7868, + "step": 9105 + }, + { + "epoch": 0.74, + "learning_rate": 3.774355242946075e-05, + "loss": 3.1224, + "step": 9110 + }, + { + "epoch": 0.74, + "learning_rate": 3.7736815156169994e-05, + "loss": 2.9381, + "step": 9115 + }, + { + "epoch": 0.74, + "learning_rate": 3.773007788287924e-05, + "loss": 2.9636, + "step": 9120 + }, + { + "epoch": 0.74, + "learning_rate": 3.7723340609588485e-05, + "loss": 3.1651, + "step": 9125 + }, + { + "epoch": 0.74, + "learning_rate": 3.7716603336297734e-05, + "loss": 3.1566, + "step": 9130 + }, + { + "epoch": 0.74, + "learning_rate": 3.770986606300698e-05, + "loss": 3.0716, + "step": 9135 + }, + { + "epoch": 0.74, + "learning_rate": 3.770312878971623e-05, + "loss": 2.9066, + "step": 9140 + }, + { + "epoch": 0.74, + "learning_rate": 3.7696391516425474e-05, + "loss": 3.0514, + "step": 9145 + }, + { + "epoch": 0.74, + "learning_rate": 3.768965424313472e-05, + "loss": 2.9148, + "step": 9150 + }, + { + "epoch": 0.74, + "learning_rate": 3.7682916969843965e-05, + "loss": 2.9026, + "step": 9155 + }, + { + "epoch": 0.74, + "learning_rate": 3.7676179696553214e-05, + "loss": 3.373, + "step": 9160 + }, + { + "epoch": 0.74, + "learning_rate": 3.7669442423262456e-05, + "loss": 3.0228, + "step": 9165 + }, + { + "epoch": 0.74, + "learning_rate": 3.7662705149971705e-05, + "loss": 2.9815, + "step": 9170 + }, + { + "epoch": 0.74, + "learning_rate": 3.7655967876680953e-05, + "loss": 2.9908, + "step": 9175 + }, + { + "epoch": 0.74, + "learning_rate": 3.7649230603390196e-05, + "loss": 3.0224, + "step": 9180 + }, + { + "epoch": 0.74, + "learning_rate": 3.7642493330099444e-05, + "loss": 3.5886, + "step": 9185 + }, + { + "epoch": 0.74, + "learning_rate": 3.763575605680869e-05, + "loss": 2.9071, + "step": 9190 + }, + { + "epoch": 0.74, + "learning_rate": 3.7629018783517935e-05, + "loss": 2.9337, + "step": 9195 + }, + { + "epoch": 0.74, + "learning_rate": 3.7622281510227184e-05, + "loss": 3.4955, + "step": 9200 + }, + { + "epoch": 0.74, + "learning_rate": 3.7615544236936426e-05, + "loss": 2.9738, + "step": 9205 + }, + { + "epoch": 0.74, + "learning_rate": 3.7608806963645675e-05, + "loss": 2.9454, + "step": 9210 + }, + { + "epoch": 0.74, + "learning_rate": 3.7602069690354924e-05, + "loss": 3.086, + "step": 9215 + }, + { + "epoch": 0.75, + "learning_rate": 3.7595332417064166e-05, + "loss": 3.4329, + "step": 9220 + }, + { + "epoch": 0.75, + "learning_rate": 3.7588595143773415e-05, + "loss": 3.1704, + "step": 9225 + }, + { + "epoch": 0.75, + "learning_rate": 3.7581857870482664e-05, + "loss": 3.0783, + "step": 9230 + }, + { + "epoch": 0.75, + "learning_rate": 3.7575120597191906e-05, + "loss": 2.9674, + "step": 9235 + }, + { + "epoch": 0.75, + "learning_rate": 3.7568383323901155e-05, + "loss": 3.1008, + "step": 9240 + }, + { + "epoch": 0.75, + "learning_rate": 3.75616460506104e-05, + "loss": 3.07, + "step": 9245 + }, + { + "epoch": 0.75, + "learning_rate": 3.7554908777319646e-05, + "loss": 3.3072, + "step": 9250 + }, + { + "epoch": 0.75, + "learning_rate": 3.754817150402889e-05, + "loss": 3.1095, + "step": 9255 + }, + { + "epoch": 0.75, + "learning_rate": 3.754143423073814e-05, + "loss": 3.0179, + "step": 9260 + }, + { + "epoch": 0.75, + "learning_rate": 3.7534696957447386e-05, + "loss": 2.901, + "step": 9265 + }, + { + "epoch": 0.75, + "learning_rate": 3.752795968415663e-05, + "loss": 2.9, + "step": 9270 + }, + { + "epoch": 0.75, + "learning_rate": 3.752122241086588e-05, + "loss": 3.0123, + "step": 9275 + }, + { + "epoch": 0.75, + "learning_rate": 3.7514485137575126e-05, + "loss": 3.2678, + "step": 9280 + }, + { + "epoch": 0.75, + "learning_rate": 3.750774786428437e-05, + "loss": 2.8679, + "step": 9285 + }, + { + "epoch": 0.75, + "learning_rate": 3.750101059099361e-05, + "loss": 2.9353, + "step": 9290 + }, + { + "epoch": 0.75, + "learning_rate": 3.749427331770286e-05, + "loss": 2.8873, + "step": 9295 + }, + { + "epoch": 0.75, + "learning_rate": 3.748753604441211e-05, + "loss": 3.2057, + "step": 9300 + }, + { + "epoch": 0.75, + "learning_rate": 3.7480798771121356e-05, + "loss": 3.1522, + "step": 9305 + }, + { + "epoch": 0.75, + "learning_rate": 3.74740614978306e-05, + "loss": 3.1599, + "step": 9310 + }, + { + "epoch": 0.75, + "learning_rate": 3.746732422453985e-05, + "loss": 2.9306, + "step": 9315 + }, + { + "epoch": 0.75, + "learning_rate": 3.7460586951249096e-05, + "loss": 3.013, + "step": 9320 + }, + { + "epoch": 0.75, + "learning_rate": 3.745384967795834e-05, + "loss": 3.0665, + "step": 9325 + }, + { + "epoch": 0.75, + "learning_rate": 3.744711240466758e-05, + "loss": 3.2568, + "step": 9330 + }, + { + "epoch": 0.75, + "learning_rate": 3.744037513137683e-05, + "loss": 2.9778, + "step": 9335 + }, + { + "epoch": 0.76, + "learning_rate": 3.743363785808608e-05, + "loss": 3.2069, + "step": 9340 + }, + { + "epoch": 0.76, + "learning_rate": 3.742690058479532e-05, + "loss": 3.0228, + "step": 9345 + }, + { + "epoch": 0.76, + "learning_rate": 3.742016331150457e-05, + "loss": 2.8045, + "step": 9350 + }, + { + "epoch": 0.76, + "learning_rate": 3.741342603821382e-05, + "loss": 3.0413, + "step": 9355 + }, + { + "epoch": 0.76, + "learning_rate": 3.740668876492307e-05, + "loss": 2.8799, + "step": 9360 + }, + { + "epoch": 0.76, + "learning_rate": 3.739995149163231e-05, + "loss": 3.2395, + "step": 9365 + }, + { + "epoch": 0.76, + "learning_rate": 3.739321421834156e-05, + "loss": 3.055, + "step": 9370 + }, + { + "epoch": 0.76, + "learning_rate": 3.73864769450508e-05, + "loss": 3.046, + "step": 9375 + }, + { + "epoch": 0.76, + "learning_rate": 3.737973967176004e-05, + "loss": 2.7768, + "step": 9380 + }, + { + "epoch": 0.76, + "learning_rate": 3.737300239846929e-05, + "loss": 3.4218, + "step": 9385 + }, + { + "epoch": 0.76, + "learning_rate": 3.736626512517854e-05, + "loss": 2.961, + "step": 9390 + }, + { + "epoch": 0.76, + "learning_rate": 3.735952785188779e-05, + "loss": 3.1829, + "step": 9395 + }, + { + "epoch": 0.76, + "learning_rate": 3.735279057859703e-05, + "loss": 3.1464, + "step": 9400 + }, + { + "epoch": 0.76, + "learning_rate": 3.734605330530628e-05, + "loss": 3.0201, + "step": 9405 + }, + { + "epoch": 0.76, + "learning_rate": 3.733931603201553e-05, + "loss": 2.8433, + "step": 9410 + }, + { + "epoch": 0.76, + "learning_rate": 3.733257875872477e-05, + "loss": 2.9373, + "step": 9415 + }, + { + "epoch": 0.76, + "learning_rate": 3.732584148543401e-05, + "loss": 3.1014, + "step": 9420 + }, + { + "epoch": 0.76, + "learning_rate": 3.731910421214326e-05, + "loss": 2.9788, + "step": 9425 + }, + { + "epoch": 0.76, + "learning_rate": 3.731236693885251e-05, + "loss": 3.1346, + "step": 9430 + }, + { + "epoch": 0.76, + "learning_rate": 3.730562966556175e-05, + "loss": 3.0174, + "step": 9435 + }, + { + "epoch": 0.76, + "learning_rate": 3.7298892392271e-05, + "loss": 3.1674, + "step": 9440 + }, + { + "epoch": 0.76, + "learning_rate": 3.729215511898025e-05, + "loss": 2.9165, + "step": 9445 + }, + { + "epoch": 0.76, + "learning_rate": 3.72854178456895e-05, + "loss": 2.9635, + "step": 9450 + }, + { + "epoch": 0.76, + "learning_rate": 3.727868057239874e-05, + "loss": 3.3654, + "step": 9455 + }, + { + "epoch": 0.76, + "learning_rate": 3.727194329910798e-05, + "loss": 2.9462, + "step": 9460 + }, + { + "epoch": 0.77, + "learning_rate": 3.726520602581723e-05, + "loss": 3.0271, + "step": 9465 + }, + { + "epoch": 0.77, + "learning_rate": 3.725846875252648e-05, + "loss": 2.9914, + "step": 9470 + }, + { + "epoch": 0.77, + "learning_rate": 3.725173147923572e-05, + "loss": 3.0899, + "step": 9475 + }, + { + "epoch": 0.77, + "learning_rate": 3.724499420594497e-05, + "loss": 3.2328, + "step": 9480 + }, + { + "epoch": 0.77, + "learning_rate": 3.723825693265422e-05, + "loss": 2.9155, + "step": 9485 + }, + { + "epoch": 0.77, + "learning_rate": 3.723151965936346e-05, + "loss": 3.0138, + "step": 9490 + }, + { + "epoch": 0.77, + "learning_rate": 3.722478238607271e-05, + "loss": 3.0283, + "step": 9495 + }, + { + "epoch": 0.77, + "learning_rate": 3.721804511278196e-05, + "loss": 3.021, + "step": 9500 + }, + { + "epoch": 0.77, + "learning_rate": 3.72113078394912e-05, + "loss": 3.3555, + "step": 9505 + }, + { + "epoch": 0.77, + "learning_rate": 3.7204570566200445e-05, + "loss": 2.9653, + "step": 9510 + }, + { + "epoch": 0.77, + "learning_rate": 3.7197833292909694e-05, + "loss": 2.8215, + "step": 9515 + }, + { + "epoch": 0.77, + "learning_rate": 3.719109601961894e-05, + "loss": 2.9035, + "step": 9520 + }, + { + "epoch": 0.77, + "learning_rate": 3.718435874632819e-05, + "loss": 3.1791, + "step": 9525 + }, + { + "epoch": 0.77, + "learning_rate": 3.7177621473037434e-05, + "loss": 3.0717, + "step": 9530 + }, + { + "epoch": 0.77, + "learning_rate": 3.717088419974668e-05, + "loss": 3.1412, + "step": 9535 + }, + { + "epoch": 0.77, + "learning_rate": 3.716414692645593e-05, + "loss": 2.8527, + "step": 9540 + }, + { + "epoch": 0.77, + "learning_rate": 3.7157409653165173e-05, + "loss": 3.1082, + "step": 9545 + }, + { + "epoch": 0.77, + "learning_rate": 3.7150672379874416e-05, + "loss": 3.0959, + "step": 9550 + }, + { + "epoch": 0.77, + "learning_rate": 3.7143935106583664e-05, + "loss": 2.8961, + "step": 9555 + }, + { + "epoch": 0.77, + "learning_rate": 3.713719783329291e-05, + "loss": 3.0879, + "step": 9560 + }, + { + "epoch": 0.77, + "learning_rate": 3.7130460560002155e-05, + "loss": 3.0639, + "step": 9565 + }, + { + "epoch": 0.77, + "learning_rate": 3.7123723286711404e-05, + "loss": 2.9382, + "step": 9570 + }, + { + "epoch": 0.77, + "learning_rate": 3.711698601342065e-05, + "loss": 2.9074, + "step": 9575 + }, + { + "epoch": 0.77, + "learning_rate": 3.7110248740129895e-05, + "loss": 3.1172, + "step": 9580 + }, + { + "epoch": 0.77, + "learning_rate": 3.7103511466839144e-05, + "loss": 2.6656, + "step": 9585 + }, + { + "epoch": 0.78, + "learning_rate": 3.7096774193548386e-05, + "loss": 2.9794, + "step": 9590 + }, + { + "epoch": 0.78, + "learning_rate": 3.7090036920257635e-05, + "loss": 3.0121, + "step": 9595 + }, + { + "epoch": 0.78, + "learning_rate": 3.708329964696688e-05, + "loss": 2.9782, + "step": 9600 + }, + { + "epoch": 0.78, + "learning_rate": 3.7076562373676126e-05, + "loss": 3.3433, + "step": 9605 + }, + { + "epoch": 0.78, + "learning_rate": 3.7069825100385375e-05, + "loss": 2.7182, + "step": 9610 + }, + { + "epoch": 0.78, + "learning_rate": 3.7063087827094624e-05, + "loss": 2.9858, + "step": 9615 + }, + { + "epoch": 0.78, + "learning_rate": 3.7056350553803866e-05, + "loss": 3.4233, + "step": 9620 + }, + { + "epoch": 0.78, + "learning_rate": 3.7049613280513115e-05, + "loss": 3.2665, + "step": 9625 + }, + { + "epoch": 0.78, + "learning_rate": 3.704287600722236e-05, + "loss": 2.9364, + "step": 9630 + }, + { + "epoch": 0.78, + "learning_rate": 3.70361387339316e-05, + "loss": 3.0708, + "step": 9635 + }, + { + "epoch": 0.78, + "learning_rate": 3.702940146064085e-05, + "loss": 3.078, + "step": 9640 + }, + { + "epoch": 0.78, + "learning_rate": 3.70226641873501e-05, + "loss": 2.9575, + "step": 9645 + }, + { + "epoch": 0.78, + "learning_rate": 3.7015926914059346e-05, + "loss": 3.0181, + "step": 9650 + }, + { + "epoch": 0.78, + "learning_rate": 3.700918964076859e-05, + "loss": 2.7718, + "step": 9655 + }, + { + "epoch": 0.78, + "learning_rate": 3.7002452367477837e-05, + "loss": 3.2755, + "step": 9660 + }, + { + "epoch": 0.78, + "learning_rate": 3.6995715094187085e-05, + "loss": 3.126, + "step": 9665 + }, + { + "epoch": 0.78, + "learning_rate": 3.6988977820896334e-05, + "loss": 3.04, + "step": 9670 + }, + { + "epoch": 0.78, + "learning_rate": 3.6982240547605576e-05, + "loss": 3.1031, + "step": 9675 + }, + { + "epoch": 0.78, + "learning_rate": 3.697550327431482e-05, + "loss": 3.0017, + "step": 9680 + }, + { + "epoch": 0.78, + "learning_rate": 3.697011345568222e-05, + "loss": 3.3596, + "step": 9685 + }, + { + "epoch": 0.78, + "learning_rate": 3.696337618239146e-05, + "loss": 3.1731, + "step": 9690 + }, + { + "epoch": 0.78, + "learning_rate": 3.695663890910071e-05, + "loss": 2.9105, + "step": 9695 + }, + { + "epoch": 0.78, + "learning_rate": 3.694990163580996e-05, + "loss": 3.0292, + "step": 9700 + }, + { + "epoch": 0.78, + "learning_rate": 3.69431643625192e-05, + "loss": 3.1361, + "step": 9705 + }, + { + "epoch": 0.78, + "learning_rate": 3.693642708922845e-05, + "loss": 3.1294, + "step": 9710 + }, + { + "epoch": 0.79, + "learning_rate": 3.692968981593769e-05, + "loss": 3.1243, + "step": 9715 + }, + { + "epoch": 0.79, + "learning_rate": 3.692295254264694e-05, + "loss": 3.0418, + "step": 9720 + }, + { + "epoch": 0.79, + "learning_rate": 3.6916215269356184e-05, + "loss": 3.1548, + "step": 9725 + }, + { + "epoch": 0.79, + "learning_rate": 3.690947799606543e-05, + "loss": 2.8778, + "step": 9730 + }, + { + "epoch": 0.79, + "learning_rate": 3.690274072277468e-05, + "loss": 3.1158, + "step": 9735 + }, + { + "epoch": 0.79, + "learning_rate": 3.689600344948393e-05, + "loss": 2.9114, + "step": 9740 + }, + { + "epoch": 0.79, + "learning_rate": 3.688926617619317e-05, + "loss": 3.0476, + "step": 9745 + }, + { + "epoch": 0.79, + "learning_rate": 3.688252890290242e-05, + "loss": 3.1116, + "step": 9750 + }, + { + "epoch": 0.79, + "learning_rate": 3.6875791629611664e-05, + "loss": 3.1288, + "step": 9755 + }, + { + "epoch": 0.79, + "learning_rate": 3.6869054356320906e-05, + "loss": 3.2127, + "step": 9760 + }, + { + "epoch": 0.79, + "learning_rate": 3.6862317083030155e-05, + "loss": 3.1348, + "step": 9765 + }, + { + "epoch": 0.79, + "learning_rate": 3.6855579809739404e-05, + "loss": 3.189, + "step": 9770 + }, + { + "epoch": 0.79, + "learning_rate": 3.684884253644865e-05, + "loss": 2.9306, + "step": 9775 + }, + { + "epoch": 0.79, + "learning_rate": 3.6842105263157895e-05, + "loss": 3.0042, + "step": 9780 + }, + { + "epoch": 0.79, + "learning_rate": 3.6835367989867144e-05, + "loss": 2.8858, + "step": 9785 + }, + { + "epoch": 0.79, + "learning_rate": 3.682863071657639e-05, + "loss": 3.2717, + "step": 9790 + }, + { + "epoch": 0.79, + "learning_rate": 3.6821893443285635e-05, + "loss": 3.0997, + "step": 9795 + }, + { + "epoch": 0.79, + "learning_rate": 3.681515616999488e-05, + "loss": 3.1203, + "step": 9800 + }, + { + "epoch": 0.79, + "learning_rate": 3.6808418896704126e-05, + "loss": 3.0215, + "step": 9805 + }, + { + "epoch": 0.79, + "learning_rate": 3.6801681623413374e-05, + "loss": 3.1261, + "step": 9810 + }, + { + "epoch": 0.79, + "learning_rate": 3.6794944350122617e-05, + "loss": 3.4258, + "step": 9815 + }, + { + "epoch": 0.79, + "learning_rate": 3.6788207076831865e-05, + "loss": 2.8897, + "step": 9820 + }, + { + "epoch": 0.79, + "learning_rate": 3.6781469803541114e-05, + "loss": 2.9942, + "step": 9825 + }, + { + "epoch": 0.79, + "learning_rate": 3.677473253025036e-05, + "loss": 2.9707, + "step": 9830 + }, + { + "epoch": 0.8, + "learning_rate": 3.6767995256959605e-05, + "loss": 3.0694, + "step": 9835 + }, + { + "epoch": 0.8, + "learning_rate": 3.6761257983668854e-05, + "loss": 2.8014, + "step": 9840 + }, + { + "epoch": 0.8, + "learning_rate": 3.6754520710378096e-05, + "loss": 3.0555, + "step": 9845 + }, + { + "epoch": 0.8, + "learning_rate": 3.6747783437087345e-05, + "loss": 2.8841, + "step": 9850 + }, + { + "epoch": 0.8, + "learning_rate": 3.674104616379659e-05, + "loss": 3.1467, + "step": 9855 + }, + { + "epoch": 0.8, + "learning_rate": 3.6734308890505836e-05, + "loss": 2.8715, + "step": 9860 + }, + { + "epoch": 0.8, + "learning_rate": 3.6727571617215085e-05, + "loss": 3.08, + "step": 9865 + }, + { + "epoch": 0.8, + "learning_rate": 3.672083434392433e-05, + "loss": 3.231, + "step": 9870 + }, + { + "epoch": 0.8, + "learning_rate": 3.6714097070633576e-05, + "loss": 3.0236, + "step": 9875 + }, + { + "epoch": 0.8, + "learning_rate": 3.6707359797342825e-05, + "loss": 3.2113, + "step": 9880 + }, + { + "epoch": 0.8, + "learning_rate": 3.670062252405207e-05, + "loss": 3.0357, + "step": 9885 + }, + { + "epoch": 0.8, + "learning_rate": 3.669388525076131e-05, + "loss": 3.2755, + "step": 9890 + }, + { + "epoch": 0.8, + "learning_rate": 3.668714797747056e-05, + "loss": 2.7913, + "step": 9895 + }, + { + "epoch": 0.8, + "learning_rate": 3.668041070417981e-05, + "loss": 2.9929, + "step": 9900 + }, + { + "epoch": 0.8, + "learning_rate": 3.6673673430889056e-05, + "loss": 3.2487, + "step": 9905 + }, + { + "epoch": 0.8, + "learning_rate": 3.66669361575983e-05, + "loss": 3.0298, + "step": 9910 + }, + { + "epoch": 0.8, + "learning_rate": 3.6660198884307547e-05, + "loss": 2.9926, + "step": 9915 + }, + { + "epoch": 0.8, + "learning_rate": 3.6653461611016795e-05, + "loss": 3.0167, + "step": 9920 + }, + { + "epoch": 0.8, + "learning_rate": 3.664672433772604e-05, + "loss": 2.9078, + "step": 9925 + }, + { + "epoch": 0.8, + "learning_rate": 3.663998706443528e-05, + "loss": 2.8785, + "step": 9930 + }, + { + "epoch": 0.8, + "learning_rate": 3.663324979114453e-05, + "loss": 3.0155, + "step": 9935 + }, + { + "epoch": 0.8, + "learning_rate": 3.662651251785378e-05, + "loss": 2.9471, + "step": 9940 + }, + { + "epoch": 0.8, + "learning_rate": 3.661977524456302e-05, + "loss": 2.8238, + "step": 9945 + }, + { + "epoch": 0.8, + "learning_rate": 3.661303797127227e-05, + "loss": 2.7744, + "step": 9950 + }, + { + "epoch": 0.8, + "learning_rate": 3.660630069798152e-05, + "loss": 3.2174, + "step": 9955 + }, + { + "epoch": 0.81, + "learning_rate": 3.659956342469076e-05, + "loss": 3.0096, + "step": 9960 + }, + { + "epoch": 0.81, + "learning_rate": 3.659282615140001e-05, + "loss": 2.9241, + "step": 9965 + }, + { + "epoch": 0.81, + "learning_rate": 3.658608887810926e-05, + "loss": 2.8246, + "step": 9970 + }, + { + "epoch": 0.81, + "learning_rate": 3.65793516048185e-05, + "loss": 3.0677, + "step": 9975 + }, + { + "epoch": 0.81, + "learning_rate": 3.657261433152774e-05, + "loss": 3.3918, + "step": 9980 + }, + { + "epoch": 0.81, + "learning_rate": 3.656587705823699e-05, + "loss": 3.0951, + "step": 9985 + }, + { + "epoch": 0.81, + "learning_rate": 3.655913978494624e-05, + "loss": 3.2887, + "step": 9990 + }, + { + "epoch": 0.81, + "learning_rate": 3.6553749966313634e-05, + "loss": 3.2198, + "step": 9995 + }, + { + "epoch": 0.81, + "learning_rate": 3.654701269302288e-05, + "loss": 3.1275, + "step": 10000 + }, + { + "epoch": 0.81, + "learning_rate": 3.654027541973213e-05, + "loss": 2.8183, + "step": 10005 + }, + { + "epoch": 0.81, + "learning_rate": 3.6533538146441374e-05, + "loss": 3.2344, + "step": 10010 + }, + { + "epoch": 0.81, + "learning_rate": 3.6526800873150616e-05, + "loss": 3.0728, + "step": 10015 + }, + { + "epoch": 0.81, + "learning_rate": 3.6520063599859865e-05, + "loss": 3.1439, + "step": 10020 + }, + { + "epoch": 0.81, + "learning_rate": 3.6513326326569114e-05, + "loss": 2.9468, + "step": 10025 + }, + { + "epoch": 0.81, + "learning_rate": 3.650658905327836e-05, + "loss": 3.0711, + "step": 10030 + }, + { + "epoch": 0.81, + "learning_rate": 3.6499851779987605e-05, + "loss": 2.9164, + "step": 10035 + }, + { + "epoch": 0.81, + "learning_rate": 3.6493114506696854e-05, + "loss": 3.1862, + "step": 10040 + }, + { + "epoch": 0.81, + "learning_rate": 3.64863772334061e-05, + "loss": 2.7662, + "step": 10045 + }, + { + "epoch": 0.81, + "learning_rate": 3.6479639960115344e-05, + "loss": 2.7516, + "step": 10050 + }, + { + "epoch": 0.81, + "learning_rate": 3.6472902686824587e-05, + "loss": 3.0381, + "step": 10055 + }, + { + "epoch": 0.81, + "learning_rate": 3.6466165413533835e-05, + "loss": 2.9114, + "step": 10060 + }, + { + "epoch": 0.81, + "learning_rate": 3.6459428140243084e-05, + "loss": 3.1425, + "step": 10065 + }, + { + "epoch": 0.81, + "learning_rate": 3.6452690866952326e-05, + "loss": 3.1069, + "step": 10070 + }, + { + "epoch": 0.81, + "learning_rate": 3.6445953593661575e-05, + "loss": 3.1094, + "step": 10075 + }, + { + "epoch": 0.81, + "learning_rate": 3.6439216320370824e-05, + "loss": 3.1806, + "step": 10080 + }, + { + "epoch": 0.82, + "learning_rate": 3.6432479047080066e-05, + "loss": 3.2412, + "step": 10085 + }, + { + "epoch": 0.82, + "learning_rate": 3.6425741773789315e-05, + "loss": 2.6815, + "step": 10090 + }, + { + "epoch": 0.82, + "learning_rate": 3.641900450049856e-05, + "loss": 2.9824, + "step": 10095 + }, + { + "epoch": 0.82, + "learning_rate": 3.6412267227207806e-05, + "loss": 3.0131, + "step": 10100 + }, + { + "epoch": 0.82, + "learning_rate": 3.640552995391705e-05, + "loss": 3.2256, + "step": 10105 + }, + { + "epoch": 0.82, + "learning_rate": 3.63987926806263e-05, + "loss": 3.0638, + "step": 10110 + }, + { + "epoch": 0.82, + "learning_rate": 3.6392055407335546e-05, + "loss": 2.9276, + "step": 10115 + }, + { + "epoch": 0.82, + "learning_rate": 3.6385318134044795e-05, + "loss": 3.1751, + "step": 10120 + }, + { + "epoch": 0.82, + "learning_rate": 3.637858086075404e-05, + "loss": 3.1735, + "step": 10125 + }, + { + "epoch": 0.82, + "learning_rate": 3.6371843587463286e-05, + "loss": 2.7972, + "step": 10130 + }, + { + "epoch": 0.82, + "learning_rate": 3.636510631417253e-05, + "loss": 2.9994, + "step": 10135 + }, + { + "epoch": 0.82, + "learning_rate": 3.635836904088178e-05, + "loss": 2.9632, + "step": 10140 + }, + { + "epoch": 0.82, + "learning_rate": 3.635163176759102e-05, + "loss": 2.9621, + "step": 10145 + }, + { + "epoch": 0.82, + "learning_rate": 3.634489449430027e-05, + "loss": 2.826, + "step": 10150 + }, + { + "epoch": 0.82, + "learning_rate": 3.633815722100952e-05, + "loss": 3.0373, + "step": 10155 + }, + { + "epoch": 0.82, + "learning_rate": 3.633141994771876e-05, + "loss": 2.8925, + "step": 10160 + }, + { + "epoch": 0.82, + "learning_rate": 3.632468267442801e-05, + "loss": 3.0813, + "step": 10165 + }, + { + "epoch": 0.82, + "learning_rate": 3.6317945401137256e-05, + "loss": 2.8686, + "step": 10170 + }, + { + "epoch": 0.82, + "learning_rate": 3.6311208127846505e-05, + "loss": 3.0871, + "step": 10175 + }, + { + "epoch": 0.82, + "learning_rate": 3.630447085455575e-05, + "loss": 2.9655, + "step": 10180 + }, + { + "epoch": 0.82, + "learning_rate": 3.629773358126499e-05, + "loss": 2.9345, + "step": 10185 + }, + { + "epoch": 0.82, + "learning_rate": 3.629099630797424e-05, + "loss": 3.0776, + "step": 10190 + }, + { + "epoch": 0.82, + "learning_rate": 3.628425903468348e-05, + "loss": 3.0574, + "step": 10195 + }, + { + "epoch": 0.82, + "learning_rate": 3.627752176139273e-05, + "loss": 2.9316, + "step": 10200 + }, + { + "epoch": 0.83, + "learning_rate": 3.627078448810198e-05, + "loss": 2.8688, + "step": 10205 + }, + { + "epoch": 0.83, + "learning_rate": 3.626404721481123e-05, + "loss": 2.9406, + "step": 10210 + }, + { + "epoch": 0.83, + "learning_rate": 3.625730994152047e-05, + "loss": 3.0147, + "step": 10215 + }, + { + "epoch": 0.83, + "learning_rate": 3.625057266822972e-05, + "loss": 3.0162, + "step": 10220 + }, + { + "epoch": 0.83, + "learning_rate": 3.624383539493896e-05, + "loss": 2.9309, + "step": 10225 + }, + { + "epoch": 0.83, + "learning_rate": 3.623709812164821e-05, + "loss": 3.2362, + "step": 10230 + }, + { + "epoch": 0.83, + "learning_rate": 3.623036084835745e-05, + "loss": 3.1789, + "step": 10235 + }, + { + "epoch": 0.83, + "learning_rate": 3.62236235750667e-05, + "loss": 2.7308, + "step": 10240 + }, + { + "epoch": 0.83, + "learning_rate": 3.621688630177595e-05, + "loss": 2.8587, + "step": 10245 + }, + { + "epoch": 0.83, + "learning_rate": 3.621014902848519e-05, + "loss": 2.9517, + "step": 10250 + }, + { + "epoch": 0.83, + "learning_rate": 3.620341175519444e-05, + "loss": 2.9824, + "step": 10255 + }, + { + "epoch": 0.83, + "learning_rate": 3.619667448190369e-05, + "loss": 3.0932, + "step": 10260 + }, + { + "epoch": 0.83, + "learning_rate": 3.618993720861293e-05, + "loss": 3.3439, + "step": 10265 + }, + { + "epoch": 0.83, + "learning_rate": 3.618319993532217e-05, + "loss": 3.1795, + "step": 10270 + }, + { + "epoch": 0.83, + "learning_rate": 3.617646266203142e-05, + "loss": 3.0621, + "step": 10275 + }, + { + "epoch": 0.83, + "learning_rate": 3.616972538874067e-05, + "loss": 2.8596, + "step": 10280 + }, + { + "epoch": 0.83, + "learning_rate": 3.616298811544992e-05, + "loss": 3.0604, + "step": 10285 + }, + { + "epoch": 0.83, + "learning_rate": 3.615625084215916e-05, + "loss": 3.0342, + "step": 10290 + }, + { + "epoch": 0.83, + "learning_rate": 3.614951356886841e-05, + "loss": 2.9337, + "step": 10295 + }, + { + "epoch": 0.83, + "learning_rate": 3.614277629557766e-05, + "loss": 3.0446, + "step": 10300 + }, + { + "epoch": 0.83, + "learning_rate": 3.61360390222869e-05, + "loss": 2.8938, + "step": 10305 + }, + { + "epoch": 0.83, + "learning_rate": 3.612930174899615e-05, + "loss": 2.8491, + "step": 10310 + }, + { + "epoch": 0.83, + "learning_rate": 3.612256447570539e-05, + "loss": 2.9921, + "step": 10315 + }, + { + "epoch": 0.83, + "learning_rate": 3.611582720241464e-05, + "loss": 3.0225, + "step": 10320 + }, + { + "epoch": 0.83, + "learning_rate": 3.6109089929123883e-05, + "loss": 2.9918, + "step": 10325 + }, + { + "epoch": 0.84, + "learning_rate": 3.610235265583313e-05, + "loss": 3.1685, + "step": 10330 + }, + { + "epoch": 0.84, + "learning_rate": 3.609561538254238e-05, + "loss": 3.1366, + "step": 10335 + }, + { + "epoch": 0.84, + "learning_rate": 3.608887810925163e-05, + "loss": 3.1979, + "step": 10340 + }, + { + "epoch": 0.84, + "learning_rate": 3.608214083596087e-05, + "loss": 3.2472, + "step": 10345 + }, + { + "epoch": 0.84, + "learning_rate": 3.607540356267012e-05, + "loss": 3.0492, + "step": 10350 + }, + { + "epoch": 0.84, + "learning_rate": 3.606866628937936e-05, + "loss": 3.0889, + "step": 10355 + }, + { + "epoch": 0.84, + "learning_rate": 3.6061929016088605e-05, + "loss": 3.1712, + "step": 10360 + }, + { + "epoch": 0.84, + "learning_rate": 3.6055191742797854e-05, + "loss": 3.1994, + "step": 10365 + }, + { + "epoch": 0.84, + "learning_rate": 3.60484544695071e-05, + "loss": 2.9661, + "step": 10370 + }, + { + "epoch": 0.84, + "learning_rate": 3.604171719621635e-05, + "loss": 3.0717, + "step": 10375 + }, + { + "epoch": 0.84, + "learning_rate": 3.6034979922925594e-05, + "loss": 2.963, + "step": 10380 + }, + { + "epoch": 0.84, + "learning_rate": 3.602824264963484e-05, + "loss": 2.9531, + "step": 10385 + }, + { + "epoch": 0.84, + "learning_rate": 3.602150537634409e-05, + "loss": 2.7738, + "step": 10390 + }, + { + "epoch": 0.84, + "learning_rate": 3.6014768103053334e-05, + "loss": 3.2369, + "step": 10395 + }, + { + "epoch": 0.84, + "learning_rate": 3.6008030829762576e-05, + "loss": 3.0199, + "step": 10400 + }, + { + "epoch": 0.84, + "learning_rate": 3.6001293556471825e-05, + "loss": 2.9024, + "step": 10405 + }, + { + "epoch": 0.84, + "learning_rate": 3.5994556283181074e-05, + "loss": 3.4314, + "step": 10410 + }, + { + "epoch": 0.84, + "learning_rate": 3.5987819009890316e-05, + "loss": 2.9359, + "step": 10415 + }, + { + "epoch": 0.84, + "learning_rate": 3.5981081736599565e-05, + "loss": 3.0674, + "step": 10420 + }, + { + "epoch": 0.84, + "learning_rate": 3.5974344463308813e-05, + "loss": 2.8982, + "step": 10425 + }, + { + "epoch": 0.84, + "learning_rate": 3.596760719001806e-05, + "loss": 3.1686, + "step": 10430 + }, + { + "epoch": 0.84, + "learning_rate": 3.5960869916727304e-05, + "loss": 2.855, + "step": 10435 + }, + { + "epoch": 0.84, + "learning_rate": 3.595413264343655e-05, + "loss": 2.9604, + "step": 10440 + }, + { + "epoch": 0.84, + "learning_rate": 3.5947395370145795e-05, + "loss": 2.9602, + "step": 10445 + }, + { + "epoch": 0.84, + "learning_rate": 3.594065809685504e-05, + "loss": 3.1929, + "step": 10450 + }, + { + "epoch": 0.85, + "learning_rate": 3.5933920823564286e-05, + "loss": 3.1893, + "step": 10455 + }, + { + "epoch": 0.85, + "learning_rate": 3.5927183550273535e-05, + "loss": 3.0879, + "step": 10460 + }, + { + "epoch": 0.85, + "learning_rate": 3.5920446276982784e-05, + "loss": 2.8213, + "step": 10465 + }, + { + "epoch": 0.85, + "learning_rate": 3.5913709003692026e-05, + "loss": 3.1896, + "step": 10470 + }, + { + "epoch": 0.85, + "learning_rate": 3.5906971730401275e-05, + "loss": 2.8873, + "step": 10475 + }, + { + "epoch": 0.85, + "learning_rate": 3.5900234457110524e-05, + "loss": 2.7271, + "step": 10480 + }, + { + "epoch": 0.85, + "learning_rate": 3.5893497183819766e-05, + "loss": 2.9885, + "step": 10485 + }, + { + "epoch": 0.85, + "learning_rate": 3.588675991052901e-05, + "loss": 2.9931, + "step": 10490 + }, + { + "epoch": 0.85, + "learning_rate": 3.588002263723826e-05, + "loss": 3.0486, + "step": 10495 + }, + { + "epoch": 0.85, + "learning_rate": 3.5873285363947506e-05, + "loss": 2.7903, + "step": 10500 + }, + { + "epoch": 0.85, + "learning_rate": 3.586654809065675e-05, + "loss": 2.9754, + "step": 10505 + }, + { + "epoch": 0.85, + "learning_rate": 3.5859810817366e-05, + "loss": 3.2095, + "step": 10510 + }, + { + "epoch": 0.85, + "learning_rate": 3.5853073544075246e-05, + "loss": 3.1133, + "step": 10515 + }, + { + "epoch": 0.85, + "learning_rate": 3.5846336270784495e-05, + "loss": 2.7682, + "step": 10520 + }, + { + "epoch": 0.85, + "learning_rate": 3.583959899749374e-05, + "loss": 2.941, + "step": 10525 + }, + { + "epoch": 0.85, + "learning_rate": 3.583286172420298e-05, + "loss": 3.0385, + "step": 10530 + }, + { + "epoch": 0.85, + "learning_rate": 3.582612445091223e-05, + "loss": 2.8513, + "step": 10535 + }, + { + "epoch": 0.85, + "learning_rate": 3.5819387177621477e-05, + "loss": 3.1885, + "step": 10540 + }, + { + "epoch": 0.85, + "learning_rate": 3.581264990433072e-05, + "loss": 3.2157, + "step": 10545 + }, + { + "epoch": 0.85, + "learning_rate": 3.580591263103997e-05, + "loss": 2.9265, + "step": 10550 + }, + { + "epoch": 0.85, + "learning_rate": 3.5799175357749216e-05, + "loss": 2.6739, + "step": 10555 + }, + { + "epoch": 0.85, + "learning_rate": 3.579243808445846e-05, + "loss": 2.8451, + "step": 10560 + }, + { + "epoch": 0.85, + "learning_rate": 3.578570081116771e-05, + "loss": 2.9205, + "step": 10565 + }, + { + "epoch": 0.85, + "learning_rate": 3.577896353787695e-05, + "loss": 2.6902, + "step": 10570 + }, + { + "epoch": 0.85, + "learning_rate": 3.57722262645862e-05, + "loss": 2.9319, + "step": 10575 + }, + { + "epoch": 0.86, + "learning_rate": 3.576548899129544e-05, + "loss": 3.5273, + "step": 10580 + }, + { + "epoch": 0.86, + "learning_rate": 3.575875171800469e-05, + "loss": 3.2621, + "step": 10585 + }, + { + "epoch": 0.86, + "learning_rate": 3.575201444471394e-05, + "loss": 2.8811, + "step": 10590 + }, + { + "epoch": 0.86, + "learning_rate": 3.574527717142319e-05, + "loss": 3.0016, + "step": 10595 + }, + { + "epoch": 0.86, + "learning_rate": 3.573853989813243e-05, + "loss": 3.0808, + "step": 10600 + }, + { + "epoch": 0.86, + "learning_rate": 3.573180262484168e-05, + "loss": 3.0308, + "step": 10605 + }, + { + "epoch": 0.86, + "learning_rate": 3.572506535155093e-05, + "loss": 2.9164, + "step": 10610 + }, + { + "epoch": 0.86, + "learning_rate": 3.571832807826017e-05, + "loss": 2.8661, + "step": 10615 + }, + { + "epoch": 0.86, + "learning_rate": 3.571159080496941e-05, + "loss": 3.1764, + "step": 10620 + }, + { + "epoch": 0.86, + "learning_rate": 3.570485353167866e-05, + "loss": 2.9688, + "step": 10625 + }, + { + "epoch": 0.86, + "learning_rate": 3.569811625838791e-05, + "loss": 2.8292, + "step": 10630 + }, + { + "epoch": 0.86, + "learning_rate": 3.569137898509715e-05, + "loss": 3.1102, + "step": 10635 + }, + { + "epoch": 0.86, + "learning_rate": 3.56846417118064e-05, + "loss": 2.9934, + "step": 10640 + }, + { + "epoch": 0.86, + "learning_rate": 3.567790443851565e-05, + "loss": 3.2563, + "step": 10645 + }, + { + "epoch": 0.86, + "learning_rate": 3.56711671652249e-05, + "loss": 2.8752, + "step": 10650 + }, + { + "epoch": 0.86, + "learning_rate": 3.566442989193414e-05, + "loss": 2.9797, + "step": 10655 + }, + { + "epoch": 0.86, + "learning_rate": 3.565769261864338e-05, + "loss": 2.8909, + "step": 10660 + }, + { + "epoch": 0.86, + "learning_rate": 3.565095534535263e-05, + "loss": 3.0765, + "step": 10665 + }, + { + "epoch": 0.86, + "learning_rate": 3.564421807206187e-05, + "loss": 2.8784, + "step": 10670 + }, + { + "epoch": 0.86, + "learning_rate": 3.563748079877112e-05, + "loss": 3.0186, + "step": 10675 + }, + { + "epoch": 0.86, + "learning_rate": 3.563074352548037e-05, + "loss": 3.0764, + "step": 10680 + }, + { + "epoch": 0.86, + "learning_rate": 3.562400625218962e-05, + "loss": 3.015, + "step": 10685 + }, + { + "epoch": 0.86, + "learning_rate": 3.561726897889886e-05, + "loss": 2.9773, + "step": 10690 + }, + { + "epoch": 0.86, + "learning_rate": 3.561053170560811e-05, + "loss": 2.9843, + "step": 10695 + }, + { + "epoch": 0.87, + "learning_rate": 3.560379443231735e-05, + "loss": 2.8827, + "step": 10700 + }, + { + "epoch": 0.87, + "learning_rate": 3.5597057159026595e-05, + "loss": 3.1021, + "step": 10705 + }, + { + "epoch": 0.87, + "learning_rate": 3.559031988573584e-05, + "loss": 3.0825, + "step": 10710 + }, + { + "epoch": 0.87, + "learning_rate": 3.558358261244509e-05, + "loss": 2.9813, + "step": 10715 + }, + { + "epoch": 0.87, + "learning_rate": 3.557684533915434e-05, + "loss": 2.8546, + "step": 10720 + }, + { + "epoch": 0.87, + "learning_rate": 3.557010806586358e-05, + "loss": 3.0049, + "step": 10725 + }, + { + "epoch": 0.87, + "learning_rate": 3.556337079257283e-05, + "loss": 2.6026, + "step": 10730 + }, + { + "epoch": 0.87, + "learning_rate": 3.555663351928208e-05, + "loss": 3.0555, + "step": 10735 + }, + { + "epoch": 0.87, + "learning_rate": 3.554989624599133e-05, + "loss": 2.9656, + "step": 10740 + }, + { + "epoch": 0.87, + "learning_rate": 3.554315897270057e-05, + "loss": 2.9598, + "step": 10745 + }, + { + "epoch": 0.87, + "learning_rate": 3.5536421699409814e-05, + "loss": 2.9622, + "step": 10750 + }, + { + "epoch": 0.87, + "learning_rate": 3.552968442611906e-05, + "loss": 3.2513, + "step": 10755 + }, + { + "epoch": 0.87, + "learning_rate": 3.5522947152828305e-05, + "loss": 3.0568, + "step": 10760 + }, + { + "epoch": 0.87, + "learning_rate": 3.5516209879537554e-05, + "loss": 3.1684, + "step": 10765 + }, + { + "epoch": 0.87, + "learning_rate": 3.55094726062468e-05, + "loss": 3.0154, + "step": 10770 + }, + { + "epoch": 0.87, + "learning_rate": 3.550273533295605e-05, + "loss": 2.8607, + "step": 10775 + }, + { + "epoch": 0.87, + "learning_rate": 3.5495998059665294e-05, + "loss": 2.953, + "step": 10780 + }, + { + "epoch": 0.87, + "learning_rate": 3.548926078637454e-05, + "loss": 3.1728, + "step": 10785 + }, + { + "epoch": 0.87, + "learning_rate": 3.5482523513083785e-05, + "loss": 2.8006, + "step": 10790 + }, + { + "epoch": 0.87, + "learning_rate": 3.5475786239793034e-05, + "loss": 3.0427, + "step": 10795 + }, + { + "epoch": 0.87, + "learning_rate": 3.5469048966502276e-05, + "loss": 3.0487, + "step": 10800 + }, + { + "epoch": 0.87, + "learning_rate": 3.5462311693211525e-05, + "loss": 3.1628, + "step": 10805 + }, + { + "epoch": 0.87, + "learning_rate": 3.5455574419920773e-05, + "loss": 2.8268, + "step": 10810 + }, + { + "epoch": 0.87, + "learning_rate": 3.5448837146630015e-05, + "loss": 2.9314, + "step": 10815 + }, + { + "epoch": 0.87, + "learning_rate": 3.5442099873339264e-05, + "loss": 2.9674, + "step": 10820 + }, + { + "epoch": 0.88, + "learning_rate": 3.543536260004851e-05, + "loss": 3.027, + "step": 10825 + }, + { + "epoch": 0.88, + "learning_rate": 3.5428625326757755e-05, + "loss": 3.0957, + "step": 10830 + }, + { + "epoch": 0.88, + "learning_rate": 3.5421888053467e-05, + "loss": 2.9217, + "step": 10835 + }, + { + "epoch": 0.88, + "learning_rate": 3.5415150780176246e-05, + "loss": 2.8224, + "step": 10840 + }, + { + "epoch": 0.88, + "learning_rate": 3.5408413506885495e-05, + "loss": 2.9139, + "step": 10845 + }, + { + "epoch": 0.88, + "learning_rate": 3.5401676233594744e-05, + "loss": 2.8247, + "step": 10850 + }, + { + "epoch": 0.88, + "learning_rate": 3.5394938960303986e-05, + "loss": 2.9044, + "step": 10855 + }, + { + "epoch": 0.88, + "learning_rate": 3.5388201687013235e-05, + "loss": 2.8095, + "step": 10860 + }, + { + "epoch": 0.88, + "learning_rate": 3.5381464413722484e-05, + "loss": 3.1797, + "step": 10865 + }, + { + "epoch": 0.88, + "learning_rate": 3.5374727140431726e-05, + "loss": 2.7943, + "step": 10870 + }, + { + "epoch": 0.88, + "learning_rate": 3.5367989867140975e-05, + "loss": 2.8645, + "step": 10875 + }, + { + "epoch": 0.88, + "learning_rate": 3.536125259385022e-05, + "loss": 2.8684, + "step": 10880 + }, + { + "epoch": 0.88, + "learning_rate": 3.5354515320559466e-05, + "loss": 3.0959, + "step": 10885 + }, + { + "epoch": 0.88, + "learning_rate": 3.534777804726871e-05, + "loss": 3.2173, + "step": 10890 + }, + { + "epoch": 0.88, + "learning_rate": 3.534104077397796e-05, + "loss": 2.8572, + "step": 10895 + }, + { + "epoch": 0.88, + "learning_rate": 3.5334303500687206e-05, + "loss": 2.8573, + "step": 10900 + }, + { + "epoch": 0.88, + "learning_rate": 3.5327566227396455e-05, + "loss": 3.1226, + "step": 10905 + }, + { + "epoch": 0.88, + "learning_rate": 3.53208289541057e-05, + "loss": 2.8455, + "step": 10910 + }, + { + "epoch": 0.88, + "learning_rate": 3.5314091680814946e-05, + "loss": 3.0792, + "step": 10915 + }, + { + "epoch": 0.88, + "learning_rate": 3.530735440752419e-05, + "loss": 3.1342, + "step": 10920 + }, + { + "epoch": 0.88, + "learning_rate": 3.530061713423343e-05, + "loss": 2.9695, + "step": 10925 + }, + { + "epoch": 0.88, + "learning_rate": 3.529387986094268e-05, + "loss": 2.9094, + "step": 10930 + }, + { + "epoch": 0.88, + "learning_rate": 3.528714258765193e-05, + "loss": 2.8054, + "step": 10935 + }, + { + "epoch": 0.88, + "learning_rate": 3.5280405314361176e-05, + "loss": 3.2511, + "step": 10940 + }, + { + "epoch": 0.88, + "learning_rate": 3.527366804107042e-05, + "loss": 3.067, + "step": 10945 + }, + { + "epoch": 0.89, + "learning_rate": 3.526693076777967e-05, + "loss": 3.0682, + "step": 10950 + }, + { + "epoch": 0.89, + "learning_rate": 3.5260193494488916e-05, + "loss": 3.2405, + "step": 10955 + }, + { + "epoch": 0.89, + "learning_rate": 3.525345622119816e-05, + "loss": 2.8744, + "step": 10960 + }, + { + "epoch": 0.89, + "learning_rate": 3.52467189479074e-05, + "loss": 2.8158, + "step": 10965 + }, + { + "epoch": 0.89, + "learning_rate": 3.523998167461665e-05, + "loss": 3.1325, + "step": 10970 + }, + { + "epoch": 0.89, + "learning_rate": 3.52332444013259e-05, + "loss": 2.8495, + "step": 10975 + }, + { + "epoch": 0.89, + "learning_rate": 3.522650712803514e-05, + "loss": 2.9472, + "step": 10980 + }, + { + "epoch": 0.89, + "learning_rate": 3.521976985474439e-05, + "loss": 3.1621, + "step": 10985 + }, + { + "epoch": 0.89, + "learning_rate": 3.521303258145364e-05, + "loss": 2.998, + "step": 10990 + }, + { + "epoch": 0.89, + "learning_rate": 3.520629530816289e-05, + "loss": 2.9869, + "step": 10995 + }, + { + "epoch": 0.89, + "learning_rate": 3.519955803487213e-05, + "loss": 3.0895, + "step": 11000 + }, + { + "epoch": 0.89, + "learning_rate": 3.519282076158137e-05, + "loss": 2.8368, + "step": 11005 + }, + { + "epoch": 0.89, + "learning_rate": 3.518608348829062e-05, + "loss": 3.1186, + "step": 11010 + }, + { + "epoch": 0.89, + "learning_rate": 3.517934621499986e-05, + "loss": 2.9144, + "step": 11015 + }, + { + "epoch": 0.89, + "learning_rate": 3.517260894170911e-05, + "loss": 3.1524, + "step": 11020 + }, + { + "epoch": 0.89, + "learning_rate": 3.516587166841836e-05, + "loss": 2.7921, + "step": 11025 + }, + { + "epoch": 0.89, + "learning_rate": 3.515913439512761e-05, + "loss": 2.9172, + "step": 11030 + }, + { + "epoch": 0.89, + "learning_rate": 3.515239712183685e-05, + "loss": 3.1107, + "step": 11035 + }, + { + "epoch": 0.89, + "learning_rate": 3.51456598485461e-05, + "loss": 3.0204, + "step": 11040 + }, + { + "epoch": 0.89, + "learning_rate": 3.513892257525535e-05, + "loss": 2.9076, + "step": 11045 + }, + { + "epoch": 0.89, + "learning_rate": 3.513218530196459e-05, + "loss": 2.746, + "step": 11050 + }, + { + "epoch": 0.89, + "learning_rate": 3.512544802867383e-05, + "loss": 3.1643, + "step": 11055 + }, + { + "epoch": 0.89, + "learning_rate": 3.511871075538308e-05, + "loss": 2.9974, + "step": 11060 + }, + { + "epoch": 0.89, + "learning_rate": 3.511197348209233e-05, + "loss": 2.9475, + "step": 11065 + }, + { + "epoch": 0.89, + "learning_rate": 3.510523620880157e-05, + "loss": 2.8503, + "step": 11070 + }, + { + "epoch": 0.9, + "learning_rate": 3.509849893551082e-05, + "loss": 2.8255, + "step": 11075 + }, + { + "epoch": 0.9, + "learning_rate": 3.509176166222007e-05, + "loss": 3.0081, + "step": 11080 + }, + { + "epoch": 0.9, + "learning_rate": 3.508502438892932e-05, + "loss": 3.0882, + "step": 11085 + }, + { + "epoch": 0.9, + "learning_rate": 3.507828711563856e-05, + "loss": 3.3281, + "step": 11090 + }, + { + "epoch": 0.9, + "learning_rate": 3.50715498423478e-05, + "loss": 2.7693, + "step": 11095 + }, + { + "epoch": 0.9, + "learning_rate": 3.506481256905705e-05, + "loss": 2.9684, + "step": 11100 + }, + { + "epoch": 0.9, + "learning_rate": 3.50580752957663e-05, + "loss": 2.8099, + "step": 11105 + }, + { + "epoch": 0.9, + "learning_rate": 3.505133802247554e-05, + "loss": 2.9284, + "step": 11110 + }, + { + "epoch": 0.9, + "learning_rate": 3.504460074918479e-05, + "loss": 3.0644, + "step": 11115 + }, + { + "epoch": 0.9, + "learning_rate": 3.503786347589404e-05, + "loss": 3.0546, + "step": 11120 + }, + { + "epoch": 0.9, + "learning_rate": 3.503112620260328e-05, + "loss": 3.0909, + "step": 11125 + }, + { + "epoch": 0.9, + "learning_rate": 3.502438892931253e-05, + "loss": 3.0999, + "step": 11130 + }, + { + "epoch": 0.9, + "learning_rate": 3.5017651656021774e-05, + "loss": 2.9832, + "step": 11135 + }, + { + "epoch": 0.9, + "learning_rate": 3.501091438273102e-05, + "loss": 3.2219, + "step": 11140 + }, + { + "epoch": 0.9, + "learning_rate": 3.5004177109440265e-05, + "loss": 2.7767, + "step": 11145 + }, + { + "epoch": 0.9, + "learning_rate": 3.4997439836149514e-05, + "loss": 3.1374, + "step": 11150 + }, + { + "epoch": 0.9, + "learning_rate": 3.499070256285876e-05, + "loss": 2.6789, + "step": 11155 + }, + { + "epoch": 0.9, + "learning_rate": 3.498396528956801e-05, + "loss": 2.7915, + "step": 11160 + }, + { + "epoch": 0.9, + "learning_rate": 3.4977228016277254e-05, + "loss": 3.1062, + "step": 11165 + }, + { + "epoch": 0.9, + "learning_rate": 3.49704907429865e-05, + "loss": 2.7951, + "step": 11170 + }, + { + "epoch": 0.9, + "learning_rate": 3.496375346969575e-05, + "loss": 3.0343, + "step": 11175 + }, + { + "epoch": 0.9, + "learning_rate": 3.4957016196404993e-05, + "loss": 2.9418, + "step": 11180 + }, + { + "epoch": 0.9, + "learning_rate": 3.4950278923114236e-05, + "loss": 2.9457, + "step": 11185 + }, + { + "epoch": 0.9, + "learning_rate": 3.4943541649823484e-05, + "loss": 2.9444, + "step": 11190 + }, + { + "epoch": 0.91, + "learning_rate": 3.493680437653273e-05, + "loss": 2.9274, + "step": 11195 + }, + { + "epoch": 0.91, + "learning_rate": 3.4930067103241975e-05, + "loss": 2.9349, + "step": 11200 + }, + { + "epoch": 0.91, + "learning_rate": 3.4923329829951224e-05, + "loss": 2.8807, + "step": 11205 + }, + { + "epoch": 0.91, + "learning_rate": 3.491659255666047e-05, + "loss": 3.0897, + "step": 11210 + }, + { + "epoch": 0.91, + "learning_rate": 3.490985528336972e-05, + "loss": 3.0386, + "step": 11215 + }, + { + "epoch": 0.91, + "learning_rate": 3.4903118010078964e-05, + "loss": 3.0223, + "step": 11220 + }, + { + "epoch": 0.91, + "learning_rate": 3.4896380736788206e-05, + "loss": 2.9861, + "step": 11225 + }, + { + "epoch": 0.91, + "learning_rate": 3.4889643463497455e-05, + "loss": 3.4111, + "step": 11230 + }, + { + "epoch": 0.91, + "learning_rate": 3.48829061902067e-05, + "loss": 2.8458, + "step": 11235 + }, + { + "epoch": 0.91, + "learning_rate": 3.4876168916915946e-05, + "loss": 3.1452, + "step": 11240 + }, + { + "epoch": 0.91, + "learning_rate": 3.4869431643625195e-05, + "loss": 3.0766, + "step": 11245 + }, + { + "epoch": 0.91, + "learning_rate": 3.4862694370334444e-05, + "loss": 3.0981, + "step": 11250 + }, + { + "epoch": 0.91, + "learning_rate": 3.4855957097043686e-05, + "loss": 2.9809, + "step": 11255 + }, + { + "epoch": 0.91, + "learning_rate": 3.4849219823752935e-05, + "loss": 2.9177, + "step": 11260 + }, + { + "epoch": 0.91, + "learning_rate": 3.484248255046218e-05, + "loss": 2.9045, + "step": 11265 + }, + { + "epoch": 0.91, + "learning_rate": 3.483574527717142e-05, + "loss": 3.1389, + "step": 11270 + }, + { + "epoch": 0.91, + "learning_rate": 3.482900800388067e-05, + "loss": 3.273, + "step": 11275 + }, + { + "epoch": 0.91, + "learning_rate": 3.482227073058992e-05, + "loss": 3.0738, + "step": 11280 + }, + { + "epoch": 0.91, + "learning_rate": 3.4815533457299166e-05, + "loss": 3.0059, + "step": 11285 + }, + { + "epoch": 0.91, + "learning_rate": 3.480879618400841e-05, + "loss": 3.1052, + "step": 11290 + }, + { + "epoch": 0.91, + "learning_rate": 3.4802058910717657e-05, + "loss": 2.9186, + "step": 11295 + }, + { + "epoch": 0.91, + "learning_rate": 3.4795321637426905e-05, + "loss": 3.4463, + "step": 11300 + }, + { + "epoch": 0.91, + "learning_rate": 3.478858436413615e-05, + "loss": 2.9143, + "step": 11305 + }, + { + "epoch": 0.91, + "learning_rate": 3.4781847090845396e-05, + "loss": 3.0278, + "step": 11310 + }, + { + "epoch": 0.91, + "learning_rate": 3.477510981755464e-05, + "loss": 2.7317, + "step": 11315 + }, + { + "epoch": 0.92, + "learning_rate": 3.476837254426389e-05, + "loss": 3.2409, + "step": 11320 + }, + { + "epoch": 0.92, + "learning_rate": 3.476163527097313e-05, + "loss": 2.8277, + "step": 11325 + }, + { + "epoch": 0.92, + "learning_rate": 3.475489799768238e-05, + "loss": 2.7135, + "step": 11330 + }, + { + "epoch": 0.92, + "learning_rate": 3.474816072439163e-05, + "loss": 2.9345, + "step": 11335 + }, + { + "epoch": 0.92, + "learning_rate": 3.4741423451100876e-05, + "loss": 3.0272, + "step": 11340 + }, + { + "epoch": 0.92, + "learning_rate": 3.473468617781012e-05, + "loss": 3.1851, + "step": 11345 + }, + { + "epoch": 0.92, + "learning_rate": 3.472794890451937e-05, + "loss": 3.0905, + "step": 11350 + }, + { + "epoch": 0.92, + "learning_rate": 3.472121163122861e-05, + "loss": 3.0061, + "step": 11355 + }, + { + "epoch": 0.92, + "learning_rate": 3.471447435793786e-05, + "loss": 3.1029, + "step": 11360 + }, + { + "epoch": 0.92, + "learning_rate": 3.47077370846471e-05, + "loss": 3.0591, + "step": 11365 + }, + { + "epoch": 0.92, + "learning_rate": 3.470099981135635e-05, + "loss": 3.0617, + "step": 11370 + }, + { + "epoch": 0.92, + "learning_rate": 3.46942625380656e-05, + "loss": 3.0491, + "step": 11375 + }, + { + "epoch": 0.92, + "learning_rate": 3.468752526477484e-05, + "loss": 3.0714, + "step": 11380 + }, + { + "epoch": 0.92, + "learning_rate": 3.468078799148409e-05, + "loss": 2.9701, + "step": 11385 + }, + { + "epoch": 0.92, + "learning_rate": 3.467405071819334e-05, + "loss": 3.2235, + "step": 11390 + }, + { + "epoch": 0.92, + "learning_rate": 3.466731344490258e-05, + "loss": 2.8136, + "step": 11395 + }, + { + "epoch": 0.92, + "learning_rate": 3.466057617161182e-05, + "loss": 2.9353, + "step": 11400 + }, + { + "epoch": 0.92, + "learning_rate": 3.465383889832107e-05, + "loss": 3.0163, + "step": 11405 + }, + { + "epoch": 0.92, + "learning_rate": 3.464710162503032e-05, + "loss": 2.9738, + "step": 11410 + }, + { + "epoch": 0.92, + "learning_rate": 3.464036435173957e-05, + "loss": 3.0965, + "step": 11415 + }, + { + "epoch": 0.92, + "learning_rate": 3.463362707844881e-05, + "loss": 2.955, + "step": 11420 + }, + { + "epoch": 0.92, + "learning_rate": 3.462688980515806e-05, + "loss": 2.8734, + "step": 11425 + }, + { + "epoch": 0.92, + "learning_rate": 3.462015253186731e-05, + "loss": 3.0841, + "step": 11430 + }, + { + "epoch": 0.92, + "learning_rate": 3.461341525857655e-05, + "loss": 3.2559, + "step": 11435 + }, + { + "epoch": 0.92, + "learning_rate": 3.460667798528579e-05, + "loss": 2.9961, + "step": 11440 + }, + { + "epoch": 0.93, + "learning_rate": 3.459994071199504e-05, + "loss": 3.0601, + "step": 11445 + }, + { + "epoch": 0.93, + "learning_rate": 3.459320343870429e-05, + "loss": 3.4935, + "step": 11450 + }, + { + "epoch": 0.93, + "learning_rate": 3.458646616541353e-05, + "loss": 2.9135, + "step": 11455 + }, + { + "epoch": 0.93, + "learning_rate": 3.457972889212278e-05, + "loss": 2.9466, + "step": 11460 + }, + { + "epoch": 0.93, + "learning_rate": 3.457299161883203e-05, + "loss": 3.0844, + "step": 11465 + }, + { + "epoch": 0.93, + "learning_rate": 3.456625434554128e-05, + "loss": 3.1374, + "step": 11470 + }, + { + "epoch": 0.93, + "learning_rate": 3.455951707225052e-05, + "loss": 2.9251, + "step": 11475 + }, + { + "epoch": 0.93, + "learning_rate": 3.455277979895977e-05, + "loss": 3.0553, + "step": 11480 + }, + { + "epoch": 0.93, + "learning_rate": 3.454604252566901e-05, + "loss": 3.1865, + "step": 11485 + }, + { + "epoch": 0.93, + "learning_rate": 3.4539305252378254e-05, + "loss": 2.8396, + "step": 11490 + }, + { + "epoch": 0.93, + "learning_rate": 3.4533915433745656e-05, + "loss": 3.1356, + "step": 11495 + }, + { + "epoch": 0.93, + "learning_rate": 3.4527178160454905e-05, + "loss": 2.9385, + "step": 11500 + }, + { + "epoch": 0.93, + "learning_rate": 3.452044088716415e-05, + "loss": 2.909, + "step": 11505 + }, + { + "epoch": 0.93, + "learning_rate": 3.4513703613873396e-05, + "loss": 3.1132, + "step": 11510 + }, + { + "epoch": 0.93, + "learning_rate": 3.4506966340582645e-05, + "loss": 2.9435, + "step": 11515 + }, + { + "epoch": 0.93, + "learning_rate": 3.450157652195003e-05, + "loss": 3.6755, + "step": 11520 + }, + { + "epoch": 0.93, + "learning_rate": 3.449483924865928e-05, + "loss": 3.2154, + "step": 11525 + }, + { + "epoch": 0.93, + "learning_rate": 3.448810197536853e-05, + "loss": 3.0608, + "step": 11530 + }, + { + "epoch": 0.93, + "learning_rate": 3.448136470207778e-05, + "loss": 3.012, + "step": 11535 + }, + { + "epoch": 0.93, + "learning_rate": 3.447462742878702e-05, + "loss": 3.1076, + "step": 11540 + }, + { + "epoch": 0.93, + "learning_rate": 3.446789015549627e-05, + "loss": 2.9761, + "step": 11545 + }, + { + "epoch": 0.93, + "learning_rate": 3.446115288220552e-05, + "loss": 2.9443, + "step": 11550 + }, + { + "epoch": 0.93, + "learning_rate": 3.445441560891476e-05, + "loss": 2.9833, + "step": 11555 + }, + { + "epoch": 0.93, + "learning_rate": 3.4447678335624004e-05, + "loss": 2.9272, + "step": 11560 + }, + { + "epoch": 0.93, + "learning_rate": 3.444094106233325e-05, + "loss": 2.7781, + "step": 11565 + }, + { + "epoch": 0.94, + "learning_rate": 3.44342037890425e-05, + "loss": 2.8401, + "step": 11570 + }, + { + "epoch": 0.94, + "learning_rate": 3.4427466515751743e-05, + "loss": 3.3159, + "step": 11575 + }, + { + "epoch": 0.94, + "learning_rate": 3.442072924246099e-05, + "loss": 3.2537, + "step": 11580 + }, + { + "epoch": 0.94, + "learning_rate": 3.441399196917024e-05, + "loss": 2.7323, + "step": 11585 + }, + { + "epoch": 0.94, + "learning_rate": 3.440725469587949e-05, + "loss": 2.8754, + "step": 11590 + }, + { + "epoch": 0.94, + "learning_rate": 3.440051742258873e-05, + "loss": 2.7398, + "step": 11595 + }, + { + "epoch": 0.94, + "learning_rate": 3.4393780149297974e-05, + "loss": 3.2692, + "step": 11600 + }, + { + "epoch": 0.94, + "learning_rate": 3.438704287600722e-05, + "loss": 2.9441, + "step": 11605 + }, + { + "epoch": 0.94, + "learning_rate": 3.438030560271647e-05, + "loss": 3.0601, + "step": 11610 + }, + { + "epoch": 0.94, + "learning_rate": 3.4373568329425714e-05, + "loss": 2.9512, + "step": 11615 + }, + { + "epoch": 0.94, + "learning_rate": 3.436683105613496e-05, + "loss": 3.04, + "step": 11620 + }, + { + "epoch": 0.94, + "learning_rate": 3.436009378284421e-05, + "loss": 2.9893, + "step": 11625 + }, + { + "epoch": 0.94, + "learning_rate": 3.4353356509553454e-05, + "loss": 2.9824, + "step": 11630 + }, + { + "epoch": 0.94, + "learning_rate": 3.43466192362627e-05, + "loss": 2.9835, + "step": 11635 + }, + { + "epoch": 0.94, + "learning_rate": 3.4339881962971945e-05, + "loss": 2.8617, + "step": 11640 + }, + { + "epoch": 0.94, + "learning_rate": 3.4333144689681194e-05, + "loss": 2.8856, + "step": 11645 + }, + { + "epoch": 0.94, + "learning_rate": 3.4326407416390436e-05, + "loss": 3.131, + "step": 11650 + }, + { + "epoch": 0.94, + "learning_rate": 3.4319670143099685e-05, + "loss": 2.846, + "step": 11655 + }, + { + "epoch": 0.94, + "learning_rate": 3.4312932869808934e-05, + "loss": 3.2464, + "step": 11660 + }, + { + "epoch": 0.94, + "learning_rate": 3.430619559651818e-05, + "loss": 3.1764, + "step": 11665 + }, + { + "epoch": 0.94, + "learning_rate": 3.4299458323227425e-05, + "loss": 3.0205, + "step": 11670 + }, + { + "epoch": 0.94, + "learning_rate": 3.4292721049936674e-05, + "loss": 3.0125, + "step": 11675 + }, + { + "epoch": 0.94, + "learning_rate": 3.428598377664592e-05, + "loss": 3.0583, + "step": 11680 + }, + { + "epoch": 0.94, + "learning_rate": 3.4279246503355164e-05, + "loss": 3.2121, + "step": 11685 + }, + { + "epoch": 0.95, + "learning_rate": 3.4272509230064407e-05, + "loss": 3.0483, + "step": 11690 + }, + { + "epoch": 0.95, + "learning_rate": 3.4265771956773655e-05, + "loss": 2.9595, + "step": 11695 + }, + { + "epoch": 0.95, + "learning_rate": 3.4259034683482904e-05, + "loss": 2.8907, + "step": 11700 + }, + { + "epoch": 0.95, + "learning_rate": 3.4252297410192146e-05, + "loss": 3.2868, + "step": 11705 + }, + { + "epoch": 0.95, + "learning_rate": 3.4245560136901395e-05, + "loss": 2.9464, + "step": 11710 + }, + { + "epoch": 0.95, + "learning_rate": 3.4238822863610644e-05, + "loss": 3.4274, + "step": 11715 + }, + { + "epoch": 0.95, + "learning_rate": 3.423208559031989e-05, + "loss": 3.1743, + "step": 11720 + }, + { + "epoch": 0.95, + "learning_rate": 3.4225348317029135e-05, + "loss": 3.2586, + "step": 11725 + }, + { + "epoch": 0.95, + "learning_rate": 3.421861104373838e-05, + "loss": 2.7091, + "step": 11730 + }, + { + "epoch": 0.95, + "learning_rate": 3.4211873770447626e-05, + "loss": 2.7987, + "step": 11735 + }, + { + "epoch": 0.95, + "learning_rate": 3.420513649715687e-05, + "loss": 2.9936, + "step": 11740 + }, + { + "epoch": 0.95, + "learning_rate": 3.419839922386612e-05, + "loss": 3.0006, + "step": 11745 + }, + { + "epoch": 0.95, + "learning_rate": 3.4191661950575366e-05, + "loss": 3.2487, + "step": 11750 + }, + { + "epoch": 0.95, + "learning_rate": 3.4184924677284615e-05, + "loss": 2.9673, + "step": 11755 + }, + { + "epoch": 0.95, + "learning_rate": 3.417818740399386e-05, + "loss": 2.9303, + "step": 11760 + }, + { + "epoch": 0.95, + "learning_rate": 3.4171450130703106e-05, + "loss": 2.7594, + "step": 11765 + }, + { + "epoch": 0.95, + "learning_rate": 3.416471285741235e-05, + "loss": 2.9743, + "step": 11770 + }, + { + "epoch": 0.95, + "learning_rate": 3.41579755841216e-05, + "loss": 2.7895, + "step": 11775 + }, + { + "epoch": 0.95, + "learning_rate": 3.415123831083084e-05, + "loss": 3.0708, + "step": 11780 + }, + { + "epoch": 0.95, + "learning_rate": 3.414450103754009e-05, + "loss": 2.9679, + "step": 11785 + }, + { + "epoch": 0.95, + "learning_rate": 3.413776376424934e-05, + "loss": 2.998, + "step": 11790 + }, + { + "epoch": 0.95, + "learning_rate": 3.413102649095858e-05, + "loss": 2.9087, + "step": 11795 + }, + { + "epoch": 0.95, + "learning_rate": 3.412428921766783e-05, + "loss": 2.9266, + "step": 11800 + }, + { + "epoch": 0.95, + "learning_rate": 3.4117551944377076e-05, + "loss": 3.1942, + "step": 11805 + }, + { + "epoch": 0.95, + "learning_rate": 3.411081467108632e-05, + "loss": 3.1146, + "step": 11810 + }, + { + "epoch": 0.96, + "learning_rate": 3.410407739779557e-05, + "loss": 3.1195, + "step": 11815 + }, + { + "epoch": 0.96, + "learning_rate": 3.409734012450481e-05, + "loss": 2.8045, + "step": 11820 + }, + { + "epoch": 0.96, + "learning_rate": 3.409060285121406e-05, + "loss": 3.0467, + "step": 11825 + }, + { + "epoch": 0.96, + "learning_rate": 3.40838655779233e-05, + "loss": 2.8286, + "step": 11830 + }, + { + "epoch": 0.96, + "learning_rate": 3.407712830463255e-05, + "loss": 3.0586, + "step": 11835 + }, + { + "epoch": 0.96, + "learning_rate": 3.40703910313418e-05, + "loss": 2.9959, + "step": 11840 + }, + { + "epoch": 0.96, + "learning_rate": 3.406365375805105e-05, + "loss": 3.0457, + "step": 11845 + }, + { + "epoch": 0.96, + "learning_rate": 3.405691648476029e-05, + "loss": 2.9982, + "step": 11850 + }, + { + "epoch": 0.96, + "learning_rate": 3.405017921146954e-05, + "loss": 2.8829, + "step": 11855 + }, + { + "epoch": 0.96, + "learning_rate": 3.404344193817878e-05, + "loss": 3.3715, + "step": 11860 + }, + { + "epoch": 0.96, + "learning_rate": 3.403670466488803e-05, + "loss": 3.0459, + "step": 11865 + }, + { + "epoch": 0.96, + "learning_rate": 3.402996739159727e-05, + "loss": 2.8927, + "step": 11870 + }, + { + "epoch": 0.96, + "learning_rate": 3.402323011830652e-05, + "loss": 2.9431, + "step": 11875 + }, + { + "epoch": 0.96, + "learning_rate": 3.401649284501577e-05, + "loss": 2.9148, + "step": 11880 + }, + { + "epoch": 0.96, + "learning_rate": 3.400975557172501e-05, + "loss": 2.9739, + "step": 11885 + }, + { + "epoch": 0.96, + "learning_rate": 3.400301829843426e-05, + "loss": 2.8028, + "step": 11890 + }, + { + "epoch": 0.96, + "learning_rate": 3.399628102514351e-05, + "loss": 2.9279, + "step": 11895 + }, + { + "epoch": 0.96, + "learning_rate": 3.398954375185275e-05, + "loss": 3.0049, + "step": 11900 + }, + { + "epoch": 0.96, + "learning_rate": 3.398280647856199e-05, + "loss": 3.0606, + "step": 11905 + }, + { + "epoch": 0.96, + "learning_rate": 3.397606920527124e-05, + "loss": 2.9626, + "step": 11910 + }, + { + "epoch": 0.96, + "learning_rate": 3.396933193198049e-05, + "loss": 3.2141, + "step": 11915 + }, + { + "epoch": 0.96, + "learning_rate": 3.396259465868974e-05, + "loss": 2.9912, + "step": 11920 + }, + { + "epoch": 0.96, + "learning_rate": 3.395585738539898e-05, + "loss": 2.9204, + "step": 11925 + }, + { + "epoch": 0.96, + "learning_rate": 3.394912011210823e-05, + "loss": 2.7776, + "step": 11930 + }, + { + "epoch": 0.96, + "learning_rate": 3.394238283881748e-05, + "loss": 3.2191, + "step": 11935 + }, + { + "epoch": 0.97, + "learning_rate": 3.393564556552672e-05, + "loss": 2.8625, + "step": 11940 + }, + { + "epoch": 0.97, + "learning_rate": 3.3928908292235964e-05, + "loss": 3.0099, + "step": 11945 + }, + { + "epoch": 0.97, + "learning_rate": 3.392217101894521e-05, + "loss": 3.1315, + "step": 11950 + }, + { + "epoch": 0.97, + "learning_rate": 3.391543374565446e-05, + "loss": 3.0517, + "step": 11955 + }, + { + "epoch": 0.97, + "learning_rate": 3.3908696472363703e-05, + "loss": 3.1481, + "step": 11960 + }, + { + "epoch": 0.97, + "learning_rate": 3.390195919907295e-05, + "loss": 2.9481, + "step": 11965 + }, + { + "epoch": 0.97, + "learning_rate": 3.38952219257822e-05, + "loss": 3.0222, + "step": 11970 + }, + { + "epoch": 0.97, + "learning_rate": 3.388848465249145e-05, + "loss": 3.0445, + "step": 11975 + }, + { + "epoch": 0.97, + "learning_rate": 3.388174737920069e-05, + "loss": 3.1314, + "step": 11980 + }, + { + "epoch": 0.97, + "learning_rate": 3.387501010590994e-05, + "loss": 2.888, + "step": 11985 + }, + { + "epoch": 0.97, + "learning_rate": 3.386827283261918e-05, + "loss": 3.1614, + "step": 11990 + }, + { + "epoch": 0.97, + "learning_rate": 3.3861535559328425e-05, + "loss": 3.1802, + "step": 11995 + }, + { + "epoch": 0.97, + "learning_rate": 3.3854798286037674e-05, + "loss": 3.0269, + "step": 12000 + }, + { + "epoch": 0.97, + "eval_loss": 2.9837048053741455, + "eval_rouge2_fmeasure": 0.0042, + "eval_rouge2_precision": 0.01, + "eval_rouge2_recall": 0.0033, + "eval_runtime": 2865.3591, + "eval_samples_per_second": 0.096, + "eval_steps_per_second": 0.048, + "step": 12000 } ], "max_steps": 37107, "num_train_epochs": 3, - "total_flos": 3.12180234780672e+17, + "total_flos": 4.68270352171008e+17, "trial_name": null, "trial_params": null }