{ "best_metric": null, "best_model_checkpoint": null, "epoch": 29.0, "eval_steps": 500, "global_step": 969209, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 7.5e-05, "loss": 29.5796, "step": 500 }, { "epoch": 0.03, "learning_rate": 0.00015, "loss": 5.0487, "step": 1000 }, { "epoch": 0.04, "learning_rate": 0.000225, "loss": 4.5358, "step": 1500 }, { "epoch": 0.06, "learning_rate": 0.0003, "loss": 3.0616, "step": 2000 }, { "epoch": 0.07, "learning_rate": 0.00029985009444050245, "loss": 2.5781, "step": 2500 }, { "epoch": 0.09, "learning_rate": 0.00029970018888100493, "loss": 2.3254, "step": 3000 }, { "epoch": 0.1, "learning_rate": 0.0002995502833215074, "loss": 2.2124, "step": 3500 }, { "epoch": 0.12, "learning_rate": 0.0002994003777620099, "loss": 2.1224, "step": 4000 }, { "epoch": 0.13, "learning_rate": 0.00029925047220251237, "loss": 2.0343, "step": 4500 }, { "epoch": 0.15, "learning_rate": 0.0002991005666430149, "loss": 2.0035, "step": 5000 }, { "epoch": 0.16, "learning_rate": 0.00029895066108351733, "loss": 1.9224, "step": 5500 }, { "epoch": 0.18, "learning_rate": 0.00029880075552401986, "loss": 1.9103, "step": 6000 }, { "epoch": 0.19, "learning_rate": 0.00029865084996452234, "loss": 1.9074, "step": 6500 }, { "epoch": 0.21, "learning_rate": 0.0002985009444050248, "loss": 1.8261, "step": 7000 }, { "epoch": 0.22, "learning_rate": 0.0002983510388455273, "loss": 1.818, "step": 7500 }, { "epoch": 0.24, "learning_rate": 0.0002982011332860298, "loss": 1.7873, "step": 8000 }, { "epoch": 0.25, "learning_rate": 0.00029805122772653226, "loss": 1.7779, "step": 8500 }, { "epoch": 0.27, "learning_rate": 0.00029790132216703474, "loss": 1.7728, "step": 9000 }, { "epoch": 0.28, "learning_rate": 0.0002977514166075372, "loss": 1.782, "step": 9500 }, { "epoch": 0.3, "learning_rate": 0.0002976015110480397, "loss": 1.7174, "step": 10000 }, { "epoch": 0.31, "learning_rate": 0.0002974516054885422, "loss": 1.7356, "step": 10500 }, { "epoch": 0.33, "learning_rate": 0.00029730169992904466, "loss": 1.7125, "step": 11000 }, { "epoch": 0.34, "learning_rate": 0.0002971517943695472, "loss": 1.7017, "step": 11500 }, { "epoch": 0.36, "learning_rate": 0.0002970018888100496, "loss": 1.6844, "step": 12000 }, { "epoch": 0.37, "learning_rate": 0.00029685198325055215, "loss": 1.6783, "step": 12500 }, { "epoch": 0.39, "learning_rate": 0.00029670207769105463, "loss": 1.6738, "step": 13000 }, { "epoch": 0.4, "learning_rate": 0.0002965521721315571, "loss": 1.6553, "step": 13500 }, { "epoch": 0.42, "learning_rate": 0.0002964022665720596, "loss": 1.6784, "step": 14000 }, { "epoch": 0.43, "learning_rate": 0.00029625236101256207, "loss": 1.6356, "step": 14500 }, { "epoch": 0.45, "learning_rate": 0.00029610245545306455, "loss": 1.6308, "step": 15000 }, { "epoch": 0.46, "learning_rate": 0.00029595254989356703, "loss": 1.6176, "step": 15500 }, { "epoch": 0.48, "learning_rate": 0.0002958026443340695, "loss": 1.5953, "step": 16000 }, { "epoch": 0.49, "learning_rate": 0.000295652738774572, "loss": 1.6242, "step": 16500 }, { "epoch": 0.51, "learning_rate": 0.00029550283321507447, "loss": 1.5879, "step": 17000 }, { "epoch": 0.52, "learning_rate": 0.00029535292765557695, "loss": 1.626, "step": 17500 }, { "epoch": 0.54, "learning_rate": 0.0002952030220960795, "loss": 1.5977, "step": 18000 }, { "epoch": 0.55, "learning_rate": 0.0002950531165365819, "loss": 1.5809, "step": 18500 }, { "epoch": 0.57, "learning_rate": 0.00029490321097708444, "loss": 1.5621, "step": 19000 }, { "epoch": 0.58, "learning_rate": 0.0002947533054175869, "loss": 1.5742, "step": 19500 }, { "epoch": 0.6, "learning_rate": 0.00029460339985808935, "loss": 1.5925, "step": 20000 }, { "epoch": 0.61, "learning_rate": 0.0002944534942985919, "loss": 1.5554, "step": 20500 }, { "epoch": 0.63, "learning_rate": 0.0002943035887390943, "loss": 1.5545, "step": 21000 }, { "epoch": 0.64, "learning_rate": 0.00029415368317959684, "loss": 1.5381, "step": 21500 }, { "epoch": 0.66, "learning_rate": 0.0002940037776200993, "loss": 1.5622, "step": 22000 }, { "epoch": 0.67, "learning_rate": 0.0002938538720606018, "loss": 1.5643, "step": 22500 }, { "epoch": 0.69, "learning_rate": 0.0002937039665011043, "loss": 1.5613, "step": 23000 }, { "epoch": 0.7, "learning_rate": 0.00029355406094160676, "loss": 1.519, "step": 23500 }, { "epoch": 0.72, "learning_rate": 0.00029340415538210924, "loss": 1.5043, "step": 24000 }, { "epoch": 0.73, "learning_rate": 0.0002932542498226117, "loss": 1.5179, "step": 24500 }, { "epoch": 0.75, "learning_rate": 0.0002931043442631142, "loss": 1.5246, "step": 25000 }, { "epoch": 0.76, "learning_rate": 0.00029295443870361673, "loss": 1.5201, "step": 25500 }, { "epoch": 0.78, "learning_rate": 0.00029280453314411915, "loss": 1.5007, "step": 26000 }, { "epoch": 0.79, "learning_rate": 0.00029265462758462163, "loss": 1.5055, "step": 26500 }, { "epoch": 0.81, "learning_rate": 0.00029250472202512417, "loss": 1.469, "step": 27000 }, { "epoch": 0.82, "learning_rate": 0.0002923548164656266, "loss": 1.5371, "step": 27500 }, { "epoch": 0.84, "learning_rate": 0.0002922049109061291, "loss": 1.5006, "step": 28000 }, { "epoch": 0.85, "learning_rate": 0.0002920550053466316, "loss": 1.4942, "step": 28500 }, { "epoch": 0.87, "learning_rate": 0.0002919050997871341, "loss": 1.4926, "step": 29000 }, { "epoch": 0.88, "learning_rate": 0.00029175519422763657, "loss": 1.4735, "step": 29500 }, { "epoch": 0.9, "learning_rate": 0.00029160528866813905, "loss": 1.5184, "step": 30000 }, { "epoch": 0.91, "learning_rate": 0.0002914553831086415, "loss": 1.4969, "step": 30500 }, { "epoch": 0.93, "learning_rate": 0.000291305477549144, "loss": 1.4843, "step": 31000 }, { "epoch": 0.94, "learning_rate": 0.0002911555719896465, "loss": 1.4722, "step": 31500 }, { "epoch": 0.96, "learning_rate": 0.00029100566643014896, "loss": 1.4597, "step": 32000 }, { "epoch": 0.97, "learning_rate": 0.00029085576087065144, "loss": 1.4646, "step": 32500 }, { "epoch": 0.99, "learning_rate": 0.0002907058553111539, "loss": 1.5, "step": 33000 }, { "epoch": 1.0, "learning_rate": 0.00029055594975165646, "loss": 1.4608, "step": 33500 }, { "epoch": 1.02, "learning_rate": 0.0002904060441921589, "loss": 1.4222, "step": 34000 }, { "epoch": 1.03, "learning_rate": 0.0002902561386326614, "loss": 1.4288, "step": 34500 }, { "epoch": 1.05, "learning_rate": 0.0002901062330731639, "loss": 1.4357, "step": 35000 }, { "epoch": 1.06, "learning_rate": 0.0002899563275136664, "loss": 1.4275, "step": 35500 }, { "epoch": 1.08, "learning_rate": 0.00028980642195416885, "loss": 1.3738, "step": 36000 }, { "epoch": 1.09, "learning_rate": 0.00028965651639467133, "loss": 1.434, "step": 36500 }, { "epoch": 1.11, "learning_rate": 0.0002895066108351738, "loss": 1.4138, "step": 37000 }, { "epoch": 1.12, "learning_rate": 0.0002893567052756763, "loss": 1.4139, "step": 37500 }, { "epoch": 1.14, "learning_rate": 0.00028920679971617877, "loss": 1.3977, "step": 38000 }, { "epoch": 1.15, "learning_rate": 0.00028905689415668125, "loss": 1.4193, "step": 38500 }, { "epoch": 1.17, "learning_rate": 0.00028890698859718373, "loss": 1.4216, "step": 39000 }, { "epoch": 1.18, "learning_rate": 0.0002887570830376862, "loss": 1.376, "step": 39500 }, { "epoch": 1.2, "learning_rate": 0.00028860717747818874, "loss": 1.4231, "step": 40000 }, { "epoch": 1.21, "learning_rate": 0.00028845727191869117, "loss": 1.3734, "step": 40500 }, { "epoch": 1.23, "learning_rate": 0.0002883073663591937, "loss": 1.3961, "step": 41000 }, { "epoch": 1.24, "learning_rate": 0.0002881574607996962, "loss": 1.4046, "step": 41500 }, { "epoch": 1.26, "learning_rate": 0.00028800755524019866, "loss": 1.4058, "step": 42000 }, { "epoch": 1.27, "learning_rate": 0.00028785764968070114, "loss": 1.3845, "step": 42500 }, { "epoch": 1.29, "learning_rate": 0.0002877077441212036, "loss": 1.4175, "step": 43000 }, { "epoch": 1.3, "learning_rate": 0.0002875578385617061, "loss": 1.388, "step": 43500 }, { "epoch": 1.32, "learning_rate": 0.0002874079330022086, "loss": 1.3935, "step": 44000 }, { "epoch": 1.33, "learning_rate": 0.00028725802744271106, "loss": 1.3668, "step": 44500 }, { "epoch": 1.35, "learning_rate": 0.00028710812188321354, "loss": 1.4078, "step": 45000 }, { "epoch": 1.36, "learning_rate": 0.000286958216323716, "loss": 1.3631, "step": 45500 }, { "epoch": 1.38, "learning_rate": 0.0002868083107642185, "loss": 1.405, "step": 46000 }, { "epoch": 1.39, "learning_rate": 0.00028665840520472103, "loss": 1.3793, "step": 46500 }, { "epoch": 1.41, "learning_rate": 0.00028650849964522346, "loss": 1.421, "step": 47000 }, { "epoch": 1.42, "learning_rate": 0.000286358594085726, "loss": 1.4005, "step": 47500 }, { "epoch": 1.44, "learning_rate": 0.00028620868852622847, "loss": 1.3874, "step": 48000 }, { "epoch": 1.45, "learning_rate": 0.00028605878296673095, "loss": 1.43, "step": 48500 }, { "epoch": 1.47, "learning_rate": 0.00028590887740723343, "loss": 1.3763, "step": 49000 }, { "epoch": 1.48, "learning_rate": 0.0002857589718477359, "loss": 1.3586, "step": 49500 }, { "epoch": 1.5, "learning_rate": 0.0002856090662882384, "loss": 1.3848, "step": 50000 }, { "epoch": 1.51, "learning_rate": 0.00028545916072874087, "loss": 1.3846, "step": 50500 }, { "epoch": 1.53, "learning_rate": 0.00028530925516924335, "loss": 1.3709, "step": 51000 }, { "epoch": 1.54, "learning_rate": 0.00028515934960974583, "loss": 1.38, "step": 51500 }, { "epoch": 1.56, "learning_rate": 0.0002850094440502483, "loss": 1.3563, "step": 52000 }, { "epoch": 1.57, "learning_rate": 0.0002848595384907508, "loss": 1.3732, "step": 52500 }, { "epoch": 1.59, "learning_rate": 0.0002847096329312533, "loss": 1.3564, "step": 53000 }, { "epoch": 1.6, "learning_rate": 0.00028455972737175575, "loss": 1.3822, "step": 53500 }, { "epoch": 1.62, "learning_rate": 0.0002844098218122583, "loss": 1.3864, "step": 54000 }, { "epoch": 1.63, "learning_rate": 0.00028425991625276076, "loss": 1.345, "step": 54500 }, { "epoch": 1.65, "learning_rate": 0.00028411001069326324, "loss": 1.3678, "step": 55000 }, { "epoch": 1.66, "learning_rate": 0.0002839601051337657, "loss": 1.3789, "step": 55500 }, { "epoch": 1.68, "learning_rate": 0.00028381019957426814, "loss": 1.3545, "step": 56000 }, { "epoch": 1.69, "learning_rate": 0.0002836602940147707, "loss": 1.3542, "step": 56500 }, { "epoch": 1.71, "learning_rate": 0.00028351038845527316, "loss": 1.3543, "step": 57000 }, { "epoch": 1.72, "learning_rate": 0.00028336048289577564, "loss": 1.3892, "step": 57500 }, { "epoch": 1.74, "learning_rate": 0.0002832105773362781, "loss": 1.3512, "step": 58000 }, { "epoch": 1.75, "learning_rate": 0.0002830606717767806, "loss": 1.3513, "step": 58500 }, { "epoch": 1.77, "learning_rate": 0.0002829107662172831, "loss": 1.3483, "step": 59000 }, { "epoch": 1.78, "learning_rate": 0.00028276086065778556, "loss": 1.3846, "step": 59500 }, { "epoch": 1.8, "learning_rate": 0.00028261095509828803, "loss": 1.3375, "step": 60000 }, { "epoch": 1.81, "learning_rate": 0.00028246104953879057, "loss": 1.3498, "step": 60500 }, { "epoch": 1.83, "learning_rate": 0.000282311143979293, "loss": 1.3392, "step": 61000 }, { "epoch": 1.84, "learning_rate": 0.0002821612384197955, "loss": 1.3646, "step": 61500 }, { "epoch": 1.86, "learning_rate": 0.000282011332860298, "loss": 1.3565, "step": 62000 }, { "epoch": 1.87, "learning_rate": 0.00028186142730080043, "loss": 1.3866, "step": 62500 }, { "epoch": 1.89, "learning_rate": 0.00028171152174130297, "loss": 1.3409, "step": 63000 }, { "epoch": 1.9, "learning_rate": 0.00028156161618180545, "loss": 1.3469, "step": 63500 }, { "epoch": 1.91, "learning_rate": 0.0002814117106223079, "loss": 1.3585, "step": 64000 }, { "epoch": 1.93, "learning_rate": 0.0002812618050628104, "loss": 1.3729, "step": 64500 }, { "epoch": 1.94, "learning_rate": 0.0002811118995033129, "loss": 1.3655, "step": 65000 }, { "epoch": 1.96, "learning_rate": 0.00028096199394381536, "loss": 1.3744, "step": 65500 }, { "epoch": 1.97, "learning_rate": 0.00028081208838431784, "loss": 1.3685, "step": 66000 }, { "epoch": 1.99, "learning_rate": 0.0002806621828248203, "loss": 1.3334, "step": 66500 }, { "epoch": 2.0, "learning_rate": 0.00028051227726532286, "loss": 1.3497, "step": 67000 }, { "epoch": 2.02, "learning_rate": 0.0002803623717058253, "loss": 1.2964, "step": 67500 }, { "epoch": 2.03, "learning_rate": 0.00028021246614632776, "loss": 1.2764, "step": 68000 }, { "epoch": 2.05, "learning_rate": 0.0002800625605868303, "loss": 1.3203, "step": 68500 }, { "epoch": 2.06, "learning_rate": 0.0002799126550273327, "loss": 1.3164, "step": 69000 }, { "epoch": 2.08, "learning_rate": 0.00027976274946783525, "loss": 1.3063, "step": 69500 }, { "epoch": 2.09, "learning_rate": 0.00027961284390833773, "loss": 1.2936, "step": 70000 }, { "epoch": 2.11, "learning_rate": 0.0002794629383488402, "loss": 1.2928, "step": 70500 }, { "epoch": 2.12, "learning_rate": 0.0002793130327893427, "loss": 1.2756, "step": 71000 }, { "epoch": 2.14, "learning_rate": 0.0002791631272298452, "loss": 1.2719, "step": 71500 }, { "epoch": 2.15, "learning_rate": 0.00027901322167034765, "loss": 1.2868, "step": 72000 }, { "epoch": 2.17, "learning_rate": 0.00027886331611085013, "loss": 1.3048, "step": 72500 }, { "epoch": 2.18, "learning_rate": 0.0002787134105513526, "loss": 1.3064, "step": 73000 }, { "epoch": 2.2, "learning_rate": 0.0002785635049918551, "loss": 1.2907, "step": 73500 }, { "epoch": 2.21, "learning_rate": 0.00027841359943235757, "loss": 1.2787, "step": 74000 }, { "epoch": 2.23, "learning_rate": 0.00027826369387286005, "loss": 1.2823, "step": 74500 }, { "epoch": 2.24, "learning_rate": 0.0002781137883133626, "loss": 1.2931, "step": 75000 }, { "epoch": 2.26, "learning_rate": 0.000277963882753865, "loss": 1.3014, "step": 75500 }, { "epoch": 2.27, "learning_rate": 0.00027781397719436754, "loss": 1.2898, "step": 76000 }, { "epoch": 2.29, "learning_rate": 0.00027766407163487, "loss": 1.3193, "step": 76500 }, { "epoch": 2.3, "learning_rate": 0.0002775141660753725, "loss": 1.2956, "step": 77000 }, { "epoch": 2.32, "learning_rate": 0.000277364260515875, "loss": 1.3065, "step": 77500 }, { "epoch": 2.33, "learning_rate": 0.00027721435495637746, "loss": 1.2659, "step": 78000 }, { "epoch": 2.35, "learning_rate": 0.00027706444939687994, "loss": 1.2881, "step": 78500 }, { "epoch": 2.36, "learning_rate": 0.0002769145438373824, "loss": 1.3188, "step": 79000 }, { "epoch": 2.38, "learning_rate": 0.0002767646382778849, "loss": 1.3035, "step": 79500 }, { "epoch": 2.39, "learning_rate": 0.0002766147327183874, "loss": 1.3019, "step": 80000 }, { "epoch": 2.41, "learning_rate": 0.00027646482715888986, "loss": 1.2784, "step": 80500 }, { "epoch": 2.42, "learning_rate": 0.00027631492159939234, "loss": 1.2879, "step": 81000 }, { "epoch": 2.44, "learning_rate": 0.00027616501603989487, "loss": 1.2775, "step": 81500 }, { "epoch": 2.45, "learning_rate": 0.0002760151104803973, "loss": 1.2574, "step": 82000 }, { "epoch": 2.47, "learning_rate": 0.00027586520492089983, "loss": 1.2695, "step": 82500 }, { "epoch": 2.48, "learning_rate": 0.0002757152993614023, "loss": 1.2832, "step": 83000 }, { "epoch": 2.5, "learning_rate": 0.0002755653938019048, "loss": 1.2812, "step": 83500 }, { "epoch": 2.51, "learning_rate": 0.00027541548824240727, "loss": 1.2741, "step": 84000 }, { "epoch": 2.53, "learning_rate": 0.00027526558268290975, "loss": 1.269, "step": 84500 }, { "epoch": 2.54, "learning_rate": 0.00027511567712341223, "loss": 1.2734, "step": 85000 }, { "epoch": 2.56, "learning_rate": 0.0002749657715639147, "loss": 1.3023, "step": 85500 }, { "epoch": 2.57, "learning_rate": 0.0002748158660044172, "loss": 1.2889, "step": 86000 }, { "epoch": 2.59, "learning_rate": 0.00027466596044491967, "loss": 1.2759, "step": 86500 }, { "epoch": 2.6, "learning_rate": 0.00027451605488542215, "loss": 1.3041, "step": 87000 }, { "epoch": 2.62, "learning_rate": 0.0002743661493259246, "loss": 1.3103, "step": 87500 }, { "epoch": 2.63, "learning_rate": 0.00027421624376642716, "loss": 1.2742, "step": 88000 }, { "epoch": 2.65, "learning_rate": 0.0002740663382069296, "loss": 1.2909, "step": 88500 }, { "epoch": 2.66, "learning_rate": 0.0002739164326474321, "loss": 1.2887, "step": 89000 }, { "epoch": 2.68, "learning_rate": 0.0002737665270879346, "loss": 1.2792, "step": 89500 }, { "epoch": 2.69, "learning_rate": 0.0002736166215284371, "loss": 1.2846, "step": 90000 }, { "epoch": 2.71, "learning_rate": 0.00027346671596893956, "loss": 1.2781, "step": 90500 }, { "epoch": 2.72, "learning_rate": 0.00027331681040944204, "loss": 1.2864, "step": 91000 }, { "epoch": 2.74, "learning_rate": 0.0002731669048499445, "loss": 1.2685, "step": 91500 }, { "epoch": 2.75, "learning_rate": 0.000273016999290447, "loss": 1.2737, "step": 92000 }, { "epoch": 2.77, "learning_rate": 0.0002728670937309495, "loss": 1.305, "step": 92500 }, { "epoch": 2.78, "learning_rate": 0.00027271718817145196, "loss": 1.273, "step": 93000 }, { "epoch": 2.8, "learning_rate": 0.00027256728261195444, "loss": 1.2815, "step": 93500 }, { "epoch": 2.81, "learning_rate": 0.0002724173770524569, "loss": 1.2647, "step": 94000 }, { "epoch": 2.83, "learning_rate": 0.0002722674714929594, "loss": 1.2633, "step": 94500 }, { "epoch": 2.84, "learning_rate": 0.0002721175659334619, "loss": 1.2868, "step": 95000 }, { "epoch": 2.86, "learning_rate": 0.0002719676603739644, "loss": 1.3155, "step": 95500 }, { "epoch": 2.87, "learning_rate": 0.00027181775481446683, "loss": 1.2758, "step": 96000 }, { "epoch": 2.89, "learning_rate": 0.00027166784925496937, "loss": 1.2902, "step": 96500 }, { "epoch": 2.9, "learning_rate": 0.00027151794369547185, "loss": 1.2891, "step": 97000 }, { "epoch": 2.92, "learning_rate": 0.00027136803813597427, "loss": 1.2694, "step": 97500 }, { "epoch": 2.93, "learning_rate": 0.0002712181325764768, "loss": 1.2873, "step": 98000 }, { "epoch": 2.95, "learning_rate": 0.0002710682270169793, "loss": 1.2561, "step": 98500 }, { "epoch": 2.96, "learning_rate": 0.00027091832145748176, "loss": 1.2444, "step": 99000 }, { "epoch": 2.98, "learning_rate": 0.00027076841589798424, "loss": 1.2628, "step": 99500 }, { "epoch": 2.99, "learning_rate": 0.0002706185103384867, "loss": 1.2574, "step": 100000 }, { "epoch": 3.01, "learning_rate": 0.0002704686047789892, "loss": 1.2172, "step": 100500 }, { "epoch": 3.02, "learning_rate": 0.0002703186992194917, "loss": 1.2282, "step": 101000 }, { "epoch": 3.04, "learning_rate": 0.00027016879365999416, "loss": 1.2293, "step": 101500 }, { "epoch": 3.05, "learning_rate": 0.0002700188881004967, "loss": 1.2209, "step": 102000 }, { "epoch": 3.07, "learning_rate": 0.0002698689825409991, "loss": 1.2102, "step": 102500 }, { "epoch": 3.08, "learning_rate": 0.0002697190769815016, "loss": 1.2473, "step": 103000 }, { "epoch": 3.1, "learning_rate": 0.00026956917142200413, "loss": 1.2036, "step": 103500 }, { "epoch": 3.11, "learning_rate": 0.00026941926586250656, "loss": 1.2114, "step": 104000 }, { "epoch": 3.13, "learning_rate": 0.0002692693603030091, "loss": 1.2356, "step": 104500 }, { "epoch": 3.14, "learning_rate": 0.0002691194547435116, "loss": 1.202, "step": 105000 }, { "epoch": 3.16, "learning_rate": 0.00026896954918401405, "loss": 1.1923, "step": 105500 }, { "epoch": 3.17, "learning_rate": 0.00026881964362451653, "loss": 1.2245, "step": 106000 }, { "epoch": 3.19, "learning_rate": 0.000268669738065019, "loss": 1.23, "step": 106500 }, { "epoch": 3.2, "learning_rate": 0.0002685198325055215, "loss": 1.2165, "step": 107000 }, { "epoch": 3.22, "learning_rate": 0.00026836992694602397, "loss": 1.2066, "step": 107500 }, { "epoch": 3.23, "learning_rate": 0.00026822002138652645, "loss": 1.2418, "step": 108000 }, { "epoch": 3.25, "learning_rate": 0.000268070115827029, "loss": 1.2099, "step": 108500 }, { "epoch": 3.26, "learning_rate": 0.0002679202102675314, "loss": 1.2304, "step": 109000 }, { "epoch": 3.28, "learning_rate": 0.0002677703047080339, "loss": 1.2137, "step": 109500 }, { "epoch": 3.29, "learning_rate": 0.0002676203991485364, "loss": 1.2338, "step": 110000 }, { "epoch": 3.31, "learning_rate": 0.00026747049358903885, "loss": 1.2007, "step": 110500 }, { "epoch": 3.32, "learning_rate": 0.0002673205880295414, "loss": 1.2158, "step": 111000 }, { "epoch": 3.34, "learning_rate": 0.00026717068247004386, "loss": 1.2081, "step": 111500 }, { "epoch": 3.35, "learning_rate": 0.00026702077691054634, "loss": 1.2243, "step": 112000 }, { "epoch": 3.37, "learning_rate": 0.0002668708713510488, "loss": 1.2122, "step": 112500 }, { "epoch": 3.38, "learning_rate": 0.0002667209657915513, "loss": 1.199, "step": 113000 }, { "epoch": 3.4, "learning_rate": 0.0002665710602320538, "loss": 1.2292, "step": 113500 }, { "epoch": 3.41, "learning_rate": 0.00026642115467255626, "loss": 1.2217, "step": 114000 }, { "epoch": 3.43, "learning_rate": 0.00026627124911305874, "loss": 1.2275, "step": 114500 }, { "epoch": 3.44, "learning_rate": 0.0002661213435535612, "loss": 1.2443, "step": 115000 }, { "epoch": 3.46, "learning_rate": 0.0002659714379940637, "loss": 1.231, "step": 115500 }, { "epoch": 3.47, "learning_rate": 0.0002658215324345662, "loss": 1.2468, "step": 116000 }, { "epoch": 3.49, "learning_rate": 0.0002656716268750687, "loss": 1.2063, "step": 116500 }, { "epoch": 3.5, "learning_rate": 0.00026552172131557114, "loss": 1.2138, "step": 117000 }, { "epoch": 3.52, "learning_rate": 0.00026537181575607367, "loss": 1.2046, "step": 117500 }, { "epoch": 3.53, "learning_rate": 0.00026522191019657615, "loss": 1.206, "step": 118000 }, { "epoch": 3.55, "learning_rate": 0.00026507200463707863, "loss": 1.2186, "step": 118500 }, { "epoch": 3.56, "learning_rate": 0.0002649220990775811, "loss": 1.1986, "step": 119000 }, { "epoch": 3.58, "learning_rate": 0.0002647721935180836, "loss": 1.2548, "step": 119500 }, { "epoch": 3.59, "learning_rate": 0.00026462228795858607, "loss": 1.2416, "step": 120000 }, { "epoch": 3.61, "learning_rate": 0.00026447238239908855, "loss": 1.2324, "step": 120500 }, { "epoch": 3.62, "learning_rate": 0.00026432247683959103, "loss": 1.1915, "step": 121000 }, { "epoch": 3.64, "learning_rate": 0.0002641725712800935, "loss": 1.2168, "step": 121500 }, { "epoch": 3.65, "learning_rate": 0.000264022665720596, "loss": 1.23, "step": 122000 }, { "epoch": 3.67, "learning_rate": 0.00026387276016109847, "loss": 1.1844, "step": 122500 }, { "epoch": 3.68, "learning_rate": 0.000263722854601601, "loss": 1.2306, "step": 123000 }, { "epoch": 3.7, "learning_rate": 0.0002635729490421034, "loss": 1.2315, "step": 123500 }, { "epoch": 3.71, "learning_rate": 0.00026342304348260596, "loss": 1.2321, "step": 124000 }, { "epoch": 3.73, "learning_rate": 0.00026327313792310844, "loss": 1.1979, "step": 124500 }, { "epoch": 3.74, "learning_rate": 0.0002631232323636109, "loss": 1.2148, "step": 125000 }, { "epoch": 3.76, "learning_rate": 0.0002629733268041134, "loss": 1.2141, "step": 125500 }, { "epoch": 3.77, "learning_rate": 0.0002628234212446159, "loss": 1.2422, "step": 126000 }, { "epoch": 3.79, "learning_rate": 0.00026267351568511836, "loss": 1.199, "step": 126500 }, { "epoch": 3.8, "learning_rate": 0.00026252361012562084, "loss": 1.2199, "step": 127000 }, { "epoch": 3.81, "learning_rate": 0.0002623737045661233, "loss": 1.1985, "step": 127500 }, { "epoch": 3.83, "learning_rate": 0.0002622237990066258, "loss": 1.1969, "step": 128000 }, { "epoch": 3.84, "learning_rate": 0.0002620738934471283, "loss": 1.209, "step": 128500 }, { "epoch": 3.86, "learning_rate": 0.00026192398788763075, "loss": 1.2148, "step": 129000 }, { "epoch": 3.87, "learning_rate": 0.00026177408232813323, "loss": 1.2142, "step": 129500 }, { "epoch": 3.89, "learning_rate": 0.0002616241767686357, "loss": 1.1933, "step": 130000 }, { "epoch": 3.9, "learning_rate": 0.00026147427120913825, "loss": 1.1997, "step": 130500 }, { "epoch": 3.92, "learning_rate": 0.00026132436564964067, "loss": 1.2063, "step": 131000 }, { "epoch": 3.93, "learning_rate": 0.0002611744600901432, "loss": 1.1821, "step": 131500 }, { "epoch": 3.95, "learning_rate": 0.0002610245545306457, "loss": 1.1942, "step": 132000 }, { "epoch": 3.96, "learning_rate": 0.00026087464897114817, "loss": 1.19, "step": 132500 }, { "epoch": 3.98, "learning_rate": 0.00026072474341165065, "loss": 1.2073, "step": 133000 }, { "epoch": 3.99, "learning_rate": 0.0002605748378521531, "loss": 1.2181, "step": 133500 }, { "epoch": 4.01, "learning_rate": 0.0002604249322926556, "loss": 1.1659, "step": 134000 }, { "epoch": 4.02, "learning_rate": 0.0002602750267331581, "loss": 1.1496, "step": 134500 }, { "epoch": 4.04, "learning_rate": 0.00026012512117366056, "loss": 1.1468, "step": 135000 }, { "epoch": 4.05, "learning_rate": 0.00025997521561416304, "loss": 1.1483, "step": 135500 }, { "epoch": 4.07, "learning_rate": 0.0002598253100546655, "loss": 1.1771, "step": 136000 }, { "epoch": 4.08, "learning_rate": 0.000259675404495168, "loss": 1.1612, "step": 136500 }, { "epoch": 4.1, "learning_rate": 0.00025952549893567054, "loss": 1.1715, "step": 137000 }, { "epoch": 4.11, "learning_rate": 0.00025937559337617296, "loss": 1.1637, "step": 137500 }, { "epoch": 4.13, "learning_rate": 0.0002592256878166755, "loss": 1.1611, "step": 138000 }, { "epoch": 4.14, "learning_rate": 0.000259075782257178, "loss": 1.1684, "step": 138500 }, { "epoch": 4.16, "learning_rate": 0.0002589258766976804, "loss": 1.1652, "step": 139000 }, { "epoch": 4.17, "learning_rate": 0.00025877597113818293, "loss": 1.1482, "step": 139500 }, { "epoch": 4.19, "learning_rate": 0.0002586260655786854, "loss": 1.163, "step": 140000 }, { "epoch": 4.2, "learning_rate": 0.0002584761600191879, "loss": 1.1449, "step": 140500 }, { "epoch": 4.22, "learning_rate": 0.00025832625445969037, "loss": 1.1691, "step": 141000 }, { "epoch": 4.23, "learning_rate": 0.00025817634890019285, "loss": 1.1522, "step": 141500 }, { "epoch": 4.25, "learning_rate": 0.00025802644334069533, "loss": 1.178, "step": 142000 }, { "epoch": 4.26, "learning_rate": 0.0002578765377811978, "loss": 1.1656, "step": 142500 }, { "epoch": 4.28, "learning_rate": 0.0002577266322217003, "loss": 1.1786, "step": 143000 }, { "epoch": 4.29, "learning_rate": 0.0002575767266622028, "loss": 1.1548, "step": 143500 }, { "epoch": 4.31, "learning_rate": 0.00025742682110270525, "loss": 1.1459, "step": 144000 }, { "epoch": 4.32, "learning_rate": 0.00025727691554320773, "loss": 1.1766, "step": 144500 }, { "epoch": 4.34, "learning_rate": 0.00025712700998371026, "loss": 1.1512, "step": 145000 }, { "epoch": 4.35, "learning_rate": 0.0002569771044242127, "loss": 1.1965, "step": 145500 }, { "epoch": 4.37, "learning_rate": 0.0002568271988647152, "loss": 1.1772, "step": 146000 }, { "epoch": 4.38, "learning_rate": 0.0002566772933052177, "loss": 1.1584, "step": 146500 }, { "epoch": 4.4, "learning_rate": 0.0002565273877457202, "loss": 1.1792, "step": 147000 }, { "epoch": 4.41, "learning_rate": 0.00025637748218622266, "loss": 1.142, "step": 147500 }, { "epoch": 4.43, "learning_rate": 0.00025622757662672514, "loss": 1.1759, "step": 148000 }, { "epoch": 4.44, "learning_rate": 0.0002560776710672276, "loss": 1.1563, "step": 148500 }, { "epoch": 4.46, "learning_rate": 0.0002559277655077301, "loss": 1.1672, "step": 149000 }, { "epoch": 4.47, "learning_rate": 0.0002557778599482326, "loss": 1.1661, "step": 149500 }, { "epoch": 4.49, "learning_rate": 0.0002556279543887351, "loss": 1.1787, "step": 150000 }, { "epoch": 4.5, "learning_rate": 0.00025547804882923754, "loss": 1.1436, "step": 150500 }, { "epoch": 4.52, "learning_rate": 0.00025532814326974, "loss": 1.1812, "step": 151000 }, { "epoch": 4.53, "learning_rate": 0.00025517823771024255, "loss": 1.1749, "step": 151500 }, { "epoch": 4.55, "learning_rate": 0.000255028332150745, "loss": 1.1869, "step": 152000 }, { "epoch": 4.56, "learning_rate": 0.0002548784265912475, "loss": 1.1506, "step": 152500 }, { "epoch": 4.58, "learning_rate": 0.00025472852103175, "loss": 1.1592, "step": 153000 }, { "epoch": 4.59, "learning_rate": 0.00025457861547225247, "loss": 1.1639, "step": 153500 }, { "epoch": 4.61, "learning_rate": 0.00025442870991275495, "loss": 1.1498, "step": 154000 }, { "epoch": 4.62, "learning_rate": 0.00025427880435325743, "loss": 1.1832, "step": 154500 }, { "epoch": 4.64, "learning_rate": 0.0002541288987937599, "loss": 1.1682, "step": 155000 }, { "epoch": 4.65, "learning_rate": 0.0002539789932342624, "loss": 1.1543, "step": 155500 }, { "epoch": 4.67, "learning_rate": 0.00025382908767476487, "loss": 1.1572, "step": 156000 }, { "epoch": 4.68, "learning_rate": 0.00025367918211526735, "loss": 1.1744, "step": 156500 }, { "epoch": 4.7, "learning_rate": 0.0002535292765557698, "loss": 1.1685, "step": 157000 }, { "epoch": 4.71, "learning_rate": 0.0002533793709962723, "loss": 1.1561, "step": 157500 }, { "epoch": 4.73, "learning_rate": 0.00025322946543677484, "loss": 1.1457, "step": 158000 }, { "epoch": 4.74, "learning_rate": 0.00025307955987727726, "loss": 1.1987, "step": 158500 }, { "epoch": 4.76, "learning_rate": 0.0002529296543177798, "loss": 1.1663, "step": 159000 }, { "epoch": 4.77, "learning_rate": 0.0002527797487582823, "loss": 1.164, "step": 159500 }, { "epoch": 4.79, "learning_rate": 0.00025262984319878476, "loss": 1.1853, "step": 160000 }, { "epoch": 4.8, "learning_rate": 0.00025247993763928724, "loss": 1.1601, "step": 160500 }, { "epoch": 4.82, "learning_rate": 0.0002523300320797897, "loss": 1.1691, "step": 161000 }, { "epoch": 4.83, "learning_rate": 0.0002521801265202922, "loss": 1.1645, "step": 161500 }, { "epoch": 4.85, "learning_rate": 0.0002520302209607947, "loss": 1.139, "step": 162000 }, { "epoch": 4.86, "learning_rate": 0.00025188031540129716, "loss": 1.162, "step": 162500 }, { "epoch": 4.88, "learning_rate": 0.00025173040984179963, "loss": 1.1697, "step": 163000 }, { "epoch": 4.89, "learning_rate": 0.0002515805042823021, "loss": 1.159, "step": 163500 }, { "epoch": 4.91, "learning_rate": 0.0002514305987228046, "loss": 1.1414, "step": 164000 }, { "epoch": 4.92, "learning_rate": 0.00025128069316330713, "loss": 1.1484, "step": 164500 }, { "epoch": 4.94, "learning_rate": 0.00025113078760380955, "loss": 1.1821, "step": 165000 }, { "epoch": 4.95, "learning_rate": 0.0002509808820443121, "loss": 1.1406, "step": 165500 }, { "epoch": 4.97, "learning_rate": 0.0002508309764848145, "loss": 1.1538, "step": 166000 }, { "epoch": 4.98, "learning_rate": 0.00025068107092531705, "loss": 1.1307, "step": 166500 }, { "epoch": 5.0, "learning_rate": 0.0002505311653658195, "loss": 1.1903, "step": 167000 }, { "epoch": 5.01, "learning_rate": 0.000250381259806322, "loss": 1.124, "step": 167500 }, { "epoch": 5.03, "learning_rate": 0.0002502313542468245, "loss": 1.1119, "step": 168000 }, { "epoch": 5.04, "learning_rate": 0.00025008144868732696, "loss": 1.121, "step": 168500 }, { "epoch": 5.06, "learning_rate": 0.00024993154312782944, "loss": 1.1289, "step": 169000 }, { "epoch": 5.07, "learning_rate": 0.0002497816375683319, "loss": 1.0986, "step": 169500 }, { "epoch": 5.09, "learning_rate": 0.0002496317320088344, "loss": 1.1228, "step": 170000 }, { "epoch": 5.1, "learning_rate": 0.0002494818264493369, "loss": 1.1112, "step": 170500 }, { "epoch": 5.12, "learning_rate": 0.00024933192088983936, "loss": 1.1248, "step": 171000 }, { "epoch": 5.13, "learning_rate": 0.00024918201533034184, "loss": 1.1139, "step": 171500 }, { "epoch": 5.15, "learning_rate": 0.0002490321097708444, "loss": 1.1427, "step": 172000 }, { "epoch": 5.16, "learning_rate": 0.0002488822042113468, "loss": 1.1168, "step": 172500 }, { "epoch": 5.18, "learning_rate": 0.00024873229865184933, "loss": 1.1151, "step": 173000 }, { "epoch": 5.19, "learning_rate": 0.0002485823930923518, "loss": 1.1027, "step": 173500 }, { "epoch": 5.21, "learning_rate": 0.0002484324875328543, "loss": 1.1284, "step": 174000 }, { "epoch": 5.22, "learning_rate": 0.00024828258197335677, "loss": 1.1148, "step": 174500 }, { "epoch": 5.24, "learning_rate": 0.00024813267641385925, "loss": 1.1379, "step": 175000 }, { "epoch": 5.25, "learning_rate": 0.00024798277085436173, "loss": 1.1542, "step": 175500 }, { "epoch": 5.27, "learning_rate": 0.0002478328652948642, "loss": 1.1037, "step": 176000 }, { "epoch": 5.28, "learning_rate": 0.0002476829597353667, "loss": 1.1284, "step": 176500 }, { "epoch": 5.3, "learning_rate": 0.00024753305417586917, "loss": 1.1461, "step": 177000 }, { "epoch": 5.31, "learning_rate": 0.00024738314861637165, "loss": 1.1157, "step": 177500 }, { "epoch": 5.33, "learning_rate": 0.00024723324305687413, "loss": 1.1337, "step": 178000 }, { "epoch": 5.34, "learning_rate": 0.00024708333749737666, "loss": 1.1273, "step": 178500 }, { "epoch": 5.36, "learning_rate": 0.0002469334319378791, "loss": 1.1226, "step": 179000 }, { "epoch": 5.37, "learning_rate": 0.0002467835263783816, "loss": 1.1079, "step": 179500 }, { "epoch": 5.39, "learning_rate": 0.0002466336208188841, "loss": 1.1279, "step": 180000 }, { "epoch": 5.4, "learning_rate": 0.00024648371525938653, "loss": 1.1123, "step": 180500 }, { "epoch": 5.42, "learning_rate": 0.00024633380969988906, "loss": 1.0985, "step": 181000 }, { "epoch": 5.43, "learning_rate": 0.00024618390414039154, "loss": 1.1128, "step": 181500 }, { "epoch": 5.45, "learning_rate": 0.000246033998580894, "loss": 1.1228, "step": 182000 }, { "epoch": 5.46, "learning_rate": 0.0002458840930213965, "loss": 1.0973, "step": 182500 }, { "epoch": 5.48, "learning_rate": 0.000245734187461899, "loss": 1.1238, "step": 183000 }, { "epoch": 5.49, "learning_rate": 0.00024558428190240146, "loss": 1.1275, "step": 183500 }, { "epoch": 5.51, "learning_rate": 0.00024543437634290394, "loss": 1.1188, "step": 184000 }, { "epoch": 5.52, "learning_rate": 0.0002452844707834064, "loss": 1.1296, "step": 184500 }, { "epoch": 5.54, "learning_rate": 0.00024513456522390895, "loss": 1.1172, "step": 185000 }, { "epoch": 5.55, "learning_rate": 0.0002449846596644114, "loss": 1.1052, "step": 185500 }, { "epoch": 5.57, "learning_rate": 0.0002448347541049139, "loss": 1.1292, "step": 186000 }, { "epoch": 5.58, "learning_rate": 0.0002446848485454164, "loss": 1.0994, "step": 186500 }, { "epoch": 5.6, "learning_rate": 0.0002445349429859188, "loss": 1.1049, "step": 187000 }, { "epoch": 5.61, "learning_rate": 0.00024438503742642135, "loss": 1.1302, "step": 187500 }, { "epoch": 5.63, "learning_rate": 0.00024423513186692383, "loss": 1.1094, "step": 188000 }, { "epoch": 5.64, "learning_rate": 0.0002440852263074263, "loss": 1.1281, "step": 188500 }, { "epoch": 5.66, "learning_rate": 0.0002439353207479288, "loss": 1.1279, "step": 189000 }, { "epoch": 5.67, "learning_rate": 0.00024378541518843127, "loss": 1.1285, "step": 189500 }, { "epoch": 5.69, "learning_rate": 0.00024363550962893375, "loss": 1.1303, "step": 190000 }, { "epoch": 5.7, "learning_rate": 0.00024348560406943625, "loss": 1.0965, "step": 190500 }, { "epoch": 5.71, "learning_rate": 0.0002433356985099387, "loss": 1.1304, "step": 191000 }, { "epoch": 5.73, "learning_rate": 0.0002431857929504412, "loss": 1.1189, "step": 191500 }, { "epoch": 5.74, "learning_rate": 0.0002430358873909437, "loss": 1.1174, "step": 192000 }, { "epoch": 5.76, "learning_rate": 0.00024288598183144614, "loss": 1.1054, "step": 192500 }, { "epoch": 5.77, "learning_rate": 0.00024273607627194865, "loss": 1.1295, "step": 193000 }, { "epoch": 5.79, "learning_rate": 0.00024258617071245113, "loss": 1.1369, "step": 193500 }, { "epoch": 5.8, "learning_rate": 0.0002424362651529536, "loss": 1.1273, "step": 194000 }, { "epoch": 5.82, "learning_rate": 0.0002422863595934561, "loss": 1.1262, "step": 194500 }, { "epoch": 5.83, "learning_rate": 0.0002421364540339586, "loss": 1.1287, "step": 195000 }, { "epoch": 5.85, "learning_rate": 0.00024198654847446105, "loss": 1.1213, "step": 195500 }, { "epoch": 5.86, "learning_rate": 0.00024183664291496356, "loss": 1.1241, "step": 196000 }, { "epoch": 5.88, "learning_rate": 0.00024168673735546604, "loss": 1.1434, "step": 196500 }, { "epoch": 5.89, "learning_rate": 0.00024153683179596854, "loss": 1.1403, "step": 197000 }, { "epoch": 5.91, "learning_rate": 0.000241386926236471, "loss": 1.1259, "step": 197500 }, { "epoch": 5.92, "learning_rate": 0.00024123702067697347, "loss": 1.1449, "step": 198000 }, { "epoch": 5.94, "learning_rate": 0.00024108711511747598, "loss": 1.1108, "step": 198500 }, { "epoch": 5.95, "learning_rate": 0.00024093720955797843, "loss": 1.1183, "step": 199000 }, { "epoch": 5.97, "learning_rate": 0.00024078730399848094, "loss": 1.1448, "step": 199500 }, { "epoch": 5.98, "learning_rate": 0.00024063739843898342, "loss": 1.0985, "step": 200000 }, { "epoch": 6.0, "learning_rate": 0.0002404874928794859, "loss": 1.0981, "step": 200500 }, { "epoch": 6.01, "learning_rate": 0.00024033758731998838, "loss": 1.0913, "step": 201000 }, { "epoch": 6.03, "learning_rate": 0.00024018768176049089, "loss": 1.0394, "step": 201500 }, { "epoch": 6.04, "learning_rate": 0.00024003777620099334, "loss": 1.0508, "step": 202000 }, { "epoch": 6.06, "learning_rate": 0.00023988787064149584, "loss": 1.0817, "step": 202500 }, { "epoch": 6.07, "learning_rate": 0.00023973796508199832, "loss": 1.0739, "step": 203000 }, { "epoch": 6.09, "learning_rate": 0.00023958805952250083, "loss": 1.0685, "step": 203500 }, { "epoch": 6.1, "learning_rate": 0.00023943815396300328, "loss": 1.0605, "step": 204000 }, { "epoch": 6.12, "learning_rate": 0.00023928824840350576, "loss": 1.0902, "step": 204500 }, { "epoch": 6.13, "learning_rate": 0.00023913834284400827, "loss": 1.0964, "step": 205000 }, { "epoch": 6.15, "learning_rate": 0.00023898843728451072, "loss": 1.0949, "step": 205500 }, { "epoch": 6.16, "learning_rate": 0.00023883853172501323, "loss": 1.0703, "step": 206000 }, { "epoch": 6.18, "learning_rate": 0.0002386886261655157, "loss": 1.0941, "step": 206500 }, { "epoch": 6.19, "learning_rate": 0.0002385387206060182, "loss": 1.0928, "step": 207000 }, { "epoch": 6.21, "learning_rate": 0.00023838881504652067, "loss": 1.0928, "step": 207500 }, { "epoch": 6.22, "learning_rate": 0.00023823890948702317, "loss": 1.0852, "step": 208000 }, { "epoch": 6.24, "learning_rate": 0.00023808900392752563, "loss": 1.078, "step": 208500 }, { "epoch": 6.25, "learning_rate": 0.00023793909836802813, "loss": 1.0864, "step": 209000 }, { "epoch": 6.27, "learning_rate": 0.0002377891928085306, "loss": 1.1053, "step": 209500 }, { "epoch": 6.28, "learning_rate": 0.00023763928724903306, "loss": 1.0701, "step": 210000 }, { "epoch": 6.3, "learning_rate": 0.00023748938168953557, "loss": 1.0588, "step": 210500 }, { "epoch": 6.31, "learning_rate": 0.00023733947613003805, "loss": 1.0793, "step": 211000 }, { "epoch": 6.33, "learning_rate": 0.00023718957057054053, "loss": 1.0722, "step": 211500 }, { "epoch": 6.34, "learning_rate": 0.000237039665011043, "loss": 1.083, "step": 212000 }, { "epoch": 6.36, "learning_rate": 0.00023688975945154552, "loss": 1.0791, "step": 212500 }, { "epoch": 6.37, "learning_rate": 0.00023673985389204797, "loss": 1.0621, "step": 213000 }, { "epoch": 6.39, "learning_rate": 0.00023658994833255048, "loss": 1.0754, "step": 213500 }, { "epoch": 6.4, "learning_rate": 0.00023644004277305296, "loss": 1.0701, "step": 214000 }, { "epoch": 6.42, "learning_rate": 0.00023629013721355546, "loss": 1.1036, "step": 214500 }, { "epoch": 6.43, "learning_rate": 0.00023614023165405791, "loss": 1.077, "step": 215000 }, { "epoch": 6.45, "learning_rate": 0.00023599032609456042, "loss": 1.0959, "step": 215500 }, { "epoch": 6.46, "learning_rate": 0.0002358404205350629, "loss": 1.099, "step": 216000 }, { "epoch": 6.48, "learning_rate": 0.00023569051497556535, "loss": 1.0847, "step": 216500 }, { "epoch": 6.49, "learning_rate": 0.00023554060941606786, "loss": 1.0704, "step": 217000 }, { "epoch": 6.51, "learning_rate": 0.00023539070385657034, "loss": 1.0949, "step": 217500 }, { "epoch": 6.52, "learning_rate": 0.00023524079829707282, "loss": 1.043, "step": 218000 }, { "epoch": 6.54, "learning_rate": 0.0002350908927375753, "loss": 1.0851, "step": 218500 }, { "epoch": 6.55, "learning_rate": 0.0002349409871780778, "loss": 1.0905, "step": 219000 }, { "epoch": 6.57, "learning_rate": 0.00023479108161858026, "loss": 1.1033, "step": 219500 }, { "epoch": 6.58, "learning_rate": 0.00023464117605908276, "loss": 1.0843, "step": 220000 }, { "epoch": 6.6, "learning_rate": 0.00023449127049958524, "loss": 1.0866, "step": 220500 }, { "epoch": 6.61, "learning_rate": 0.00023434136494008775, "loss": 1.083, "step": 221000 }, { "epoch": 6.63, "learning_rate": 0.0002341914593805902, "loss": 1.0626, "step": 221500 }, { "epoch": 6.64, "learning_rate": 0.00023404155382109268, "loss": 1.0883, "step": 222000 }, { "epoch": 6.66, "learning_rate": 0.0002338916482615952, "loss": 1.0814, "step": 222500 }, { "epoch": 6.67, "learning_rate": 0.00023374174270209764, "loss": 1.0786, "step": 223000 }, { "epoch": 6.69, "learning_rate": 0.00023359183714260015, "loss": 1.0918, "step": 223500 }, { "epoch": 6.7, "learning_rate": 0.00023344193158310263, "loss": 1.1123, "step": 224000 }, { "epoch": 6.72, "learning_rate": 0.0002332920260236051, "loss": 1.0977, "step": 224500 }, { "epoch": 6.73, "learning_rate": 0.0002331421204641076, "loss": 1.0824, "step": 225000 }, { "epoch": 6.75, "learning_rate": 0.0002329922149046101, "loss": 1.0794, "step": 225500 }, { "epoch": 6.76, "learning_rate": 0.00023284230934511255, "loss": 1.0786, "step": 226000 }, { "epoch": 6.78, "learning_rate": 0.00023269240378561505, "loss": 1.0803, "step": 226500 }, { "epoch": 6.79, "learning_rate": 0.00023254249822611753, "loss": 1.0705, "step": 227000 }, { "epoch": 6.81, "learning_rate": 0.00023239259266662004, "loss": 1.0924, "step": 227500 }, { "epoch": 6.82, "learning_rate": 0.0002322426871071225, "loss": 1.114, "step": 228000 }, { "epoch": 6.84, "learning_rate": 0.00023209278154762497, "loss": 1.084, "step": 228500 }, { "epoch": 6.85, "learning_rate": 0.00023194287598812745, "loss": 1.0792, "step": 229000 }, { "epoch": 6.87, "learning_rate": 0.00023179297042862993, "loss": 1.0865, "step": 229500 }, { "epoch": 6.88, "learning_rate": 0.00023164306486913244, "loss": 1.0994, "step": 230000 }, { "epoch": 6.9, "learning_rate": 0.0002314931593096349, "loss": 1.0989, "step": 230500 }, { "epoch": 6.91, "learning_rate": 0.0002313432537501374, "loss": 1.0918, "step": 231000 }, { "epoch": 6.93, "learning_rate": 0.00023119334819063987, "loss": 1.0877, "step": 231500 }, { "epoch": 6.94, "learning_rate": 0.00023104344263114238, "loss": 1.1128, "step": 232000 }, { "epoch": 6.96, "learning_rate": 0.00023089353707164483, "loss": 1.1044, "step": 232500 }, { "epoch": 6.97, "learning_rate": 0.00023074363151214734, "loss": 1.1081, "step": 233000 }, { "epoch": 6.99, "learning_rate": 0.00023059372595264982, "loss": 1.1013, "step": 233500 }, { "epoch": 7.0, "learning_rate": 0.00023044382039315227, "loss": 1.0976, "step": 234000 }, { "epoch": 7.02, "learning_rate": 0.00023029391483365478, "loss": 1.0356, "step": 234500 }, { "epoch": 7.03, "learning_rate": 0.00023014400927415726, "loss": 1.0288, "step": 235000 }, { "epoch": 7.05, "learning_rate": 0.00022999410371465974, "loss": 1.0486, "step": 235500 }, { "epoch": 7.06, "learning_rate": 0.00022984419815516222, "loss": 1.025, "step": 236000 }, { "epoch": 7.08, "learning_rate": 0.00022969429259566472, "loss": 1.0447, "step": 236500 }, { "epoch": 7.09, "learning_rate": 0.00022954438703616718, "loss": 1.0225, "step": 237000 }, { "epoch": 7.11, "learning_rate": 0.00022939448147666968, "loss": 1.0404, "step": 237500 }, { "epoch": 7.12, "learning_rate": 0.00022924457591717216, "loss": 1.0592, "step": 238000 }, { "epoch": 7.14, "learning_rate": 0.00022909467035767467, "loss": 1.0669, "step": 238500 }, { "epoch": 7.15, "learning_rate": 0.00022894476479817712, "loss": 1.0136, "step": 239000 }, { "epoch": 7.17, "learning_rate": 0.0002287948592386796, "loss": 1.0531, "step": 239500 }, { "epoch": 7.18, "learning_rate": 0.0002286449536791821, "loss": 1.0587, "step": 240000 }, { "epoch": 7.2, "learning_rate": 0.00022849504811968456, "loss": 1.0354, "step": 240500 }, { "epoch": 7.21, "learning_rate": 0.00022834514256018707, "loss": 1.0317, "step": 241000 }, { "epoch": 7.23, "learning_rate": 0.00022819523700068955, "loss": 1.049, "step": 241500 }, { "epoch": 7.24, "learning_rate": 0.00022804533144119203, "loss": 1.0428, "step": 242000 }, { "epoch": 7.26, "learning_rate": 0.0002278954258816945, "loss": 1.0389, "step": 242500 }, { "epoch": 7.27, "learning_rate": 0.000227745520322197, "loss": 1.0639, "step": 243000 }, { "epoch": 7.29, "learning_rate": 0.00022759561476269947, "loss": 1.055, "step": 243500 }, { "epoch": 7.3, "learning_rate": 0.00022744570920320197, "loss": 1.0589, "step": 244000 }, { "epoch": 7.32, "learning_rate": 0.00022729580364370445, "loss": 1.0759, "step": 244500 }, { "epoch": 7.33, "learning_rate": 0.00022714589808420696, "loss": 1.0566, "step": 245000 }, { "epoch": 7.35, "learning_rate": 0.0002269959925247094, "loss": 1.0576, "step": 245500 }, { "epoch": 7.36, "learning_rate": 0.0002268460869652119, "loss": 1.0446, "step": 246000 }, { "epoch": 7.38, "learning_rate": 0.00022669618140571437, "loss": 1.0614, "step": 246500 }, { "epoch": 7.39, "learning_rate": 0.00022654627584621685, "loss": 1.0674, "step": 247000 }, { "epoch": 7.41, "learning_rate": 0.00022639637028671936, "loss": 1.0605, "step": 247500 }, { "epoch": 7.42, "learning_rate": 0.0002262464647272218, "loss": 1.0616, "step": 248000 }, { "epoch": 7.44, "learning_rate": 0.00022609655916772432, "loss": 1.0473, "step": 248500 }, { "epoch": 7.45, "learning_rate": 0.0002259466536082268, "loss": 1.0488, "step": 249000 }, { "epoch": 7.47, "learning_rate": 0.0002257967480487293, "loss": 1.0662, "step": 249500 }, { "epoch": 7.48, "learning_rate": 0.00022564684248923175, "loss": 1.0475, "step": 250000 }, { "epoch": 7.5, "learning_rate": 0.00022549693692973426, "loss": 1.0448, "step": 250500 }, { "epoch": 7.51, "learning_rate": 0.00022534703137023674, "loss": 1.0371, "step": 251000 }, { "epoch": 7.53, "learning_rate": 0.0002251971258107392, "loss": 1.047, "step": 251500 }, { "epoch": 7.54, "learning_rate": 0.0002250472202512417, "loss": 1.0632, "step": 252000 }, { "epoch": 7.56, "learning_rate": 0.00022489731469174418, "loss": 1.0787, "step": 252500 }, { "epoch": 7.57, "learning_rate": 0.00022474740913224666, "loss": 1.0454, "step": 253000 }, { "epoch": 7.59, "learning_rate": 0.00022459750357274914, "loss": 1.0528, "step": 253500 }, { "epoch": 7.6, "learning_rate": 0.00022444759801325164, "loss": 1.0629, "step": 254000 }, { "epoch": 7.61, "learning_rate": 0.0002242976924537541, "loss": 1.0607, "step": 254500 }, { "epoch": 7.63, "learning_rate": 0.0002241477868942566, "loss": 1.0579, "step": 255000 }, { "epoch": 7.64, "learning_rate": 0.00022399788133475908, "loss": 1.057, "step": 255500 }, { "epoch": 7.66, "learning_rate": 0.0002238479757752616, "loss": 1.0746, "step": 256000 }, { "epoch": 7.67, "learning_rate": 0.00022369807021576404, "loss": 1.0578, "step": 256500 }, { "epoch": 7.69, "learning_rate": 0.00022354816465626655, "loss": 1.0726, "step": 257000 }, { "epoch": 7.7, "learning_rate": 0.00022339825909676903, "loss": 1.0731, "step": 257500 }, { "epoch": 7.72, "learning_rate": 0.00022324835353727148, "loss": 1.0652, "step": 258000 }, { "epoch": 7.73, "learning_rate": 0.000223098447977774, "loss": 1.0546, "step": 258500 }, { "epoch": 7.75, "learning_rate": 0.00022294854241827647, "loss": 1.049, "step": 259000 }, { "epoch": 7.76, "learning_rate": 0.00022279863685877895, "loss": 1.0479, "step": 259500 }, { "epoch": 7.78, "learning_rate": 0.00022264873129928143, "loss": 1.057, "step": 260000 }, { "epoch": 7.79, "learning_rate": 0.00022249882573978393, "loss": 1.0626, "step": 260500 }, { "epoch": 7.81, "learning_rate": 0.00022234892018028639, "loss": 1.0556, "step": 261000 }, { "epoch": 7.82, "learning_rate": 0.0002221990146207889, "loss": 1.0423, "step": 261500 }, { "epoch": 7.84, "learning_rate": 0.00022204910906129137, "loss": 1.0678, "step": 262000 }, { "epoch": 7.85, "learning_rate": 0.00022189920350179388, "loss": 1.0418, "step": 262500 }, { "epoch": 7.87, "learning_rate": 0.00022174929794229633, "loss": 1.0566, "step": 263000 }, { "epoch": 7.88, "learning_rate": 0.0002215993923827988, "loss": 1.0467, "step": 263500 }, { "epoch": 7.9, "learning_rate": 0.00022144948682330132, "loss": 1.0397, "step": 264000 }, { "epoch": 7.91, "learning_rate": 0.00022129958126380377, "loss": 1.0795, "step": 264500 }, { "epoch": 7.93, "learning_rate": 0.00022114967570430628, "loss": 1.05, "step": 265000 }, { "epoch": 7.94, "learning_rate": 0.00022099977014480873, "loss": 1.0511, "step": 265500 }, { "epoch": 7.96, "learning_rate": 0.00022084986458531123, "loss": 1.0666, "step": 266000 }, { "epoch": 7.97, "learning_rate": 0.00022069995902581371, "loss": 1.0601, "step": 266500 }, { "epoch": 7.99, "learning_rate": 0.00022055005346631622, "loss": 1.0815, "step": 267000 }, { "epoch": 8.0, "learning_rate": 0.00022040014790681867, "loss": 1.0539, "step": 267500 }, { "epoch": 8.02, "learning_rate": 0.00022025024234732118, "loss": 1.0139, "step": 268000 }, { "epoch": 8.03, "learning_rate": 0.00022010033678782366, "loss": 1.0292, "step": 268500 }, { "epoch": 8.05, "learning_rate": 0.00021995043122832614, "loss": 1.0055, "step": 269000 }, { "epoch": 8.06, "learning_rate": 0.00021980052566882862, "loss": 1.0114, "step": 269500 }, { "epoch": 8.08, "learning_rate": 0.0002196506201093311, "loss": 1.0318, "step": 270000 }, { "epoch": 8.09, "learning_rate": 0.00021950071454983358, "loss": 1.0284, "step": 270500 }, { "epoch": 8.11, "learning_rate": 0.00021935080899033606, "loss": 1.0077, "step": 271000 }, { "epoch": 8.12, "learning_rate": 0.00021920090343083856, "loss": 1.0129, "step": 271500 }, { "epoch": 8.14, "learning_rate": 0.00021905099787134102, "loss": 1.0188, "step": 272000 }, { "epoch": 8.15, "learning_rate": 0.00021890109231184352, "loss": 1.008, "step": 272500 }, { "epoch": 8.17, "learning_rate": 0.000218751186752346, "loss": 1.0319, "step": 273000 }, { "epoch": 8.18, "learning_rate": 0.0002186012811928485, "loss": 1.0362, "step": 273500 }, { "epoch": 8.2, "learning_rate": 0.00021845137563335096, "loss": 1.0303, "step": 274000 }, { "epoch": 8.21, "learning_rate": 0.00021830147007385347, "loss": 1.0276, "step": 274500 }, { "epoch": 8.23, "learning_rate": 0.00021815156451435595, "loss": 1.0256, "step": 275000 }, { "epoch": 8.24, "learning_rate": 0.0002180016589548584, "loss": 1.0215, "step": 275500 }, { "epoch": 8.26, "learning_rate": 0.0002178517533953609, "loss": 1.012, "step": 276000 }, { "epoch": 8.27, "learning_rate": 0.0002177018478358634, "loss": 1.0229, "step": 276500 }, { "epoch": 8.29, "learning_rate": 0.00021755194227636587, "loss": 1.0079, "step": 277000 }, { "epoch": 8.3, "learning_rate": 0.00021740203671686835, "loss": 1.0275, "step": 277500 }, { "epoch": 8.32, "learning_rate": 0.00021725213115737085, "loss": 1.0022, "step": 278000 }, { "epoch": 8.33, "learning_rate": 0.0002171022255978733, "loss": 1.0108, "step": 278500 }, { "epoch": 8.35, "learning_rate": 0.0002169523200383758, "loss": 1.0248, "step": 279000 }, { "epoch": 8.36, "learning_rate": 0.0002168024144788783, "loss": 1.025, "step": 279500 }, { "epoch": 8.38, "learning_rate": 0.0002166525089193808, "loss": 1.0151, "step": 280000 }, { "epoch": 8.39, "learning_rate": 0.00021650260335988325, "loss": 1.0081, "step": 280500 }, { "epoch": 8.41, "learning_rate": 0.00021635269780038573, "loss": 1.0151, "step": 281000 }, { "epoch": 8.42, "learning_rate": 0.00021620279224088824, "loss": 1.0098, "step": 281500 }, { "epoch": 8.44, "learning_rate": 0.0002160528866813907, "loss": 1.0255, "step": 282000 }, { "epoch": 8.45, "learning_rate": 0.0002159029811218932, "loss": 1.0216, "step": 282500 }, { "epoch": 8.47, "learning_rate": 0.00021575307556239565, "loss": 1.0285, "step": 283000 }, { "epoch": 8.48, "learning_rate": 0.00021560317000289815, "loss": 1.0195, "step": 283500 }, { "epoch": 8.5, "learning_rate": 0.00021545326444340063, "loss": 1.0269, "step": 284000 }, { "epoch": 8.51, "learning_rate": 0.00021530335888390314, "loss": 1.0348, "step": 284500 }, { "epoch": 8.53, "learning_rate": 0.0002151534533244056, "loss": 1.0037, "step": 285000 }, { "epoch": 8.54, "learning_rate": 0.0002150035477649081, "loss": 1.0407, "step": 285500 }, { "epoch": 8.56, "learning_rate": 0.00021485364220541058, "loss": 1.0273, "step": 286000 }, { "epoch": 8.57, "learning_rate": 0.00021470373664591306, "loss": 1.0197, "step": 286500 }, { "epoch": 8.59, "learning_rate": 0.00021455383108641554, "loss": 1.0156, "step": 287000 }, { "epoch": 8.6, "learning_rate": 0.00021440392552691802, "loss": 1.0333, "step": 287500 }, { "epoch": 8.62, "learning_rate": 0.0002142540199674205, "loss": 1.0113, "step": 288000 }, { "epoch": 8.63, "learning_rate": 0.00021410411440792298, "loss": 1.0421, "step": 288500 }, { "epoch": 8.65, "learning_rate": 0.00021395420884842548, "loss": 1.0202, "step": 289000 }, { "epoch": 8.66, "learning_rate": 0.00021380430328892794, "loss": 1.0197, "step": 289500 }, { "epoch": 8.68, "learning_rate": 0.00021365439772943044, "loss": 1.0391, "step": 290000 }, { "epoch": 8.69, "learning_rate": 0.00021350449216993292, "loss": 1.0372, "step": 290500 }, { "epoch": 8.71, "learning_rate": 0.00021335458661043543, "loss": 1.0432, "step": 291000 }, { "epoch": 8.72, "learning_rate": 0.00021320468105093788, "loss": 1.0229, "step": 291500 }, { "epoch": 8.74, "learning_rate": 0.0002130547754914404, "loss": 1.0282, "step": 292000 }, { "epoch": 8.75, "learning_rate": 0.00021290486993194287, "loss": 1.0188, "step": 292500 }, { "epoch": 8.77, "learning_rate": 0.00021275496437244532, "loss": 1.0605, "step": 293000 }, { "epoch": 8.78, "learning_rate": 0.00021260505881294783, "loss": 1.0407, "step": 293500 }, { "epoch": 8.8, "learning_rate": 0.0002124551532534503, "loss": 1.0132, "step": 294000 }, { "epoch": 8.81, "learning_rate": 0.00021230524769395279, "loss": 1.0232, "step": 294500 }, { "epoch": 8.83, "learning_rate": 0.00021215534213445527, "loss": 1.0321, "step": 295000 }, { "epoch": 8.84, "learning_rate": 0.00021200543657495777, "loss": 1.0252, "step": 295500 }, { "epoch": 8.86, "learning_rate": 0.00021185553101546022, "loss": 1.0205, "step": 296000 }, { "epoch": 8.87, "learning_rate": 0.00021170562545596273, "loss": 1.0223, "step": 296500 }, { "epoch": 8.89, "learning_rate": 0.0002115557198964652, "loss": 1.0235, "step": 297000 }, { "epoch": 8.9, "learning_rate": 0.00021140581433696772, "loss": 1.0271, "step": 297500 }, { "epoch": 8.92, "learning_rate": 0.00021125590877747017, "loss": 1.0192, "step": 298000 }, { "epoch": 8.93, "learning_rate": 0.00021110600321797268, "loss": 1.0331, "step": 298500 }, { "epoch": 8.95, "learning_rate": 0.00021095609765847516, "loss": 1.0256, "step": 299000 }, { "epoch": 8.96, "learning_rate": 0.0002108061920989776, "loss": 1.035, "step": 299500 }, { "epoch": 8.98, "learning_rate": 0.00021065628653948011, "loss": 1.0241, "step": 300000 }, { "epoch": 8.99, "learning_rate": 0.00021050638097998257, "loss": 1.0195, "step": 300500 }, { "epoch": 9.01, "learning_rate": 0.00021035647542048507, "loss": 1.0039, "step": 301000 }, { "epoch": 9.02, "learning_rate": 0.00021020656986098755, "loss": 1.0019, "step": 301500 }, { "epoch": 9.04, "learning_rate": 0.00021005666430149006, "loss": 0.9851, "step": 302000 }, { "epoch": 9.05, "learning_rate": 0.0002099067587419925, "loss": 0.9638, "step": 302500 }, { "epoch": 9.07, "learning_rate": 0.00020975685318249502, "loss": 0.996, "step": 303000 }, { "epoch": 9.08, "learning_rate": 0.0002096069476229975, "loss": 0.9858, "step": 303500 }, { "epoch": 9.1, "learning_rate": 0.00020945704206349998, "loss": 0.9689, "step": 304000 }, { "epoch": 9.11, "learning_rate": 0.00020930713650400246, "loss": 0.9925, "step": 304500 }, { "epoch": 9.13, "learning_rate": 0.00020915723094450494, "loss": 0.9896, "step": 305000 }, { "epoch": 9.14, "learning_rate": 0.00020900732538500742, "loss": 0.9741, "step": 305500 }, { "epoch": 9.16, "learning_rate": 0.0002088574198255099, "loss": 0.9792, "step": 306000 }, { "epoch": 9.17, "learning_rate": 0.0002087075142660124, "loss": 0.9736, "step": 306500 }, { "epoch": 9.19, "learning_rate": 0.00020855760870651486, "loss": 0.9702, "step": 307000 }, { "epoch": 9.2, "learning_rate": 0.00020840770314701736, "loss": 0.9688, "step": 307500 }, { "epoch": 9.22, "learning_rate": 0.00020825779758751984, "loss": 0.9854, "step": 308000 }, { "epoch": 9.23, "learning_rate": 0.00020810789202802235, "loss": 0.9834, "step": 308500 }, { "epoch": 9.25, "learning_rate": 0.0002079579864685248, "loss": 0.9926, "step": 309000 }, { "epoch": 9.26, "learning_rate": 0.0002078080809090273, "loss": 1.0002, "step": 309500 }, { "epoch": 9.28, "learning_rate": 0.0002076581753495298, "loss": 1.0092, "step": 310000 }, { "epoch": 9.29, "learning_rate": 0.00020750826979003227, "loss": 0.9872, "step": 310500 }, { "epoch": 9.31, "learning_rate": 0.00020735836423053475, "loss": 0.9807, "step": 311000 }, { "epoch": 9.32, "learning_rate": 0.00020720845867103723, "loss": 0.9805, "step": 311500 }, { "epoch": 9.34, "learning_rate": 0.0002070585531115397, "loss": 0.9743, "step": 312000 }, { "epoch": 9.35, "learning_rate": 0.00020690864755204218, "loss": 1.0109, "step": 312500 }, { "epoch": 9.37, "learning_rate": 0.0002067587419925447, "loss": 1.0164, "step": 313000 }, { "epoch": 9.38, "learning_rate": 0.00020660883643304714, "loss": 0.9788, "step": 313500 }, { "epoch": 9.4, "learning_rate": 0.00020645893087354965, "loss": 0.9881, "step": 314000 }, { "epoch": 9.41, "learning_rate": 0.00020630902531405213, "loss": 0.9781, "step": 314500 }, { "epoch": 9.43, "learning_rate": 0.00020615911975455464, "loss": 0.9985, "step": 315000 }, { "epoch": 9.44, "learning_rate": 0.0002060092141950571, "loss": 0.9937, "step": 315500 }, { "epoch": 9.46, "learning_rate": 0.0002058593086355596, "loss": 0.9835, "step": 316000 }, { "epoch": 9.47, "learning_rate": 0.00020570940307606208, "loss": 0.9791, "step": 316500 }, { "epoch": 9.49, "learning_rate": 0.00020555949751656453, "loss": 0.9924, "step": 317000 }, { "epoch": 9.5, "learning_rate": 0.00020540959195706703, "loss": 0.9722, "step": 317500 }, { "epoch": 9.51, "learning_rate": 0.0002052596863975695, "loss": 1.0106, "step": 318000 }, { "epoch": 9.53, "learning_rate": 0.000205109780838072, "loss": 1.0008, "step": 318500 }, { "epoch": 9.54, "learning_rate": 0.00020495987527857447, "loss": 0.9777, "step": 319000 }, { "epoch": 9.56, "learning_rate": 0.00020480996971907698, "loss": 1.0173, "step": 319500 }, { "epoch": 9.57, "learning_rate": 0.00020466006415957943, "loss": 0.9985, "step": 320000 }, { "epoch": 9.59, "learning_rate": 0.00020451015860008194, "loss": 1.0091, "step": 320500 }, { "epoch": 9.6, "learning_rate": 0.00020436025304058442, "loss": 1.002, "step": 321000 }, { "epoch": 9.62, "learning_rate": 0.0002042103474810869, "loss": 1.0042, "step": 321500 }, { "epoch": 9.63, "learning_rate": 0.00020406044192158938, "loss": 0.9925, "step": 322000 }, { "epoch": 9.65, "learning_rate": 0.00020391053636209188, "loss": 0.9938, "step": 322500 }, { "epoch": 9.66, "learning_rate": 0.00020376063080259434, "loss": 0.9866, "step": 323000 }, { "epoch": 9.68, "learning_rate": 0.00020361072524309682, "loss": 0.9834, "step": 323500 }, { "epoch": 9.69, "learning_rate": 0.00020346081968359932, "loss": 0.994, "step": 324000 }, { "epoch": 9.71, "learning_rate": 0.00020331091412410178, "loss": 0.9763, "step": 324500 }, { "epoch": 9.72, "learning_rate": 0.00020316100856460428, "loss": 0.9965, "step": 325000 }, { "epoch": 9.74, "learning_rate": 0.00020301110300510676, "loss": 0.9956, "step": 325500 }, { "epoch": 9.75, "learning_rate": 0.00020286119744560927, "loss": 1.0259, "step": 326000 }, { "epoch": 9.77, "learning_rate": 0.00020271129188611172, "loss": 1.0093, "step": 326500 }, { "epoch": 9.78, "learning_rate": 0.00020256138632661423, "loss": 1.0012, "step": 327000 }, { "epoch": 9.8, "learning_rate": 0.0002024114807671167, "loss": 0.9822, "step": 327500 }, { "epoch": 9.81, "learning_rate": 0.0002022615752076192, "loss": 1.0148, "step": 328000 }, { "epoch": 9.83, "learning_rate": 0.00020211166964812167, "loss": 1.0118, "step": 328500 }, { "epoch": 9.84, "learning_rate": 0.00020196176408862415, "loss": 1.0119, "step": 329000 }, { "epoch": 9.86, "learning_rate": 0.00020181185852912663, "loss": 0.9932, "step": 329500 }, { "epoch": 9.87, "learning_rate": 0.0002016619529696291, "loss": 0.9958, "step": 330000 }, { "epoch": 9.89, "learning_rate": 0.0002015120474101316, "loss": 0.987, "step": 330500 }, { "epoch": 9.9, "learning_rate": 0.00020136214185063406, "loss": 0.9916, "step": 331000 }, { "epoch": 9.92, "learning_rate": 0.00020121223629113657, "loss": 0.9936, "step": 331500 }, { "epoch": 9.93, "learning_rate": 0.00020106233073163905, "loss": 0.9892, "step": 332000 }, { "epoch": 9.95, "learning_rate": 0.00020091242517214156, "loss": 0.9884, "step": 332500 }, { "epoch": 9.96, "learning_rate": 0.000200762519612644, "loss": 0.9871, "step": 333000 }, { "epoch": 9.98, "learning_rate": 0.00020061261405314652, "loss": 0.9911, "step": 333500 }, { "epoch": 9.99, "learning_rate": 0.000200462708493649, "loss": 1.005, "step": 334000 }, { "epoch": 10.01, "learning_rate": 0.00020031280293415145, "loss": 0.9681, "step": 334500 }, { "epoch": 10.02, "learning_rate": 0.00020016289737465395, "loss": 0.941, "step": 335000 }, { "epoch": 10.04, "learning_rate": 0.00020001299181515643, "loss": 0.9515, "step": 335500 }, { "epoch": 10.05, "learning_rate": 0.0001998630862556589, "loss": 0.9457, "step": 336000 }, { "epoch": 10.07, "learning_rate": 0.0001997131806961614, "loss": 0.9595, "step": 336500 }, { "epoch": 10.08, "learning_rate": 0.0001995632751366639, "loss": 0.9628, "step": 337000 }, { "epoch": 10.1, "learning_rate": 0.00019941336957716635, "loss": 0.9668, "step": 337500 }, { "epoch": 10.11, "learning_rate": 0.00019926346401766886, "loss": 0.9714, "step": 338000 }, { "epoch": 10.13, "learning_rate": 0.00019911355845817134, "loss": 0.9626, "step": 338500 }, { "epoch": 10.14, "learning_rate": 0.00019896365289867382, "loss": 0.9666, "step": 339000 }, { "epoch": 10.16, "learning_rate": 0.0001988137473391763, "loss": 0.9846, "step": 339500 }, { "epoch": 10.17, "learning_rate": 0.0001986638417796788, "loss": 0.9748, "step": 340000 }, { "epoch": 10.19, "learning_rate": 0.00019851393622018126, "loss": 0.9658, "step": 340500 }, { "epoch": 10.2, "learning_rate": 0.00019836403066068374, "loss": 0.9439, "step": 341000 }, { "epoch": 10.22, "learning_rate": 0.00019821412510118624, "loss": 0.9577, "step": 341500 }, { "epoch": 10.23, "learning_rate": 0.0001980642195416887, "loss": 0.9623, "step": 342000 }, { "epoch": 10.25, "learning_rate": 0.0001979143139821912, "loss": 0.9688, "step": 342500 }, { "epoch": 10.26, "learning_rate": 0.00019776440842269368, "loss": 0.9634, "step": 343000 }, { "epoch": 10.28, "learning_rate": 0.0001976145028631962, "loss": 0.9597, "step": 343500 }, { "epoch": 10.29, "learning_rate": 0.00019746459730369864, "loss": 0.9426, "step": 344000 }, { "epoch": 10.31, "learning_rate": 0.00019731469174420115, "loss": 0.9573, "step": 344500 }, { "epoch": 10.32, "learning_rate": 0.00019716478618470363, "loss": 0.9767, "step": 345000 }, { "epoch": 10.34, "learning_rate": 0.0001970148806252061, "loss": 0.9874, "step": 345500 }, { "epoch": 10.35, "learning_rate": 0.00019686497506570859, "loss": 0.9517, "step": 346000 }, { "epoch": 10.37, "learning_rate": 0.00019671506950621107, "loss": 0.9413, "step": 346500 }, { "epoch": 10.38, "learning_rate": 0.00019656516394671354, "loss": 0.9585, "step": 347000 }, { "epoch": 10.4, "learning_rate": 0.00019641525838721602, "loss": 0.9686, "step": 347500 }, { "epoch": 10.41, "learning_rate": 0.00019626535282771853, "loss": 0.9761, "step": 348000 }, { "epoch": 10.43, "learning_rate": 0.00019611544726822098, "loss": 0.9653, "step": 348500 }, { "epoch": 10.44, "learning_rate": 0.0001959655417087235, "loss": 0.9879, "step": 349000 }, { "epoch": 10.46, "learning_rate": 0.00019581563614922597, "loss": 0.9566, "step": 349500 }, { "epoch": 10.47, "learning_rate": 0.00019566573058972848, "loss": 0.9423, "step": 350000 }, { "epoch": 10.49, "learning_rate": 0.00019551582503023093, "loss": 0.961, "step": 350500 }, { "epoch": 10.5, "learning_rate": 0.00019536591947073344, "loss": 0.9736, "step": 351000 }, { "epoch": 10.52, "learning_rate": 0.00019521601391123591, "loss": 0.9619, "step": 351500 }, { "epoch": 10.53, "learning_rate": 0.0001950661083517384, "loss": 0.9978, "step": 352000 }, { "epoch": 10.55, "learning_rate": 0.00019491620279224087, "loss": 0.9906, "step": 352500 }, { "epoch": 10.56, "learning_rate": 0.00019476629723274335, "loss": 0.9671, "step": 353000 }, { "epoch": 10.58, "learning_rate": 0.00019461639167324583, "loss": 0.9574, "step": 353500 }, { "epoch": 10.59, "learning_rate": 0.0001944664861137483, "loss": 0.9554, "step": 354000 }, { "epoch": 10.61, "learning_rate": 0.00019431658055425082, "loss": 0.9729, "step": 354500 }, { "epoch": 10.62, "learning_rate": 0.00019416667499475327, "loss": 0.9697, "step": 355000 }, { "epoch": 10.64, "learning_rate": 0.00019401676943525578, "loss": 0.9532, "step": 355500 }, { "epoch": 10.65, "learning_rate": 0.00019386686387575826, "loss": 0.9498, "step": 356000 }, { "epoch": 10.67, "learning_rate": 0.00019371695831626074, "loss": 0.9542, "step": 356500 }, { "epoch": 10.68, "learning_rate": 0.00019356705275676322, "loss": 0.9728, "step": 357000 }, { "epoch": 10.7, "learning_rate": 0.00019341714719726572, "loss": 0.9584, "step": 357500 }, { "epoch": 10.71, "learning_rate": 0.00019326724163776818, "loss": 0.9891, "step": 358000 }, { "epoch": 10.73, "learning_rate": 0.00019311733607827066, "loss": 0.9618, "step": 358500 }, { "epoch": 10.74, "learning_rate": 0.00019296743051877316, "loss": 0.9722, "step": 359000 }, { "epoch": 10.76, "learning_rate": 0.00019281752495927561, "loss": 0.9755, "step": 359500 }, { "epoch": 10.77, "learning_rate": 0.00019266761939977812, "loss": 0.9829, "step": 360000 }, { "epoch": 10.79, "learning_rate": 0.0001925177138402806, "loss": 0.9789, "step": 360500 }, { "epoch": 10.8, "learning_rate": 0.0001923678082807831, "loss": 0.9987, "step": 361000 }, { "epoch": 10.82, "learning_rate": 0.00019221790272128556, "loss": 0.9822, "step": 361500 }, { "epoch": 10.83, "learning_rate": 0.00019206799716178807, "loss": 0.9569, "step": 362000 }, { "epoch": 10.85, "learning_rate": 0.00019191809160229055, "loss": 0.9708, "step": 362500 }, { "epoch": 10.86, "learning_rate": 0.00019176818604279303, "loss": 0.9615, "step": 363000 }, { "epoch": 10.88, "learning_rate": 0.0001916182804832955, "loss": 0.9736, "step": 363500 }, { "epoch": 10.89, "learning_rate": 0.000191468374923798, "loss": 0.9682, "step": 364000 }, { "epoch": 10.91, "learning_rate": 0.00019131846936430046, "loss": 0.9811, "step": 364500 }, { "epoch": 10.92, "learning_rate": 0.00019116856380480294, "loss": 0.9694, "step": 365000 }, { "epoch": 10.94, "learning_rate": 0.00019101865824530545, "loss": 0.9779, "step": 365500 }, { "epoch": 10.95, "learning_rate": 0.0001908687526858079, "loss": 0.9342, "step": 366000 }, { "epoch": 10.97, "learning_rate": 0.0001907188471263104, "loss": 0.9632, "step": 366500 }, { "epoch": 10.98, "learning_rate": 0.0001905689415668129, "loss": 0.9736, "step": 367000 }, { "epoch": 11.0, "learning_rate": 0.0001904190360073154, "loss": 0.985, "step": 367500 }, { "epoch": 11.01, "learning_rate": 0.00019026913044781785, "loss": 0.9425, "step": 368000 }, { "epoch": 11.03, "learning_rate": 0.00019011922488832036, "loss": 0.9354, "step": 368500 }, { "epoch": 11.04, "learning_rate": 0.00018996931932882283, "loss": 0.9302, "step": 369000 }, { "epoch": 11.06, "learning_rate": 0.00018981941376932531, "loss": 0.9397, "step": 369500 }, { "epoch": 11.07, "learning_rate": 0.0001896695082098278, "loss": 0.9397, "step": 370000 }, { "epoch": 11.09, "learning_rate": 0.00018951960265033027, "loss": 0.9249, "step": 370500 }, { "epoch": 11.1, "learning_rate": 0.00018936969709083275, "loss": 0.9421, "step": 371000 }, { "epoch": 11.12, "learning_rate": 0.00018921979153133523, "loss": 0.9346, "step": 371500 }, { "epoch": 11.13, "learning_rate": 0.00018906988597183774, "loss": 0.9198, "step": 372000 }, { "epoch": 11.15, "learning_rate": 0.0001889199804123402, "loss": 0.9489, "step": 372500 }, { "epoch": 11.16, "learning_rate": 0.0001887700748528427, "loss": 0.9155, "step": 373000 }, { "epoch": 11.18, "learning_rate": 0.00018862016929334518, "loss": 0.9275, "step": 373500 }, { "epoch": 11.19, "learning_rate": 0.00018847026373384766, "loss": 0.9397, "step": 374000 }, { "epoch": 11.21, "learning_rate": 0.00018832035817435014, "loss": 0.9401, "step": 374500 }, { "epoch": 11.22, "learning_rate": 0.00018817045261485264, "loss": 0.9301, "step": 375000 }, { "epoch": 11.24, "learning_rate": 0.0001880205470553551, "loss": 0.9641, "step": 375500 }, { "epoch": 11.25, "learning_rate": 0.00018787064149585758, "loss": 0.9419, "step": 376000 }, { "epoch": 11.27, "learning_rate": 0.00018772073593636008, "loss": 0.9402, "step": 376500 }, { "epoch": 11.28, "learning_rate": 0.00018757083037686253, "loss": 0.9323, "step": 377000 }, { "epoch": 11.3, "learning_rate": 0.00018742092481736504, "loss": 0.9392, "step": 377500 }, { "epoch": 11.31, "learning_rate": 0.00018727101925786752, "loss": 0.9338, "step": 378000 }, { "epoch": 11.33, "learning_rate": 0.00018712111369837003, "loss": 0.9176, "step": 378500 }, { "epoch": 11.34, "learning_rate": 0.00018697120813887248, "loss": 0.9402, "step": 379000 }, { "epoch": 11.36, "learning_rate": 0.00018682130257937499, "loss": 0.9319, "step": 379500 }, { "epoch": 11.37, "learning_rate": 0.00018667139701987747, "loss": 0.9298, "step": 380000 }, { "epoch": 11.39, "learning_rate": 0.00018652149146037995, "loss": 0.9318, "step": 380500 }, { "epoch": 11.4, "learning_rate": 0.00018637158590088243, "loss": 0.9349, "step": 381000 }, { "epoch": 11.41, "learning_rate": 0.00018622168034138493, "loss": 0.9365, "step": 381500 }, { "epoch": 11.43, "learning_rate": 0.00018607177478188738, "loss": 0.9404, "step": 382000 }, { "epoch": 11.44, "learning_rate": 0.00018592186922238986, "loss": 0.9246, "step": 382500 }, { "epoch": 11.46, "learning_rate": 0.00018577196366289237, "loss": 0.948, "step": 383000 }, { "epoch": 11.47, "learning_rate": 0.00018562205810339482, "loss": 0.9426, "step": 383500 }, { "epoch": 11.49, "learning_rate": 0.00018547215254389733, "loss": 0.9429, "step": 384000 }, { "epoch": 11.5, "learning_rate": 0.0001853222469843998, "loss": 0.9294, "step": 384500 }, { "epoch": 11.52, "learning_rate": 0.00018517234142490232, "loss": 0.9402, "step": 385000 }, { "epoch": 11.53, "learning_rate": 0.00018502243586540477, "loss": 0.9434, "step": 385500 }, { "epoch": 11.55, "learning_rate": 0.00018487253030590727, "loss": 0.9512, "step": 386000 }, { "epoch": 11.56, "learning_rate": 0.00018472262474640975, "loss": 0.9245, "step": 386500 }, { "epoch": 11.58, "learning_rate": 0.00018457271918691223, "loss": 0.9354, "step": 387000 }, { "epoch": 11.59, "learning_rate": 0.0001844228136274147, "loss": 0.9534, "step": 387500 }, { "epoch": 11.61, "learning_rate": 0.0001842729080679172, "loss": 0.9501, "step": 388000 }, { "epoch": 11.62, "learning_rate": 0.00018412300250841967, "loss": 0.9676, "step": 388500 }, { "epoch": 11.64, "learning_rate": 0.00018397309694892215, "loss": 0.9375, "step": 389000 }, { "epoch": 11.65, "learning_rate": 0.00018382319138942466, "loss": 0.9536, "step": 389500 }, { "epoch": 11.67, "learning_rate": 0.0001836732858299271, "loss": 0.9287, "step": 390000 }, { "epoch": 11.68, "learning_rate": 0.00018352338027042962, "loss": 0.9583, "step": 390500 }, { "epoch": 11.7, "learning_rate": 0.0001833734747109321, "loss": 0.9223, "step": 391000 }, { "epoch": 11.71, "learning_rate": 0.0001832235691514346, "loss": 0.9448, "step": 391500 }, { "epoch": 11.73, "learning_rate": 0.00018307366359193706, "loss": 0.928, "step": 392000 }, { "epoch": 11.74, "learning_rate": 0.00018292375803243956, "loss": 0.9339, "step": 392500 }, { "epoch": 11.76, "learning_rate": 0.00018277385247294202, "loss": 0.9231, "step": 393000 }, { "epoch": 11.77, "learning_rate": 0.00018262394691344452, "loss": 0.9589, "step": 393500 }, { "epoch": 11.79, "learning_rate": 0.000182474041353947, "loss": 0.9459, "step": 394000 }, { "epoch": 11.8, "learning_rate": 0.00018232413579444945, "loss": 0.9415, "step": 394500 }, { "epoch": 11.82, "learning_rate": 0.00018217423023495196, "loss": 0.9438, "step": 395000 }, { "epoch": 11.83, "learning_rate": 0.00018202432467545444, "loss": 0.943, "step": 395500 }, { "epoch": 11.85, "learning_rate": 0.00018187441911595695, "loss": 0.9526, "step": 396000 }, { "epoch": 11.86, "learning_rate": 0.0001817245135564594, "loss": 0.9404, "step": 396500 }, { "epoch": 11.88, "learning_rate": 0.0001815746079969619, "loss": 0.9394, "step": 397000 }, { "epoch": 11.89, "learning_rate": 0.00018142470243746439, "loss": 0.9484, "step": 397500 }, { "epoch": 11.91, "learning_rate": 0.00018127479687796687, "loss": 0.9396, "step": 398000 }, { "epoch": 11.92, "learning_rate": 0.00018112489131846934, "loss": 0.9565, "step": 398500 }, { "epoch": 11.94, "learning_rate": 0.00018097498575897185, "loss": 0.9344, "step": 399000 }, { "epoch": 11.95, "learning_rate": 0.0001808250801994743, "loss": 0.946, "step": 399500 }, { "epoch": 11.97, "learning_rate": 0.00018067517463997678, "loss": 0.9487, "step": 400000 }, { "epoch": 11.98, "learning_rate": 0.0001805252690804793, "loss": 0.9571, "step": 400500 }, { "epoch": 12.0, "learning_rate": 0.00018037536352098174, "loss": 0.931, "step": 401000 }, { "epoch": 12.01, "learning_rate": 0.00018022545796148425, "loss": 0.9103, "step": 401500 }, { "epoch": 12.03, "learning_rate": 0.00018007555240198673, "loss": 0.9013, "step": 402000 }, { "epoch": 12.04, "learning_rate": 0.00017992564684248924, "loss": 0.8962, "step": 402500 }, { "epoch": 12.06, "learning_rate": 0.0001797757412829917, "loss": 0.9058, "step": 403000 }, { "epoch": 12.07, "learning_rate": 0.0001796258357234942, "loss": 0.8948, "step": 403500 }, { "epoch": 12.09, "learning_rate": 0.00017947593016399667, "loss": 0.9097, "step": 404000 }, { "epoch": 12.1, "learning_rate": 0.00017932602460449915, "loss": 0.9052, "step": 404500 }, { "epoch": 12.12, "learning_rate": 0.00017917611904500163, "loss": 0.9029, "step": 405000 }, { "epoch": 12.13, "learning_rate": 0.00017902621348550414, "loss": 0.9128, "step": 405500 }, { "epoch": 12.15, "learning_rate": 0.0001788763079260066, "loss": 0.8998, "step": 406000 }, { "epoch": 12.16, "learning_rate": 0.00017872640236650907, "loss": 0.9071, "step": 406500 }, { "epoch": 12.18, "learning_rate": 0.00017857649680701158, "loss": 0.9057, "step": 407000 }, { "epoch": 12.19, "learning_rate": 0.00017842659124751403, "loss": 0.9013, "step": 407500 }, { "epoch": 12.21, "learning_rate": 0.00017827668568801654, "loss": 0.909, "step": 408000 }, { "epoch": 12.22, "learning_rate": 0.00017812678012851902, "loss": 0.9145, "step": 408500 }, { "epoch": 12.24, "learning_rate": 0.00017797687456902152, "loss": 0.9153, "step": 409000 }, { "epoch": 12.25, "learning_rate": 0.00017782696900952398, "loss": 0.8972, "step": 409500 }, { "epoch": 12.27, "learning_rate": 0.00017767706345002648, "loss": 0.9036, "step": 410000 }, { "epoch": 12.28, "learning_rate": 0.00017752715789052894, "loss": 0.8955, "step": 410500 }, { "epoch": 12.3, "learning_rate": 0.00017737725233103144, "loss": 0.91, "step": 411000 }, { "epoch": 12.31, "learning_rate": 0.00017722734677153392, "loss": 0.9116, "step": 411500 }, { "epoch": 12.33, "learning_rate": 0.00017707744121203637, "loss": 0.9182, "step": 412000 }, { "epoch": 12.34, "learning_rate": 0.00017692753565253888, "loss": 0.9106, "step": 412500 }, { "epoch": 12.36, "learning_rate": 0.00017677763009304136, "loss": 0.9066, "step": 413000 }, { "epoch": 12.37, "learning_rate": 0.00017662772453354387, "loss": 0.8996, "step": 413500 }, { "epoch": 12.39, "learning_rate": 0.00017647781897404632, "loss": 0.9269, "step": 414000 }, { "epoch": 12.4, "learning_rate": 0.00017632791341454883, "loss": 0.9236, "step": 414500 }, { "epoch": 12.42, "learning_rate": 0.0001761780078550513, "loss": 0.9161, "step": 415000 }, { "epoch": 12.43, "learning_rate": 0.00017602810229555378, "loss": 0.9081, "step": 415500 }, { "epoch": 12.45, "learning_rate": 0.00017587819673605626, "loss": 0.8983, "step": 416000 }, { "epoch": 12.46, "learning_rate": 0.00017572829117655877, "loss": 0.928, "step": 416500 }, { "epoch": 12.48, "learning_rate": 0.00017557838561706122, "loss": 0.9179, "step": 417000 }, { "epoch": 12.49, "learning_rate": 0.0001754284800575637, "loss": 0.9006, "step": 417500 }, { "epoch": 12.51, "learning_rate": 0.0001752785744980662, "loss": 0.916, "step": 418000 }, { "epoch": 12.52, "learning_rate": 0.00017512866893856866, "loss": 0.8964, "step": 418500 }, { "epoch": 12.54, "learning_rate": 0.00017497876337907117, "loss": 0.9402, "step": 419000 }, { "epoch": 12.55, "learning_rate": 0.00017482885781957365, "loss": 0.8994, "step": 419500 }, { "epoch": 12.57, "learning_rate": 0.00017467895226007616, "loss": 0.9059, "step": 420000 }, { "epoch": 12.58, "learning_rate": 0.0001745290467005786, "loss": 0.9007, "step": 420500 }, { "epoch": 12.6, "learning_rate": 0.00017437914114108111, "loss": 0.9265, "step": 421000 }, { "epoch": 12.61, "learning_rate": 0.0001742292355815836, "loss": 0.9151, "step": 421500 }, { "epoch": 12.63, "learning_rate": 0.00017407933002208607, "loss": 0.9292, "step": 422000 }, { "epoch": 12.64, "learning_rate": 0.00017392942446258855, "loss": 0.9086, "step": 422500 }, { "epoch": 12.66, "learning_rate": 0.00017377951890309106, "loss": 0.9122, "step": 423000 }, { "epoch": 12.67, "learning_rate": 0.0001736296133435935, "loss": 0.922, "step": 423500 }, { "epoch": 12.69, "learning_rate": 0.000173479707784096, "loss": 0.9351, "step": 424000 }, { "epoch": 12.7, "learning_rate": 0.0001733298022245985, "loss": 0.9161, "step": 424500 }, { "epoch": 12.72, "learning_rate": 0.00017317989666510095, "loss": 0.9264, "step": 425000 }, { "epoch": 12.73, "learning_rate": 0.00017302999110560346, "loss": 0.9007, "step": 425500 }, { "epoch": 12.75, "learning_rate": 0.00017288008554610594, "loss": 0.916, "step": 426000 }, { "epoch": 12.76, "learning_rate": 0.00017273017998660844, "loss": 0.9292, "step": 426500 }, { "epoch": 12.78, "learning_rate": 0.0001725802744271109, "loss": 0.9327, "step": 427000 }, { "epoch": 12.79, "learning_rate": 0.0001724303688676134, "loss": 0.9064, "step": 427500 }, { "epoch": 12.81, "learning_rate": 0.00017228046330811585, "loss": 0.9498, "step": 428000 }, { "epoch": 12.82, "learning_rate": 0.00017213055774861836, "loss": 0.9193, "step": 428500 }, { "epoch": 12.84, "learning_rate": 0.00017198065218912084, "loss": 0.9282, "step": 429000 }, { "epoch": 12.85, "learning_rate": 0.0001718307466296233, "loss": 0.9219, "step": 429500 }, { "epoch": 12.87, "learning_rate": 0.0001716808410701258, "loss": 0.9236, "step": 430000 }, { "epoch": 12.88, "learning_rate": 0.00017153093551062828, "loss": 0.9184, "step": 430500 }, { "epoch": 12.9, "learning_rate": 0.00017138102995113079, "loss": 0.9215, "step": 431000 }, { "epoch": 12.91, "learning_rate": 0.00017123112439163324, "loss": 0.9065, "step": 431500 }, { "epoch": 12.93, "learning_rate": 0.00017108121883213575, "loss": 0.9278, "step": 432000 }, { "epoch": 12.94, "learning_rate": 0.00017093131327263823, "loss": 0.9045, "step": 432500 }, { "epoch": 12.96, "learning_rate": 0.0001707814077131407, "loss": 0.9374, "step": 433000 }, { "epoch": 12.97, "learning_rate": 0.00017063150215364318, "loss": 0.9255, "step": 433500 }, { "epoch": 12.99, "learning_rate": 0.0001704815965941457, "loss": 0.9168, "step": 434000 }, { "epoch": 13.0, "learning_rate": 0.00017033169103464814, "loss": 0.9244, "step": 434500 }, { "epoch": 13.02, "learning_rate": 0.00017018178547515065, "loss": 0.8795, "step": 435000 }, { "epoch": 13.03, "learning_rate": 0.00017003187991565313, "loss": 0.8548, "step": 435500 }, { "epoch": 13.05, "learning_rate": 0.00016988197435615558, "loss": 0.887, "step": 436000 }, { "epoch": 13.06, "learning_rate": 0.0001697320687966581, "loss": 0.8818, "step": 436500 }, { "epoch": 13.08, "learning_rate": 0.00016958216323716057, "loss": 0.8991, "step": 437000 }, { "epoch": 13.09, "learning_rate": 0.00016943225767766307, "loss": 0.8855, "step": 437500 }, { "epoch": 13.11, "learning_rate": 0.00016928235211816553, "loss": 0.8916, "step": 438000 }, { "epoch": 13.12, "learning_rate": 0.00016913244655866803, "loss": 0.873, "step": 438500 }, { "epoch": 13.14, "learning_rate": 0.0001689825409991705, "loss": 0.87, "step": 439000 }, { "epoch": 13.15, "learning_rate": 0.000168832635439673, "loss": 0.8893, "step": 439500 }, { "epoch": 13.17, "learning_rate": 0.00016868272988017547, "loss": 0.8951, "step": 440000 }, { "epoch": 13.18, "learning_rate": 0.00016853282432067798, "loss": 0.9063, "step": 440500 }, { "epoch": 13.2, "learning_rate": 0.00016838291876118043, "loss": 0.8915, "step": 441000 }, { "epoch": 13.21, "learning_rate": 0.0001682330132016829, "loss": 0.8849, "step": 441500 }, { "epoch": 13.23, "learning_rate": 0.00016808310764218542, "loss": 0.8909, "step": 442000 }, { "epoch": 13.24, "learning_rate": 0.00016793320208268787, "loss": 0.9017, "step": 442500 }, { "epoch": 13.26, "learning_rate": 0.00016778329652319038, "loss": 0.8993, "step": 443000 }, { "epoch": 13.27, "learning_rate": 0.00016763339096369286, "loss": 0.8929, "step": 443500 }, { "epoch": 13.29, "learning_rate": 0.00016748348540419536, "loss": 0.8822, "step": 444000 }, { "epoch": 13.3, "learning_rate": 0.00016733357984469782, "loss": 0.8886, "step": 444500 }, { "epoch": 13.31, "learning_rate": 0.00016718367428520032, "loss": 0.8884, "step": 445000 }, { "epoch": 13.33, "learning_rate": 0.00016703376872570277, "loss": 0.8747, "step": 445500 }, { "epoch": 13.34, "learning_rate": 0.00016688386316620528, "loss": 0.8814, "step": 446000 }, { "epoch": 13.36, "learning_rate": 0.00016673395760670776, "loss": 0.8915, "step": 446500 }, { "epoch": 13.37, "learning_rate": 0.00016658405204721027, "loss": 0.8929, "step": 447000 }, { "epoch": 13.39, "learning_rate": 0.00016643414648771272, "loss": 0.874, "step": 447500 }, { "epoch": 13.4, "learning_rate": 0.0001662842409282152, "loss": 0.8707, "step": 448000 }, { "epoch": 13.42, "learning_rate": 0.0001661343353687177, "loss": 0.9065, "step": 448500 }, { "epoch": 13.43, "learning_rate": 0.00016598442980922016, "loss": 0.8994, "step": 449000 }, { "epoch": 13.45, "learning_rate": 0.00016583452424972267, "loss": 0.8945, "step": 449500 }, { "epoch": 13.46, "learning_rate": 0.00016568461869022514, "loss": 0.8753, "step": 450000 }, { "epoch": 13.48, "learning_rate": 0.00016553471313072762, "loss": 0.894, "step": 450500 }, { "epoch": 13.49, "learning_rate": 0.0001653848075712301, "loss": 0.8811, "step": 451000 }, { "epoch": 13.51, "learning_rate": 0.0001652349020117326, "loss": 0.8717, "step": 451500 }, { "epoch": 13.52, "learning_rate": 0.00016508499645223506, "loss": 0.9009, "step": 452000 }, { "epoch": 13.54, "learning_rate": 0.00016493509089273757, "loss": 0.901, "step": 452500 }, { "epoch": 13.55, "learning_rate": 0.00016478518533324005, "loss": 0.9092, "step": 453000 }, { "epoch": 13.57, "learning_rate": 0.0001646352797737425, "loss": 0.892, "step": 453500 }, { "epoch": 13.58, "learning_rate": 0.000164485374214245, "loss": 0.8891, "step": 454000 }, { "epoch": 13.6, "learning_rate": 0.0001643354686547475, "loss": 0.8965, "step": 454500 }, { "epoch": 13.61, "learning_rate": 0.00016418556309525, "loss": 0.8944, "step": 455000 }, { "epoch": 13.63, "learning_rate": 0.00016403565753575245, "loss": 0.905, "step": 455500 }, { "epoch": 13.64, "learning_rate": 0.00016388575197625495, "loss": 0.8966, "step": 456000 }, { "epoch": 13.66, "learning_rate": 0.00016373584641675743, "loss": 0.8835, "step": 456500 }, { "epoch": 13.67, "learning_rate": 0.0001635859408572599, "loss": 0.908, "step": 457000 }, { "epoch": 13.69, "learning_rate": 0.0001634360352977624, "loss": 0.9004, "step": 457500 }, { "epoch": 13.7, "learning_rate": 0.0001632861297382649, "loss": 0.8757, "step": 458000 }, { "epoch": 13.72, "learning_rate": 0.00016313622417876735, "loss": 0.8926, "step": 458500 }, { "epoch": 13.73, "learning_rate": 0.00016298631861926986, "loss": 0.884, "step": 459000 }, { "epoch": 13.75, "learning_rate": 0.00016283641305977234, "loss": 0.8753, "step": 459500 }, { "epoch": 13.76, "learning_rate": 0.0001626865075002748, "loss": 0.8954, "step": 460000 }, { "epoch": 13.78, "learning_rate": 0.0001625366019407773, "loss": 0.8842, "step": 460500 }, { "epoch": 13.79, "learning_rate": 0.00016238669638127978, "loss": 0.8919, "step": 461000 }, { "epoch": 13.81, "learning_rate": 0.00016223679082178228, "loss": 0.9095, "step": 461500 }, { "epoch": 13.82, "learning_rate": 0.00016208688526228474, "loss": 0.9141, "step": 462000 }, { "epoch": 13.84, "learning_rate": 0.00016193697970278724, "loss": 0.8925, "step": 462500 }, { "epoch": 13.85, "learning_rate": 0.0001617870741432897, "loss": 0.9005, "step": 463000 }, { "epoch": 13.87, "learning_rate": 0.0001616371685837922, "loss": 0.9009, "step": 463500 }, { "epoch": 13.88, "learning_rate": 0.00016148726302429468, "loss": 0.8958, "step": 464000 }, { "epoch": 13.9, "learning_rate": 0.0001613373574647972, "loss": 0.8925, "step": 464500 }, { "epoch": 13.91, "learning_rate": 0.00016118745190529964, "loss": 0.8725, "step": 465000 }, { "epoch": 13.93, "learning_rate": 0.00016103754634580212, "loss": 0.8792, "step": 465500 }, { "epoch": 13.94, "learning_rate": 0.00016088764078630463, "loss": 0.8859, "step": 466000 }, { "epoch": 13.96, "learning_rate": 0.00016073773522680708, "loss": 0.8717, "step": 466500 }, { "epoch": 13.97, "learning_rate": 0.00016058782966730958, "loss": 0.8951, "step": 467000 }, { "epoch": 13.99, "learning_rate": 0.00016043792410781206, "loss": 0.9077, "step": 467500 }, { "epoch": 14.0, "learning_rate": 0.00016028801854831454, "loss": 0.8778, "step": 468000 }, { "epoch": 14.02, "learning_rate": 0.00016013811298881702, "loss": 0.8637, "step": 468500 }, { "epoch": 14.03, "learning_rate": 0.00015998820742931953, "loss": 0.8469, "step": 469000 }, { "epoch": 14.05, "learning_rate": 0.00015983830186982198, "loss": 0.848, "step": 469500 }, { "epoch": 14.06, "learning_rate": 0.0001596883963103245, "loss": 0.8591, "step": 470000 }, { "epoch": 14.08, "learning_rate": 0.00015953849075082697, "loss": 0.8624, "step": 470500 }, { "epoch": 14.09, "learning_rate": 0.00015938858519132942, "loss": 0.8571, "step": 471000 }, { "epoch": 14.11, "learning_rate": 0.00015923867963183193, "loss": 0.8636, "step": 471500 }, { "epoch": 14.12, "learning_rate": 0.0001590887740723344, "loss": 0.8616, "step": 472000 }, { "epoch": 14.14, "learning_rate": 0.00015893886851283691, "loss": 0.8613, "step": 472500 }, { "epoch": 14.15, "learning_rate": 0.00015878896295333937, "loss": 0.866, "step": 473000 }, { "epoch": 14.17, "learning_rate": 0.00015863905739384187, "loss": 0.8645, "step": 473500 }, { "epoch": 14.18, "learning_rate": 0.00015848915183434435, "loss": 0.8551, "step": 474000 }, { "epoch": 14.2, "learning_rate": 0.00015833924627484683, "loss": 0.8524, "step": 474500 }, { "epoch": 14.21, "learning_rate": 0.0001581893407153493, "loss": 0.8559, "step": 475000 }, { "epoch": 14.23, "learning_rate": 0.00015803943515585182, "loss": 0.853, "step": 475500 }, { "epoch": 14.24, "learning_rate": 0.00015788952959635427, "loss": 0.8808, "step": 476000 }, { "epoch": 14.26, "learning_rate": 0.00015773962403685678, "loss": 0.8758, "step": 476500 }, { "epoch": 14.27, "learning_rate": 0.00015758971847735926, "loss": 0.8381, "step": 477000 }, { "epoch": 14.29, "learning_rate": 0.0001574398129178617, "loss": 0.853, "step": 477500 }, { "epoch": 14.3, "learning_rate": 0.00015728990735836422, "loss": 0.8603, "step": 478000 }, { "epoch": 14.32, "learning_rate": 0.0001571400017988667, "loss": 0.8534, "step": 478500 }, { "epoch": 14.33, "learning_rate": 0.0001569900962393692, "loss": 0.8671, "step": 479000 }, { "epoch": 14.35, "learning_rate": 0.00015684019067987165, "loss": 0.8505, "step": 479500 }, { "epoch": 14.36, "learning_rate": 0.00015669028512037416, "loss": 0.881, "step": 480000 }, { "epoch": 14.38, "learning_rate": 0.00015654037956087664, "loss": 0.8569, "step": 480500 }, { "epoch": 14.39, "learning_rate": 0.00015639047400137912, "loss": 0.8722, "step": 481000 }, { "epoch": 14.41, "learning_rate": 0.0001562405684418816, "loss": 0.8788, "step": 481500 }, { "epoch": 14.42, "learning_rate": 0.0001560906628823841, "loss": 0.8657, "step": 482000 }, { "epoch": 14.44, "learning_rate": 0.00015594075732288656, "loss": 0.8841, "step": 482500 }, { "epoch": 14.45, "learning_rate": 0.00015579085176338904, "loss": 0.8502, "step": 483000 }, { "epoch": 14.47, "learning_rate": 0.00015564094620389155, "loss": 0.8658, "step": 483500 }, { "epoch": 14.48, "learning_rate": 0.000155491040644394, "loss": 0.8626, "step": 484000 }, { "epoch": 14.5, "learning_rate": 0.0001553411350848965, "loss": 0.8678, "step": 484500 }, { "epoch": 14.51, "learning_rate": 0.00015519122952539898, "loss": 0.8488, "step": 485000 }, { "epoch": 14.53, "learning_rate": 0.00015504132396590146, "loss": 0.8703, "step": 485500 }, { "epoch": 14.54, "learning_rate": 0.00015489141840640394, "loss": 0.86, "step": 486000 }, { "epoch": 14.56, "learning_rate": 0.00015474151284690645, "loss": 0.8576, "step": 486500 }, { "epoch": 14.57, "learning_rate": 0.0001545916072874089, "loss": 0.8711, "step": 487000 }, { "epoch": 14.59, "learning_rate": 0.0001544417017279114, "loss": 0.8735, "step": 487500 }, { "epoch": 14.6, "learning_rate": 0.0001542917961684139, "loss": 0.8646, "step": 488000 }, { "epoch": 14.62, "learning_rate": 0.0001541418906089164, "loss": 0.8547, "step": 488500 }, { "epoch": 14.63, "learning_rate": 0.00015399198504941885, "loss": 0.8827, "step": 489000 }, { "epoch": 14.65, "learning_rate": 0.00015384207948992133, "loss": 0.8632, "step": 489500 }, { "epoch": 14.66, "learning_rate": 0.00015369217393042383, "loss": 0.8675, "step": 490000 }, { "epoch": 14.68, "learning_rate": 0.00015354226837092629, "loss": 0.866, "step": 490500 }, { "epoch": 14.69, "learning_rate": 0.0001533923628114288, "loss": 0.8879, "step": 491000 }, { "epoch": 14.71, "learning_rate": 0.00015324245725193127, "loss": 0.878, "step": 491500 }, { "epoch": 14.72, "learning_rate": 0.00015309255169243375, "loss": 0.8786, "step": 492000 }, { "epoch": 14.74, "learning_rate": 0.00015294264613293623, "loss": 0.8686, "step": 492500 }, { "epoch": 14.75, "learning_rate": 0.00015279274057343874, "loss": 0.8693, "step": 493000 }, { "epoch": 14.77, "learning_rate": 0.0001526428350139412, "loss": 0.8709, "step": 493500 }, { "epoch": 14.78, "learning_rate": 0.0001524929294544437, "loss": 0.8924, "step": 494000 }, { "epoch": 14.8, "learning_rate": 0.00015234302389494618, "loss": 0.8855, "step": 494500 }, { "epoch": 14.81, "learning_rate": 0.00015219311833544863, "loss": 0.8685, "step": 495000 }, { "epoch": 14.83, "learning_rate": 0.00015204321277595114, "loss": 0.8675, "step": 495500 }, { "epoch": 14.84, "learning_rate": 0.00015189330721645362, "loss": 0.8694, "step": 496000 }, { "epoch": 14.86, "learning_rate": 0.00015174340165695612, "loss": 0.8793, "step": 496500 }, { "epoch": 14.87, "learning_rate": 0.00015159349609745857, "loss": 0.8753, "step": 497000 }, { "epoch": 14.89, "learning_rate": 0.00015144359053796108, "loss": 0.8651, "step": 497500 }, { "epoch": 14.9, "learning_rate": 0.00015129368497846356, "loss": 0.8686, "step": 498000 }, { "epoch": 14.92, "learning_rate": 0.00015114377941896604, "loss": 0.8725, "step": 498500 }, { "epoch": 14.93, "learning_rate": 0.00015099387385946852, "loss": 0.8623, "step": 499000 }, { "epoch": 14.95, "learning_rate": 0.00015084396829997103, "loss": 0.8735, "step": 499500 }, { "epoch": 14.96, "learning_rate": 0.00015069406274047348, "loss": 0.886, "step": 500000 }, { "epoch": 14.98, "learning_rate": 0.00015054415718097599, "loss": 0.8643, "step": 500500 }, { "epoch": 14.99, "learning_rate": 0.00015039425162147847, "loss": 0.8696, "step": 501000 }, { "epoch": 15.01, "learning_rate": 0.00015024434606198092, "loss": 0.8534, "step": 501500 }, { "epoch": 15.02, "learning_rate": 0.00015009444050248342, "loss": 0.8438, "step": 502000 }, { "epoch": 15.04, "learning_rate": 0.0001499445349429859, "loss": 0.8503, "step": 502500 }, { "epoch": 15.05, "learning_rate": 0.00014979462938348838, "loss": 0.8536, "step": 503000 }, { "epoch": 15.07, "learning_rate": 0.0001496447238239909, "loss": 0.8408, "step": 503500 }, { "epoch": 15.08, "learning_rate": 0.00014949481826449334, "loss": 0.842, "step": 504000 }, { "epoch": 15.1, "learning_rate": 0.00014934491270499582, "loss": 0.841, "step": 504500 }, { "epoch": 15.11, "learning_rate": 0.00014919500714549833, "loss": 0.8277, "step": 505000 }, { "epoch": 15.13, "learning_rate": 0.0001490451015860008, "loss": 0.8472, "step": 505500 }, { "epoch": 15.14, "learning_rate": 0.0001488951960265033, "loss": 0.8271, "step": 506000 }, { "epoch": 15.16, "learning_rate": 0.00014874529046700577, "loss": 0.8331, "step": 506500 }, { "epoch": 15.17, "learning_rate": 0.00014859538490750825, "loss": 0.8436, "step": 507000 }, { "epoch": 15.19, "learning_rate": 0.00014844547934801075, "loss": 0.8388, "step": 507500 }, { "epoch": 15.2, "learning_rate": 0.00014829557378851323, "loss": 0.8495, "step": 508000 }, { "epoch": 15.21, "learning_rate": 0.0001481456682290157, "loss": 0.8436, "step": 508500 }, { "epoch": 15.23, "learning_rate": 0.0001479957626695182, "loss": 0.834, "step": 509000 }, { "epoch": 15.24, "learning_rate": 0.00014784585711002067, "loss": 0.8232, "step": 509500 }, { "epoch": 15.26, "learning_rate": 0.00014769595155052315, "loss": 0.8357, "step": 510000 }, { "epoch": 15.27, "learning_rate": 0.00014754604599102563, "loss": 0.8504, "step": 510500 }, { "epoch": 15.29, "learning_rate": 0.0001473961404315281, "loss": 0.8328, "step": 511000 }, { "epoch": 15.3, "learning_rate": 0.00014724623487203062, "loss": 0.8639, "step": 511500 }, { "epoch": 15.32, "learning_rate": 0.0001470963293125331, "loss": 0.8546, "step": 512000 }, { "epoch": 15.33, "learning_rate": 0.00014694642375303558, "loss": 0.8487, "step": 512500 }, { "epoch": 15.35, "learning_rate": 0.00014679651819353806, "loss": 0.8203, "step": 513000 }, { "epoch": 15.36, "learning_rate": 0.00014664661263404054, "loss": 0.8422, "step": 513500 }, { "epoch": 15.38, "learning_rate": 0.00014649670707454304, "loss": 0.8515, "step": 514000 }, { "epoch": 15.39, "learning_rate": 0.00014634680151504552, "loss": 0.8362, "step": 514500 }, { "epoch": 15.41, "learning_rate": 0.000146196895955548, "loss": 0.8327, "step": 515000 }, { "epoch": 15.42, "learning_rate": 0.00014604699039605048, "loss": 0.8561, "step": 515500 }, { "epoch": 15.44, "learning_rate": 0.00014589708483655296, "loss": 0.8647, "step": 516000 }, { "epoch": 15.45, "learning_rate": 0.00014574717927705544, "loss": 0.8487, "step": 516500 }, { "epoch": 15.47, "learning_rate": 0.00014559727371755792, "loss": 0.8379, "step": 517000 }, { "epoch": 15.48, "learning_rate": 0.0001454473681580604, "loss": 0.8607, "step": 517500 }, { "epoch": 15.5, "learning_rate": 0.00014529746259856288, "loss": 0.8548, "step": 518000 }, { "epoch": 15.51, "learning_rate": 0.00014514755703906538, "loss": 0.8504, "step": 518500 }, { "epoch": 15.53, "learning_rate": 0.00014499765147956786, "loss": 0.8485, "step": 519000 }, { "epoch": 15.54, "learning_rate": 0.00014484774592007034, "loss": 0.8419, "step": 519500 }, { "epoch": 15.56, "learning_rate": 0.00014469784036057282, "loss": 0.8509, "step": 520000 }, { "epoch": 15.57, "learning_rate": 0.0001445479348010753, "loss": 0.8377, "step": 520500 }, { "epoch": 15.59, "learning_rate": 0.0001443980292415778, "loss": 0.8417, "step": 521000 }, { "epoch": 15.6, "learning_rate": 0.00014424812368208026, "loss": 0.8422, "step": 521500 }, { "epoch": 15.62, "learning_rate": 0.00014409821812258274, "loss": 0.8228, "step": 522000 }, { "epoch": 15.63, "learning_rate": 0.00014394831256308525, "loss": 0.8544, "step": 522500 }, { "epoch": 15.65, "learning_rate": 0.00014379840700358773, "loss": 0.8495, "step": 523000 }, { "epoch": 15.66, "learning_rate": 0.0001436485014440902, "loss": 0.8498, "step": 523500 }, { "epoch": 15.68, "learning_rate": 0.0001434985958845927, "loss": 0.8513, "step": 524000 }, { "epoch": 15.69, "learning_rate": 0.00014334869032509517, "loss": 0.8416, "step": 524500 }, { "epoch": 15.71, "learning_rate": 0.00014319878476559767, "loss": 0.8563, "step": 525000 }, { "epoch": 15.72, "learning_rate": 0.00014304887920610015, "loss": 0.8463, "step": 525500 }, { "epoch": 15.74, "learning_rate": 0.00014289897364660263, "loss": 0.8371, "step": 526000 }, { "epoch": 15.75, "learning_rate": 0.0001427490680871051, "loss": 0.8415, "step": 526500 }, { "epoch": 15.77, "learning_rate": 0.0001425991625276076, "loss": 0.8303, "step": 527000 }, { "epoch": 15.78, "learning_rate": 0.00014244925696811007, "loss": 0.8581, "step": 527500 }, { "epoch": 15.8, "learning_rate": 0.00014229935140861255, "loss": 0.8418, "step": 528000 }, { "epoch": 15.81, "learning_rate": 0.00014214944584911503, "loss": 0.8296, "step": 528500 }, { "epoch": 15.83, "learning_rate": 0.00014199954028961754, "loss": 0.8515, "step": 529000 }, { "epoch": 15.84, "learning_rate": 0.00014184963473012002, "loss": 0.8382, "step": 529500 }, { "epoch": 15.86, "learning_rate": 0.0001416997291706225, "loss": 0.8395, "step": 530000 }, { "epoch": 15.87, "learning_rate": 0.00014154982361112498, "loss": 0.823, "step": 530500 }, { "epoch": 15.89, "learning_rate": 0.00014139991805162745, "loss": 0.8454, "step": 531000 }, { "epoch": 15.9, "learning_rate": 0.00014125001249212996, "loss": 0.844, "step": 531500 }, { "epoch": 15.92, "learning_rate": 0.00014110010693263244, "loss": 0.8375, "step": 532000 }, { "epoch": 15.93, "learning_rate": 0.00014095020137313492, "loss": 0.8549, "step": 532500 }, { "epoch": 15.95, "learning_rate": 0.0001408002958136374, "loss": 0.8322, "step": 533000 }, { "epoch": 15.96, "learning_rate": 0.00014065039025413988, "loss": 0.8346, "step": 533500 }, { "epoch": 15.98, "learning_rate": 0.00014050048469464236, "loss": 0.848, "step": 534000 }, { "epoch": 15.99, "learning_rate": 0.00014035057913514484, "loss": 0.8605, "step": 534500 }, { "epoch": 16.01, "learning_rate": 0.00014020067357564732, "loss": 0.8431, "step": 535000 }, { "epoch": 16.02, "learning_rate": 0.0001400507680161498, "loss": 0.7915, "step": 535500 }, { "epoch": 16.04, "learning_rate": 0.0001399008624566523, "loss": 0.8093, "step": 536000 }, { "epoch": 16.05, "learning_rate": 0.00013975095689715478, "loss": 0.8242, "step": 536500 }, { "epoch": 16.07, "learning_rate": 0.00013960105133765726, "loss": 0.8114, "step": 537000 }, { "epoch": 16.08, "learning_rate": 0.00013945114577815974, "loss": 0.7946, "step": 537500 }, { "epoch": 16.1, "learning_rate": 0.00013930124021866222, "loss": 0.8027, "step": 538000 }, { "epoch": 16.11, "learning_rate": 0.00013915133465916473, "loss": 0.8051, "step": 538500 }, { "epoch": 16.13, "learning_rate": 0.0001390014290996672, "loss": 0.8243, "step": 539000 }, { "epoch": 16.14, "learning_rate": 0.00013885152354016966, "loss": 0.821, "step": 539500 }, { "epoch": 16.16, "learning_rate": 0.00013870161798067217, "loss": 0.8271, "step": 540000 }, { "epoch": 16.17, "learning_rate": 0.00013855171242117465, "loss": 0.8185, "step": 540500 }, { "epoch": 16.19, "learning_rate": 0.00013840180686167713, "loss": 0.8149, "step": 541000 }, { "epoch": 16.2, "learning_rate": 0.0001382519013021796, "loss": 0.7955, "step": 541500 }, { "epoch": 16.22, "learning_rate": 0.00013810199574268209, "loss": 0.8273, "step": 542000 }, { "epoch": 16.23, "learning_rate": 0.0001379520901831846, "loss": 0.8367, "step": 542500 }, { "epoch": 16.25, "learning_rate": 0.00013780218462368707, "loss": 0.8414, "step": 543000 }, { "epoch": 16.26, "learning_rate": 0.00013765227906418955, "loss": 0.8162, "step": 543500 }, { "epoch": 16.28, "learning_rate": 0.00013750237350469203, "loss": 0.8197, "step": 544000 }, { "epoch": 16.29, "learning_rate": 0.0001373524679451945, "loss": 0.8085, "step": 544500 }, { "epoch": 16.31, "learning_rate": 0.00013720256238569702, "loss": 0.8006, "step": 545000 }, { "epoch": 16.32, "learning_rate": 0.00013705265682619947, "loss": 0.8421, "step": 545500 }, { "epoch": 16.34, "learning_rate": 0.00013690275126670195, "loss": 0.8027, "step": 546000 }, { "epoch": 16.35, "learning_rate": 0.00013675284570720446, "loss": 0.8164, "step": 546500 }, { "epoch": 16.37, "learning_rate": 0.00013660294014770694, "loss": 0.8271, "step": 547000 }, { "epoch": 16.38, "learning_rate": 0.00013645303458820942, "loss": 0.844, "step": 547500 }, { "epoch": 16.4, "learning_rate": 0.0001363031290287119, "loss": 0.8177, "step": 548000 }, { "epoch": 16.41, "learning_rate": 0.00013615322346921437, "loss": 0.8193, "step": 548500 }, { "epoch": 16.43, "learning_rate": 0.00013600331790971688, "loss": 0.829, "step": 549000 }, { "epoch": 16.44, "learning_rate": 0.00013585341235021936, "loss": 0.8306, "step": 549500 }, { "epoch": 16.46, "learning_rate": 0.00013570350679072184, "loss": 0.8298, "step": 550000 }, { "epoch": 16.47, "learning_rate": 0.00013555360123122432, "loss": 0.8027, "step": 550500 }, { "epoch": 16.49, "learning_rate": 0.0001354036956717268, "loss": 0.8251, "step": 551000 }, { "epoch": 16.5, "learning_rate": 0.00013525379011222928, "loss": 0.8216, "step": 551500 }, { "epoch": 16.52, "learning_rate": 0.00013510388455273176, "loss": 0.8337, "step": 552000 }, { "epoch": 16.53, "learning_rate": 0.00013495397899323424, "loss": 0.8322, "step": 552500 }, { "epoch": 16.55, "learning_rate": 0.00013480407343373674, "loss": 0.8357, "step": 553000 }, { "epoch": 16.56, "learning_rate": 0.00013465416787423922, "loss": 0.8244, "step": 553500 }, { "epoch": 16.58, "learning_rate": 0.0001345042623147417, "loss": 0.8214, "step": 554000 }, { "epoch": 16.59, "learning_rate": 0.00013435435675524418, "loss": 0.84, "step": 554500 }, { "epoch": 16.61, "learning_rate": 0.00013420445119574666, "loss": 0.8311, "step": 555000 }, { "epoch": 16.62, "learning_rate": 0.00013405454563624914, "loss": 0.8384, "step": 555500 }, { "epoch": 16.64, "learning_rate": 0.00013390464007675165, "loss": 0.8237, "step": 556000 }, { "epoch": 16.65, "learning_rate": 0.00013375473451725413, "loss": 0.8081, "step": 556500 }, { "epoch": 16.67, "learning_rate": 0.00013360482895775658, "loss": 0.8184, "step": 557000 }, { "epoch": 16.68, "learning_rate": 0.0001334549233982591, "loss": 0.8093, "step": 557500 }, { "epoch": 16.7, "learning_rate": 0.00013330501783876157, "loss": 0.8157, "step": 558000 }, { "epoch": 16.71, "learning_rate": 0.00013315511227926405, "loss": 0.8073, "step": 558500 }, { "epoch": 16.73, "learning_rate": 0.00013300520671976653, "loss": 0.8238, "step": 559000 }, { "epoch": 16.74, "learning_rate": 0.000132855301160269, "loss": 0.8137, "step": 559500 }, { "epoch": 16.76, "learning_rate": 0.0001327053956007715, "loss": 0.8181, "step": 560000 }, { "epoch": 16.77, "learning_rate": 0.000132555490041274, "loss": 0.8108, "step": 560500 }, { "epoch": 16.79, "learning_rate": 0.00013240558448177647, "loss": 0.829, "step": 561000 }, { "epoch": 16.8, "learning_rate": 0.00013225567892227895, "loss": 0.846, "step": 561500 }, { "epoch": 16.82, "learning_rate": 0.00013210577336278143, "loss": 0.7963, "step": 562000 }, { "epoch": 16.83, "learning_rate": 0.00013195586780328394, "loss": 0.8091, "step": 562500 }, { "epoch": 16.85, "learning_rate": 0.0001318059622437864, "loss": 0.8276, "step": 563000 }, { "epoch": 16.86, "learning_rate": 0.00013165605668428887, "loss": 0.8359, "step": 563500 }, { "epoch": 16.88, "learning_rate": 0.00013150615112479138, "loss": 0.8228, "step": 564000 }, { "epoch": 16.89, "learning_rate": 0.00013135624556529386, "loss": 0.83, "step": 564500 }, { "epoch": 16.91, "learning_rate": 0.00013120634000579634, "loss": 0.8201, "step": 565000 }, { "epoch": 16.92, "learning_rate": 0.00013105643444629881, "loss": 0.8283, "step": 565500 }, { "epoch": 16.94, "learning_rate": 0.0001309065288868013, "loss": 0.8175, "step": 566000 }, { "epoch": 16.95, "learning_rate": 0.0001307566233273038, "loss": 0.8318, "step": 566500 }, { "epoch": 16.97, "learning_rate": 0.00013060671776780628, "loss": 0.7978, "step": 567000 }, { "epoch": 16.98, "learning_rate": 0.00013045681220830876, "loss": 0.8185, "step": 567500 }, { "epoch": 17.0, "learning_rate": 0.00013030690664881124, "loss": 0.8267, "step": 568000 }, { "epoch": 17.01, "learning_rate": 0.00013015700108931372, "loss": 0.7983, "step": 568500 }, { "epoch": 17.03, "learning_rate": 0.0001300070955298162, "loss": 0.7953, "step": 569000 }, { "epoch": 17.04, "learning_rate": 0.00012985718997031868, "loss": 0.7757, "step": 569500 }, { "epoch": 17.06, "learning_rate": 0.00012970728441082116, "loss": 0.7859, "step": 570000 }, { "epoch": 17.07, "learning_rate": 0.00012955737885132366, "loss": 0.7836, "step": 570500 }, { "epoch": 17.09, "learning_rate": 0.00012940747329182614, "loss": 0.7968, "step": 571000 }, { "epoch": 17.1, "learning_rate": 0.00012925756773232862, "loss": 0.7894, "step": 571500 }, { "epoch": 17.11, "learning_rate": 0.0001291076621728311, "loss": 0.7958, "step": 572000 }, { "epoch": 17.13, "learning_rate": 0.00012895775661333358, "loss": 0.7711, "step": 572500 }, { "epoch": 17.14, "learning_rate": 0.00012880785105383606, "loss": 0.7975, "step": 573000 }, { "epoch": 17.16, "learning_rate": 0.00012865794549433857, "loss": 0.7934, "step": 573500 }, { "epoch": 17.17, "learning_rate": 0.00012850803993484105, "loss": 0.81, "step": 574000 }, { "epoch": 17.19, "learning_rate": 0.00012835813437534353, "loss": 0.7923, "step": 574500 }, { "epoch": 17.2, "learning_rate": 0.000128208228815846, "loss": 0.7944, "step": 575000 }, { "epoch": 17.22, "learning_rate": 0.0001280583232563485, "loss": 0.8038, "step": 575500 }, { "epoch": 17.23, "learning_rate": 0.00012790841769685097, "loss": 0.7916, "step": 576000 }, { "epoch": 17.25, "learning_rate": 0.00012775851213735345, "loss": 0.8171, "step": 576500 }, { "epoch": 17.26, "learning_rate": 0.00012760860657785593, "loss": 0.79, "step": 577000 }, { "epoch": 17.28, "learning_rate": 0.00012745870101835843, "loss": 0.7968, "step": 577500 }, { "epoch": 17.29, "learning_rate": 0.0001273087954588609, "loss": 0.7882, "step": 578000 }, { "epoch": 17.31, "learning_rate": 0.0001271588898993634, "loss": 0.7863, "step": 578500 }, { "epoch": 17.32, "learning_rate": 0.00012700898433986587, "loss": 0.8008, "step": 579000 }, { "epoch": 17.34, "learning_rate": 0.00012685907878036835, "loss": 0.7904, "step": 579500 }, { "epoch": 17.35, "learning_rate": 0.00012670917322087086, "loss": 0.8133, "step": 580000 }, { "epoch": 17.37, "learning_rate": 0.00012655926766137334, "loss": 0.8, "step": 580500 }, { "epoch": 17.38, "learning_rate": 0.0001264093621018758, "loss": 0.8, "step": 581000 }, { "epoch": 17.4, "learning_rate": 0.0001262594565423783, "loss": 0.7994, "step": 581500 }, { "epoch": 17.41, "learning_rate": 0.00012610955098288078, "loss": 0.8093, "step": 582000 }, { "epoch": 17.43, "learning_rate": 0.00012595964542338325, "loss": 0.8263, "step": 582500 }, { "epoch": 17.44, "learning_rate": 0.00012580973986388573, "loss": 0.7971, "step": 583000 }, { "epoch": 17.46, "learning_rate": 0.00012565983430438821, "loss": 0.8205, "step": 583500 }, { "epoch": 17.47, "learning_rate": 0.00012550992874489072, "loss": 0.8018, "step": 584000 }, { "epoch": 17.49, "learning_rate": 0.0001253600231853932, "loss": 0.7923, "step": 584500 }, { "epoch": 17.5, "learning_rate": 0.00012521011762589568, "loss": 0.7821, "step": 585000 }, { "epoch": 17.52, "learning_rate": 0.00012506021206639816, "loss": 0.7972, "step": 585500 }, { "epoch": 17.53, "learning_rate": 0.00012491030650690064, "loss": 0.8023, "step": 586000 }, { "epoch": 17.55, "learning_rate": 0.00012476040094740315, "loss": 0.791, "step": 586500 }, { "epoch": 17.56, "learning_rate": 0.0001246104953879056, "loss": 0.7786, "step": 587000 }, { "epoch": 17.58, "learning_rate": 0.00012446058982840808, "loss": 0.8098, "step": 587500 }, { "epoch": 17.59, "learning_rate": 0.00012431068426891058, "loss": 0.7725, "step": 588000 }, { "epoch": 17.61, "learning_rate": 0.00012416077870941306, "loss": 0.8116, "step": 588500 }, { "epoch": 17.62, "learning_rate": 0.00012401087314991554, "loss": 0.7904, "step": 589000 }, { "epoch": 17.64, "learning_rate": 0.00012386096759041802, "loss": 0.7895, "step": 589500 }, { "epoch": 17.65, "learning_rate": 0.0001237110620309205, "loss": 0.8055, "step": 590000 }, { "epoch": 17.67, "learning_rate": 0.00012356115647142298, "loss": 0.8156, "step": 590500 }, { "epoch": 17.68, "learning_rate": 0.0001234112509119255, "loss": 0.8047, "step": 591000 }, { "epoch": 17.7, "learning_rate": 0.00012326134535242797, "loss": 0.8095, "step": 591500 }, { "epoch": 17.71, "learning_rate": 0.00012311143979293045, "loss": 0.796, "step": 592000 }, { "epoch": 17.73, "learning_rate": 0.00012296153423343293, "loss": 0.8166, "step": 592500 }, { "epoch": 17.74, "learning_rate": 0.0001228116286739354, "loss": 0.8087, "step": 593000 }, { "epoch": 17.76, "learning_rate": 0.00012266172311443789, "loss": 0.8061, "step": 593500 }, { "epoch": 17.77, "learning_rate": 0.00012251181755494037, "loss": 0.8024, "step": 594000 }, { "epoch": 17.79, "learning_rate": 0.00012236191199544285, "loss": 0.7689, "step": 594500 }, { "epoch": 17.8, "learning_rate": 0.00012221200643594535, "loss": 0.8206, "step": 595000 }, { "epoch": 17.82, "learning_rate": 0.00012206210087644783, "loss": 0.8092, "step": 595500 }, { "epoch": 17.83, "learning_rate": 0.00012191219531695031, "loss": 0.7948, "step": 596000 }, { "epoch": 17.85, "learning_rate": 0.00012176228975745279, "loss": 0.7896, "step": 596500 }, { "epoch": 17.86, "learning_rate": 0.00012161238419795528, "loss": 0.7985, "step": 597000 }, { "epoch": 17.88, "learning_rate": 0.00012146247863845776, "loss": 0.8219, "step": 597500 }, { "epoch": 17.89, "learning_rate": 0.00012131257307896026, "loss": 0.7926, "step": 598000 }, { "epoch": 17.91, "learning_rate": 0.00012116266751946274, "loss": 0.8145, "step": 598500 }, { "epoch": 17.92, "learning_rate": 0.0001210127619599652, "loss": 0.7882, "step": 599000 }, { "epoch": 17.94, "learning_rate": 0.0001208628564004677, "loss": 0.8075, "step": 599500 }, { "epoch": 17.95, "learning_rate": 0.00012071295084097017, "loss": 0.8136, "step": 600000 }, { "epoch": 17.97, "learning_rate": 0.00012056304528147265, "loss": 0.8207, "step": 600500 }, { "epoch": 17.98, "learning_rate": 0.00012041313972197515, "loss": 0.7927, "step": 601000 }, { "epoch": 18.0, "learning_rate": 0.00012026323416247763, "loss": 0.7825, "step": 601500 }, { "epoch": 18.01, "learning_rate": 0.00012011332860298012, "loss": 0.7775, "step": 602000 }, { "epoch": 18.03, "learning_rate": 0.0001199634230434826, "loss": 0.7673, "step": 602500 }, { "epoch": 18.04, "learning_rate": 0.00011981351748398508, "loss": 0.772, "step": 603000 }, { "epoch": 18.06, "learning_rate": 0.00011966361192448757, "loss": 0.7624, "step": 603500 }, { "epoch": 18.07, "learning_rate": 0.00011951370636499005, "loss": 0.7818, "step": 604000 }, { "epoch": 18.09, "learning_rate": 0.00011936380080549252, "loss": 0.7454, "step": 604500 }, { "epoch": 18.1, "learning_rate": 0.00011921389524599501, "loss": 0.7679, "step": 605000 }, { "epoch": 18.12, "learning_rate": 0.00011906398968649749, "loss": 0.7645, "step": 605500 }, { "epoch": 18.13, "learning_rate": 0.00011891408412699997, "loss": 0.7758, "step": 606000 }, { "epoch": 18.15, "learning_rate": 0.00011876417856750246, "loss": 0.7485, "step": 606500 }, { "epoch": 18.16, "learning_rate": 0.00011861427300800494, "loss": 0.7823, "step": 607000 }, { "epoch": 18.18, "learning_rate": 0.00011846436744850744, "loss": 0.7966, "step": 607500 }, { "epoch": 18.19, "learning_rate": 0.00011831446188900991, "loss": 0.789, "step": 608000 }, { "epoch": 18.21, "learning_rate": 0.0001181645563295124, "loss": 0.7774, "step": 608500 }, { "epoch": 18.22, "learning_rate": 0.00011801465077001489, "loss": 0.7918, "step": 609000 }, { "epoch": 18.24, "learning_rate": 0.00011786474521051737, "loss": 0.7693, "step": 609500 }, { "epoch": 18.25, "learning_rate": 0.00011771483965101986, "loss": 0.7592, "step": 610000 }, { "epoch": 18.27, "learning_rate": 0.00011756493409152233, "loss": 0.7838, "step": 610500 }, { "epoch": 18.28, "learning_rate": 0.0001174150285320248, "loss": 0.7846, "step": 611000 }, { "epoch": 18.3, "learning_rate": 0.0001172651229725273, "loss": 0.7693, "step": 611500 }, { "epoch": 18.31, "learning_rate": 0.00011711521741302978, "loss": 0.7825, "step": 612000 }, { "epoch": 18.33, "learning_rate": 0.00011696531185353226, "loss": 0.7787, "step": 612500 }, { "epoch": 18.34, "learning_rate": 0.00011681540629403475, "loss": 0.7682, "step": 613000 }, { "epoch": 18.36, "learning_rate": 0.00011666550073453723, "loss": 0.7638, "step": 613500 }, { "epoch": 18.37, "learning_rate": 0.00011651559517503971, "loss": 0.7683, "step": 614000 }, { "epoch": 18.39, "learning_rate": 0.0001163656896155422, "loss": 0.7636, "step": 614500 }, { "epoch": 18.4, "learning_rate": 0.00011621578405604468, "loss": 0.7691, "step": 615000 }, { "epoch": 18.42, "learning_rate": 0.00011606587849654718, "loss": 0.7897, "step": 615500 }, { "epoch": 18.43, "learning_rate": 0.00011591597293704966, "loss": 0.7826, "step": 616000 }, { "epoch": 18.45, "learning_rate": 0.00011576606737755212, "loss": 0.7832, "step": 616500 }, { "epoch": 18.46, "learning_rate": 0.00011561616181805461, "loss": 0.7815, "step": 617000 }, { "epoch": 18.48, "learning_rate": 0.0001154662562585571, "loss": 0.7783, "step": 617500 }, { "epoch": 18.49, "learning_rate": 0.00011531635069905957, "loss": 0.7811, "step": 618000 }, { "epoch": 18.51, "learning_rate": 0.00011516644513956207, "loss": 0.7856, "step": 618500 }, { "epoch": 18.52, "learning_rate": 0.00011501653958006455, "loss": 0.7954, "step": 619000 }, { "epoch": 18.54, "learning_rate": 0.00011486663402056704, "loss": 0.7587, "step": 619500 }, { "epoch": 18.55, "learning_rate": 0.00011471672846106952, "loss": 0.7876, "step": 620000 }, { "epoch": 18.57, "learning_rate": 0.000114566822901572, "loss": 0.77, "step": 620500 }, { "epoch": 18.58, "learning_rate": 0.00011441691734207449, "loss": 0.7741, "step": 621000 }, { "epoch": 18.6, "learning_rate": 0.00011426701178257697, "loss": 0.7718, "step": 621500 }, { "epoch": 18.61, "learning_rate": 0.00011411710622307946, "loss": 0.7674, "step": 622000 }, { "epoch": 18.63, "learning_rate": 0.00011396720066358193, "loss": 0.7605, "step": 622500 }, { "epoch": 18.64, "learning_rate": 0.00011381729510408441, "loss": 0.7886, "step": 623000 }, { "epoch": 18.66, "learning_rate": 0.00011366738954458689, "loss": 0.7866, "step": 623500 }, { "epoch": 18.67, "learning_rate": 0.00011351748398508938, "loss": 0.7777, "step": 624000 }, { "epoch": 18.69, "learning_rate": 0.00011336757842559186, "loss": 0.7976, "step": 624500 }, { "epoch": 18.7, "learning_rate": 0.00011321767286609436, "loss": 0.7877, "step": 625000 }, { "epoch": 18.72, "learning_rate": 0.00011306776730659683, "loss": 0.7687, "step": 625500 }, { "epoch": 18.73, "learning_rate": 0.00011291786174709931, "loss": 0.767, "step": 626000 }, { "epoch": 18.75, "learning_rate": 0.00011276795618760181, "loss": 0.7761, "step": 626500 }, { "epoch": 18.76, "learning_rate": 0.00011261805062810429, "loss": 0.7931, "step": 627000 }, { "epoch": 18.78, "learning_rate": 0.00011246814506860678, "loss": 0.7701, "step": 627500 }, { "epoch": 18.79, "learning_rate": 0.00011231823950910926, "loss": 0.7706, "step": 628000 }, { "epoch": 18.81, "learning_rate": 0.00011216833394961173, "loss": 0.78, "step": 628500 }, { "epoch": 18.82, "learning_rate": 0.00011201842839011422, "loss": 0.7881, "step": 629000 }, { "epoch": 18.84, "learning_rate": 0.0001118685228306167, "loss": 0.7588, "step": 629500 }, { "epoch": 18.85, "learning_rate": 0.00011171861727111918, "loss": 0.7747, "step": 630000 }, { "epoch": 18.87, "learning_rate": 0.00011156871171162167, "loss": 0.787, "step": 630500 }, { "epoch": 18.88, "learning_rate": 0.00011141880615212415, "loss": 0.7741, "step": 631000 }, { "epoch": 18.9, "learning_rate": 0.00011126890059262663, "loss": 0.7881, "step": 631500 }, { "epoch": 18.91, "learning_rate": 0.00011111899503312912, "loss": 0.7621, "step": 632000 }, { "epoch": 18.93, "learning_rate": 0.0001109690894736316, "loss": 0.7885, "step": 632500 }, { "epoch": 18.94, "learning_rate": 0.0001108191839141341, "loss": 0.7976, "step": 633000 }, { "epoch": 18.96, "learning_rate": 0.00011066927835463658, "loss": 0.7878, "step": 633500 }, { "epoch": 18.97, "learning_rate": 0.00011051937279513905, "loss": 0.7923, "step": 634000 }, { "epoch": 18.99, "learning_rate": 0.00011036946723564153, "loss": 0.7846, "step": 634500 }, { "epoch": 19.0, "learning_rate": 0.00011021956167614401, "loss": 0.7589, "step": 635000 }, { "epoch": 19.01, "learning_rate": 0.0001100696561166465, "loss": 0.7466, "step": 635500 }, { "epoch": 19.03, "learning_rate": 0.00010991975055714899, "loss": 0.7361, "step": 636000 }, { "epoch": 19.04, "learning_rate": 0.00010976984499765147, "loss": 0.7505, "step": 636500 }, { "epoch": 19.06, "learning_rate": 0.00010961993943815396, "loss": 0.7444, "step": 637000 }, { "epoch": 19.07, "learning_rate": 0.00010947003387865644, "loss": 0.754, "step": 637500 }, { "epoch": 19.09, "learning_rate": 0.00010932012831915892, "loss": 0.7534, "step": 638000 }, { "epoch": 19.1, "learning_rate": 0.00010917022275966141, "loss": 0.7478, "step": 638500 }, { "epoch": 19.12, "learning_rate": 0.00010902031720016389, "loss": 0.7344, "step": 639000 }, { "epoch": 19.13, "learning_rate": 0.00010887041164066638, "loss": 0.7494, "step": 639500 }, { "epoch": 19.15, "learning_rate": 0.00010872050608116886, "loss": 0.7639, "step": 640000 }, { "epoch": 19.16, "learning_rate": 0.00010857060052167133, "loss": 0.7593, "step": 640500 }, { "epoch": 19.18, "learning_rate": 0.00010842069496217381, "loss": 0.747, "step": 641000 }, { "epoch": 19.19, "learning_rate": 0.0001082707894026763, "loss": 0.7525, "step": 641500 }, { "epoch": 19.21, "learning_rate": 0.00010812088384317878, "loss": 0.7525, "step": 642000 }, { "epoch": 19.22, "learning_rate": 0.00010797097828368127, "loss": 0.7513, "step": 642500 }, { "epoch": 19.24, "learning_rate": 0.00010782107272418375, "loss": 0.7505, "step": 643000 }, { "epoch": 19.25, "learning_rate": 0.00010767116716468623, "loss": 0.7652, "step": 643500 }, { "epoch": 19.27, "learning_rate": 0.00010752126160518873, "loss": 0.7597, "step": 644000 }, { "epoch": 19.28, "learning_rate": 0.0001073713560456912, "loss": 0.7486, "step": 644500 }, { "epoch": 19.3, "learning_rate": 0.0001072214504861937, "loss": 0.7477, "step": 645000 }, { "epoch": 19.31, "learning_rate": 0.00010707154492669618, "loss": 0.7661, "step": 645500 }, { "epoch": 19.33, "learning_rate": 0.00010692163936719866, "loss": 0.7683, "step": 646000 }, { "epoch": 19.34, "learning_rate": 0.00010677173380770114, "loss": 0.7677, "step": 646500 }, { "epoch": 19.36, "learning_rate": 0.00010662182824820362, "loss": 0.7516, "step": 647000 }, { "epoch": 19.37, "learning_rate": 0.0001064719226887061, "loss": 0.7573, "step": 647500 }, { "epoch": 19.39, "learning_rate": 0.00010632201712920859, "loss": 0.7576, "step": 648000 }, { "epoch": 19.4, "learning_rate": 0.00010617211156971107, "loss": 0.7436, "step": 648500 }, { "epoch": 19.42, "learning_rate": 0.00010602220601021355, "loss": 0.7434, "step": 649000 }, { "epoch": 19.43, "learning_rate": 0.00010587230045071604, "loss": 0.7356, "step": 649500 }, { "epoch": 19.45, "learning_rate": 0.00010572239489121852, "loss": 0.7596, "step": 650000 }, { "epoch": 19.46, "learning_rate": 0.00010557248933172102, "loss": 0.7531, "step": 650500 }, { "epoch": 19.48, "learning_rate": 0.0001054225837722235, "loss": 0.7313, "step": 651000 }, { "epoch": 19.49, "learning_rate": 0.00010527267821272597, "loss": 0.7474, "step": 651500 }, { "epoch": 19.51, "learning_rate": 0.00010512277265322845, "loss": 0.7477, "step": 652000 }, { "epoch": 19.52, "learning_rate": 0.00010497286709373093, "loss": 0.7572, "step": 652500 }, { "epoch": 19.54, "learning_rate": 0.00010482296153423341, "loss": 0.7561, "step": 653000 }, { "epoch": 19.55, "learning_rate": 0.0001046730559747359, "loss": 0.7451, "step": 653500 }, { "epoch": 19.57, "learning_rate": 0.00010452315041523839, "loss": 0.7607, "step": 654000 }, { "epoch": 19.58, "learning_rate": 0.00010437324485574088, "loss": 0.7335, "step": 654500 }, { "epoch": 19.6, "learning_rate": 0.00010422333929624336, "loss": 0.7619, "step": 655000 }, { "epoch": 19.61, "learning_rate": 0.00010407343373674584, "loss": 0.763, "step": 655500 }, { "epoch": 19.63, "learning_rate": 0.00010392352817724833, "loss": 0.7391, "step": 656000 }, { "epoch": 19.64, "learning_rate": 0.00010377362261775081, "loss": 0.7686, "step": 656500 }, { "epoch": 19.66, "learning_rate": 0.0001036237170582533, "loss": 0.7568, "step": 657000 }, { "epoch": 19.67, "learning_rate": 0.00010347381149875578, "loss": 0.7549, "step": 657500 }, { "epoch": 19.69, "learning_rate": 0.00010332390593925825, "loss": 0.7675, "step": 658000 }, { "epoch": 19.7, "learning_rate": 0.00010317400037976073, "loss": 0.7607, "step": 658500 }, { "epoch": 19.72, "learning_rate": 0.00010302409482026322, "loss": 0.7639, "step": 659000 }, { "epoch": 19.73, "learning_rate": 0.0001028741892607657, "loss": 0.7437, "step": 659500 }, { "epoch": 19.75, "learning_rate": 0.0001027242837012682, "loss": 0.7487, "step": 660000 }, { "epoch": 19.76, "learning_rate": 0.00010257437814177067, "loss": 0.7677, "step": 660500 }, { "epoch": 19.78, "learning_rate": 0.00010242447258227315, "loss": 0.7553, "step": 661000 }, { "epoch": 19.79, "learning_rate": 0.00010227456702277565, "loss": 0.7717, "step": 661500 }, { "epoch": 19.81, "learning_rate": 0.00010212466146327813, "loss": 0.7373, "step": 662000 }, { "epoch": 19.82, "learning_rate": 0.00010197475590378062, "loss": 0.7541, "step": 662500 }, { "epoch": 19.84, "learning_rate": 0.0001018248503442831, "loss": 0.7634, "step": 663000 }, { "epoch": 19.85, "learning_rate": 0.00010167494478478558, "loss": 0.7672, "step": 663500 }, { "epoch": 19.87, "learning_rate": 0.00010152503922528806, "loss": 0.7467, "step": 664000 }, { "epoch": 19.88, "learning_rate": 0.00010137513366579054, "loss": 0.7622, "step": 664500 }, { "epoch": 19.9, "learning_rate": 0.00010122522810629302, "loss": 0.7727, "step": 665000 }, { "epoch": 19.91, "learning_rate": 0.00010107532254679551, "loss": 0.7628, "step": 665500 }, { "epoch": 19.93, "learning_rate": 0.00010092541698729799, "loss": 0.7881, "step": 666000 }, { "epoch": 19.94, "learning_rate": 0.00010077551142780048, "loss": 0.7446, "step": 666500 }, { "epoch": 19.96, "learning_rate": 0.00010062560586830296, "loss": 0.7576, "step": 667000 }, { "epoch": 19.97, "learning_rate": 0.00010047570030880544, "loss": 0.7571, "step": 667500 }, { "epoch": 19.99, "learning_rate": 0.00010032579474930793, "loss": 0.7719, "step": 668000 }, { "epoch": 20.0, "learning_rate": 0.00010017588918981041, "loss": 0.7767, "step": 668500 }, { "epoch": 20.02, "learning_rate": 0.0001000259836303129, "loss": 0.7249, "step": 669000 }, { "epoch": 20.03, "learning_rate": 9.987607807081539e-05, "loss": 0.7378, "step": 669500 }, { "epoch": 20.05, "learning_rate": 9.972617251131785e-05, "loss": 0.73, "step": 670000 }, { "epoch": 20.06, "learning_rate": 9.957626695182033e-05, "loss": 0.733, "step": 670500 }, { "epoch": 20.08, "learning_rate": 9.942636139232283e-05, "loss": 0.7267, "step": 671000 }, { "epoch": 20.09, "learning_rate": 9.92764558328253e-05, "loss": 0.7461, "step": 671500 }, { "epoch": 20.11, "learning_rate": 9.91265502733278e-05, "loss": 0.7247, "step": 672000 }, { "epoch": 20.12, "learning_rate": 9.897664471383028e-05, "loss": 0.7317, "step": 672500 }, { "epoch": 20.14, "learning_rate": 9.882673915433276e-05, "loss": 0.738, "step": 673000 }, { "epoch": 20.15, "learning_rate": 9.867683359483525e-05, "loss": 0.7435, "step": 673500 }, { "epoch": 20.17, "learning_rate": 9.852692803533773e-05, "loss": 0.7352, "step": 674000 }, { "epoch": 20.18, "learning_rate": 9.837702247584022e-05, "loss": 0.7399, "step": 674500 }, { "epoch": 20.2, "learning_rate": 9.82271169163427e-05, "loss": 0.7315, "step": 675000 }, { "epoch": 20.21, "learning_rate": 9.807721135684518e-05, "loss": 0.717, "step": 675500 }, { "epoch": 20.23, "learning_rate": 9.792730579734765e-05, "loss": 0.7247, "step": 676000 }, { "epoch": 20.24, "learning_rate": 9.777740023785014e-05, "loss": 0.7433, "step": 676500 }, { "epoch": 20.26, "learning_rate": 9.762749467835262e-05, "loss": 0.7346, "step": 677000 }, { "epoch": 20.27, "learning_rate": 9.747758911885511e-05, "loss": 0.7363, "step": 677500 }, { "epoch": 20.29, "learning_rate": 9.73276835593576e-05, "loss": 0.7297, "step": 678000 }, { "epoch": 20.3, "learning_rate": 9.717777799986007e-05, "loss": 0.7156, "step": 678500 }, { "epoch": 20.32, "learning_rate": 9.702787244036257e-05, "loss": 0.7128, "step": 679000 }, { "epoch": 20.33, "learning_rate": 9.687796688086505e-05, "loss": 0.7357, "step": 679500 }, { "epoch": 20.35, "learning_rate": 9.672806132136754e-05, "loss": 0.7323, "step": 680000 }, { "epoch": 20.36, "learning_rate": 9.657815576187002e-05, "loss": 0.741, "step": 680500 }, { "epoch": 20.38, "learning_rate": 9.64282502023725e-05, "loss": 0.7149, "step": 681000 }, { "epoch": 20.39, "learning_rate": 9.627834464287499e-05, "loss": 0.7354, "step": 681500 }, { "epoch": 20.41, "learning_rate": 9.612843908337746e-05, "loss": 0.7245, "step": 682000 }, { "epoch": 20.42, "learning_rate": 9.597853352387994e-05, "loss": 0.7278, "step": 682500 }, { "epoch": 20.44, "learning_rate": 9.582862796438243e-05, "loss": 0.7288, "step": 683000 }, { "epoch": 20.45, "learning_rate": 9.567872240488491e-05, "loss": 0.7536, "step": 683500 }, { "epoch": 20.47, "learning_rate": 9.55288168453874e-05, "loss": 0.751, "step": 684000 }, { "epoch": 20.48, "learning_rate": 9.537891128588988e-05, "loss": 0.7343, "step": 684500 }, { "epoch": 20.5, "learning_rate": 9.522900572639236e-05, "loss": 0.7224, "step": 685000 }, { "epoch": 20.51, "learning_rate": 9.507910016689485e-05, "loss": 0.7421, "step": 685500 }, { "epoch": 20.53, "learning_rate": 9.492919460739733e-05, "loss": 0.7356, "step": 686000 }, { "epoch": 20.54, "learning_rate": 9.477928904789981e-05, "loss": 0.7373, "step": 686500 }, { "epoch": 20.56, "learning_rate": 9.462938348840231e-05, "loss": 0.7455, "step": 687000 }, { "epoch": 20.57, "learning_rate": 9.447947792890479e-05, "loss": 0.7364, "step": 687500 }, { "epoch": 20.59, "learning_rate": 9.432957236940725e-05, "loss": 0.7259, "step": 688000 }, { "epoch": 20.6, "learning_rate": 9.417966680990975e-05, "loss": 0.7167, "step": 688500 }, { "epoch": 20.62, "learning_rate": 9.402976125041223e-05, "loss": 0.7317, "step": 689000 }, { "epoch": 20.63, "learning_rate": 9.387985569091472e-05, "loss": 0.7246, "step": 689500 }, { "epoch": 20.65, "learning_rate": 9.37299501314172e-05, "loss": 0.7543, "step": 690000 }, { "epoch": 20.66, "learning_rate": 9.358004457191968e-05, "loss": 0.7507, "step": 690500 }, { "epoch": 20.68, "learning_rate": 9.343013901242217e-05, "loss": 0.7432, "step": 691000 }, { "epoch": 20.69, "learning_rate": 9.328023345292465e-05, "loss": 0.7396, "step": 691500 }, { "epoch": 20.71, "learning_rate": 9.313032789342714e-05, "loss": 0.7179, "step": 692000 }, { "epoch": 20.72, "learning_rate": 9.298042233392962e-05, "loss": 0.7368, "step": 692500 }, { "epoch": 20.74, "learning_rate": 9.28305167744321e-05, "loss": 0.7348, "step": 693000 }, { "epoch": 20.75, "learning_rate": 9.26806112149346e-05, "loss": 0.7389, "step": 693500 }, { "epoch": 20.77, "learning_rate": 9.253070565543706e-05, "loss": 0.7338, "step": 694000 }, { "epoch": 20.78, "learning_rate": 9.238080009593954e-05, "loss": 0.7407, "step": 694500 }, { "epoch": 20.8, "learning_rate": 9.223089453644203e-05, "loss": 0.724, "step": 695000 }, { "epoch": 20.81, "learning_rate": 9.208098897694451e-05, "loss": 0.7441, "step": 695500 }, { "epoch": 20.83, "learning_rate": 9.193108341744699e-05, "loss": 0.73, "step": 696000 }, { "epoch": 20.84, "learning_rate": 9.178117785794949e-05, "loss": 0.7175, "step": 696500 }, { "epoch": 20.86, "learning_rate": 9.163127229845197e-05, "loss": 0.7557, "step": 697000 }, { "epoch": 20.87, "learning_rate": 9.148136673895446e-05, "loss": 0.7302, "step": 697500 }, { "epoch": 20.89, "learning_rate": 9.133146117945694e-05, "loss": 0.7429, "step": 698000 }, { "epoch": 20.9, "learning_rate": 9.118155561995942e-05, "loss": 0.7428, "step": 698500 }, { "epoch": 20.91, "learning_rate": 9.103165006046191e-05, "loss": 0.7224, "step": 699000 }, { "epoch": 20.93, "learning_rate": 9.088174450096438e-05, "loss": 0.7218, "step": 699500 }, { "epoch": 20.94, "learning_rate": 9.073183894146686e-05, "loss": 0.7353, "step": 700000 }, { "epoch": 20.96, "learning_rate": 9.058193338196935e-05, "loss": 0.7264, "step": 700500 }, { "epoch": 20.97, "learning_rate": 9.043202782247183e-05, "loss": 0.7373, "step": 701000 }, { "epoch": 20.99, "learning_rate": 9.028212226297432e-05, "loss": 0.7362, "step": 701500 }, { "epoch": 21.0, "learning_rate": 9.01322167034768e-05, "loss": 0.7394, "step": 702000 }, { "epoch": 21.02, "learning_rate": 8.998231114397928e-05, "loss": 0.6919, "step": 702500 }, { "epoch": 21.03, "learning_rate": 8.983240558448177e-05, "loss": 0.7024, "step": 703000 }, { "epoch": 21.05, "learning_rate": 8.968250002498425e-05, "loss": 0.7343, "step": 703500 }, { "epoch": 21.06, "learning_rate": 8.953259446548673e-05, "loss": 0.7225, "step": 704000 }, { "epoch": 21.08, "learning_rate": 8.938268890598923e-05, "loss": 0.7039, "step": 704500 }, { "epoch": 21.09, "learning_rate": 8.92327833464917e-05, "loss": 0.7019, "step": 705000 }, { "epoch": 21.11, "learning_rate": 8.908287778699417e-05, "loss": 0.6986, "step": 705500 }, { "epoch": 21.12, "learning_rate": 8.893297222749667e-05, "loss": 0.7156, "step": 706000 }, { "epoch": 21.14, "learning_rate": 8.878306666799914e-05, "loss": 0.7253, "step": 706500 }, { "epoch": 21.15, "learning_rate": 8.863316110850164e-05, "loss": 0.7047, "step": 707000 }, { "epoch": 21.17, "learning_rate": 8.848325554900412e-05, "loss": 0.7133, "step": 707500 }, { "epoch": 21.18, "learning_rate": 8.83333499895066e-05, "loss": 0.7129, "step": 708000 }, { "epoch": 21.2, "learning_rate": 8.818344443000909e-05, "loss": 0.7113, "step": 708500 }, { "epoch": 21.21, "learning_rate": 8.803353887051157e-05, "loss": 0.7238, "step": 709000 }, { "epoch": 21.23, "learning_rate": 8.788363331101406e-05, "loss": 0.7064, "step": 709500 }, { "epoch": 21.24, "learning_rate": 8.773372775151654e-05, "loss": 0.7324, "step": 710000 }, { "epoch": 21.26, "learning_rate": 8.758382219201902e-05, "loss": 0.6991, "step": 710500 }, { "epoch": 21.27, "learning_rate": 8.743391663252151e-05, "loss": 0.701, "step": 711000 }, { "epoch": 21.29, "learning_rate": 8.728401107302398e-05, "loss": 0.705, "step": 711500 }, { "epoch": 21.3, "learning_rate": 8.713410551352646e-05, "loss": 0.7189, "step": 712000 }, { "epoch": 21.32, "learning_rate": 8.698419995402895e-05, "loss": 0.721, "step": 712500 }, { "epoch": 21.33, "learning_rate": 8.683429439453143e-05, "loss": 0.6843, "step": 713000 }, { "epoch": 21.35, "learning_rate": 8.668438883503391e-05, "loss": 0.7105, "step": 713500 }, { "epoch": 21.36, "learning_rate": 8.65344832755364e-05, "loss": 0.7219, "step": 714000 }, { "epoch": 21.38, "learning_rate": 8.638457771603889e-05, "loss": 0.7165, "step": 714500 }, { "epoch": 21.39, "learning_rate": 8.623467215654138e-05, "loss": 0.6908, "step": 715000 }, { "epoch": 21.41, "learning_rate": 8.608476659704386e-05, "loss": 0.7187, "step": 715500 }, { "epoch": 21.42, "learning_rate": 8.593486103754634e-05, "loss": 0.7191, "step": 716000 }, { "epoch": 21.44, "learning_rate": 8.578495547804883e-05, "loss": 0.718, "step": 716500 }, { "epoch": 21.45, "learning_rate": 8.563504991855131e-05, "loss": 0.7116, "step": 717000 }, { "epoch": 21.47, "learning_rate": 8.548514435905378e-05, "loss": 0.7347, "step": 717500 }, { "epoch": 21.48, "learning_rate": 8.533523879955627e-05, "loss": 0.715, "step": 718000 }, { "epoch": 21.5, "learning_rate": 8.518533324005875e-05, "loss": 0.7124, "step": 718500 }, { "epoch": 21.51, "learning_rate": 8.503542768056124e-05, "loss": 0.7114, "step": 719000 }, { "epoch": 21.53, "learning_rate": 8.488552212106372e-05, "loss": 0.7477, "step": 719500 }, { "epoch": 21.54, "learning_rate": 8.47356165615662e-05, "loss": 0.7172, "step": 720000 }, { "epoch": 21.56, "learning_rate": 8.45857110020687e-05, "loss": 0.7105, "step": 720500 }, { "epoch": 21.57, "learning_rate": 8.443580544257117e-05, "loss": 0.7283, "step": 721000 }, { "epoch": 21.59, "learning_rate": 8.428589988307365e-05, "loss": 0.721, "step": 721500 }, { "epoch": 21.6, "learning_rate": 8.413599432357615e-05, "loss": 0.7017, "step": 722000 }, { "epoch": 21.62, "learning_rate": 8.398608876407863e-05, "loss": 0.7044, "step": 722500 }, { "epoch": 21.63, "learning_rate": 8.383618320458112e-05, "loss": 0.7124, "step": 723000 }, { "epoch": 21.65, "learning_rate": 8.368627764508358e-05, "loss": 0.7166, "step": 723500 }, { "epoch": 21.66, "learning_rate": 8.353637208558606e-05, "loss": 0.7204, "step": 724000 }, { "epoch": 21.68, "learning_rate": 8.338646652608856e-05, "loss": 0.7228, "step": 724500 }, { "epoch": 21.69, "learning_rate": 8.323656096659104e-05, "loss": 0.7058, "step": 725000 }, { "epoch": 21.71, "learning_rate": 8.308665540709352e-05, "loss": 0.6931, "step": 725500 }, { "epoch": 21.72, "learning_rate": 8.293674984759601e-05, "loss": 0.7113, "step": 726000 }, { "epoch": 21.74, "learning_rate": 8.278684428809849e-05, "loss": 0.6993, "step": 726500 }, { "epoch": 21.75, "learning_rate": 8.263693872860098e-05, "loss": 0.7311, "step": 727000 }, { "epoch": 21.77, "learning_rate": 8.248703316910346e-05, "loss": 0.7121, "step": 727500 }, { "epoch": 21.78, "learning_rate": 8.233712760960594e-05, "loss": 0.718, "step": 728000 }, { "epoch": 21.8, "learning_rate": 8.218722205010843e-05, "loss": 0.7217, "step": 728500 }, { "epoch": 21.81, "learning_rate": 8.203731649061091e-05, "loss": 0.7172, "step": 729000 }, { "epoch": 21.83, "learning_rate": 8.188741093111338e-05, "loss": 0.7158, "step": 729500 }, { "epoch": 21.84, "learning_rate": 8.173750537161587e-05, "loss": 0.724, "step": 730000 }, { "epoch": 21.86, "learning_rate": 8.158759981211835e-05, "loss": 0.7099, "step": 730500 }, { "epoch": 21.87, "learning_rate": 8.143769425262083e-05, "loss": 0.6909, "step": 731000 }, { "epoch": 21.89, "learning_rate": 8.128778869312333e-05, "loss": 0.7204, "step": 731500 }, { "epoch": 21.9, "learning_rate": 8.11378831336258e-05, "loss": 0.7063, "step": 732000 }, { "epoch": 21.92, "learning_rate": 8.09879775741283e-05, "loss": 0.7079, "step": 732500 }, { "epoch": 21.93, "learning_rate": 8.083807201463078e-05, "loss": 0.7238, "step": 733000 }, { "epoch": 21.95, "learning_rate": 8.068816645513326e-05, "loss": 0.7334, "step": 733500 }, { "epoch": 21.96, "learning_rate": 8.053826089563575e-05, "loss": 0.7228, "step": 734000 }, { "epoch": 21.98, "learning_rate": 8.038835533613823e-05, "loss": 0.7316, "step": 734500 }, { "epoch": 21.99, "learning_rate": 8.023844977664072e-05, "loss": 0.7232, "step": 735000 }, { "epoch": 22.01, "learning_rate": 8.008854421714319e-05, "loss": 0.7005, "step": 735500 }, { "epoch": 22.02, "learning_rate": 7.993863865764567e-05, "loss": 0.6997, "step": 736000 }, { "epoch": 22.04, "learning_rate": 7.978873309814816e-05, "loss": 0.7029, "step": 736500 }, { "epoch": 22.05, "learning_rate": 7.963882753865064e-05, "loss": 0.7038, "step": 737000 }, { "epoch": 22.07, "learning_rate": 7.948892197915312e-05, "loss": 0.6937, "step": 737500 }, { "epoch": 22.08, "learning_rate": 7.933901641965561e-05, "loss": 0.6759, "step": 738000 }, { "epoch": 22.1, "learning_rate": 7.91891108601581e-05, "loss": 0.6865, "step": 738500 }, { "epoch": 22.11, "learning_rate": 7.903920530066059e-05, "loss": 0.6777, "step": 739000 }, { "epoch": 22.13, "learning_rate": 7.888929974116307e-05, "loss": 0.7043, "step": 739500 }, { "epoch": 22.14, "learning_rate": 7.873939418166555e-05, "loss": 0.699, "step": 740000 }, { "epoch": 22.16, "learning_rate": 7.858948862216804e-05, "loss": 0.6759, "step": 740500 }, { "epoch": 22.17, "learning_rate": 7.84395830626705e-05, "loss": 0.7029, "step": 741000 }, { "epoch": 22.19, "learning_rate": 7.828967750317298e-05, "loss": 0.696, "step": 741500 }, { "epoch": 22.2, "learning_rate": 7.813977194367548e-05, "loss": 0.7077, "step": 742000 }, { "epoch": 22.22, "learning_rate": 7.798986638417796e-05, "loss": 0.7031, "step": 742500 }, { "epoch": 22.23, "learning_rate": 7.783996082468044e-05, "loss": 0.6837, "step": 743000 }, { "epoch": 22.25, "learning_rate": 7.769005526518293e-05, "loss": 0.6885, "step": 743500 }, { "epoch": 22.26, "learning_rate": 7.754014970568541e-05, "loss": 0.6833, "step": 744000 }, { "epoch": 22.28, "learning_rate": 7.73902441461879e-05, "loss": 0.6837, "step": 744500 }, { "epoch": 22.29, "learning_rate": 7.724033858669038e-05, "loss": 0.6739, "step": 745000 }, { "epoch": 22.31, "learning_rate": 7.709043302719286e-05, "loss": 0.6823, "step": 745500 }, { "epoch": 22.32, "learning_rate": 7.694052746769535e-05, "loss": 0.6946, "step": 746000 }, { "epoch": 22.34, "learning_rate": 7.679062190819783e-05, "loss": 0.6924, "step": 746500 }, { "epoch": 22.35, "learning_rate": 7.66407163487003e-05, "loss": 0.6822, "step": 747000 }, { "epoch": 22.37, "learning_rate": 7.649081078920279e-05, "loss": 0.7175, "step": 747500 }, { "epoch": 22.38, "learning_rate": 7.634090522970527e-05, "loss": 0.699, "step": 748000 }, { "epoch": 22.4, "learning_rate": 7.619099967020775e-05, "loss": 0.7055, "step": 748500 }, { "epoch": 22.41, "learning_rate": 7.604109411071025e-05, "loss": 0.6743, "step": 749000 }, { "epoch": 22.43, "learning_rate": 7.589118855121272e-05, "loss": 0.6927, "step": 749500 }, { "epoch": 22.44, "learning_rate": 7.574128299171522e-05, "loss": 0.6966, "step": 750000 }, { "epoch": 22.46, "learning_rate": 7.55913774322177e-05, "loss": 0.6933, "step": 750500 }, { "epoch": 22.47, "learning_rate": 7.544147187272018e-05, "loss": 0.6986, "step": 751000 }, { "epoch": 22.49, "learning_rate": 7.529156631322267e-05, "loss": 0.7019, "step": 751500 }, { "epoch": 22.5, "learning_rate": 7.514166075372515e-05, "loss": 0.6708, "step": 752000 }, { "epoch": 22.52, "learning_rate": 7.499175519422763e-05, "loss": 0.6831, "step": 752500 }, { "epoch": 22.53, "learning_rate": 7.484184963473011e-05, "loss": 0.7063, "step": 753000 }, { "epoch": 22.55, "learning_rate": 7.46919440752326e-05, "loss": 0.7093, "step": 753500 }, { "epoch": 22.56, "learning_rate": 7.454203851573508e-05, "loss": 0.7024, "step": 754000 }, { "epoch": 22.58, "learning_rate": 7.439213295623756e-05, "loss": 0.6713, "step": 754500 }, { "epoch": 22.59, "learning_rate": 7.424222739674004e-05, "loss": 0.6837, "step": 755000 }, { "epoch": 22.61, "learning_rate": 7.409232183724253e-05, "loss": 0.6941, "step": 755500 }, { "epoch": 22.62, "learning_rate": 7.394241627774501e-05, "loss": 0.6916, "step": 756000 }, { "epoch": 22.64, "learning_rate": 7.37925107182475e-05, "loss": 0.6994, "step": 756500 }, { "epoch": 22.65, "learning_rate": 7.364260515874999e-05, "loss": 0.7029, "step": 757000 }, { "epoch": 22.67, "learning_rate": 7.349269959925247e-05, "loss": 0.6912, "step": 757500 }, { "epoch": 22.68, "learning_rate": 7.334279403975494e-05, "loss": 0.701, "step": 758000 }, { "epoch": 22.7, "learning_rate": 7.319288848025742e-05, "loss": 0.6997, "step": 758500 }, { "epoch": 22.71, "learning_rate": 7.304298292075992e-05, "loss": 0.7171, "step": 759000 }, { "epoch": 22.73, "learning_rate": 7.28930773612624e-05, "loss": 0.6932, "step": 759500 }, { "epoch": 22.74, "learning_rate": 7.274317180176489e-05, "loss": 0.6957, "step": 760000 }, { "epoch": 22.76, "learning_rate": 7.259326624226736e-05, "loss": 0.6995, "step": 760500 }, { "epoch": 22.77, "learning_rate": 7.244336068276985e-05, "loss": 0.7083, "step": 761000 }, { "epoch": 22.79, "learning_rate": 7.229345512327233e-05, "loss": 0.6979, "step": 761500 }, { "epoch": 22.8, "learning_rate": 7.214354956377482e-05, "loss": 0.6856, "step": 762000 }, { "epoch": 22.81, "learning_rate": 7.19936440042773e-05, "loss": 0.6783, "step": 762500 }, { "epoch": 22.83, "learning_rate": 7.184373844477978e-05, "loss": 0.7028, "step": 763000 }, { "epoch": 22.84, "learning_rate": 7.169383288528226e-05, "loss": 0.6838, "step": 763500 }, { "epoch": 22.86, "learning_rate": 7.154392732578475e-05, "loss": 0.7014, "step": 764000 }, { "epoch": 22.87, "learning_rate": 7.139402176628723e-05, "loss": 0.7034, "step": 764500 }, { "epoch": 22.89, "learning_rate": 7.124411620678971e-05, "loss": 0.6974, "step": 765000 }, { "epoch": 22.9, "learning_rate": 7.10942106472922e-05, "loss": 0.7062, "step": 765500 }, { "epoch": 22.92, "learning_rate": 7.094430508779469e-05, "loss": 0.6995, "step": 766000 }, { "epoch": 22.93, "learning_rate": 7.079439952829716e-05, "loss": 0.6957, "step": 766500 }, { "epoch": 22.95, "learning_rate": 7.064449396879964e-05, "loss": 0.7066, "step": 767000 }, { "epoch": 22.96, "learning_rate": 7.049458840930214e-05, "loss": 0.6975, "step": 767500 }, { "epoch": 22.98, "learning_rate": 7.034468284980462e-05, "loss": 0.6834, "step": 768000 }, { "epoch": 22.99, "learning_rate": 7.01947772903071e-05, "loss": 0.7014, "step": 768500 }, { "epoch": 23.01, "learning_rate": 7.004487173080959e-05, "loss": 0.674, "step": 769000 }, { "epoch": 23.02, "learning_rate": 6.989496617131207e-05, "loss": 0.6863, "step": 769500 }, { "epoch": 23.04, "learning_rate": 6.974506061181455e-05, "loss": 0.6731, "step": 770000 }, { "epoch": 23.05, "learning_rate": 6.959515505231703e-05, "loss": 0.6623, "step": 770500 }, { "epoch": 23.07, "learning_rate": 6.944524949281952e-05, "loss": 0.6691, "step": 771000 }, { "epoch": 23.08, "learning_rate": 6.9295343933322e-05, "loss": 0.6734, "step": 771500 }, { "epoch": 23.1, "learning_rate": 6.91454383738245e-05, "loss": 0.6675, "step": 772000 }, { "epoch": 23.11, "learning_rate": 6.899553281432696e-05, "loss": 0.6671, "step": 772500 }, { "epoch": 23.13, "learning_rate": 6.884562725482945e-05, "loss": 0.6767, "step": 773000 }, { "epoch": 23.14, "learning_rate": 6.869572169533193e-05, "loss": 0.6718, "step": 773500 }, { "epoch": 23.16, "learning_rate": 6.854581613583443e-05, "loss": 0.6682, "step": 774000 }, { "epoch": 23.17, "learning_rate": 6.83959105763369e-05, "loss": 0.6724, "step": 774500 }, { "epoch": 23.19, "learning_rate": 6.824600501683938e-05, "loss": 0.6714, "step": 775000 }, { "epoch": 23.2, "learning_rate": 6.809609945734186e-05, "loss": 0.6848, "step": 775500 }, { "epoch": 23.22, "learning_rate": 6.794619389784434e-05, "loss": 0.6646, "step": 776000 }, { "epoch": 23.23, "learning_rate": 6.779628833834684e-05, "loss": 0.6828, "step": 776500 }, { "epoch": 23.25, "learning_rate": 6.764638277884932e-05, "loss": 0.6647, "step": 777000 }, { "epoch": 23.26, "learning_rate": 6.749647721935181e-05, "loss": 0.6773, "step": 777500 }, { "epoch": 23.28, "learning_rate": 6.734657165985429e-05, "loss": 0.6844, "step": 778000 }, { "epoch": 23.29, "learning_rate": 6.719666610035677e-05, "loss": 0.6624, "step": 778500 }, { "epoch": 23.31, "learning_rate": 6.704676054085925e-05, "loss": 0.6539, "step": 779000 }, { "epoch": 23.32, "learning_rate": 6.689685498136174e-05, "loss": 0.6695, "step": 779500 }, { "epoch": 23.34, "learning_rate": 6.674694942186422e-05, "loss": 0.6765, "step": 780000 }, { "epoch": 23.35, "learning_rate": 6.65970438623667e-05, "loss": 0.6733, "step": 780500 }, { "epoch": 23.37, "learning_rate": 6.64471383028692e-05, "loss": 0.6826, "step": 781000 }, { "epoch": 23.38, "learning_rate": 6.629723274337167e-05, "loss": 0.6798, "step": 781500 }, { "epoch": 23.4, "learning_rate": 6.614732718387415e-05, "loss": 0.6649, "step": 782000 }, { "epoch": 23.41, "learning_rate": 6.599742162437663e-05, "loss": 0.6744, "step": 782500 }, { "epoch": 23.43, "learning_rate": 6.584751606487913e-05, "loss": 0.6759, "step": 783000 }, { "epoch": 23.44, "learning_rate": 6.56976105053816e-05, "loss": 0.6707, "step": 783500 }, { "epoch": 23.46, "learning_rate": 6.55477049458841e-05, "loss": 0.676, "step": 784000 }, { "epoch": 23.47, "learning_rate": 6.539779938638656e-05, "loss": 0.6724, "step": 784500 }, { "epoch": 23.49, "learning_rate": 6.524789382688906e-05, "loss": 0.6804, "step": 785000 }, { "epoch": 23.5, "learning_rate": 6.509798826739154e-05, "loss": 0.6862, "step": 785500 }, { "epoch": 23.52, "learning_rate": 6.494808270789402e-05, "loss": 0.6835, "step": 786000 }, { "epoch": 23.53, "learning_rate": 6.479817714839651e-05, "loss": 0.6967, "step": 786500 }, { "epoch": 23.55, "learning_rate": 6.464827158889899e-05, "loss": 0.6882, "step": 787000 }, { "epoch": 23.56, "learning_rate": 6.449836602940147e-05, "loss": 0.685, "step": 787500 }, { "epoch": 23.58, "learning_rate": 6.434846046990395e-05, "loss": 0.6631, "step": 788000 }, { "epoch": 23.59, "learning_rate": 6.419855491040644e-05, "loss": 0.682, "step": 788500 }, { "epoch": 23.61, "learning_rate": 6.404864935090892e-05, "loss": 0.6891, "step": 789000 }, { "epoch": 23.62, "learning_rate": 6.389874379141141e-05, "loss": 0.6896, "step": 789500 }, { "epoch": 23.64, "learning_rate": 6.374883823191388e-05, "loss": 0.6729, "step": 790000 }, { "epoch": 23.65, "learning_rate": 6.359893267241637e-05, "loss": 0.6832, "step": 790500 }, { "epoch": 23.67, "learning_rate": 6.344902711291885e-05, "loss": 0.6603, "step": 791000 }, { "epoch": 23.68, "learning_rate": 6.329912155342135e-05, "loss": 0.6817, "step": 791500 }, { "epoch": 23.7, "learning_rate": 6.314921599392382e-05, "loss": 0.6767, "step": 792000 }, { "epoch": 23.71, "learning_rate": 6.29993104344263e-05, "loss": 0.6723, "step": 792500 }, { "epoch": 23.73, "learning_rate": 6.284940487492878e-05, "loss": 0.6728, "step": 793000 }, { "epoch": 23.74, "learning_rate": 6.269949931543126e-05, "loss": 0.6846, "step": 793500 }, { "epoch": 23.76, "learning_rate": 6.254959375593376e-05, "loss": 0.678, "step": 794000 }, { "epoch": 23.77, "learning_rate": 6.239968819643624e-05, "loss": 0.6821, "step": 794500 }, { "epoch": 23.79, "learning_rate": 6.224978263693873e-05, "loss": 0.6712, "step": 795000 }, { "epoch": 23.8, "learning_rate": 6.209987707744121e-05, "loss": 0.6722, "step": 795500 }, { "epoch": 23.82, "learning_rate": 6.194997151794369e-05, "loss": 0.673, "step": 796000 }, { "epoch": 23.83, "learning_rate": 6.180006595844617e-05, "loss": 0.6891, "step": 796500 }, { "epoch": 23.85, "learning_rate": 6.165016039894866e-05, "loss": 0.6592, "step": 797000 }, { "epoch": 23.86, "learning_rate": 6.150025483945114e-05, "loss": 0.6606, "step": 797500 }, { "epoch": 23.88, "learning_rate": 6.135034927995362e-05, "loss": 0.673, "step": 798000 }, { "epoch": 23.89, "learning_rate": 6.120044372045611e-05, "loss": 0.6724, "step": 798500 }, { "epoch": 23.91, "learning_rate": 6.105053816095859e-05, "loss": 0.6778, "step": 799000 }, { "epoch": 23.92, "learning_rate": 6.090063260146107e-05, "loss": 0.6786, "step": 799500 }, { "epoch": 23.94, "learning_rate": 6.075072704196356e-05, "loss": 0.6836, "step": 800000 }, { "epoch": 23.95, "learning_rate": 6.060082148246604e-05, "loss": 0.6673, "step": 800500 }, { "epoch": 23.97, "learning_rate": 6.0450915922968525e-05, "loss": 0.6786, "step": 801000 }, { "epoch": 23.98, "learning_rate": 6.030101036347101e-05, "loss": 0.6797, "step": 801500 }, { "epoch": 24.0, "learning_rate": 6.015110480397349e-05, "loss": 0.6699, "step": 802000 }, { "epoch": 24.01, "learning_rate": 6.000119924447597e-05, "loss": 0.6681, "step": 802500 }, { "epoch": 24.03, "learning_rate": 5.9851293684978456e-05, "loss": 0.6418, "step": 803000 }, { "epoch": 24.04, "learning_rate": 5.970138812548094e-05, "loss": 0.658, "step": 803500 }, { "epoch": 24.06, "learning_rate": 5.955148256598343e-05, "loss": 0.6584, "step": 804000 }, { "epoch": 24.07, "learning_rate": 5.9401577006485915e-05, "loss": 0.6565, "step": 804500 }, { "epoch": 24.09, "learning_rate": 5.925167144698839e-05, "loss": 0.6668, "step": 805000 }, { "epoch": 24.1, "learning_rate": 5.9101765887490874e-05, "loss": 0.6486, "step": 805500 }, { "epoch": 24.12, "learning_rate": 5.895186032799336e-05, "loss": 0.6773, "step": 806000 }, { "epoch": 24.13, "learning_rate": 5.880195476849584e-05, "loss": 0.6601, "step": 806500 }, { "epoch": 24.15, "learning_rate": 5.8652049208998327e-05, "loss": 0.6572, "step": 807000 }, { "epoch": 24.16, "learning_rate": 5.850214364950081e-05, "loss": 0.6722, "step": 807500 }, { "epoch": 24.18, "learning_rate": 5.835223809000329e-05, "loss": 0.6493, "step": 808000 }, { "epoch": 24.19, "learning_rate": 5.820233253050577e-05, "loss": 0.6439, "step": 808500 }, { "epoch": 24.21, "learning_rate": 5.805242697100826e-05, "loss": 0.6486, "step": 809000 }, { "epoch": 24.22, "learning_rate": 5.7902521411510745e-05, "loss": 0.6545, "step": 809500 }, { "epoch": 24.24, "learning_rate": 5.775261585201323e-05, "loss": 0.6602, "step": 810000 }, { "epoch": 24.25, "learning_rate": 5.760271029251571e-05, "loss": 0.6505, "step": 810500 }, { "epoch": 24.27, "learning_rate": 5.745280473301819e-05, "loss": 0.6624, "step": 811000 }, { "epoch": 24.28, "learning_rate": 5.7302899173520676e-05, "loss": 0.6454, "step": 811500 }, { "epoch": 24.3, "learning_rate": 5.715299361402316e-05, "loss": 0.6359, "step": 812000 }, { "epoch": 24.31, "learning_rate": 5.700308805452564e-05, "loss": 0.6581, "step": 812500 }, { "epoch": 24.33, "learning_rate": 5.685318249502813e-05, "loss": 0.6429, "step": 813000 }, { "epoch": 24.34, "learning_rate": 5.6703276935530615e-05, "loss": 0.6763, "step": 813500 }, { "epoch": 24.36, "learning_rate": 5.655337137603309e-05, "loss": 0.6614, "step": 814000 }, { "epoch": 24.37, "learning_rate": 5.6403465816535574e-05, "loss": 0.6569, "step": 814500 }, { "epoch": 24.39, "learning_rate": 5.625356025703806e-05, "loss": 0.6869, "step": 815000 }, { "epoch": 24.4, "learning_rate": 5.610365469754055e-05, "loss": 0.6609, "step": 815500 }, { "epoch": 24.42, "learning_rate": 5.595374913804303e-05, "loss": 0.6502, "step": 816000 }, { "epoch": 24.43, "learning_rate": 5.580384357854551e-05, "loss": 0.6613, "step": 816500 }, { "epoch": 24.45, "learning_rate": 5.565393801904799e-05, "loss": 0.6481, "step": 817000 }, { "epoch": 24.46, "learning_rate": 5.550403245955048e-05, "loss": 0.6377, "step": 817500 }, { "epoch": 24.48, "learning_rate": 5.5354126900052965e-05, "loss": 0.6694, "step": 818000 }, { "epoch": 24.49, "learning_rate": 5.5204221340555444e-05, "loss": 0.6472, "step": 818500 }, { "epoch": 24.51, "learning_rate": 5.505431578105793e-05, "loss": 0.6602, "step": 819000 }, { "epoch": 24.52, "learning_rate": 5.490441022156042e-05, "loss": 0.6638, "step": 819500 }, { "epoch": 24.54, "learning_rate": 5.475450466206289e-05, "loss": 0.6743, "step": 820000 }, { "epoch": 24.55, "learning_rate": 5.4604599102565376e-05, "loss": 0.6461, "step": 820500 }, { "epoch": 24.57, "learning_rate": 5.445469354306786e-05, "loss": 0.6516, "step": 821000 }, { "epoch": 24.58, "learning_rate": 5.430478798357035e-05, "loss": 0.6662, "step": 821500 }, { "epoch": 24.6, "learning_rate": 5.4154882424072835e-05, "loss": 0.655, "step": 822000 }, { "epoch": 24.61, "learning_rate": 5.4004976864575314e-05, "loss": 0.6693, "step": 822500 }, { "epoch": 24.63, "learning_rate": 5.3855071305077794e-05, "loss": 0.661, "step": 823000 }, { "epoch": 24.64, "learning_rate": 5.370516574558028e-05, "loss": 0.66, "step": 823500 }, { "epoch": 24.66, "learning_rate": 5.355526018608276e-05, "loss": 0.6533, "step": 824000 }, { "epoch": 24.67, "learning_rate": 5.3405354626585246e-05, "loss": 0.6472, "step": 824500 }, { "epoch": 24.69, "learning_rate": 5.325544906708773e-05, "loss": 0.6688, "step": 825000 }, { "epoch": 24.7, "learning_rate": 5.310554350759022e-05, "loss": 0.6508, "step": 825500 }, { "epoch": 24.71, "learning_rate": 5.295563794809269e-05, "loss": 0.6564, "step": 826000 }, { "epoch": 24.73, "learning_rate": 5.280573238859518e-05, "loss": 0.6636, "step": 826500 }, { "epoch": 24.74, "learning_rate": 5.2655826829097664e-05, "loss": 0.6558, "step": 827000 }, { "epoch": 24.76, "learning_rate": 5.250592126960015e-05, "loss": 0.642, "step": 827500 }, { "epoch": 24.77, "learning_rate": 5.235601571010263e-05, "loss": 0.6522, "step": 828000 }, { "epoch": 24.79, "learning_rate": 5.2206110150605117e-05, "loss": 0.649, "step": 828500 }, { "epoch": 24.8, "learning_rate": 5.2056204591107596e-05, "loss": 0.6446, "step": 829000 }, { "epoch": 24.82, "learning_rate": 5.190629903161008e-05, "loss": 0.6692, "step": 829500 }, { "epoch": 24.83, "learning_rate": 5.175639347211256e-05, "loss": 0.6634, "step": 830000 }, { "epoch": 24.85, "learning_rate": 5.160648791261505e-05, "loss": 0.6762, "step": 830500 }, { "epoch": 24.86, "learning_rate": 5.1456582353117535e-05, "loss": 0.6498, "step": 831000 }, { "epoch": 24.88, "learning_rate": 5.130667679362002e-05, "loss": 0.6635, "step": 831500 }, { "epoch": 24.89, "learning_rate": 5.1156771234122494e-05, "loss": 0.6404, "step": 832000 }, { "epoch": 24.91, "learning_rate": 5.100686567462498e-05, "loss": 0.6523, "step": 832500 }, { "epoch": 24.92, "learning_rate": 5.0856960115127466e-05, "loss": 0.6744, "step": 833000 }, { "epoch": 24.94, "learning_rate": 5.070705455562995e-05, "loss": 0.6463, "step": 833500 }, { "epoch": 24.95, "learning_rate": 5.055714899613243e-05, "loss": 0.6713, "step": 834000 }, { "epoch": 24.97, "learning_rate": 5.040724343663491e-05, "loss": 0.6611, "step": 834500 }, { "epoch": 24.98, "learning_rate": 5.02573378771374e-05, "loss": 0.6516, "step": 835000 }, { "epoch": 25.0, "learning_rate": 5.0107432317639884e-05, "loss": 0.6663, "step": 835500 }, { "epoch": 25.01, "learning_rate": 4.9957526758142364e-05, "loss": 0.635, "step": 836000 }, { "epoch": 25.03, "learning_rate": 4.980762119864485e-05, "loss": 0.6347, "step": 836500 }, { "epoch": 25.04, "learning_rate": 4.9657715639147337e-05, "loss": 0.6458, "step": 837000 }, { "epoch": 25.06, "learning_rate": 4.950781007964981e-05, "loss": 0.6342, "step": 837500 }, { "epoch": 25.07, "learning_rate": 4.9357904520152296e-05, "loss": 0.6544, "step": 838000 }, { "epoch": 25.09, "learning_rate": 4.920799896065478e-05, "loss": 0.6318, "step": 838500 }, { "epoch": 25.1, "learning_rate": 4.905809340115727e-05, "loss": 0.6476, "step": 839000 }, { "epoch": 25.12, "learning_rate": 4.8908187841659755e-05, "loss": 0.6437, "step": 839500 }, { "epoch": 25.13, "learning_rate": 4.8758282282162234e-05, "loss": 0.6314, "step": 840000 }, { "epoch": 25.15, "learning_rate": 4.8608376722664714e-05, "loss": 0.6457, "step": 840500 }, { "epoch": 25.16, "learning_rate": 4.84584711631672e-05, "loss": 0.6558, "step": 841000 }, { "epoch": 25.18, "learning_rate": 4.830856560366968e-05, "loss": 0.6464, "step": 841500 }, { "epoch": 25.19, "learning_rate": 4.8158660044172166e-05, "loss": 0.6371, "step": 842000 }, { "epoch": 25.21, "learning_rate": 4.800875448467465e-05, "loss": 0.6244, "step": 842500 }, { "epoch": 25.22, "learning_rate": 4.785884892517714e-05, "loss": 0.6327, "step": 843000 }, { "epoch": 25.24, "learning_rate": 4.770894336567961e-05, "loss": 0.6447, "step": 843500 }, { "epoch": 25.25, "learning_rate": 4.75590378061821e-05, "loss": 0.6261, "step": 844000 }, { "epoch": 25.27, "learning_rate": 4.7409132246684584e-05, "loss": 0.6267, "step": 844500 }, { "epoch": 25.28, "learning_rate": 4.725922668718707e-05, "loss": 0.6354, "step": 845000 }, { "epoch": 25.3, "learning_rate": 4.710932112768955e-05, "loss": 0.651, "step": 845500 }, { "epoch": 25.31, "learning_rate": 4.6959415568192036e-05, "loss": 0.654, "step": 846000 }, { "epoch": 25.33, "learning_rate": 4.6809510008694516e-05, "loss": 0.6394, "step": 846500 }, { "epoch": 25.34, "learning_rate": 4.6659604449197e-05, "loss": 0.6439, "step": 847000 }, { "epoch": 25.36, "learning_rate": 4.650969888969948e-05, "loss": 0.6441, "step": 847500 }, { "epoch": 25.37, "learning_rate": 4.635979333020197e-05, "loss": 0.6197, "step": 848000 }, { "epoch": 25.39, "learning_rate": 4.6209887770704454e-05, "loss": 0.6318, "step": 848500 }, { "epoch": 25.4, "learning_rate": 4.605998221120694e-05, "loss": 0.6511, "step": 849000 }, { "epoch": 25.42, "learning_rate": 4.591007665170941e-05, "loss": 0.6532, "step": 849500 }, { "epoch": 25.43, "learning_rate": 4.57601710922119e-05, "loss": 0.6495, "step": 850000 }, { "epoch": 25.45, "learning_rate": 4.5610265532714386e-05, "loss": 0.6375, "step": 850500 }, { "epoch": 25.46, "learning_rate": 4.546035997321687e-05, "loss": 0.6347, "step": 851000 }, { "epoch": 25.48, "learning_rate": 4.531045441371935e-05, "loss": 0.6443, "step": 851500 }, { "epoch": 25.49, "learning_rate": 4.516054885422184e-05, "loss": 0.6396, "step": 852000 }, { "epoch": 25.51, "learning_rate": 4.501064329472432e-05, "loss": 0.6271, "step": 852500 }, { "epoch": 25.52, "learning_rate": 4.4860737735226804e-05, "loss": 0.6305, "step": 853000 }, { "epoch": 25.54, "learning_rate": 4.4710832175729284e-05, "loss": 0.642, "step": 853500 }, { "epoch": 25.55, "learning_rate": 4.456092661623177e-05, "loss": 0.6298, "step": 854000 }, { "epoch": 25.57, "learning_rate": 4.4411021056734256e-05, "loss": 0.6554, "step": 854500 }, { "epoch": 25.58, "learning_rate": 4.426111549723674e-05, "loss": 0.6406, "step": 855000 }, { "epoch": 25.6, "learning_rate": 4.4111209937739215e-05, "loss": 0.6407, "step": 855500 }, { "epoch": 25.61, "learning_rate": 4.39613043782417e-05, "loss": 0.6408, "step": 856000 }, { "epoch": 25.63, "learning_rate": 4.381139881874419e-05, "loss": 0.6378, "step": 856500 }, { "epoch": 25.64, "learning_rate": 4.3661493259246674e-05, "loss": 0.658, "step": 857000 }, { "epoch": 25.66, "learning_rate": 4.3511587699749154e-05, "loss": 0.6379, "step": 857500 }, { "epoch": 25.67, "learning_rate": 4.336168214025164e-05, "loss": 0.6436, "step": 858000 }, { "epoch": 25.69, "learning_rate": 4.321177658075412e-05, "loss": 0.6418, "step": 858500 }, { "epoch": 25.7, "learning_rate": 4.30618710212566e-05, "loss": 0.6297, "step": 859000 }, { "epoch": 25.72, "learning_rate": 4.2911965461759086e-05, "loss": 0.6468, "step": 859500 }, { "epoch": 25.73, "learning_rate": 4.276205990226157e-05, "loss": 0.643, "step": 860000 }, { "epoch": 25.75, "learning_rate": 4.261215434276406e-05, "loss": 0.6405, "step": 860500 }, { "epoch": 25.76, "learning_rate": 4.2462248783266545e-05, "loss": 0.6549, "step": 861000 }, { "epoch": 25.78, "learning_rate": 4.231234322376902e-05, "loss": 0.6577, "step": 861500 }, { "epoch": 25.79, "learning_rate": 4.2162437664271504e-05, "loss": 0.6362, "step": 862000 }, { "epoch": 25.81, "learning_rate": 4.201253210477399e-05, "loss": 0.6412, "step": 862500 }, { "epoch": 25.82, "learning_rate": 4.1862626545276476e-05, "loss": 0.6265, "step": 863000 }, { "epoch": 25.84, "learning_rate": 4.1712720985778956e-05, "loss": 0.6356, "step": 863500 }, { "epoch": 25.85, "learning_rate": 4.156281542628144e-05, "loss": 0.6583, "step": 864000 }, { "epoch": 25.87, "learning_rate": 4.141290986678392e-05, "loss": 0.647, "step": 864500 }, { "epoch": 25.88, "learning_rate": 4.12630043072864e-05, "loss": 0.6412, "step": 865000 }, { "epoch": 25.9, "learning_rate": 4.111309874778889e-05, "loss": 0.6503, "step": 865500 }, { "epoch": 25.91, "learning_rate": 4.0963193188291374e-05, "loss": 0.6381, "step": 866000 }, { "epoch": 25.93, "learning_rate": 4.081328762879386e-05, "loss": 0.6299, "step": 866500 }, { "epoch": 25.94, "learning_rate": 4.0663382069296347e-05, "loss": 0.6442, "step": 867000 }, { "epoch": 25.96, "learning_rate": 4.051347650979882e-05, "loss": 0.6178, "step": 867500 }, { "epoch": 25.97, "learning_rate": 4.0363570950301306e-05, "loss": 0.6469, "step": 868000 }, { "epoch": 25.99, "learning_rate": 4.021366539080379e-05, "loss": 0.6421, "step": 868500 }, { "epoch": 26.0, "learning_rate": 4.006375983130627e-05, "loss": 0.6367, "step": 869000 }, { "epoch": 26.02, "learning_rate": 3.991385427180876e-05, "loss": 0.6152, "step": 869500 }, { "epoch": 26.03, "learning_rate": 3.9763948712311244e-05, "loss": 0.6387, "step": 870000 }, { "epoch": 26.05, "learning_rate": 3.9614043152813724e-05, "loss": 0.6327, "step": 870500 }, { "epoch": 26.06, "learning_rate": 3.94641375933162e-05, "loss": 0.6142, "step": 871000 }, { "epoch": 26.08, "learning_rate": 3.931423203381869e-05, "loss": 0.6437, "step": 871500 }, { "epoch": 26.09, "learning_rate": 3.9164326474321176e-05, "loss": 0.6211, "step": 872000 }, { "epoch": 26.11, "learning_rate": 3.901442091482366e-05, "loss": 0.6181, "step": 872500 }, { "epoch": 26.12, "learning_rate": 3.886451535532614e-05, "loss": 0.6129, "step": 873000 }, { "epoch": 26.14, "learning_rate": 3.871460979582862e-05, "loss": 0.637, "step": 873500 }, { "epoch": 26.15, "learning_rate": 3.856470423633111e-05, "loss": 0.6191, "step": 874000 }, { "epoch": 26.17, "learning_rate": 3.8414798676833594e-05, "loss": 0.6342, "step": 874500 }, { "epoch": 26.18, "learning_rate": 3.8264893117336074e-05, "loss": 0.6314, "step": 875000 }, { "epoch": 26.2, "learning_rate": 3.811498755783856e-05, "loss": 0.6259, "step": 875500 }, { "epoch": 26.21, "learning_rate": 3.7965081998341046e-05, "loss": 0.6446, "step": 876000 }, { "epoch": 26.23, "learning_rate": 3.7815176438843526e-05, "loss": 0.6377, "step": 876500 }, { "epoch": 26.24, "learning_rate": 3.7665270879346005e-05, "loss": 0.6086, "step": 877000 }, { "epoch": 26.26, "learning_rate": 3.751536531984849e-05, "loss": 0.6378, "step": 877500 }, { "epoch": 26.27, "learning_rate": 3.736545976035098e-05, "loss": 0.6313, "step": 878000 }, { "epoch": 26.29, "learning_rate": 3.721555420085346e-05, "loss": 0.6377, "step": 878500 }, { "epoch": 26.3, "learning_rate": 3.7065648641355944e-05, "loss": 0.6311, "step": 879000 }, { "epoch": 26.32, "learning_rate": 3.691574308185843e-05, "loss": 0.6219, "step": 879500 }, { "epoch": 26.33, "learning_rate": 3.676583752236091e-05, "loss": 0.6131, "step": 880000 }, { "epoch": 26.35, "learning_rate": 3.6615931962863396e-05, "loss": 0.6142, "step": 880500 }, { "epoch": 26.36, "learning_rate": 3.6466026403365876e-05, "loss": 0.6062, "step": 881000 }, { "epoch": 26.38, "learning_rate": 3.6316120843868355e-05, "loss": 0.6393, "step": 881500 }, { "epoch": 26.39, "learning_rate": 3.616621528437084e-05, "loss": 0.618, "step": 882000 }, { "epoch": 26.41, "learning_rate": 3.601630972487333e-05, "loss": 0.6238, "step": 882500 }, { "epoch": 26.42, "learning_rate": 3.586640416537581e-05, "loss": 0.6266, "step": 883000 }, { "epoch": 26.44, "learning_rate": 3.5716498605878294e-05, "loss": 0.6211, "step": 883500 }, { "epoch": 26.45, "learning_rate": 3.556659304638078e-05, "loss": 0.6272, "step": 884000 }, { "epoch": 26.47, "learning_rate": 3.541668748688326e-05, "loss": 0.625, "step": 884500 }, { "epoch": 26.48, "learning_rate": 3.5266781927385746e-05, "loss": 0.6209, "step": 885000 }, { "epoch": 26.5, "learning_rate": 3.511687636788823e-05, "loss": 0.6407, "step": 885500 }, { "epoch": 26.51, "learning_rate": 3.496697080839071e-05, "loss": 0.6077, "step": 886000 }, { "epoch": 26.53, "learning_rate": 3.481706524889319e-05, "loss": 0.6272, "step": 886500 }, { "epoch": 26.54, "learning_rate": 3.466715968939568e-05, "loss": 0.6299, "step": 887000 }, { "epoch": 26.56, "learning_rate": 3.451725412989816e-05, "loss": 0.6205, "step": 887500 }, { "epoch": 26.57, "learning_rate": 3.4367348570400643e-05, "loss": 0.6117, "step": 888000 }, { "epoch": 26.59, "learning_rate": 3.421744301090313e-05, "loss": 0.641, "step": 888500 }, { "epoch": 26.6, "learning_rate": 3.406753745140561e-05, "loss": 0.6289, "step": 889000 }, { "epoch": 26.62, "learning_rate": 3.3917631891908096e-05, "loss": 0.6102, "step": 889500 }, { "epoch": 26.63, "learning_rate": 3.376772633241058e-05, "loss": 0.6362, "step": 890000 }, { "epoch": 26.64, "learning_rate": 3.361782077291306e-05, "loss": 0.621, "step": 890500 }, { "epoch": 26.66, "learning_rate": 3.346791521341555e-05, "loss": 0.6283, "step": 891000 }, { "epoch": 26.67, "learning_rate": 3.331800965391803e-05, "loss": 0.6255, "step": 891500 }, { "epoch": 26.69, "learning_rate": 3.3168104094420514e-05, "loss": 0.6345, "step": 892000 }, { "epoch": 26.7, "learning_rate": 3.301819853492299e-05, "loss": 0.6232, "step": 892500 }, { "epoch": 26.72, "learning_rate": 3.286829297542548e-05, "loss": 0.634, "step": 893000 }, { "epoch": 26.73, "learning_rate": 3.271838741592796e-05, "loss": 0.6113, "step": 893500 }, { "epoch": 26.75, "learning_rate": 3.2568481856430445e-05, "loss": 0.6278, "step": 894000 }, { "epoch": 26.76, "learning_rate": 3.241857629693293e-05, "loss": 0.6257, "step": 894500 }, { "epoch": 26.78, "learning_rate": 3.226867073743541e-05, "loss": 0.63, "step": 895000 }, { "epoch": 26.79, "learning_rate": 3.21187651779379e-05, "loss": 0.6132, "step": 895500 }, { "epoch": 26.81, "learning_rate": 3.1968859618440384e-05, "loss": 0.6146, "step": 896000 }, { "epoch": 26.82, "learning_rate": 3.1818954058942863e-05, "loss": 0.6183, "step": 896500 }, { "epoch": 26.84, "learning_rate": 3.166904849944535e-05, "loss": 0.6271, "step": 897000 }, { "epoch": 26.85, "learning_rate": 3.151914293994783e-05, "loss": 0.6213, "step": 897500 }, { "epoch": 26.87, "learning_rate": 3.1369237380450316e-05, "loss": 0.6187, "step": 898000 }, { "epoch": 26.88, "learning_rate": 3.1219331820952795e-05, "loss": 0.6358, "step": 898500 }, { "epoch": 26.9, "learning_rate": 3.106942626145528e-05, "loss": 0.6257, "step": 899000 }, { "epoch": 26.91, "learning_rate": 3.091952070195776e-05, "loss": 0.6058, "step": 899500 }, { "epoch": 26.93, "learning_rate": 3.076961514246025e-05, "loss": 0.6168, "step": 900000 }, { "epoch": 26.94, "learning_rate": 3.0619709582962734e-05, "loss": 0.6181, "step": 900500 }, { "epoch": 26.96, "learning_rate": 3.0469804023465213e-05, "loss": 0.6164, "step": 901000 }, { "epoch": 26.97, "learning_rate": 3.03198984639677e-05, "loss": 0.6293, "step": 901500 }, { "epoch": 26.99, "learning_rate": 3.016999290447018e-05, "loss": 0.631, "step": 902000 }, { "epoch": 27.0, "learning_rate": 3.0020087344972665e-05, "loss": 0.6212, "step": 902500 }, { "epoch": 27.02, "learning_rate": 2.987018178547515e-05, "loss": 0.5994, "step": 903000 }, { "epoch": 27.03, "learning_rate": 2.9720276225977628e-05, "loss": 0.6311, "step": 903500 }, { "epoch": 27.05, "learning_rate": 2.9570370666480114e-05, "loss": 0.6059, "step": 904000 }, { "epoch": 27.06, "learning_rate": 2.94204651069826e-05, "loss": 0.6046, "step": 904500 }, { "epoch": 27.08, "learning_rate": 2.927055954748508e-05, "loss": 0.6107, "step": 905000 }, { "epoch": 27.09, "learning_rate": 2.9120653987987563e-05, "loss": 0.6181, "step": 905500 }, { "epoch": 27.11, "learning_rate": 2.897074842849005e-05, "loss": 0.615, "step": 906000 }, { "epoch": 27.12, "learning_rate": 2.882084286899253e-05, "loss": 0.6037, "step": 906500 }, { "epoch": 27.14, "learning_rate": 2.8670937309495015e-05, "loss": 0.6293, "step": 907000 }, { "epoch": 27.15, "learning_rate": 2.85210317499975e-05, "loss": 0.6253, "step": 907500 }, { "epoch": 27.17, "learning_rate": 2.837112619049998e-05, "loss": 0.6161, "step": 908000 }, { "epoch": 27.18, "learning_rate": 2.8221220631002464e-05, "loss": 0.6192, "step": 908500 }, { "epoch": 27.2, "learning_rate": 2.807131507150495e-05, "loss": 0.6139, "step": 909000 }, { "epoch": 27.21, "learning_rate": 2.792140951200743e-05, "loss": 0.6053, "step": 909500 }, { "epoch": 27.23, "learning_rate": 2.7771503952509916e-05, "loss": 0.5955, "step": 910000 }, { "epoch": 27.24, "learning_rate": 2.76215983930124e-05, "loss": 0.6129, "step": 910500 }, { "epoch": 27.26, "learning_rate": 2.7471692833514882e-05, "loss": 0.6196, "step": 911000 }, { "epoch": 27.27, "learning_rate": 2.7321787274017365e-05, "loss": 0.6237, "step": 911500 }, { "epoch": 27.29, "learning_rate": 2.717188171451985e-05, "loss": 0.5945, "step": 912000 }, { "epoch": 27.3, "learning_rate": 2.702197615502233e-05, "loss": 0.6236, "step": 912500 }, { "epoch": 27.32, "learning_rate": 2.6872070595524817e-05, "loss": 0.614, "step": 913000 }, { "epoch": 27.33, "learning_rate": 2.67221650360273e-05, "loss": 0.6084, "step": 913500 }, { "epoch": 27.35, "learning_rate": 2.6572259476529783e-05, "loss": 0.593, "step": 914000 }, { "epoch": 27.36, "learning_rate": 2.6422353917032266e-05, "loss": 0.6267, "step": 914500 }, { "epoch": 27.38, "learning_rate": 2.6272448357534752e-05, "loss": 0.6058, "step": 915000 }, { "epoch": 27.39, "learning_rate": 2.6122542798037232e-05, "loss": 0.6168, "step": 915500 }, { "epoch": 27.41, "learning_rate": 2.5972637238539718e-05, "loss": 0.5989, "step": 916000 }, { "epoch": 27.42, "learning_rate": 2.58227316790422e-05, "loss": 0.6066, "step": 916500 }, { "epoch": 27.44, "learning_rate": 2.5672826119544684e-05, "loss": 0.6248, "step": 917000 }, { "epoch": 27.45, "learning_rate": 2.5522920560047167e-05, "loss": 0.6318, "step": 917500 }, { "epoch": 27.47, "learning_rate": 2.5373015000549653e-05, "loss": 0.6234, "step": 918000 }, { "epoch": 27.48, "learning_rate": 2.5223109441052133e-05, "loss": 0.6226, "step": 918500 }, { "epoch": 27.5, "learning_rate": 2.507320388155462e-05, "loss": 0.6342, "step": 919000 }, { "epoch": 27.51, "learning_rate": 2.4923298322057102e-05, "loss": 0.5974, "step": 919500 }, { "epoch": 27.53, "learning_rate": 2.4773392762559585e-05, "loss": 0.6084, "step": 920000 }, { "epoch": 27.54, "learning_rate": 2.4623487203062068e-05, "loss": 0.6086, "step": 920500 }, { "epoch": 27.56, "learning_rate": 2.4473581643564554e-05, "loss": 0.6073, "step": 921000 }, { "epoch": 27.57, "learning_rate": 2.4323676084067034e-05, "loss": 0.6144, "step": 921500 }, { "epoch": 27.59, "learning_rate": 2.417377052456952e-05, "loss": 0.6047, "step": 922000 }, { "epoch": 27.6, "learning_rate": 2.4023864965072003e-05, "loss": 0.6012, "step": 922500 }, { "epoch": 27.62, "learning_rate": 2.3873959405574486e-05, "loss": 0.6015, "step": 923000 }, { "epoch": 27.63, "learning_rate": 2.372405384607697e-05, "loss": 0.6039, "step": 923500 }, { "epoch": 27.65, "learning_rate": 2.3574148286579455e-05, "loss": 0.6067, "step": 924000 }, { "epoch": 27.66, "learning_rate": 2.3424242727081935e-05, "loss": 0.6227, "step": 924500 }, { "epoch": 27.68, "learning_rate": 2.327433716758442e-05, "loss": 0.6116, "step": 925000 }, { "epoch": 27.69, "learning_rate": 2.3124431608086904e-05, "loss": 0.606, "step": 925500 }, { "epoch": 27.71, "learning_rate": 2.2974526048589384e-05, "loss": 0.6118, "step": 926000 }, { "epoch": 27.72, "learning_rate": 2.282462048909187e-05, "loss": 0.6126, "step": 926500 }, { "epoch": 27.74, "learning_rate": 2.2674714929594356e-05, "loss": 0.6072, "step": 927000 }, { "epoch": 27.75, "learning_rate": 2.2524809370096836e-05, "loss": 0.6037, "step": 927500 }, { "epoch": 27.77, "learning_rate": 2.237490381059932e-05, "loss": 0.6106, "step": 928000 }, { "epoch": 27.78, "learning_rate": 2.2224998251101805e-05, "loss": 0.6031, "step": 928500 }, { "epoch": 27.8, "learning_rate": 2.2075092691604285e-05, "loss": 0.6117, "step": 929000 }, { "epoch": 27.81, "learning_rate": 2.192518713210677e-05, "loss": 0.6292, "step": 929500 }, { "epoch": 27.83, "learning_rate": 2.1775281572609257e-05, "loss": 0.6186, "step": 930000 }, { "epoch": 27.84, "learning_rate": 2.1625376013111737e-05, "loss": 0.5992, "step": 930500 }, { "epoch": 27.86, "learning_rate": 2.147547045361422e-05, "loss": 0.6177, "step": 931000 }, { "epoch": 27.87, "learning_rate": 2.1325564894116706e-05, "loss": 0.6232, "step": 931500 }, { "epoch": 27.89, "learning_rate": 2.1175659334619186e-05, "loss": 0.6028, "step": 932000 }, { "epoch": 27.9, "learning_rate": 2.1025753775121672e-05, "loss": 0.5986, "step": 932500 }, { "epoch": 27.92, "learning_rate": 2.0875848215624155e-05, "loss": 0.6152, "step": 933000 }, { "epoch": 27.93, "learning_rate": 2.0725942656126638e-05, "loss": 0.5985, "step": 933500 }, { "epoch": 27.95, "learning_rate": 2.057603709662912e-05, "loss": 0.6113, "step": 934000 }, { "epoch": 27.96, "learning_rate": 2.0426131537131607e-05, "loss": 0.6099, "step": 934500 }, { "epoch": 27.98, "learning_rate": 2.0276225977634087e-05, "loss": 0.5843, "step": 935000 }, { "epoch": 27.99, "learning_rate": 2.0126320418136573e-05, "loss": 0.6036, "step": 935500 }, { "epoch": 28.01, "learning_rate": 1.9976414858639056e-05, "loss": 0.6014, "step": 936000 }, { "epoch": 28.02, "learning_rate": 1.982650929914154e-05, "loss": 0.5958, "step": 936500 }, { "epoch": 28.04, "learning_rate": 1.9676603739644022e-05, "loss": 0.6049, "step": 937000 }, { "epoch": 28.05, "learning_rate": 1.9526698180146508e-05, "loss": 0.6062, "step": 937500 }, { "epoch": 28.07, "learning_rate": 1.9376792620648988e-05, "loss": 0.5972, "step": 938000 }, { "epoch": 28.08, "learning_rate": 1.9226887061151474e-05, "loss": 0.5964, "step": 938500 }, { "epoch": 28.1, "learning_rate": 1.9076981501653957e-05, "loss": 0.6094, "step": 939000 }, { "epoch": 28.11, "learning_rate": 1.892707594215644e-05, "loss": 0.5902, "step": 939500 }, { "epoch": 28.13, "learning_rate": 1.8777170382658923e-05, "loss": 0.6009, "step": 940000 }, { "epoch": 28.14, "learning_rate": 1.8627264823161406e-05, "loss": 0.5857, "step": 940500 }, { "epoch": 28.16, "learning_rate": 1.847735926366389e-05, "loss": 0.6009, "step": 941000 }, { "epoch": 28.17, "learning_rate": 1.8327453704166375e-05, "loss": 0.6036, "step": 941500 }, { "epoch": 28.19, "learning_rate": 1.8177548144668858e-05, "loss": 0.5965, "step": 942000 }, { "epoch": 28.2, "learning_rate": 1.802764258517134e-05, "loss": 0.6016, "step": 942500 }, { "epoch": 28.22, "learning_rate": 1.7877737025673824e-05, "loss": 0.603, "step": 943000 }, { "epoch": 28.23, "learning_rate": 1.7727831466176307e-05, "loss": 0.6095, "step": 943500 }, { "epoch": 28.25, "learning_rate": 1.757792590667879e-05, "loss": 0.5984, "step": 944000 }, { "epoch": 28.26, "learning_rate": 1.7428020347181276e-05, "loss": 0.5945, "step": 944500 }, { "epoch": 28.28, "learning_rate": 1.727811478768376e-05, "loss": 0.6038, "step": 945000 }, { "epoch": 28.29, "learning_rate": 1.7128209228186242e-05, "loss": 0.5933, "step": 945500 }, { "epoch": 28.31, "learning_rate": 1.6978303668688725e-05, "loss": 0.6037, "step": 946000 }, { "epoch": 28.32, "learning_rate": 1.6828398109191208e-05, "loss": 0.6082, "step": 946500 }, { "epoch": 28.34, "learning_rate": 1.667849254969369e-05, "loss": 0.5862, "step": 947000 }, { "epoch": 28.35, "learning_rate": 1.6528586990196177e-05, "loss": 0.598, "step": 947500 }, { "epoch": 28.37, "learning_rate": 1.637868143069866e-05, "loss": 0.591, "step": 948000 }, { "epoch": 28.38, "learning_rate": 1.622877587120114e-05, "loss": 0.5798, "step": 948500 }, { "epoch": 28.4, "learning_rate": 1.6078870311703626e-05, "loss": 0.5916, "step": 949000 }, { "epoch": 28.41, "learning_rate": 1.592896475220611e-05, "loss": 0.6024, "step": 949500 }, { "epoch": 28.43, "learning_rate": 1.5779059192708592e-05, "loss": 0.5936, "step": 950000 }, { "epoch": 28.44, "learning_rate": 1.5629153633211078e-05, "loss": 0.6059, "step": 950500 }, { "epoch": 28.46, "learning_rate": 1.5479248073713558e-05, "loss": 0.5938, "step": 951000 }, { "epoch": 28.47, "learning_rate": 1.532934251421604e-05, "loss": 0.5886, "step": 951500 }, { "epoch": 28.49, "learning_rate": 1.5179436954718527e-05, "loss": 0.5988, "step": 952000 }, { "epoch": 28.5, "learning_rate": 1.502953139522101e-05, "loss": 0.6229, "step": 952500 }, { "epoch": 28.52, "learning_rate": 1.4879625835723493e-05, "loss": 0.5935, "step": 953000 }, { "epoch": 28.53, "learning_rate": 1.4729720276225977e-05, "loss": 0.6019, "step": 953500 }, { "epoch": 28.54, "learning_rate": 1.457981471672846e-05, "loss": 0.6029, "step": 954000 }, { "epoch": 28.56, "learning_rate": 1.4429909157230943e-05, "loss": 0.5877, "step": 954500 }, { "epoch": 28.57, "learning_rate": 1.4280003597733428e-05, "loss": 0.6127, "step": 955000 }, { "epoch": 28.59, "learning_rate": 1.4130098038235911e-05, "loss": 0.6104, "step": 955500 }, { "epoch": 28.6, "learning_rate": 1.3980192478738394e-05, "loss": 0.5947, "step": 956000 }, { "epoch": 28.62, "learning_rate": 1.3830286919240878e-05, "loss": 0.6072, "step": 956500 }, { "epoch": 28.63, "learning_rate": 1.3680381359743361e-05, "loss": 0.6168, "step": 957000 }, { "epoch": 28.65, "learning_rate": 1.3530475800245844e-05, "loss": 0.5884, "step": 957500 }, { "epoch": 28.66, "learning_rate": 1.3380570240748329e-05, "loss": 0.6068, "step": 958000 }, { "epoch": 28.68, "learning_rate": 1.3230664681250812e-05, "loss": 0.5842, "step": 958500 }, { "epoch": 28.69, "learning_rate": 1.3080759121753293e-05, "loss": 0.592, "step": 959000 }, { "epoch": 28.71, "learning_rate": 1.2930853562255776e-05, "loss": 0.592, "step": 959500 }, { "epoch": 28.72, "learning_rate": 1.2780948002758262e-05, "loss": 0.6008, "step": 960000 }, { "epoch": 28.74, "learning_rate": 1.2631042443260744e-05, "loss": 0.6014, "step": 960500 }, { "epoch": 28.75, "learning_rate": 1.2481136883763227e-05, "loss": 0.6173, "step": 961000 }, { "epoch": 28.77, "learning_rate": 1.2331231324265711e-05, "loss": 0.6105, "step": 961500 }, { "epoch": 28.78, "learning_rate": 1.2181325764768194e-05, "loss": 0.5961, "step": 962000 }, { "epoch": 28.8, "learning_rate": 1.2031420205270677e-05, "loss": 0.6072, "step": 962500 }, { "epoch": 28.81, "learning_rate": 1.1881514645773162e-05, "loss": 0.6136, "step": 963000 }, { "epoch": 28.83, "learning_rate": 1.1731609086275645e-05, "loss": 0.6051, "step": 963500 }, { "epoch": 28.84, "learning_rate": 1.1581703526778128e-05, "loss": 0.5956, "step": 964000 }, { "epoch": 28.86, "learning_rate": 1.1431797967280612e-05, "loss": 0.5941, "step": 964500 }, { "epoch": 28.87, "learning_rate": 1.1281892407783095e-05, "loss": 0.6001, "step": 965000 }, { "epoch": 28.89, "learning_rate": 1.1131986848285578e-05, "loss": 0.5945, "step": 965500 }, { "epoch": 28.9, "learning_rate": 1.0982081288788063e-05, "loss": 0.6144, "step": 966000 }, { "epoch": 28.92, "learning_rate": 1.0832175729290546e-05, "loss": 0.606, "step": 966500 }, { "epoch": 28.93, "learning_rate": 1.0682270169793029e-05, "loss": 0.5952, "step": 967000 }, { "epoch": 28.95, "learning_rate": 1.0532364610295513e-05, "loss": 0.6121, "step": 967500 }, { "epoch": 28.96, "learning_rate": 1.0382459050797996e-05, "loss": 0.5926, "step": 968000 }, { "epoch": 28.98, "learning_rate": 1.0232553491300479e-05, "loss": 0.5864, "step": 968500 }, { "epoch": 28.99, "learning_rate": 1.0082647931802964e-05, "loss": 0.5794, "step": 969000 } ], "logging_steps": 500, "max_steps": 1002630, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "total_flos": 2.4453843078595006e+21, "train_batch_size": 2, "trial_name": null, "trial_params": null }