diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json" --- "a/last-checkpoint/trainer_state.json" +++ "b/last-checkpoint/trainer_state.json" @@ -1,8 +1,8 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.3233760459193985, - "global_step": 4000, + "epoch": 0.646752091838797, + "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -4916,11 +4916,4822 @@ "eval_samples_per_second": 0.092, "eval_steps_per_second": 0.046, "step": 4000 + }, + { + "epoch": 0.32, + "learning_rate": 4.461557118602959e-05, + "loss": 2.9516, + "step": 4005 + }, + { + "epoch": 0.32, + "learning_rate": 4.460883391273884e-05, + "loss": 3.2431, + "step": 4010 + }, + { + "epoch": 0.32, + "learning_rate": 4.460209663944808e-05, + "loss": 3.0873, + "step": 4015 + }, + { + "epoch": 0.32, + "learning_rate": 4.459535936615733e-05, + "loss": 2.8684, + "step": 4020 + }, + { + "epoch": 0.33, + "learning_rate": 4.458862209286658e-05, + "loss": 3.1442, + "step": 4025 + }, + { + "epoch": 0.33, + "learning_rate": 4.458188481957582e-05, + "loss": 3.1723, + "step": 4030 + }, + { + "epoch": 0.33, + "learning_rate": 4.4575147546285065e-05, + "loss": 3.4121, + "step": 4035 + }, + { + "epoch": 0.33, + "learning_rate": 4.4568410272994313e-05, + "loss": 2.8923, + "step": 4040 + }, + { + "epoch": 0.33, + "learning_rate": 4.456167299970356e-05, + "loss": 3.3188, + "step": 4045 + }, + { + "epoch": 0.33, + "learning_rate": 4.455493572641281e-05, + "loss": 3.3509, + "step": 4050 + }, + { + "epoch": 0.33, + "learning_rate": 4.454819845312205e-05, + "loss": 2.9415, + "step": 4055 + }, + { + "epoch": 0.33, + "learning_rate": 4.45414611798313e-05, + "loss": 3.1391, + "step": 4060 + }, + { + "epoch": 0.33, + "learning_rate": 4.453472390654055e-05, + "loss": 3.2693, + "step": 4065 + }, + { + "epoch": 0.33, + "learning_rate": 4.452798663324979e-05, + "loss": 3.1826, + "step": 4070 + }, + { + "epoch": 0.33, + "learning_rate": 4.452124935995904e-05, + "loss": 2.9731, + "step": 4075 + }, + { + "epoch": 0.33, + "learning_rate": 4.4514512086668284e-05, + "loss": 3.0303, + "step": 4080 + }, + { + "epoch": 0.33, + "learning_rate": 4.450777481337753e-05, + "loss": 3.1995, + "step": 4085 + }, + { + "epoch": 0.33, + "learning_rate": 4.4501037540086775e-05, + "loss": 3.0824, + "step": 4090 + }, + { + "epoch": 0.33, + "learning_rate": 4.4494300266796024e-05, + "loss": 3.1416, + "step": 4095 + }, + { + "epoch": 0.33, + "learning_rate": 4.448756299350527e-05, + "loss": 3.0063, + "step": 4100 + }, + { + "epoch": 0.33, + "learning_rate": 4.448082572021452e-05, + "loss": 2.9765, + "step": 4105 + }, + { + "epoch": 0.33, + "learning_rate": 4.4474088446923764e-05, + "loss": 3.1823, + "step": 4110 + }, + { + "epoch": 0.33, + "learning_rate": 4.446735117363301e-05, + "loss": 3.255, + "step": 4115 + }, + { + "epoch": 0.33, + "learning_rate": 4.4460613900342255e-05, + "loss": 3.2149, + "step": 4120 + }, + { + "epoch": 0.33, + "learning_rate": 4.44538766270515e-05, + "loss": 3.0531, + "step": 4125 + }, + { + "epoch": 0.33, + "learning_rate": 4.4447139353760746e-05, + "loss": 3.0287, + "step": 4130 + }, + { + "epoch": 0.33, + "learning_rate": 4.4440402080469995e-05, + "loss": 2.8681, + "step": 4135 + }, + { + "epoch": 0.33, + "learning_rate": 4.4433664807179243e-05, + "loss": 3.2937, + "step": 4140 + }, + { + "epoch": 0.34, + "learning_rate": 4.4426927533888486e-05, + "loss": 3.1149, + "step": 4145 + }, + { + "epoch": 0.34, + "learning_rate": 4.4420190260597734e-05, + "loss": 3.1983, + "step": 4150 + }, + { + "epoch": 0.34, + "learning_rate": 4.441345298730698e-05, + "loss": 3.3325, + "step": 4155 + }, + { + "epoch": 0.34, + "learning_rate": 4.4406715714016225e-05, + "loss": 3.1252, + "step": 4160 + }, + { + "epoch": 0.34, + "learning_rate": 4.439997844072547e-05, + "loss": 3.2575, + "step": 4165 + }, + { + "epoch": 0.34, + "learning_rate": 4.4393241167434716e-05, + "loss": 3.1463, + "step": 4170 + }, + { + "epoch": 0.34, + "learning_rate": 4.4386503894143965e-05, + "loss": 3.002, + "step": 4175 + }, + { + "epoch": 0.34, + "learning_rate": 4.437976662085321e-05, + "loss": 3.3059, + "step": 4180 + }, + { + "epoch": 0.34, + "learning_rate": 4.4373029347562456e-05, + "loss": 3.2497, + "step": 4185 + }, + { + "epoch": 0.34, + "learning_rate": 4.4366292074271705e-05, + "loss": 2.9685, + "step": 4190 + }, + { + "epoch": 0.34, + "learning_rate": 4.4359554800980954e-05, + "loss": 2.9922, + "step": 4195 + }, + { + "epoch": 0.34, + "learning_rate": 4.4352817527690196e-05, + "loss": 3.261, + "step": 4200 + }, + { + "epoch": 0.34, + "learning_rate": 4.434608025439944e-05, + "loss": 3.0169, + "step": 4205 + }, + { + "epoch": 0.34, + "learning_rate": 4.433934298110869e-05, + "loss": 3.0149, + "step": 4210 + }, + { + "epoch": 0.34, + "learning_rate": 4.433260570781793e-05, + "loss": 3.0762, + "step": 4215 + }, + { + "epoch": 0.34, + "learning_rate": 4.432586843452718e-05, + "loss": 3.1659, + "step": 4220 + }, + { + "epoch": 0.34, + "learning_rate": 4.431913116123643e-05, + "loss": 3.1697, + "step": 4225 + }, + { + "epoch": 0.34, + "learning_rate": 4.4312393887945676e-05, + "loss": 3.2132, + "step": 4230 + }, + { + "epoch": 0.34, + "learning_rate": 4.430565661465492e-05, + "loss": 2.9254, + "step": 4235 + }, + { + "epoch": 0.34, + "learning_rate": 4.429891934136417e-05, + "loss": 3.1275, + "step": 4240 + }, + { + "epoch": 0.34, + "learning_rate": 4.4292182068073416e-05, + "loss": 3.298, + "step": 4245 + }, + { + "epoch": 0.34, + "learning_rate": 4.428544479478266e-05, + "loss": 2.9652, + "step": 4250 + }, + { + "epoch": 0.34, + "learning_rate": 4.42787075214919e-05, + "loss": 2.8456, + "step": 4255 + }, + { + "epoch": 0.34, + "learning_rate": 4.427197024820115e-05, + "loss": 2.9501, + "step": 4260 + }, + { + "epoch": 0.34, + "learning_rate": 4.42652329749104e-05, + "loss": 2.9993, + "step": 4265 + }, + { + "epoch": 0.35, + "learning_rate": 4.425849570161964e-05, + "loss": 3.1575, + "step": 4270 + }, + { + "epoch": 0.35, + "learning_rate": 4.425175842832889e-05, + "loss": 2.9604, + "step": 4275 + }, + { + "epoch": 0.35, + "learning_rate": 4.424502115503814e-05, + "loss": 3.1482, + "step": 4280 + }, + { + "epoch": 0.35, + "learning_rate": 4.4238283881747386e-05, + "loss": 3.2198, + "step": 4285 + }, + { + "epoch": 0.35, + "learning_rate": 4.423154660845663e-05, + "loss": 3.3901, + "step": 4290 + }, + { + "epoch": 0.35, + "learning_rate": 4.422480933516587e-05, + "loss": 3.1776, + "step": 4295 + }, + { + "epoch": 0.35, + "learning_rate": 4.421807206187512e-05, + "loss": 3.0075, + "step": 4300 + }, + { + "epoch": 0.35, + "learning_rate": 4.421133478858437e-05, + "loss": 2.9798, + "step": 4305 + }, + { + "epoch": 0.35, + "learning_rate": 4.420459751529361e-05, + "loss": 2.9517, + "step": 4310 + }, + { + "epoch": 0.35, + "learning_rate": 4.419786024200286e-05, + "loss": 3.1629, + "step": 4315 + }, + { + "epoch": 0.35, + "learning_rate": 4.419112296871211e-05, + "loss": 3.1076, + "step": 4320 + }, + { + "epoch": 0.35, + "learning_rate": 4.418438569542135e-05, + "loss": 2.9667, + "step": 4325 + }, + { + "epoch": 0.35, + "learning_rate": 4.41776484221306e-05, + "loss": 3.1091, + "step": 4330 + }, + { + "epoch": 0.35, + "learning_rate": 4.417091114883984e-05, + "loss": 2.9619, + "step": 4335 + }, + { + "epoch": 0.35, + "learning_rate": 4.416417387554909e-05, + "loss": 3.2804, + "step": 4340 + }, + { + "epoch": 0.35, + "learning_rate": 4.415743660225833e-05, + "loss": 2.9225, + "step": 4345 + }, + { + "epoch": 0.35, + "learning_rate": 4.415069932896758e-05, + "loss": 2.8438, + "step": 4350 + }, + { + "epoch": 0.35, + "learning_rate": 4.414396205567683e-05, + "loss": 3.2374, + "step": 4355 + }, + { + "epoch": 0.35, + "learning_rate": 4.413722478238608e-05, + "loss": 3.027, + "step": 4360 + }, + { + "epoch": 0.35, + "learning_rate": 4.413048750909532e-05, + "loss": 3.1018, + "step": 4365 + }, + { + "epoch": 0.35, + "learning_rate": 4.412375023580457e-05, + "loss": 3.0387, + "step": 4370 + }, + { + "epoch": 0.35, + "learning_rate": 4.411701296251382e-05, + "loss": 3.1002, + "step": 4375 + }, + { + "epoch": 0.35, + "learning_rate": 4.411027568922306e-05, + "loss": 3.0958, + "step": 4380 + }, + { + "epoch": 0.35, + "learning_rate": 4.41035384159323e-05, + "loss": 3.0154, + "step": 4385 + }, + { + "epoch": 0.35, + "learning_rate": 4.409680114264155e-05, + "loss": 2.8887, + "step": 4390 + }, + { + "epoch": 0.36, + "learning_rate": 4.40900638693508e-05, + "loss": 3.3108, + "step": 4395 + }, + { + "epoch": 0.36, + "learning_rate": 4.408332659606004e-05, + "loss": 3.1558, + "step": 4400 + }, + { + "epoch": 0.36, + "learning_rate": 4.407658932276929e-05, + "loss": 3.0019, + "step": 4405 + }, + { + "epoch": 0.36, + "learning_rate": 4.406985204947854e-05, + "loss": 3.1111, + "step": 4410 + }, + { + "epoch": 0.36, + "learning_rate": 4.406311477618779e-05, + "loss": 3.0919, + "step": 4415 + }, + { + "epoch": 0.36, + "learning_rate": 4.405637750289703e-05, + "loss": 3.1809, + "step": 4420 + }, + { + "epoch": 0.36, + "learning_rate": 4.404964022960627e-05, + "loss": 3.211, + "step": 4425 + }, + { + "epoch": 0.36, + "learning_rate": 4.404290295631552e-05, + "loss": 3.0161, + "step": 4430 + }, + { + "epoch": 0.36, + "learning_rate": 4.4036165683024764e-05, + "loss": 3.0918, + "step": 4435 + }, + { + "epoch": 0.36, + "learning_rate": 4.402942840973401e-05, + "loss": 3.1814, + "step": 4440 + }, + { + "epoch": 0.36, + "learning_rate": 4.402269113644326e-05, + "loss": 2.9785, + "step": 4445 + }, + { + "epoch": 0.36, + "learning_rate": 4.401595386315251e-05, + "loss": 3.1153, + "step": 4450 + }, + { + "epoch": 0.36, + "learning_rate": 4.400921658986175e-05, + "loss": 2.8357, + "step": 4455 + }, + { + "epoch": 0.36, + "learning_rate": 4.4002479316571e-05, + "loss": 3.0285, + "step": 4460 + }, + { + "epoch": 0.36, + "learning_rate": 4.3995742043280244e-05, + "loss": 3.2999, + "step": 4465 + }, + { + "epoch": 0.36, + "learning_rate": 4.398900476998949e-05, + "loss": 3.0133, + "step": 4470 + }, + { + "epoch": 0.36, + "learning_rate": 4.3982267496698735e-05, + "loss": 3.3555, + "step": 4475 + }, + { + "epoch": 0.36, + "learning_rate": 4.3975530223407984e-05, + "loss": 3.296, + "step": 4480 + }, + { + "epoch": 0.36, + "learning_rate": 4.396879295011723e-05, + "loss": 3.4159, + "step": 4485 + }, + { + "epoch": 0.36, + "learning_rate": 4.3962055676826475e-05, + "loss": 3.1615, + "step": 4490 + }, + { + "epoch": 0.36, + "learning_rate": 4.3955318403535724e-05, + "loss": 3.1965, + "step": 4495 + }, + { + "epoch": 0.36, + "learning_rate": 4.394858113024497e-05, + "loss": 3.0436, + "step": 4500 + }, + { + "epoch": 0.36, + "learning_rate": 4.3941843856954215e-05, + "loss": 3.1806, + "step": 4505 + }, + { + "epoch": 0.36, + "learning_rate": 4.393510658366346e-05, + "loss": 3.2201, + "step": 4510 + }, + { + "epoch": 0.37, + "learning_rate": 4.3928369310372706e-05, + "loss": 3.0641, + "step": 4515 + }, + { + "epoch": 0.37, + "learning_rate": 4.3921632037081954e-05, + "loss": 2.8647, + "step": 4520 + }, + { + "epoch": 0.37, + "learning_rate": 4.3914894763791197e-05, + "loss": 2.8806, + "step": 4525 + }, + { + "epoch": 0.37, + "learning_rate": 4.3908157490500445e-05, + "loss": 2.9292, + "step": 4530 + }, + { + "epoch": 0.37, + "learning_rate": 4.3901420217209694e-05, + "loss": 3.0328, + "step": 4535 + }, + { + "epoch": 0.37, + "learning_rate": 4.389468294391894e-05, + "loss": 3.1128, + "step": 4540 + }, + { + "epoch": 0.37, + "learning_rate": 4.3887945670628185e-05, + "loss": 3.0104, + "step": 4545 + }, + { + "epoch": 0.37, + "learning_rate": 4.3881208397337434e-05, + "loss": 2.7728, + "step": 4550 + }, + { + "epoch": 0.37, + "learning_rate": 4.3874471124046676e-05, + "loss": 3.2391, + "step": 4555 + }, + { + "epoch": 0.37, + "learning_rate": 4.3867733850755925e-05, + "loss": 2.8613, + "step": 4560 + }, + { + "epoch": 0.37, + "learning_rate": 4.386099657746517e-05, + "loss": 3.3067, + "step": 4565 + }, + { + "epoch": 0.37, + "learning_rate": 4.3854259304174416e-05, + "loss": 2.9091, + "step": 4570 + }, + { + "epoch": 0.37, + "learning_rate": 4.3847522030883665e-05, + "loss": 2.8817, + "step": 4575 + }, + { + "epoch": 0.37, + "learning_rate": 4.384078475759291e-05, + "loss": 2.9741, + "step": 4580 + }, + { + "epoch": 0.37, + "learning_rate": 4.3834047484302156e-05, + "loss": 3.0892, + "step": 4585 + }, + { + "epoch": 0.37, + "learning_rate": 4.3827310211011405e-05, + "loss": 3.2429, + "step": 4590 + }, + { + "epoch": 0.37, + "learning_rate": 4.382057293772065e-05, + "loss": 2.9408, + "step": 4595 + }, + { + "epoch": 0.37, + "learning_rate": 4.381383566442989e-05, + "loss": 3.0757, + "step": 4600 + }, + { + "epoch": 0.37, + "learning_rate": 4.380709839113914e-05, + "loss": 2.8738, + "step": 4605 + }, + { + "epoch": 0.37, + "learning_rate": 4.380036111784839e-05, + "loss": 3.2346, + "step": 4610 + }, + { + "epoch": 0.37, + "learning_rate": 4.3793623844557636e-05, + "loss": 2.9399, + "step": 4615 + }, + { + "epoch": 0.37, + "learning_rate": 4.378688657126688e-05, + "loss": 3.195, + "step": 4620 + }, + { + "epoch": 0.37, + "learning_rate": 4.3780149297976127e-05, + "loss": 2.7772, + "step": 4625 + }, + { + "epoch": 0.37, + "learning_rate": 4.3773412024685375e-05, + "loss": 3.4355, + "step": 4630 + }, + { + "epoch": 0.37, + "learning_rate": 4.376667475139462e-05, + "loss": 3.2062, + "step": 4635 + }, + { + "epoch": 0.38, + "learning_rate": 4.375993747810386e-05, + "loss": 3.1914, + "step": 4640 + }, + { + "epoch": 0.38, + "learning_rate": 4.375320020481311e-05, + "loss": 3.1476, + "step": 4645 + }, + { + "epoch": 0.38, + "learning_rate": 4.374646293152236e-05, + "loss": 3.0083, + "step": 4650 + }, + { + "epoch": 0.38, + "learning_rate": 4.37397256582316e-05, + "loss": 3.2261, + "step": 4655 + }, + { + "epoch": 0.38, + "learning_rate": 4.373298838494085e-05, + "loss": 3.2371, + "step": 4660 + }, + { + "epoch": 0.38, + "learning_rate": 4.37262511116501e-05, + "loss": 2.9125, + "step": 4665 + }, + { + "epoch": 0.38, + "learning_rate": 4.3719513838359346e-05, + "loss": 3.3789, + "step": 4670 + }, + { + "epoch": 0.38, + "learning_rate": 4.371277656506859e-05, + "loss": 3.1501, + "step": 4675 + }, + { + "epoch": 0.38, + "learning_rate": 4.370603929177784e-05, + "loss": 3.1306, + "step": 4680 + }, + { + "epoch": 0.38, + "learning_rate": 4.369930201848708e-05, + "loss": 3.0257, + "step": 4685 + }, + { + "epoch": 0.38, + "learning_rate": 4.369256474519632e-05, + "loss": 3.1348, + "step": 4690 + }, + { + "epoch": 0.38, + "learning_rate": 4.368582747190557e-05, + "loss": 3.1772, + "step": 4695 + }, + { + "epoch": 0.38, + "learning_rate": 4.367909019861482e-05, + "loss": 2.9573, + "step": 4700 + }, + { + "epoch": 0.38, + "learning_rate": 4.367235292532407e-05, + "loss": 3.0109, + "step": 4705 + }, + { + "epoch": 0.38, + "learning_rate": 4.366561565203331e-05, + "loss": 3.1603, + "step": 4710 + }, + { + "epoch": 0.38, + "learning_rate": 4.365887837874256e-05, + "loss": 3.0631, + "step": 4715 + }, + { + "epoch": 0.38, + "learning_rate": 4.365214110545181e-05, + "loss": 3.2663, + "step": 4720 + }, + { + "epoch": 0.38, + "learning_rate": 4.364540383216105e-05, + "loss": 3.216, + "step": 4725 + }, + { + "epoch": 0.38, + "learning_rate": 4.363866655887029e-05, + "loss": 3.1248, + "step": 4730 + }, + { + "epoch": 0.38, + "learning_rate": 4.363192928557954e-05, + "loss": 3.0795, + "step": 4735 + }, + { + "epoch": 0.38, + "learning_rate": 4.362519201228879e-05, + "loss": 3.2579, + "step": 4740 + }, + { + "epoch": 0.38, + "learning_rate": 4.361845473899803e-05, + "loss": 2.9879, + "step": 4745 + }, + { + "epoch": 0.38, + "learning_rate": 4.361171746570728e-05, + "loss": 3.0311, + "step": 4750 + }, + { + "epoch": 0.38, + "learning_rate": 4.360498019241653e-05, + "loss": 2.9413, + "step": 4755 + }, + { + "epoch": 0.38, + "learning_rate": 4.359824291912578e-05, + "loss": 2.9917, + "step": 4760 + }, + { + "epoch": 0.39, + "learning_rate": 4.359150564583502e-05, + "loss": 2.9656, + "step": 4765 + }, + { + "epoch": 0.39, + "learning_rate": 4.358476837254426e-05, + "loss": 2.833, + "step": 4770 + }, + { + "epoch": 0.39, + "learning_rate": 4.357803109925351e-05, + "loss": 3.1525, + "step": 4775 + }, + { + "epoch": 0.39, + "learning_rate": 4.357129382596276e-05, + "loss": 3.1786, + "step": 4780 + }, + { + "epoch": 0.39, + "learning_rate": 4.3564556552672e-05, + "loss": 3.0506, + "step": 4785 + }, + { + "epoch": 0.39, + "learning_rate": 4.355781927938125e-05, + "loss": 3.3234, + "step": 4790 + }, + { + "epoch": 0.39, + "learning_rate": 4.35510820060905e-05, + "loss": 3.3562, + "step": 4795 + }, + { + "epoch": 0.39, + "learning_rate": 4.354434473279974e-05, + "loss": 3.1701, + "step": 4800 + }, + { + "epoch": 0.39, + "learning_rate": 4.353760745950899e-05, + "loss": 2.8417, + "step": 4805 + }, + { + "epoch": 0.39, + "learning_rate": 4.353087018621823e-05, + "loss": 3.1097, + "step": 4810 + }, + { + "epoch": 0.39, + "learning_rate": 4.352413291292748e-05, + "loss": 3.3805, + "step": 4815 + }, + { + "epoch": 0.39, + "learning_rate": 4.3517395639636724e-05, + "loss": 2.9212, + "step": 4820 + }, + { + "epoch": 0.39, + "learning_rate": 4.351065836634597e-05, + "loss": 2.9774, + "step": 4825 + }, + { + "epoch": 0.39, + "learning_rate": 4.350392109305522e-05, + "loss": 3.2141, + "step": 4830 + }, + { + "epoch": 0.39, + "learning_rate": 4.3497183819764464e-05, + "loss": 3.2205, + "step": 4835 + }, + { + "epoch": 0.39, + "learning_rate": 4.349044654647371e-05, + "loss": 3.0818, + "step": 4840 + }, + { + "epoch": 0.39, + "learning_rate": 4.348370927318296e-05, + "loss": 3.0723, + "step": 4845 + }, + { + "epoch": 0.39, + "learning_rate": 4.347697199989221e-05, + "loss": 3.3671, + "step": 4850 + }, + { + "epoch": 0.39, + "learning_rate": 4.347023472660145e-05, + "loss": 3.2076, + "step": 4855 + }, + { + "epoch": 0.39, + "learning_rate": 4.3463497453310695e-05, + "loss": 3.2495, + "step": 4860 + }, + { + "epoch": 0.39, + "learning_rate": 4.3456760180019944e-05, + "loss": 2.7959, + "step": 4865 + }, + { + "epoch": 0.39, + "learning_rate": 4.345002290672919e-05, + "loss": 3.1347, + "step": 4870 + }, + { + "epoch": 0.39, + "learning_rate": 4.3443285633438435e-05, + "loss": 2.9321, + "step": 4875 + }, + { + "epoch": 0.39, + "learning_rate": 4.3436548360147684e-05, + "loss": 2.9697, + "step": 4880 + }, + { + "epoch": 0.39, + "learning_rate": 4.342981108685693e-05, + "loss": 3.0605, + "step": 4885 + }, + { + "epoch": 0.4, + "learning_rate": 4.3423073813566175e-05, + "loss": 3.1212, + "step": 4890 + }, + { + "epoch": 0.4, + "learning_rate": 4.3416336540275423e-05, + "loss": 2.9201, + "step": 4895 + }, + { + "epoch": 0.4, + "learning_rate": 4.3409599266984666e-05, + "loss": 2.9829, + "step": 4900 + }, + { + "epoch": 0.4, + "learning_rate": 4.3402861993693914e-05, + "loss": 3.3439, + "step": 4905 + }, + { + "epoch": 0.4, + "learning_rate": 4.3396124720403157e-05, + "loss": 3.0339, + "step": 4910 + }, + { + "epoch": 0.4, + "learning_rate": 4.3389387447112405e-05, + "loss": 3.0685, + "step": 4915 + }, + { + "epoch": 0.4, + "learning_rate": 4.3382650173821654e-05, + "loss": 3.0576, + "step": 4920 + }, + { + "epoch": 0.4, + "learning_rate": 4.33759129005309e-05, + "loss": 3.0614, + "step": 4925 + }, + { + "epoch": 0.4, + "learning_rate": 4.3369175627240145e-05, + "loss": 3.1312, + "step": 4930 + }, + { + "epoch": 0.4, + "learning_rate": 4.3362438353949394e-05, + "loss": 3.1098, + "step": 4935 + }, + { + "epoch": 0.4, + "learning_rate": 4.3355701080658636e-05, + "loss": 3.2032, + "step": 4940 + }, + { + "epoch": 0.4, + "learning_rate": 4.334896380736788e-05, + "loss": 3.0584, + "step": 4945 + }, + { + "epoch": 0.4, + "learning_rate": 4.334222653407713e-05, + "loss": 2.87, + "step": 4950 + }, + { + "epoch": 0.4, + "learning_rate": 4.3335489260786376e-05, + "loss": 2.872, + "step": 4955 + }, + { + "epoch": 0.4, + "learning_rate": 4.3328751987495625e-05, + "loss": 3.0233, + "step": 4960 + }, + { + "epoch": 0.4, + "learning_rate": 4.332201471420487e-05, + "loss": 3.1497, + "step": 4965 + }, + { + "epoch": 0.4, + "learning_rate": 4.3315277440914116e-05, + "loss": 3.3478, + "step": 4970 + }, + { + "epoch": 0.4, + "learning_rate": 4.3308540167623365e-05, + "loss": 2.6513, + "step": 4975 + }, + { + "epoch": 0.4, + "learning_rate": 4.3301802894332614e-05, + "loss": 3.1054, + "step": 4980 + }, + { + "epoch": 0.4, + "learning_rate": 4.3295065621041856e-05, + "loss": 3.1679, + "step": 4985 + }, + { + "epoch": 0.4, + "learning_rate": 4.32883283477511e-05, + "loss": 2.9424, + "step": 4990 + }, + { + "epoch": 0.4, + "learning_rate": 4.328159107446035e-05, + "loss": 2.9678, + "step": 4995 + }, + { + "epoch": 0.4, + "learning_rate": 4.327485380116959e-05, + "loss": 2.8093, + "step": 5000 + }, + { + "epoch": 0.4, + "learning_rate": 4.326811652787884e-05, + "loss": 3.1868, + "step": 5005 + }, + { + "epoch": 0.41, + "learning_rate": 4.3261379254588087e-05, + "loss": 3.1751, + "step": 5010 + }, + { + "epoch": 0.41, + "learning_rate": 4.3254641981297335e-05, + "loss": 3.1772, + "step": 5015 + }, + { + "epoch": 0.41, + "learning_rate": 4.324790470800658e-05, + "loss": 2.9964, + "step": 5020 + }, + { + "epoch": 0.41, + "learning_rate": 4.3241167434715826e-05, + "loss": 2.8733, + "step": 5025 + }, + { + "epoch": 0.41, + "learning_rate": 4.323443016142507e-05, + "loss": 2.9992, + "step": 5030 + }, + { + "epoch": 0.41, + "learning_rate": 4.322769288813432e-05, + "loss": 3.0003, + "step": 5035 + }, + { + "epoch": 0.41, + "learning_rate": 4.322095561484356e-05, + "loss": 3.0774, + "step": 5040 + }, + { + "epoch": 0.41, + "learning_rate": 4.321421834155281e-05, + "loss": 2.9182, + "step": 5045 + }, + { + "epoch": 0.41, + "learning_rate": 4.320748106826206e-05, + "loss": 3.325, + "step": 5050 + }, + { + "epoch": 0.41, + "learning_rate": 4.32007437949713e-05, + "loss": 2.7606, + "step": 5055 + }, + { + "epoch": 0.41, + "learning_rate": 4.319400652168055e-05, + "loss": 2.9947, + "step": 5060 + }, + { + "epoch": 0.41, + "learning_rate": 4.31872692483898e-05, + "loss": 3.024, + "step": 5065 + }, + { + "epoch": 0.41, + "learning_rate": 4.318053197509904e-05, + "loss": 3.0752, + "step": 5070 + }, + { + "epoch": 0.41, + "learning_rate": 4.317379470180828e-05, + "loss": 3.1484, + "step": 5075 + }, + { + "epoch": 0.41, + "learning_rate": 4.316705742851753e-05, + "loss": 3.0559, + "step": 5080 + }, + { + "epoch": 0.41, + "learning_rate": 4.316032015522678e-05, + "loss": 3.1059, + "step": 5085 + }, + { + "epoch": 0.41, + "learning_rate": 4.315358288193602e-05, + "loss": 3.062, + "step": 5090 + }, + { + "epoch": 0.41, + "learning_rate": 4.314684560864527e-05, + "loss": 2.8981, + "step": 5095 + }, + { + "epoch": 0.41, + "learning_rate": 4.314010833535452e-05, + "loss": 3.0228, + "step": 5100 + }, + { + "epoch": 0.41, + "learning_rate": 4.313337106206377e-05, + "loss": 3.1474, + "step": 5105 + }, + { + "epoch": 0.41, + "learning_rate": 4.312663378877301e-05, + "loss": 3.053, + "step": 5110 + }, + { + "epoch": 0.41, + "learning_rate": 4.311989651548226e-05, + "loss": 2.8585, + "step": 5115 + }, + { + "epoch": 0.41, + "learning_rate": 4.31131592421915e-05, + "loss": 2.927, + "step": 5120 + }, + { + "epoch": 0.41, + "learning_rate": 4.310642196890075e-05, + "loss": 2.9696, + "step": 5125 + }, + { + "epoch": 0.41, + "learning_rate": 4.309968469560999e-05, + "loss": 3.0903, + "step": 5130 + }, + { + "epoch": 0.42, + "learning_rate": 4.309294742231924e-05, + "loss": 3.1462, + "step": 5135 + }, + { + "epoch": 0.42, + "learning_rate": 4.308621014902849e-05, + "loss": 3.0195, + "step": 5140 + }, + { + "epoch": 0.42, + "learning_rate": 4.307947287573773e-05, + "loss": 2.931, + "step": 5145 + }, + { + "epoch": 0.42, + "learning_rate": 4.307273560244698e-05, + "loss": 3.1945, + "step": 5150 + }, + { + "epoch": 0.42, + "learning_rate": 4.306599832915623e-05, + "loss": 2.9829, + "step": 5155 + }, + { + "epoch": 0.42, + "learning_rate": 4.305926105586547e-05, + "loss": 3.0799, + "step": 5160 + }, + { + "epoch": 0.42, + "learning_rate": 4.3052523782574714e-05, + "loss": 2.9349, + "step": 5165 + }, + { + "epoch": 0.42, + "learning_rate": 4.304578650928396e-05, + "loss": 3.1081, + "step": 5170 + }, + { + "epoch": 0.42, + "learning_rate": 4.303904923599321e-05, + "loss": 2.9171, + "step": 5175 + }, + { + "epoch": 0.42, + "learning_rate": 4.303231196270246e-05, + "loss": 3.1681, + "step": 5180 + }, + { + "epoch": 0.42, + "learning_rate": 4.30255746894117e-05, + "loss": 3.1256, + "step": 5185 + }, + { + "epoch": 0.42, + "learning_rate": 4.301883741612095e-05, + "loss": 3.0517, + "step": 5190 + }, + { + "epoch": 0.42, + "learning_rate": 4.30121001428302e-05, + "loss": 3.1481, + "step": 5195 + }, + { + "epoch": 0.42, + "learning_rate": 4.300536286953944e-05, + "loss": 3.1047, + "step": 5200 + }, + { + "epoch": 0.42, + "learning_rate": 4.2998625596248684e-05, + "loss": 3.0044, + "step": 5205 + }, + { + "epoch": 0.42, + "learning_rate": 4.299188832295793e-05, + "loss": 3.2102, + "step": 5210 + }, + { + "epoch": 0.42, + "learning_rate": 4.298515104966718e-05, + "loss": 3.383, + "step": 5215 + }, + { + "epoch": 0.42, + "learning_rate": 4.2978413776376424e-05, + "loss": 3.2182, + "step": 5220 + }, + { + "epoch": 0.42, + "learning_rate": 4.297167650308567e-05, + "loss": 3.0297, + "step": 5225 + }, + { + "epoch": 0.42, + "learning_rate": 4.296493922979492e-05, + "loss": 3.2297, + "step": 5230 + }, + { + "epoch": 0.42, + "learning_rate": 4.295820195650417e-05, + "loss": 3.2441, + "step": 5235 + }, + { + "epoch": 0.42, + "learning_rate": 4.295146468321341e-05, + "loss": 3.0492, + "step": 5240 + }, + { + "epoch": 0.42, + "learning_rate": 4.2944727409922655e-05, + "loss": 2.9498, + "step": 5245 + }, + { + "epoch": 0.42, + "learning_rate": 4.2937990136631904e-05, + "loss": 2.8979, + "step": 5250 + }, + { + "epoch": 0.42, + "learning_rate": 4.2931252863341146e-05, + "loss": 3.0763, + "step": 5255 + }, + { + "epoch": 0.43, + "learning_rate": 4.2924515590050395e-05, + "loss": 3.071, + "step": 5260 + }, + { + "epoch": 0.43, + "learning_rate": 4.2917778316759644e-05, + "loss": 3.1906, + "step": 5265 + }, + { + "epoch": 0.43, + "learning_rate": 4.291238849812704e-05, + "loss": 3.9365, + "step": 5270 + }, + { + "epoch": 0.43, + "learning_rate": 4.290565122483629e-05, + "loss": 3.0687, + "step": 5275 + }, + { + "epoch": 0.43, + "learning_rate": 4.2898913951545536e-05, + "loss": 2.8605, + "step": 5280 + }, + { + "epoch": 0.43, + "learning_rate": 4.289217667825478e-05, + "loss": 3.0541, + "step": 5285 + }, + { + "epoch": 0.43, + "learning_rate": 4.288543940496402e-05, + "loss": 3.1296, + "step": 5290 + }, + { + "epoch": 0.43, + "learning_rate": 4.287870213167327e-05, + "loss": 3.1929, + "step": 5295 + }, + { + "epoch": 0.43, + "learning_rate": 4.287196485838252e-05, + "loss": 3.0888, + "step": 5300 + }, + { + "epoch": 0.43, + "learning_rate": 4.286522758509177e-05, + "loss": 3.1341, + "step": 5305 + }, + { + "epoch": 0.43, + "learning_rate": 4.285849031180101e-05, + "loss": 3.2743, + "step": 5310 + }, + { + "epoch": 0.43, + "learning_rate": 4.285175303851026e-05, + "loss": 2.9379, + "step": 5315 + }, + { + "epoch": 0.43, + "learning_rate": 4.284501576521951e-05, + "loss": 2.93, + "step": 5320 + }, + { + "epoch": 0.43, + "learning_rate": 4.283827849192875e-05, + "loss": 2.9956, + "step": 5325 + }, + { + "epoch": 0.43, + "learning_rate": 4.283154121863799e-05, + "loss": 2.9685, + "step": 5330 + }, + { + "epoch": 0.43, + "learning_rate": 4.282480394534724e-05, + "loss": 3.1619, + "step": 5335 + }, + { + "epoch": 0.43, + "learning_rate": 4.281806667205649e-05, + "loss": 3.085, + "step": 5340 + }, + { + "epoch": 0.43, + "learning_rate": 4.281132939876573e-05, + "loss": 2.9773, + "step": 5345 + }, + { + "epoch": 0.43, + "learning_rate": 4.280459212547498e-05, + "loss": 3.2456, + "step": 5350 + }, + { + "epoch": 0.43, + "learning_rate": 4.279785485218423e-05, + "loss": 2.9965, + "step": 5355 + }, + { + "epoch": 0.43, + "learning_rate": 4.279111757889348e-05, + "loss": 3.0508, + "step": 5360 + }, + { + "epoch": 0.43, + "learning_rate": 4.278438030560272e-05, + "loss": 3.123, + "step": 5365 + }, + { + "epoch": 0.43, + "learning_rate": 4.277764303231196e-05, + "loss": 3.0023, + "step": 5370 + }, + { + "epoch": 0.43, + "learning_rate": 4.277090575902121e-05, + "loss": 3.3, + "step": 5375 + }, + { + "epoch": 0.43, + "learning_rate": 4.276416848573045e-05, + "loss": 3.139, + "step": 5380 + }, + { + "epoch": 0.44, + "learning_rate": 4.27574312124397e-05, + "loss": 3.1068, + "step": 5385 + }, + { + "epoch": 0.44, + "learning_rate": 4.275069393914895e-05, + "loss": 3.07, + "step": 5390 + }, + { + "epoch": 0.44, + "learning_rate": 4.27439566658582e-05, + "loss": 3.1072, + "step": 5395 + }, + { + "epoch": 0.44, + "learning_rate": 4.273721939256744e-05, + "loss": 3.0112, + "step": 5400 + }, + { + "epoch": 0.44, + "learning_rate": 4.273048211927669e-05, + "loss": 3.1176, + "step": 5405 + }, + { + "epoch": 0.44, + "learning_rate": 4.272374484598593e-05, + "loss": 2.7726, + "step": 5410 + }, + { + "epoch": 0.44, + "learning_rate": 4.271700757269518e-05, + "loss": 2.8491, + "step": 5415 + }, + { + "epoch": 0.44, + "learning_rate": 4.2710270299404423e-05, + "loss": 2.8852, + "step": 5420 + }, + { + "epoch": 0.44, + "learning_rate": 4.270353302611367e-05, + "loss": 2.7856, + "step": 5425 + }, + { + "epoch": 0.44, + "learning_rate": 4.269679575282292e-05, + "loss": 3.1395, + "step": 5430 + }, + { + "epoch": 0.44, + "learning_rate": 4.269005847953216e-05, + "loss": 3.2047, + "step": 5435 + }, + { + "epoch": 0.44, + "learning_rate": 4.268332120624141e-05, + "loss": 3.033, + "step": 5440 + }, + { + "epoch": 0.44, + "learning_rate": 4.267658393295066e-05, + "loss": 2.987, + "step": 5445 + }, + { + "epoch": 0.44, + "learning_rate": 4.266984665965991e-05, + "loss": 3.0016, + "step": 5450 + }, + { + "epoch": 0.44, + "learning_rate": 4.266310938636915e-05, + "loss": 3.1097, + "step": 5455 + }, + { + "epoch": 0.44, + "learning_rate": 4.2656372113078394e-05, + "loss": 3.0633, + "step": 5460 + }, + { + "epoch": 0.44, + "learning_rate": 4.264963483978764e-05, + "loss": 2.8894, + "step": 5465 + }, + { + "epoch": 0.44, + "learning_rate": 4.264289756649689e-05, + "loss": 3.026, + "step": 5470 + }, + { + "epoch": 0.44, + "learning_rate": 4.2636160293206134e-05, + "loss": 2.9712, + "step": 5475 + }, + { + "epoch": 0.44, + "learning_rate": 4.262942301991538e-05, + "loss": 3.2726, + "step": 5480 + }, + { + "epoch": 0.44, + "learning_rate": 4.262268574662463e-05, + "loss": 3.1535, + "step": 5485 + }, + { + "epoch": 0.44, + "learning_rate": 4.2615948473333874e-05, + "loss": 3.0379, + "step": 5490 + }, + { + "epoch": 0.44, + "learning_rate": 4.260921120004312e-05, + "loss": 3.3582, + "step": 5495 + }, + { + "epoch": 0.44, + "learning_rate": 4.2602473926752365e-05, + "loss": 2.9169, + "step": 5500 + }, + { + "epoch": 0.45, + "learning_rate": 4.2595736653461614e-05, + "loss": 3.0502, + "step": 5505 + }, + { + "epoch": 0.45, + "learning_rate": 4.2588999380170856e-05, + "loss": 3.2374, + "step": 5510 + }, + { + "epoch": 0.45, + "learning_rate": 4.2582262106880105e-05, + "loss": 3.0465, + "step": 5515 + }, + { + "epoch": 0.45, + "learning_rate": 4.2575524833589353e-05, + "loss": 3.4409, + "step": 5520 + }, + { + "epoch": 0.45, + "learning_rate": 4.2568787560298596e-05, + "loss": 2.8746, + "step": 5525 + }, + { + "epoch": 0.45, + "learning_rate": 4.2562050287007844e-05, + "loss": 2.9985, + "step": 5530 + }, + { + "epoch": 0.45, + "learning_rate": 4.255531301371709e-05, + "loss": 3.2513, + "step": 5535 + }, + { + "epoch": 0.45, + "learning_rate": 4.2548575740426335e-05, + "loss": 3.142, + "step": 5540 + }, + { + "epoch": 0.45, + "learning_rate": 4.254183846713558e-05, + "loss": 2.9062, + "step": 5545 + }, + { + "epoch": 0.45, + "learning_rate": 4.2535101193844826e-05, + "loss": 2.9976, + "step": 5550 + }, + { + "epoch": 0.45, + "learning_rate": 4.2528363920554075e-05, + "loss": 3.2293, + "step": 5555 + }, + { + "epoch": 0.45, + "learning_rate": 4.2521626647263324e-05, + "loss": 2.7799, + "step": 5560 + }, + { + "epoch": 0.45, + "learning_rate": 4.2514889373972566e-05, + "loss": 2.9451, + "step": 5565 + }, + { + "epoch": 0.45, + "learning_rate": 4.2508152100681815e-05, + "loss": 2.9368, + "step": 5570 + }, + { + "epoch": 0.45, + "learning_rate": 4.2501414827391064e-05, + "loss": 2.7972, + "step": 5575 + }, + { + "epoch": 0.45, + "learning_rate": 4.2494677554100306e-05, + "loss": 2.9851, + "step": 5580 + }, + { + "epoch": 0.45, + "learning_rate": 4.2487940280809555e-05, + "loss": 2.9721, + "step": 5585 + }, + { + "epoch": 0.45, + "learning_rate": 4.24812030075188e-05, + "loss": 3.0178, + "step": 5590 + }, + { + "epoch": 0.45, + "learning_rate": 4.2474465734228046e-05, + "loss": 3.0758, + "step": 5595 + }, + { + "epoch": 0.45, + "learning_rate": 4.246772846093729e-05, + "loss": 3.1676, + "step": 5600 + }, + { + "epoch": 0.45, + "learning_rate": 4.246099118764654e-05, + "loss": 3.232, + "step": 5605 + }, + { + "epoch": 0.45, + "learning_rate": 4.2454253914355786e-05, + "loss": 3.1686, + "step": 5610 + }, + { + "epoch": 0.45, + "learning_rate": 4.2447516641065035e-05, + "loss": 3.1155, + "step": 5615 + }, + { + "epoch": 0.45, + "learning_rate": 4.244077936777428e-05, + "loss": 3.328, + "step": 5620 + }, + { + "epoch": 0.45, + "learning_rate": 4.2434042094483526e-05, + "loss": 2.904, + "step": 5625 + }, + { + "epoch": 0.46, + "learning_rate": 4.242730482119277e-05, + "loss": 2.9753, + "step": 5630 + }, + { + "epoch": 0.46, + "learning_rate": 4.242056754790201e-05, + "loss": 2.9243, + "step": 5635 + }, + { + "epoch": 0.46, + "learning_rate": 4.241383027461126e-05, + "loss": 3.1304, + "step": 5640 + }, + { + "epoch": 0.46, + "learning_rate": 4.240709300132051e-05, + "loss": 3.0722, + "step": 5645 + }, + { + "epoch": 0.46, + "learning_rate": 4.2400355728029756e-05, + "loss": 3.0274, + "step": 5650 + }, + { + "epoch": 0.46, + "learning_rate": 4.2393618454739e-05, + "loss": 2.9222, + "step": 5655 + }, + { + "epoch": 0.46, + "learning_rate": 4.238688118144825e-05, + "loss": 3.0571, + "step": 5660 + }, + { + "epoch": 0.46, + "learning_rate": 4.2380143908157496e-05, + "loss": 2.8809, + "step": 5665 + }, + { + "epoch": 0.46, + "learning_rate": 4.237340663486674e-05, + "loss": 3.025, + "step": 5670 + }, + { + "epoch": 0.46, + "learning_rate": 4.236666936157598e-05, + "loss": 3.1147, + "step": 5675 + }, + { + "epoch": 0.46, + "learning_rate": 4.235993208828523e-05, + "loss": 3.1962, + "step": 5680 + }, + { + "epoch": 0.46, + "learning_rate": 4.235319481499448e-05, + "loss": 3.2954, + "step": 5685 + }, + { + "epoch": 0.46, + "learning_rate": 4.234645754170372e-05, + "loss": 3.0492, + "step": 5690 + }, + { + "epoch": 0.46, + "learning_rate": 4.233972026841297e-05, + "loss": 2.9564, + "step": 5695 + }, + { + "epoch": 0.46, + "learning_rate": 4.233298299512222e-05, + "loss": 2.8588, + "step": 5700 + }, + { + "epoch": 0.46, + "learning_rate": 4.232624572183147e-05, + "loss": 2.9818, + "step": 5705 + }, + { + "epoch": 0.46, + "learning_rate": 4.231950844854071e-05, + "loss": 3.3102, + "step": 5710 + }, + { + "epoch": 0.46, + "learning_rate": 4.231277117524995e-05, + "loss": 3.0969, + "step": 5715 + }, + { + "epoch": 0.46, + "learning_rate": 4.23060339019592e-05, + "loss": 3.1605, + "step": 5720 + }, + { + "epoch": 0.46, + "learning_rate": 4.229929662866845e-05, + "loss": 3.1074, + "step": 5725 + }, + { + "epoch": 0.46, + "learning_rate": 4.229255935537769e-05, + "loss": 3.5156, + "step": 5730 + }, + { + "epoch": 0.46, + "learning_rate": 4.228582208208694e-05, + "loss": 3.0901, + "step": 5735 + }, + { + "epoch": 0.46, + "learning_rate": 4.227908480879619e-05, + "loss": 3.1218, + "step": 5740 + }, + { + "epoch": 0.46, + "learning_rate": 4.227234753550543e-05, + "loss": 2.7888, + "step": 5745 + }, + { + "epoch": 0.46, + "learning_rate": 4.226561026221468e-05, + "loss": 2.8804, + "step": 5750 + }, + { + "epoch": 0.47, + "learning_rate": 4.225887298892393e-05, + "loss": 3.1825, + "step": 5755 + }, + { + "epoch": 0.47, + "learning_rate": 4.225213571563317e-05, + "loss": 3.0061, + "step": 5760 + }, + { + "epoch": 0.47, + "learning_rate": 4.224539844234241e-05, + "loss": 3.0918, + "step": 5765 + }, + { + "epoch": 0.47, + "learning_rate": 4.223866116905166e-05, + "loss": 2.8022, + "step": 5770 + }, + { + "epoch": 0.47, + "learning_rate": 4.223192389576091e-05, + "loss": 3.1484, + "step": 5775 + }, + { + "epoch": 0.47, + "learning_rate": 4.222518662247016e-05, + "loss": 2.9748, + "step": 5780 + }, + { + "epoch": 0.47, + "learning_rate": 4.22184493491794e-05, + "loss": 3.053, + "step": 5785 + }, + { + "epoch": 0.47, + "learning_rate": 4.221171207588865e-05, + "loss": 3.1931, + "step": 5790 + }, + { + "epoch": 0.47, + "learning_rate": 4.22049748025979e-05, + "loss": 3.5065, + "step": 5795 + }, + { + "epoch": 0.47, + "learning_rate": 4.219823752930714e-05, + "loss": 2.9901, + "step": 5800 + }, + { + "epoch": 0.47, + "learning_rate": 4.219150025601638e-05, + "loss": 3.3565, + "step": 5805 + }, + { + "epoch": 0.47, + "learning_rate": 4.218476298272563e-05, + "loss": 2.9501, + "step": 5810 + }, + { + "epoch": 0.47, + "learning_rate": 4.217802570943488e-05, + "loss": 2.9877, + "step": 5815 + }, + { + "epoch": 0.47, + "learning_rate": 4.217128843614412e-05, + "loss": 3.5543, + "step": 5820 + }, + { + "epoch": 0.47, + "learning_rate": 4.216455116285337e-05, + "loss": 3.1097, + "step": 5825 + }, + { + "epoch": 0.47, + "learning_rate": 4.215781388956262e-05, + "loss": 2.9987, + "step": 5830 + }, + { + "epoch": 0.47, + "learning_rate": 4.215107661627186e-05, + "loss": 3.0965, + "step": 5835 + }, + { + "epoch": 0.47, + "learning_rate": 4.214433934298111e-05, + "loss": 3.0131, + "step": 5840 + }, + { + "epoch": 0.47, + "learning_rate": 4.2137602069690354e-05, + "loss": 3.4088, + "step": 5845 + }, + { + "epoch": 0.47, + "learning_rate": 4.21308647963996e-05, + "loss": 2.932, + "step": 5850 + }, + { + "epoch": 0.47, + "learning_rate": 4.2124127523108845e-05, + "loss": 3.1906, + "step": 5855 + }, + { + "epoch": 0.47, + "learning_rate": 4.2117390249818094e-05, + "loss": 3.3301, + "step": 5860 + }, + { + "epoch": 0.47, + "learning_rate": 4.211065297652734e-05, + "loss": 3.0456, + "step": 5865 + }, + { + "epoch": 0.47, + "learning_rate": 4.210391570323659e-05, + "loss": 2.8092, + "step": 5870 + }, + { + "epoch": 0.47, + "learning_rate": 4.2097178429945834e-05, + "loss": 3.1395, + "step": 5875 + }, + { + "epoch": 0.48, + "learning_rate": 4.209044115665508e-05, + "loss": 2.8979, + "step": 5880 + }, + { + "epoch": 0.48, + "learning_rate": 4.208370388336433e-05, + "loss": 3.1059, + "step": 5885 + }, + { + "epoch": 0.48, + "learning_rate": 4.2076966610073574e-05, + "loss": 3.2633, + "step": 5890 + }, + { + "epoch": 0.48, + "learning_rate": 4.2070229336782816e-05, + "loss": 2.9585, + "step": 5895 + }, + { + "epoch": 0.48, + "learning_rate": 4.2063492063492065e-05, + "loss": 3.2317, + "step": 5900 + }, + { + "epoch": 0.48, + "learning_rate": 4.2056754790201313e-05, + "loss": 2.9057, + "step": 5905 + }, + { + "epoch": 0.48, + "learning_rate": 4.2050017516910556e-05, + "loss": 2.8944, + "step": 5910 + }, + { + "epoch": 0.48, + "learning_rate": 4.2043280243619804e-05, + "loss": 3.0617, + "step": 5915 + }, + { + "epoch": 0.48, + "learning_rate": 4.203654297032905e-05, + "loss": 3.1542, + "step": 5920 + }, + { + "epoch": 0.48, + "learning_rate": 4.20298056970383e-05, + "loss": 3.0791, + "step": 5925 + }, + { + "epoch": 0.48, + "learning_rate": 4.2023068423747544e-05, + "loss": 3.2551, + "step": 5930 + }, + { + "epoch": 0.48, + "learning_rate": 4.2016331150456786e-05, + "loss": 3.2556, + "step": 5935 + }, + { + "epoch": 0.48, + "learning_rate": 4.2009593877166035e-05, + "loss": 2.9848, + "step": 5940 + }, + { + "epoch": 0.48, + "learning_rate": 4.200285660387528e-05, + "loss": 3.2682, + "step": 5945 + }, + { + "epoch": 0.48, + "learning_rate": 4.1996119330584526e-05, + "loss": 2.9622, + "step": 5950 + }, + { + "epoch": 0.48, + "learning_rate": 4.1989382057293775e-05, + "loss": 3.4463, + "step": 5955 + }, + { + "epoch": 0.48, + "learning_rate": 4.1982644784003024e-05, + "loss": 3.197, + "step": 5960 + }, + { + "epoch": 0.48, + "learning_rate": 4.1975907510712266e-05, + "loss": 2.9737, + "step": 5965 + }, + { + "epoch": 0.48, + "learning_rate": 4.1969170237421515e-05, + "loss": 3.1124, + "step": 5970 + }, + { + "epoch": 0.48, + "learning_rate": 4.196243296413076e-05, + "loss": 3.067, + "step": 5975 + }, + { + "epoch": 0.48, + "learning_rate": 4.1955695690840006e-05, + "loss": 3.224, + "step": 5980 + }, + { + "epoch": 0.48, + "learning_rate": 4.194895841754925e-05, + "loss": 3.2847, + "step": 5985 + }, + { + "epoch": 0.48, + "learning_rate": 4.19422211442585e-05, + "loss": 3.0231, + "step": 5990 + }, + { + "epoch": 0.48, + "learning_rate": 4.1935483870967746e-05, + "loss": 2.8844, + "step": 5995 + }, + { + "epoch": 0.49, + "learning_rate": 4.192874659767699e-05, + "loss": 3.0196, + "step": 6000 + }, + { + "epoch": 0.49, + "learning_rate": 4.192200932438624e-05, + "loss": 2.9583, + "step": 6005 + }, + { + "epoch": 0.49, + "learning_rate": 4.1915272051095486e-05, + "loss": 3.0375, + "step": 6010 + }, + { + "epoch": 0.49, + "learning_rate": 4.1908534777804734e-05, + "loss": 2.9743, + "step": 6015 + }, + { + "epoch": 0.49, + "learning_rate": 4.1901797504513977e-05, + "loss": 2.8241, + "step": 6020 + }, + { + "epoch": 0.49, + "learning_rate": 4.189506023122322e-05, + "loss": 2.836, + "step": 6025 + }, + { + "epoch": 0.49, + "learning_rate": 4.188832295793247e-05, + "loss": 3.1118, + "step": 6030 + }, + { + "epoch": 0.49, + "learning_rate": 4.1881585684641716e-05, + "loss": 2.8837, + "step": 6035 + }, + { + "epoch": 0.49, + "learning_rate": 4.187484841135096e-05, + "loss": 3.093, + "step": 6040 + }, + { + "epoch": 0.49, + "learning_rate": 4.186811113806021e-05, + "loss": 2.9789, + "step": 6045 + }, + { + "epoch": 0.49, + "learning_rate": 4.1861373864769456e-05, + "loss": 2.9944, + "step": 6050 + }, + { + "epoch": 0.49, + "learning_rate": 4.18546365914787e-05, + "loss": 3.1287, + "step": 6055 + }, + { + "epoch": 0.49, + "learning_rate": 4.184789931818795e-05, + "loss": 3.1213, + "step": 6060 + }, + { + "epoch": 0.49, + "learning_rate": 4.184116204489719e-05, + "loss": 3.2943, + "step": 6065 + }, + { + "epoch": 0.49, + "learning_rate": 4.183442477160644e-05, + "loss": 3.1028, + "step": 6070 + }, + { + "epoch": 0.49, + "learning_rate": 4.182768749831568e-05, + "loss": 3.243, + "step": 6075 + }, + { + "epoch": 0.49, + "learning_rate": 4.182095022502493e-05, + "loss": 2.8142, + "step": 6080 + }, + { + "epoch": 0.49, + "learning_rate": 4.181421295173418e-05, + "loss": 3.1794, + "step": 6085 + }, + { + "epoch": 0.49, + "learning_rate": 4.180747567844342e-05, + "loss": 3.3118, + "step": 6090 + }, + { + "epoch": 0.49, + "learning_rate": 4.180073840515267e-05, + "loss": 3.1506, + "step": 6095 + }, + { + "epoch": 0.49, + "learning_rate": 4.179400113186192e-05, + "loss": 3.177, + "step": 6100 + }, + { + "epoch": 0.49, + "learning_rate": 4.178726385857116e-05, + "loss": 3.0311, + "step": 6105 + }, + { + "epoch": 0.49, + "learning_rate": 4.17805265852804e-05, + "loss": 3.0866, + "step": 6110 + }, + { + "epoch": 0.49, + "learning_rate": 4.177378931198965e-05, + "loss": 3.5669, + "step": 6115 + }, + { + "epoch": 0.49, + "learning_rate": 4.17670520386989e-05, + "loss": 2.942, + "step": 6120 + }, + { + "epoch": 0.5, + "learning_rate": 4.176031476540815e-05, + "loss": 3.1499, + "step": 6125 + }, + { + "epoch": 0.5, + "learning_rate": 4.175357749211739e-05, + "loss": 3.0345, + "step": 6130 + }, + { + "epoch": 0.5, + "learning_rate": 4.174684021882664e-05, + "loss": 2.9467, + "step": 6135 + }, + { + "epoch": 0.5, + "learning_rate": 4.174010294553589e-05, + "loss": 3.0161, + "step": 6140 + }, + { + "epoch": 0.5, + "learning_rate": 4.173336567224513e-05, + "loss": 3.2259, + "step": 6145 + }, + { + "epoch": 0.5, + "learning_rate": 4.172662839895437e-05, + "loss": 2.9918, + "step": 6150 + }, + { + "epoch": 0.5, + "learning_rate": 4.171989112566362e-05, + "loss": 2.7739, + "step": 6155 + }, + { + "epoch": 0.5, + "learning_rate": 4.171315385237287e-05, + "loss": 3.0321, + "step": 6160 + }, + { + "epoch": 0.5, + "learning_rate": 4.170641657908211e-05, + "loss": 3.378, + "step": 6165 + }, + { + "epoch": 0.5, + "learning_rate": 4.169967930579136e-05, + "loss": 3.0588, + "step": 6170 + }, + { + "epoch": 0.5, + "learning_rate": 4.169294203250061e-05, + "loss": 2.8973, + "step": 6175 + }, + { + "epoch": 0.5, + "learning_rate": 4.168620475920986e-05, + "loss": 3.0668, + "step": 6180 + }, + { + "epoch": 0.5, + "learning_rate": 4.16794674859191e-05, + "loss": 3.1018, + "step": 6185 + }, + { + "epoch": 0.5, + "learning_rate": 4.167273021262835e-05, + "loss": 2.729, + "step": 6190 + }, + { + "epoch": 0.5, + "learning_rate": 4.166599293933759e-05, + "loss": 3.2127, + "step": 6195 + }, + { + "epoch": 0.5, + "learning_rate": 4.1659255666046834e-05, + "loss": 3.0852, + "step": 6200 + }, + { + "epoch": 0.5, + "learning_rate": 4.165251839275608e-05, + "loss": 2.9025, + "step": 6205 + }, + { + "epoch": 0.5, + "learning_rate": 4.164578111946533e-05, + "loss": 3.0034, + "step": 6210 + }, + { + "epoch": 0.5, + "learning_rate": 4.163904384617458e-05, + "loss": 2.9844, + "step": 6215 + }, + { + "epoch": 0.5, + "learning_rate": 4.163230657288382e-05, + "loss": 2.8532, + "step": 6220 + }, + { + "epoch": 0.5, + "learning_rate": 4.162556929959307e-05, + "loss": 3.0747, + "step": 6225 + }, + { + "epoch": 0.5, + "learning_rate": 4.161883202630232e-05, + "loss": 2.9875, + "step": 6230 + }, + { + "epoch": 0.5, + "learning_rate": 4.161209475301156e-05, + "loss": 3.0558, + "step": 6235 + }, + { + "epoch": 0.5, + "learning_rate": 4.1605357479720805e-05, + "loss": 2.968, + "step": 6240 + }, + { + "epoch": 0.5, + "learning_rate": 4.1598620206430054e-05, + "loss": 2.9673, + "step": 6245 + }, + { + "epoch": 0.51, + "learning_rate": 4.15918829331393e-05, + "loss": 2.948, + "step": 6250 + }, + { + "epoch": 0.51, + "learning_rate": 4.1585145659848545e-05, + "loss": 3.173, + "step": 6255 + }, + { + "epoch": 0.51, + "learning_rate": 4.1578408386557794e-05, + "loss": 3.1657, + "step": 6260 + }, + { + "epoch": 0.51, + "learning_rate": 4.157167111326704e-05, + "loss": 3.011, + "step": 6265 + }, + { + "epoch": 0.51, + "learning_rate": 4.156493383997629e-05, + "loss": 3.2625, + "step": 6270 + }, + { + "epoch": 0.51, + "learning_rate": 4.1558196566685534e-05, + "loss": 3.1201, + "step": 6275 + }, + { + "epoch": 0.51, + "learning_rate": 4.1551459293394776e-05, + "loss": 3.258, + "step": 6280 + }, + { + "epoch": 0.51, + "learning_rate": 4.1544722020104024e-05, + "loss": 3.428, + "step": 6285 + }, + { + "epoch": 0.51, + "learning_rate": 4.153798474681327e-05, + "loss": 3.257, + "step": 6290 + }, + { + "epoch": 0.51, + "learning_rate": 4.1531247473522515e-05, + "loss": 3.2086, + "step": 6295 + }, + { + "epoch": 0.51, + "learning_rate": 4.1524510200231764e-05, + "loss": 3.2408, + "step": 6300 + }, + { + "epoch": 0.51, + "learning_rate": 4.151777292694101e-05, + "loss": 3.0592, + "step": 6305 + }, + { + "epoch": 0.51, + "learning_rate": 4.1511035653650255e-05, + "loss": 3.2017, + "step": 6310 + }, + { + "epoch": 0.51, + "learning_rate": 4.1504298380359504e-05, + "loss": 2.9562, + "step": 6315 + }, + { + "epoch": 0.51, + "learning_rate": 4.149756110706875e-05, + "loss": 3.2331, + "step": 6320 + }, + { + "epoch": 0.51, + "learning_rate": 4.1490823833777995e-05, + "loss": 3.1174, + "step": 6325 + }, + { + "epoch": 0.51, + "learning_rate": 4.148408656048724e-05, + "loss": 3.0513, + "step": 6330 + }, + { + "epoch": 0.51, + "learning_rate": 4.1477349287196486e-05, + "loss": 3.0752, + "step": 6335 + }, + { + "epoch": 0.51, + "learning_rate": 4.1470612013905735e-05, + "loss": 2.9025, + "step": 6340 + }, + { + "epoch": 0.51, + "learning_rate": 4.1463874740614984e-05, + "loss": 3.0994, + "step": 6345 + }, + { + "epoch": 0.51, + "learning_rate": 4.1457137467324226e-05, + "loss": 3.1386, + "step": 6350 + }, + { + "epoch": 0.51, + "learning_rate": 4.1450400194033475e-05, + "loss": 3.0328, + "step": 6355 + }, + { + "epoch": 0.51, + "learning_rate": 4.1443662920742724e-05, + "loss": 2.7842, + "step": 6360 + }, + { + "epoch": 0.51, + "learning_rate": 4.1436925647451966e-05, + "loss": 2.8813, + "step": 6365 + }, + { + "epoch": 0.51, + "learning_rate": 4.143018837416121e-05, + "loss": 3.3991, + "step": 6370 + }, + { + "epoch": 0.52, + "learning_rate": 4.142345110087046e-05, + "loss": 3.0849, + "step": 6375 + }, + { + "epoch": 0.52, + "learning_rate": 4.1416713827579706e-05, + "loss": 3.5411, + "step": 6380 + }, + { + "epoch": 0.52, + "learning_rate": 4.140997655428895e-05, + "loss": 2.8933, + "step": 6385 + }, + { + "epoch": 0.52, + "learning_rate": 4.1403239280998197e-05, + "loss": 2.9984, + "step": 6390 + }, + { + "epoch": 0.52, + "learning_rate": 4.1396502007707445e-05, + "loss": 3.1318, + "step": 6395 + }, + { + "epoch": 0.52, + "learning_rate": 4.138976473441669e-05, + "loss": 3.2986, + "step": 6400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1383027461125936e-05, + "loss": 2.9299, + "step": 6405 + }, + { + "epoch": 0.52, + "learning_rate": 4.137629018783518e-05, + "loss": 3.0908, + "step": 6410 + }, + { + "epoch": 0.52, + "learning_rate": 4.136955291454443e-05, + "loss": 3.0864, + "step": 6415 + }, + { + "epoch": 0.52, + "learning_rate": 4.136281564125367e-05, + "loss": 2.9794, + "step": 6420 + }, + { + "epoch": 0.52, + "learning_rate": 4.135607836796292e-05, + "loss": 3.1128, + "step": 6425 + }, + { + "epoch": 0.52, + "learning_rate": 4.134934109467217e-05, + "loss": 2.9951, + "step": 6430 + }, + { + "epoch": 0.52, + "learning_rate": 4.1342603821381416e-05, + "loss": 3.187, + "step": 6435 + }, + { + "epoch": 0.52, + "learning_rate": 4.133586654809066e-05, + "loss": 3.2286, + "step": 6440 + }, + { + "epoch": 0.52, + "learning_rate": 4.132912927479991e-05, + "loss": 2.9073, + "step": 6445 + }, + { + "epoch": 0.52, + "learning_rate": 4.132239200150915e-05, + "loss": 2.9606, + "step": 6450 + }, + { + "epoch": 0.52, + "learning_rate": 4.13156547282184e-05, + "loss": 3.3927, + "step": 6455 + }, + { + "epoch": 0.52, + "learning_rate": 4.130891745492764e-05, + "loss": 2.9814, + "step": 6460 + }, + { + "epoch": 0.52, + "learning_rate": 4.130218018163689e-05, + "loss": 3.134, + "step": 6465 + }, + { + "epoch": 0.52, + "learning_rate": 4.129544290834614e-05, + "loss": 3.1801, + "step": 6470 + }, + { + "epoch": 0.52, + "learning_rate": 4.128870563505538e-05, + "loss": 3.3085, + "step": 6475 + }, + { + "epoch": 0.52, + "learning_rate": 4.128196836176463e-05, + "loss": 3.1814, + "step": 6480 + }, + { + "epoch": 0.52, + "learning_rate": 4.127523108847388e-05, + "loss": 3.2744, + "step": 6485 + }, + { + "epoch": 0.52, + "learning_rate": 4.126849381518313e-05, + "loss": 2.9528, + "step": 6490 + }, + { + "epoch": 0.53, + "learning_rate": 4.126175654189237e-05, + "loss": 3.1222, + "step": 6495 + }, + { + "epoch": 0.53, + "learning_rate": 4.125501926860161e-05, + "loss": 2.8767, + "step": 6500 + }, + { + "epoch": 0.53, + "learning_rate": 4.124828199531086e-05, + "loss": 2.9187, + "step": 6505 + }, + { + "epoch": 0.53, + "learning_rate": 4.12415447220201e-05, + "loss": 3.1005, + "step": 6510 + }, + { + "epoch": 0.53, + "learning_rate": 4.123480744872935e-05, + "loss": 3.0222, + "step": 6515 + }, + { + "epoch": 0.53, + "learning_rate": 4.12280701754386e-05, + "loss": 3.0137, + "step": 6520 + }, + { + "epoch": 0.53, + "learning_rate": 4.122133290214785e-05, + "loss": 3.1305, + "step": 6525 + }, + { + "epoch": 0.53, + "learning_rate": 4.121459562885709e-05, + "loss": 2.9975, + "step": 6530 + }, + { + "epoch": 0.53, + "learning_rate": 4.120785835556634e-05, + "loss": 2.8521, + "step": 6535 + }, + { + "epoch": 0.53, + "learning_rate": 4.120112108227558e-05, + "loss": 2.8654, + "step": 6540 + }, + { + "epoch": 0.53, + "learning_rate": 4.119438380898483e-05, + "loss": 3.0614, + "step": 6545 + }, + { + "epoch": 0.53, + "learning_rate": 4.118764653569407e-05, + "loss": 2.9413, + "step": 6550 + }, + { + "epoch": 0.53, + "learning_rate": 4.118090926240332e-05, + "loss": 2.9269, + "step": 6555 + }, + { + "epoch": 0.53, + "learning_rate": 4.117417198911257e-05, + "loss": 3.1327, + "step": 6560 + }, + { + "epoch": 0.53, + "learning_rate": 4.116743471582181e-05, + "loss": 3.1815, + "step": 6565 + }, + { + "epoch": 0.53, + "learning_rate": 4.116069744253106e-05, + "loss": 3.3745, + "step": 6570 + }, + { + "epoch": 0.53, + "learning_rate": 4.115396016924031e-05, + "loss": 3.0903, + "step": 6575 + }, + { + "epoch": 0.53, + "learning_rate": 4.114722289594955e-05, + "loss": 3.0028, + "step": 6580 + }, + { + "epoch": 0.53, + "learning_rate": 4.1140485622658794e-05, + "loss": 3.1658, + "step": 6585 + }, + { + "epoch": 0.53, + "learning_rate": 4.113374834936804e-05, + "loss": 3.0588, + "step": 6590 + }, + { + "epoch": 0.53, + "learning_rate": 4.112701107607729e-05, + "loss": 2.9141, + "step": 6595 + }, + { + "epoch": 0.53, + "learning_rate": 4.112027380278654e-05, + "loss": 3.0221, + "step": 6600 + }, + { + "epoch": 0.53, + "learning_rate": 4.111353652949578e-05, + "loss": 3.0241, + "step": 6605 + }, + { + "epoch": 0.53, + "learning_rate": 4.110679925620503e-05, + "loss": 3.1218, + "step": 6610 + }, + { + "epoch": 0.53, + "learning_rate": 4.110006198291428e-05, + "loss": 3.1217, + "step": 6615 + }, + { + "epoch": 0.54, + "learning_rate": 4.109332470962352e-05, + "loss": 2.8752, + "step": 6620 + }, + { + "epoch": 0.54, + "learning_rate": 4.108658743633277e-05, + "loss": 3.3187, + "step": 6625 + }, + { + "epoch": 0.54, + "learning_rate": 4.1079850163042014e-05, + "loss": 3.1058, + "step": 6630 + }, + { + "epoch": 0.54, + "learning_rate": 4.107311288975126e-05, + "loss": 3.142, + "step": 6635 + }, + { + "epoch": 0.54, + "learning_rate": 4.1066375616460505e-05, + "loss": 3.048, + "step": 6640 + }, + { + "epoch": 0.54, + "learning_rate": 4.1059638343169754e-05, + "loss": 2.75, + "step": 6645 + }, + { + "epoch": 0.54, + "learning_rate": 4.1052901069879e-05, + "loss": 3.0339, + "step": 6650 + }, + { + "epoch": 0.54, + "learning_rate": 4.104616379658825e-05, + "loss": 3.113, + "step": 6655 + }, + { + "epoch": 0.54, + "learning_rate": 4.1039426523297493e-05, + "loss": 2.8537, + "step": 6660 + }, + { + "epoch": 0.54, + "learning_rate": 4.103268925000674e-05, + "loss": 3.1454, + "step": 6665 + }, + { + "epoch": 0.54, + "learning_rate": 4.1025951976715984e-05, + "loss": 2.9773, + "step": 6670 + }, + { + "epoch": 0.54, + "learning_rate": 4.1019214703425227e-05, + "loss": 2.959, + "step": 6675 + }, + { + "epoch": 0.54, + "learning_rate": 4.1012477430134475e-05, + "loss": 3.2781, + "step": 6680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1005740156843724e-05, + "loss": 3.3036, + "step": 6685 + }, + { + "epoch": 0.54, + "learning_rate": 4.099900288355297e-05, + "loss": 3.0932, + "step": 6690 + }, + { + "epoch": 0.54, + "learning_rate": 4.0992265610262215e-05, + "loss": 2.9254, + "step": 6695 + }, + { + "epoch": 0.54, + "learning_rate": 4.0985528336971464e-05, + "loss": 2.9804, + "step": 6700 + }, + { + "epoch": 0.54, + "learning_rate": 4.097879106368071e-05, + "loss": 2.8821, + "step": 6705 + }, + { + "epoch": 0.54, + "learning_rate": 4.0972053790389955e-05, + "loss": 3.245, + "step": 6710 + }, + { + "epoch": 0.54, + "learning_rate": 4.09653165170992e-05, + "loss": 3.04, + "step": 6715 + }, + { + "epoch": 0.54, + "learning_rate": 4.0958579243808446e-05, + "loss": 3.2381, + "step": 6720 + }, + { + "epoch": 0.54, + "learning_rate": 4.0951841970517695e-05, + "loss": 3.176, + "step": 6725 + }, + { + "epoch": 0.54, + "learning_rate": 4.094510469722694e-05, + "loss": 3.1522, + "step": 6730 + }, + { + "epoch": 0.54, + "learning_rate": 4.0938367423936186e-05, + "loss": 3.071, + "step": 6735 + }, + { + "epoch": 0.54, + "learning_rate": 4.0931630150645435e-05, + "loss": 3.195, + "step": 6740 + }, + { + "epoch": 0.55, + "learning_rate": 4.0924892877354684e-05, + "loss": 3.317, + "step": 6745 + }, + { + "epoch": 0.55, + "learning_rate": 4.0918155604063926e-05, + "loss": 3.0664, + "step": 6750 + }, + { + "epoch": 0.55, + "learning_rate": 4.0911418330773175e-05, + "loss": 3.04, + "step": 6755 + }, + { + "epoch": 0.55, + "learning_rate": 4.090468105748242e-05, + "loss": 2.9613, + "step": 6760 + }, + { + "epoch": 0.55, + "learning_rate": 4.089794378419166e-05, + "loss": 3.2902, + "step": 6765 + }, + { + "epoch": 0.55, + "learning_rate": 4.089120651090091e-05, + "loss": 2.9059, + "step": 6770 + }, + { + "epoch": 0.55, + "learning_rate": 4.0884469237610157e-05, + "loss": 3.0708, + "step": 6775 + }, + { + "epoch": 0.55, + "learning_rate": 4.0877731964319405e-05, + "loss": 3.2298, + "step": 6780 + }, + { + "epoch": 0.55, + "learning_rate": 4.08723421456868e-05, + "loss": 3.3366, + "step": 6785 + }, + { + "epoch": 0.55, + "learning_rate": 4.086560487239605e-05, + "loss": 2.985, + "step": 6790 + }, + { + "epoch": 0.55, + "learning_rate": 4.085886759910529e-05, + "loss": 2.9827, + "step": 6795 + }, + { + "epoch": 0.55, + "learning_rate": 4.0852130325814534e-05, + "loss": 3.1584, + "step": 6800 + }, + { + "epoch": 0.55, + "learning_rate": 4.084539305252378e-05, + "loss": 2.8316, + "step": 6805 + }, + { + "epoch": 0.55, + "learning_rate": 4.083865577923303e-05, + "loss": 3.1549, + "step": 6810 + }, + { + "epoch": 0.55, + "learning_rate": 4.083191850594228e-05, + "loss": 3.2837, + "step": 6815 + }, + { + "epoch": 0.55, + "learning_rate": 4.082518123265152e-05, + "loss": 2.9758, + "step": 6820 + }, + { + "epoch": 0.55, + "learning_rate": 4.081844395936077e-05, + "loss": 3.2667, + "step": 6825 + }, + { + "epoch": 0.55, + "learning_rate": 4.081170668607002e-05, + "loss": 3.1173, + "step": 6830 + }, + { + "epoch": 0.55, + "learning_rate": 4.080496941277926e-05, + "loss": 3.0736, + "step": 6835 + }, + { + "epoch": 0.55, + "learning_rate": 4.0798232139488504e-05, + "loss": 2.9801, + "step": 6840 + }, + { + "epoch": 0.55, + "learning_rate": 4.079149486619775e-05, + "loss": 2.9754, + "step": 6845 + }, + { + "epoch": 0.55, + "learning_rate": 4.0784757592907e-05, + "loss": 2.8494, + "step": 6850 + }, + { + "epoch": 0.55, + "learning_rate": 4.0778020319616244e-05, + "loss": 3.2473, + "step": 6855 + }, + { + "epoch": 0.55, + "learning_rate": 4.077128304632549e-05, + "loss": 2.8755, + "step": 6860 + }, + { + "epoch": 0.55, + "learning_rate": 4.076454577303474e-05, + "loss": 3.0215, + "step": 6865 + }, + { + "epoch": 0.56, + "learning_rate": 4.075780849974399e-05, + "loss": 2.9058, + "step": 6870 + }, + { + "epoch": 0.56, + "learning_rate": 4.075107122645323e-05, + "loss": 3.2889, + "step": 6875 + }, + { + "epoch": 0.56, + "learning_rate": 4.0744333953162475e-05, + "loss": 2.8474, + "step": 6880 + }, + { + "epoch": 0.56, + "learning_rate": 4.0737596679871724e-05, + "loss": 2.9836, + "step": 6885 + }, + { + "epoch": 0.56, + "learning_rate": 4.0730859406580966e-05, + "loss": 2.9131, + "step": 6890 + }, + { + "epoch": 0.56, + "learning_rate": 4.0724122133290215e-05, + "loss": 2.9475, + "step": 6895 + }, + { + "epoch": 0.56, + "learning_rate": 4.0717384859999464e-05, + "loss": 3.168, + "step": 6900 + }, + { + "epoch": 0.56, + "learning_rate": 4.071064758670871e-05, + "loss": 3.2896, + "step": 6905 + }, + { + "epoch": 0.56, + "learning_rate": 4.0703910313417955e-05, + "loss": 2.8471, + "step": 6910 + }, + { + "epoch": 0.56, + "learning_rate": 4.06971730401272e-05, + "loss": 2.9173, + "step": 6915 + }, + { + "epoch": 0.56, + "learning_rate": 4.069043576683645e-05, + "loss": 3.1674, + "step": 6920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0683698493545694e-05, + "loss": 3.1038, + "step": 6925 + }, + { + "epoch": 0.56, + "learning_rate": 4.0676961220254936e-05, + "loss": 3.0604, + "step": 6930 + }, + { + "epoch": 0.56, + "learning_rate": 4.0670223946964185e-05, + "loss": 2.9545, + "step": 6935 + }, + { + "epoch": 0.56, + "learning_rate": 4.0663486673673434e-05, + "loss": 3.1475, + "step": 6940 + }, + { + "epoch": 0.56, + "learning_rate": 4.0656749400382676e-05, + "loss": 2.9257, + "step": 6945 + }, + { + "epoch": 0.56, + "learning_rate": 4.0650012127091925e-05, + "loss": 2.6838, + "step": 6950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0643274853801174e-05, + "loss": 3.1874, + "step": 6955 + }, + { + "epoch": 0.56, + "learning_rate": 4.063653758051042e-05, + "loss": 3.2852, + "step": 6960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0629800307219665e-05, + "loss": 2.8721, + "step": 6965 + }, + { + "epoch": 0.56, + "learning_rate": 4.062306303392891e-05, + "loss": 3.1328, + "step": 6970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0616325760638156e-05, + "loss": 3.207, + "step": 6975 + }, + { + "epoch": 0.56, + "learning_rate": 4.0609588487347405e-05, + "loss": 3.2351, + "step": 6980 + }, + { + "epoch": 0.56, + "learning_rate": 4.060285121405665e-05, + "loss": 3.0004, + "step": 6985 + }, + { + "epoch": 0.57, + "learning_rate": 4.0596113940765896e-05, + "loss": 3.0114, + "step": 6990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0589376667475145e-05, + "loss": 3.3165, + "step": 6995 + }, + { + "epoch": 0.57, + "learning_rate": 4.058263939418439e-05, + "loss": 3.212, + "step": 7000 + }, + { + "epoch": 0.57, + "learning_rate": 4.0575902120893636e-05, + "loss": 2.9537, + "step": 7005 + }, + { + "epoch": 0.57, + "learning_rate": 4.056916484760288e-05, + "loss": 3.0058, + "step": 7010 + }, + { + "epoch": 0.57, + "learning_rate": 4.056242757431213e-05, + "loss": 2.9632, + "step": 7015 + }, + { + "epoch": 0.57, + "learning_rate": 4.055569030102137e-05, + "loss": 3.0076, + "step": 7020 + }, + { + "epoch": 0.57, + "learning_rate": 4.054895302773062e-05, + "loss": 3.118, + "step": 7025 + }, + { + "epoch": 0.57, + "learning_rate": 4.0542215754439866e-05, + "loss": 3.0387, + "step": 7030 + }, + { + "epoch": 0.57, + "learning_rate": 4.0535478481149115e-05, + "loss": 3.0755, + "step": 7035 + }, + { + "epoch": 0.57, + "learning_rate": 4.053008866251651e-05, + "loss": 3.5368, + "step": 7040 + }, + { + "epoch": 0.57, + "learning_rate": 4.052335138922575e-05, + "loss": 2.8257, + "step": 7045 + }, + { + "epoch": 0.57, + "learning_rate": 4.0516614115935e-05, + "loss": 3.0709, + "step": 7050 + }, + { + "epoch": 0.57, + "learning_rate": 4.0509876842644243e-05, + "loss": 2.8406, + "step": 7055 + }, + { + "epoch": 0.57, + "learning_rate": 4.050313956935349e-05, + "loss": 3.1349, + "step": 7060 + }, + { + "epoch": 0.57, + "learning_rate": 4.049640229606274e-05, + "loss": 2.9948, + "step": 7065 + }, + { + "epoch": 0.57, + "learning_rate": 4.048966502277198e-05, + "loss": 2.9537, + "step": 7070 + }, + { + "epoch": 0.57, + "learning_rate": 4.048292774948123e-05, + "loss": 2.7327, + "step": 7075 + }, + { + "epoch": 0.57, + "learning_rate": 4.047619047619048e-05, + "loss": 2.9098, + "step": 7080 + }, + { + "epoch": 0.57, + "learning_rate": 4.046945320289972e-05, + "loss": 3.1846, + "step": 7085 + }, + { + "epoch": 0.57, + "learning_rate": 4.046271592960897e-05, + "loss": 3.046, + "step": 7090 + }, + { + "epoch": 0.57, + "learning_rate": 4.0455978656318214e-05, + "loss": 3.0464, + "step": 7095 + }, + { + "epoch": 0.57, + "learning_rate": 4.044924138302746e-05, + "loss": 3.3597, + "step": 7100 + }, + { + "epoch": 0.57, + "learning_rate": 4.044250410973671e-05, + "loss": 3.1111, + "step": 7105 + }, + { + "epoch": 0.57, + "learning_rate": 4.0435766836445954e-05, + "loss": 3.187, + "step": 7110 + }, + { + "epoch": 0.58, + "learning_rate": 4.04290295631552e-05, + "loss": 2.8657, + "step": 7115 + }, + { + "epoch": 0.58, + "learning_rate": 4.042229228986445e-05, + "loss": 3.3056, + "step": 7120 + }, + { + "epoch": 0.58, + "learning_rate": 4.0415555016573694e-05, + "loss": 2.8097, + "step": 7125 + }, + { + "epoch": 0.58, + "learning_rate": 4.040881774328294e-05, + "loss": 3.078, + "step": 7130 + }, + { + "epoch": 0.58, + "learning_rate": 4.0402080469992185e-05, + "loss": 3.1158, + "step": 7135 + }, + { + "epoch": 0.58, + "learning_rate": 4.0395343196701434e-05, + "loss": 2.904, + "step": 7140 + }, + { + "epoch": 0.58, + "learning_rate": 4.0388605923410676e-05, + "loss": 3.0502, + "step": 7145 + }, + { + "epoch": 0.58, + "learning_rate": 4.0381868650119925e-05, + "loss": 3.0258, + "step": 7150 + }, + { + "epoch": 0.58, + "learning_rate": 4.0375131376829173e-05, + "loss": 3.079, + "step": 7155 + }, + { + "epoch": 0.58, + "learning_rate": 4.036839410353842e-05, + "loss": 3.3828, + "step": 7160 + }, + { + "epoch": 0.58, + "learning_rate": 4.0361656830247664e-05, + "loss": 3.3249, + "step": 7165 + }, + { + "epoch": 0.58, + "learning_rate": 4.035491955695691e-05, + "loss": 3.1637, + "step": 7170 + }, + { + "epoch": 0.58, + "learning_rate": 4.0348182283666155e-05, + "loss": 3.3418, + "step": 7175 + }, + { + "epoch": 0.58, + "learning_rate": 4.03414450103754e-05, + "loss": 3.0859, + "step": 7180 + }, + { + "epoch": 0.58, + "learning_rate": 4.0334707737084646e-05, + "loss": 3.1996, + "step": 7185 + }, + { + "epoch": 0.58, + "learning_rate": 4.0327970463793895e-05, + "loss": 3.1428, + "step": 7190 + }, + { + "epoch": 0.58, + "learning_rate": 4.0321233190503144e-05, + "loss": 3.2682, + "step": 7195 + }, + { + "epoch": 0.58, + "learning_rate": 4.0314495917212386e-05, + "loss": 2.856, + "step": 7200 + }, + { + "epoch": 0.58, + "learning_rate": 4.0307758643921635e-05, + "loss": 3.2575, + "step": 7205 + }, + { + "epoch": 0.58, + "learning_rate": 4.0301021370630884e-05, + "loss": 3.1416, + "step": 7210 + }, + { + "epoch": 0.58, + "learning_rate": 4.0294284097340126e-05, + "loss": 2.8951, + "step": 7215 + }, + { + "epoch": 0.58, + "learning_rate": 4.028754682404937e-05, + "loss": 3.0688, + "step": 7220 + }, + { + "epoch": 0.58, + "learning_rate": 4.028080955075862e-05, + "loss": 2.9216, + "step": 7225 + }, + { + "epoch": 0.58, + "learning_rate": 4.0274072277467866e-05, + "loss": 3.0731, + "step": 7230 + }, + { + "epoch": 0.58, + "learning_rate": 4.026733500417711e-05, + "loss": 2.8017, + "step": 7235 + }, + { + "epoch": 0.59, + "learning_rate": 4.026059773088636e-05, + "loss": 3.1149, + "step": 7240 + }, + { + "epoch": 0.59, + "learning_rate": 4.0253860457595606e-05, + "loss": 2.9981, + "step": 7245 + }, + { + "epoch": 0.59, + "learning_rate": 4.0247123184304855e-05, + "loss": 3.094, + "step": 7250 + }, + { + "epoch": 0.59, + "learning_rate": 4.02403859110141e-05, + "loss": 3.0985, + "step": 7255 + }, + { + "epoch": 0.59, + "learning_rate": 4.0233648637723346e-05, + "loss": 3.1172, + "step": 7260 + }, + { + "epoch": 0.59, + "learning_rate": 4.022691136443259e-05, + "loss": 3.1349, + "step": 7265 + }, + { + "epoch": 0.59, + "learning_rate": 4.022017409114183e-05, + "loss": 2.9795, + "step": 7270 + }, + { + "epoch": 0.59, + "learning_rate": 4.021343681785108e-05, + "loss": 3.1452, + "step": 7275 + }, + { + "epoch": 0.59, + "learning_rate": 4.020669954456033e-05, + "loss": 3.0341, + "step": 7280 + }, + { + "epoch": 0.59, + "learning_rate": 4.0199962271269576e-05, + "loss": 3.3302, + "step": 7285 + }, + { + "epoch": 0.59, + "learning_rate": 4.019322499797882e-05, + "loss": 3.282, + "step": 7290 + }, + { + "epoch": 0.59, + "learning_rate": 4.018648772468807e-05, + "loss": 3.1143, + "step": 7295 + }, + { + "epoch": 0.59, + "learning_rate": 4.0179750451397316e-05, + "loss": 3.047, + "step": 7300 + }, + { + "epoch": 0.59, + "learning_rate": 4.017301317810656e-05, + "loss": 2.9338, + "step": 7305 + }, + { + "epoch": 0.59, + "learning_rate": 4.01662759048158e-05, + "loss": 3.0096, + "step": 7310 + }, + { + "epoch": 0.59, + "learning_rate": 4.015953863152505e-05, + "loss": 2.8297, + "step": 7315 + }, + { + "epoch": 0.59, + "learning_rate": 4.01528013582343e-05, + "loss": 3.3948, + "step": 7320 + }, + { + "epoch": 0.59, + "learning_rate": 4.014606408494354e-05, + "loss": 3.1253, + "step": 7325 + }, + { + "epoch": 0.59, + "learning_rate": 4.013932681165279e-05, + "loss": 3.2223, + "step": 7330 + }, + { + "epoch": 0.59, + "learning_rate": 4.013258953836204e-05, + "loss": 2.956, + "step": 7335 + }, + { + "epoch": 0.59, + "learning_rate": 4.012585226507129e-05, + "loss": 3.335, + "step": 7340 + }, + { + "epoch": 0.59, + "learning_rate": 4.011911499178053e-05, + "loss": 3.1048, + "step": 7345 + }, + { + "epoch": 0.59, + "learning_rate": 4.011237771848977e-05, + "loss": 3.0714, + "step": 7350 + }, + { + "epoch": 0.59, + "learning_rate": 4.010564044519902e-05, + "loss": 3.0881, + "step": 7355 + }, + { + "epoch": 0.6, + "learning_rate": 4.009890317190827e-05, + "loss": 2.8853, + "step": 7360 + }, + { + "epoch": 0.6, + "learning_rate": 4.009216589861751e-05, + "loss": 2.93, + "step": 7365 + }, + { + "epoch": 0.6, + "learning_rate": 4.008542862532676e-05, + "loss": 3.0098, + "step": 7370 + }, + { + "epoch": 0.6, + "learning_rate": 4.007869135203601e-05, + "loss": 2.8795, + "step": 7375 + }, + { + "epoch": 0.6, + "learning_rate": 4.007195407874525e-05, + "loss": 3.1682, + "step": 7380 + }, + { + "epoch": 0.6, + "learning_rate": 4.00652168054545e-05, + "loss": 2.9576, + "step": 7385 + }, + { + "epoch": 0.6, + "learning_rate": 4.005847953216375e-05, + "loss": 3.1748, + "step": 7390 + }, + { + "epoch": 0.6, + "learning_rate": 4.005174225887299e-05, + "loss": 2.9084, + "step": 7395 + }, + { + "epoch": 0.6, + "learning_rate": 4.004500498558223e-05, + "loss": 3.3471, + "step": 7400 + }, + { + "epoch": 0.6, + "learning_rate": 4.003826771229148e-05, + "loss": 3.0671, + "step": 7405 + }, + { + "epoch": 0.6, + "learning_rate": 4.003153043900073e-05, + "loss": 2.7733, + "step": 7410 + }, + { + "epoch": 0.6, + "learning_rate": 4.002479316570998e-05, + "loss": 2.9595, + "step": 7415 + }, + { + "epoch": 0.6, + "learning_rate": 4.001805589241922e-05, + "loss": 3.3316, + "step": 7420 + }, + { + "epoch": 0.6, + "learning_rate": 4.001131861912847e-05, + "loss": 2.9916, + "step": 7425 + }, + { + "epoch": 0.6, + "learning_rate": 4.000458134583772e-05, + "loss": 3.11, + "step": 7430 + }, + { + "epoch": 0.6, + "learning_rate": 3.999784407254696e-05, + "loss": 2.9916, + "step": 7435 + }, + { + "epoch": 0.6, + "learning_rate": 3.99911067992562e-05, + "loss": 2.9348, + "step": 7440 + }, + { + "epoch": 0.6, + "learning_rate": 3.998436952596545e-05, + "loss": 3.1977, + "step": 7445 + }, + { + "epoch": 0.6, + "learning_rate": 3.99776322526747e-05, + "loss": 3.1294, + "step": 7450 + }, + { + "epoch": 0.6, + "learning_rate": 3.997089497938394e-05, + "loss": 2.9127, + "step": 7455 + }, + { + "epoch": 0.6, + "learning_rate": 3.996415770609319e-05, + "loss": 2.996, + "step": 7460 + }, + { + "epoch": 0.6, + "learning_rate": 3.995742043280244e-05, + "loss": 3.0576, + "step": 7465 + }, + { + "epoch": 0.6, + "learning_rate": 3.995068315951169e-05, + "loss": 3.1013, + "step": 7470 + }, + { + "epoch": 0.6, + "learning_rate": 3.994394588622093e-05, + "loss": 2.8199, + "step": 7475 + }, + { + "epoch": 0.6, + "learning_rate": 3.9937208612930174e-05, + "loss": 3.0425, + "step": 7480 + }, + { + "epoch": 0.61, + "learning_rate": 3.993047133963942e-05, + "loss": 2.7993, + "step": 7485 + }, + { + "epoch": 0.61, + "learning_rate": 3.9923734066348665e-05, + "loss": 3.192, + "step": 7490 + }, + { + "epoch": 0.61, + "learning_rate": 3.9916996793057914e-05, + "loss": 2.9459, + "step": 7495 + }, + { + "epoch": 0.61, + "learning_rate": 3.991025951976716e-05, + "loss": 3.0381, + "step": 7500 + }, + { + "epoch": 0.61, + "learning_rate": 3.990352224647641e-05, + "loss": 3.3137, + "step": 7505 + }, + { + "epoch": 0.61, + "learning_rate": 3.9896784973185654e-05, + "loss": 2.9695, + "step": 7510 + }, + { + "epoch": 0.61, + "learning_rate": 3.98900476998949e-05, + "loss": 2.8945, + "step": 7515 + }, + { + "epoch": 0.61, + "learning_rate": 3.9883310426604145e-05, + "loss": 3.0771, + "step": 7520 + }, + { + "epoch": 0.61, + "learning_rate": 3.987657315331339e-05, + "loss": 2.8429, + "step": 7525 + }, + { + "epoch": 0.61, + "learning_rate": 3.9869835880022636e-05, + "loss": 3.35, + "step": 7530 + }, + { + "epoch": 0.61, + "learning_rate": 3.986444606139004e-05, + "loss": 3.2072, + "step": 7535 + }, + { + "epoch": 0.61, + "learning_rate": 3.9857708788099286e-05, + "loss": 3.4036, + "step": 7540 + }, + { + "epoch": 0.61, + "learning_rate": 3.985097151480853e-05, + "loss": 2.8693, + "step": 7545 + }, + { + "epoch": 0.61, + "learning_rate": 3.984423424151778e-05, + "loss": 3.1092, + "step": 7550 + }, + { + "epoch": 0.61, + "learning_rate": 3.983749696822702e-05, + "loss": 2.8871, + "step": 7555 + }, + { + "epoch": 0.61, + "learning_rate": 3.983075969493627e-05, + "loss": 2.8921, + "step": 7560 + }, + { + "epoch": 0.61, + "learning_rate": 3.982402242164551e-05, + "loss": 3.216, + "step": 7565 + }, + { + "epoch": 0.61, + "learning_rate": 3.981728514835476e-05, + "loss": 3.0464, + "step": 7570 + }, + { + "epoch": 0.61, + "learning_rate": 3.981054787506401e-05, + "loss": 2.952, + "step": 7575 + }, + { + "epoch": 0.61, + "learning_rate": 3.980381060177325e-05, + "loss": 2.8887, + "step": 7580 + }, + { + "epoch": 0.61, + "learning_rate": 3.97970733284825e-05, + "loss": 3.2435, + "step": 7585 + }, + { + "epoch": 0.61, + "learning_rate": 3.979033605519175e-05, + "loss": 3.0364, + "step": 7590 + }, + { + "epoch": 0.61, + "learning_rate": 3.9783598781901e-05, + "loss": 3.008, + "step": 7595 + }, + { + "epoch": 0.61, + "learning_rate": 3.977686150861024e-05, + "loss": 2.9321, + "step": 7600 + }, + { + "epoch": 0.61, + "learning_rate": 3.977012423531948e-05, + "loss": 3.0257, + "step": 7605 + }, + { + "epoch": 0.62, + "learning_rate": 3.976338696202873e-05, + "loss": 3.3022, + "step": 7610 + }, + { + "epoch": 0.62, + "learning_rate": 3.975664968873797e-05, + "loss": 3.286, + "step": 7615 + }, + { + "epoch": 0.62, + "learning_rate": 3.974991241544722e-05, + "loss": 3.1355, + "step": 7620 + }, + { + "epoch": 0.62, + "learning_rate": 3.974317514215647e-05, + "loss": 3.1771, + "step": 7625 + }, + { + "epoch": 0.62, + "learning_rate": 3.973643786886572e-05, + "loss": 3.1195, + "step": 7630 + }, + { + "epoch": 0.62, + "learning_rate": 3.972970059557496e-05, + "loss": 2.8496, + "step": 7635 + }, + { + "epoch": 0.62, + "learning_rate": 3.972296332228421e-05, + "loss": 3.3008, + "step": 7640 + }, + { + "epoch": 0.62, + "learning_rate": 3.971622604899345e-05, + "loss": 3.0312, + "step": 7645 + }, + { + "epoch": 0.62, + "learning_rate": 3.9709488775702694e-05, + "loss": 3.4856, + "step": 7650 + }, + { + "epoch": 0.62, + "learning_rate": 3.970275150241194e-05, + "loss": 3.1483, + "step": 7655 + }, + { + "epoch": 0.62, + "learning_rate": 3.969601422912119e-05, + "loss": 2.8638, + "step": 7660 + }, + { + "epoch": 0.62, + "learning_rate": 3.968927695583044e-05, + "loss": 3.1239, + "step": 7665 + }, + { + "epoch": 0.62, + "learning_rate": 3.968253968253968e-05, + "loss": 3.0082, + "step": 7670 + }, + { + "epoch": 0.62, + "learning_rate": 3.967580240924893e-05, + "loss": 2.9481, + "step": 7675 + }, + { + "epoch": 0.62, + "learning_rate": 3.966906513595818e-05, + "loss": 2.9585, + "step": 7680 + }, + { + "epoch": 0.62, + "learning_rate": 3.966232786266742e-05, + "loss": 2.8524, + "step": 7685 + }, + { + "epoch": 0.62, + "learning_rate": 3.9655590589376664e-05, + "loss": 3.0689, + "step": 7690 + }, + { + "epoch": 0.62, + "learning_rate": 3.964885331608591e-05, + "loss": 3.0235, + "step": 7695 + }, + { + "epoch": 0.62, + "learning_rate": 3.964211604279516e-05, + "loss": 3.1008, + "step": 7700 + }, + { + "epoch": 0.62, + "learning_rate": 3.9635378769504404e-05, + "loss": 3.1541, + "step": 7705 + }, + { + "epoch": 0.62, + "learning_rate": 3.962864149621365e-05, + "loss": 3.2348, + "step": 7710 + }, + { + "epoch": 0.62, + "learning_rate": 3.96219042229229e-05, + "loss": 3.0871, + "step": 7715 + }, + { + "epoch": 0.62, + "learning_rate": 3.961516694963215e-05, + "loss": 2.8276, + "step": 7720 + }, + { + "epoch": 0.62, + "learning_rate": 3.960842967634139e-05, + "loss": 2.9168, + "step": 7725 + }, + { + "epoch": 0.62, + "learning_rate": 3.960169240305064e-05, + "loss": 3.037, + "step": 7730 + }, + { + "epoch": 0.63, + "learning_rate": 3.9594955129759884e-05, + "loss": 3.1009, + "step": 7735 + }, + { + "epoch": 0.63, + "learning_rate": 3.958821785646913e-05, + "loss": 2.8545, + "step": 7740 + }, + { + "epoch": 0.63, + "learning_rate": 3.9581480583178375e-05, + "loss": 3.2073, + "step": 7745 + }, + { + "epoch": 0.63, + "learning_rate": 3.9574743309887624e-05, + "loss": 3.1771, + "step": 7750 + }, + { + "epoch": 0.63, + "learning_rate": 3.956800603659687e-05, + "loss": 3.0321, + "step": 7755 + }, + { + "epoch": 0.63, + "learning_rate": 3.9561268763306115e-05, + "loss": 2.9031, + "step": 7760 + }, + { + "epoch": 0.63, + "learning_rate": 3.9554531490015364e-05, + "loss": 2.9989, + "step": 7765 + }, + { + "epoch": 0.63, + "learning_rate": 3.954779421672461e-05, + "loss": 2.9791, + "step": 7770 + }, + { + "epoch": 0.63, + "learning_rate": 3.9541056943433855e-05, + "loss": 3.1055, + "step": 7775 + }, + { + "epoch": 0.63, + "learning_rate": 3.95343196701431e-05, + "loss": 3.2512, + "step": 7780 + }, + { + "epoch": 0.63, + "learning_rate": 3.9527582396852346e-05, + "loss": 2.939, + "step": 7785 + }, + { + "epoch": 0.63, + "learning_rate": 3.9520845123561594e-05, + "loss": 3.0405, + "step": 7790 + }, + { + "epoch": 0.63, + "learning_rate": 3.951410785027084e-05, + "loss": 3.4549, + "step": 7795 + }, + { + "epoch": 0.63, + "learning_rate": 3.9507370576980085e-05, + "loss": 3.0582, + "step": 7800 + }, + { + "epoch": 0.63, + "learning_rate": 3.9500633303689334e-05, + "loss": 2.8642, + "step": 7805 + }, + { + "epoch": 0.63, + "learning_rate": 3.949389603039858e-05, + "loss": 2.9274, + "step": 7810 + }, + { + "epoch": 0.63, + "learning_rate": 3.9487158757107825e-05, + "loss": 3.3588, + "step": 7815 + }, + { + "epoch": 0.63, + "learning_rate": 3.948042148381707e-05, + "loss": 3.177, + "step": 7820 + }, + { + "epoch": 0.63, + "learning_rate": 3.9473684210526316e-05, + "loss": 2.9691, + "step": 7825 + }, + { + "epoch": 0.63, + "learning_rate": 3.9466946937235565e-05, + "loss": 3.4174, + "step": 7830 + }, + { + "epoch": 0.63, + "learning_rate": 3.946020966394481e-05, + "loss": 2.9425, + "step": 7835 + }, + { + "epoch": 0.63, + "learning_rate": 3.9453472390654056e-05, + "loss": 2.8462, + "step": 7840 + }, + { + "epoch": 0.63, + "learning_rate": 3.9446735117363305e-05, + "loss": 3.307, + "step": 7845 + }, + { + "epoch": 0.63, + "learning_rate": 3.9439997844072554e-05, + "loss": 2.9172, + "step": 7850 + }, + { + "epoch": 0.64, + "learning_rate": 3.9433260570781796e-05, + "loss": 2.9545, + "step": 7855 + }, + { + "epoch": 0.64, + "learning_rate": 3.9426523297491045e-05, + "loss": 3.1286, + "step": 7860 + }, + { + "epoch": 0.64, + "learning_rate": 3.941978602420029e-05, + "loss": 2.924, + "step": 7865 + }, + { + "epoch": 0.64, + "learning_rate": 3.941304875090953e-05, + "loss": 2.8875, + "step": 7870 + }, + { + "epoch": 0.64, + "learning_rate": 3.940631147761878e-05, + "loss": 3.1005, + "step": 7875 + }, + { + "epoch": 0.64, + "learning_rate": 3.939957420432803e-05, + "loss": 2.9367, + "step": 7880 + }, + { + "epoch": 0.64, + "learning_rate": 3.9392836931037276e-05, + "loss": 2.9924, + "step": 7885 + }, + { + "epoch": 0.64, + "learning_rate": 3.938609965774652e-05, + "loss": 3.1627, + "step": 7890 + }, + { + "epoch": 0.64, + "learning_rate": 3.9379362384455767e-05, + "loss": 2.8434, + "step": 7895 + }, + { + "epoch": 0.64, + "learning_rate": 3.9372625111165015e-05, + "loss": 3.1066, + "step": 7900 + }, + { + "epoch": 0.64, + "learning_rate": 3.936588783787426e-05, + "loss": 3.3607, + "step": 7905 + }, + { + "epoch": 0.64, + "learning_rate": 3.93591505645835e-05, + "loss": 2.9483, + "step": 7910 + }, + { + "epoch": 0.64, + "learning_rate": 3.935241329129275e-05, + "loss": 3.1395, + "step": 7915 + }, + { + "epoch": 0.64, + "learning_rate": 3.9345676018002e-05, + "loss": 2.8039, + "step": 7920 + }, + { + "epoch": 0.64, + "learning_rate": 3.933893874471124e-05, + "loss": 2.9289, + "step": 7925 + }, + { + "epoch": 0.64, + "learning_rate": 3.933220147142049e-05, + "loss": 3.1207, + "step": 7930 + }, + { + "epoch": 0.64, + "learning_rate": 3.932546419812974e-05, + "loss": 2.956, + "step": 7935 + }, + { + "epoch": 0.64, + "learning_rate": 3.9318726924838986e-05, + "loss": 3.1368, + "step": 7940 + }, + { + "epoch": 0.64, + "learning_rate": 3.931198965154823e-05, + "loss": 3.0236, + "step": 7945 + }, + { + "epoch": 0.64, + "learning_rate": 3.930525237825747e-05, + "loss": 3.1166, + "step": 7950 + }, + { + "epoch": 0.64, + "learning_rate": 3.929851510496672e-05, + "loss": 2.8586, + "step": 7955 + }, + { + "epoch": 0.64, + "learning_rate": 3.929177783167596e-05, + "loss": 3.1254, + "step": 7960 + }, + { + "epoch": 0.64, + "learning_rate": 3.928504055838521e-05, + "loss": 2.9588, + "step": 7965 + }, + { + "epoch": 0.64, + "learning_rate": 3.927830328509446e-05, + "loss": 2.9815, + "step": 7970 + }, + { + "epoch": 0.64, + "learning_rate": 3.927156601180371e-05, + "loss": 3.0071, + "step": 7975 + }, + { + "epoch": 0.65, + "learning_rate": 3.926482873851295e-05, + "loss": 3.0211, + "step": 7980 + }, + { + "epoch": 0.65, + "learning_rate": 3.92580914652222e-05, + "loss": 2.8963, + "step": 7985 + }, + { + "epoch": 0.65, + "learning_rate": 3.925135419193144e-05, + "loss": 3.0395, + "step": 7990 + }, + { + "epoch": 0.65, + "learning_rate": 3.924461691864069e-05, + "loss": 2.8795, + "step": 7995 + }, + { + "epoch": 0.65, + "learning_rate": 3.923787964534993e-05, + "loss": 3.0102, + "step": 8000 + }, + { + "epoch": 0.65, + "eval_loss": 3.0175957679748535, + "eval_rouge2_fmeasure": 0.0061, + "eval_rouge2_precision": 0.0093, + "eval_rouge2_recall": 0.0066, + "eval_runtime": 2672.3133, + "eval_samples_per_second": 0.103, + "eval_steps_per_second": 0.052, + "step": 8000 } ], "max_steps": 37107, "num_train_epochs": 3, - "total_flos": 1.56090117390336e+17, + "total_flos": 3.12180234780672e+17, "trial_name": null, "trial_params": null }