diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,19102 @@ +{ + "best_metric": 2.7569446563720703, + "best_model_checkpoint": "outputs/llama2_7b_darulm_unigram_tie_2e_16_11_23/checkpoint-260000", + "epoch": 1.9999927672239521, + "eval_steps": 1000, + "global_step": 276518, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.9994503030566046e-05, + "loss": 10.303, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 1.9987342504592864e-05, + "loss": 8.0353, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 1.99801096500745e-05, + "loss": 7.283, + "step": 300 + }, + { + "epoch": 0.0, + "learning_rate": 1.9972876795556137e-05, + "loss": 6.7724, + "step": 400 + }, + { + "epoch": 0.0, + "learning_rate": 1.996564394103777e-05, + "loss": 6.3252, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 1.995848341506459e-05, + "loss": 5.9418, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 1.9951250560546228e-05, + "loss": 5.6204, + "step": 700 + }, + { + "epoch": 0.01, + "learning_rate": 1.9944017706027864e-05, + "loss": 5.3454, + "step": 800 + }, + { + "epoch": 0.01, + "learning_rate": 1.9936784851509498e-05, + "loss": 5.1143, + "step": 900 + }, + { + "epoch": 0.01, + "learning_rate": 1.9929551996991134e-05, + "loss": 4.9167, + "step": 1000 + }, + { + "epoch": 0.01, + "eval_accuracy": 0.2686138407237027, + "eval_loss": 4.864671230316162, + "eval_runtime": 28.3398, + "eval_samples_per_second": 228.759, + "eval_steps_per_second": 2.399, + "step": 1000 + }, + { + "epoch": 0.01, + "learning_rate": 1.992231914247277e-05, + "loss": 4.7595, + "step": 1100 + }, + { + "epoch": 0.01, + "learning_rate": 1.9915086287954407e-05, + "loss": 4.6176, + "step": 1200 + }, + { + "epoch": 0.01, + "learning_rate": 1.990785343343604e-05, + "loss": 4.492, + "step": 1300 + }, + { + "epoch": 0.01, + "learning_rate": 1.9900620578917677e-05, + "loss": 4.3949, + "step": 1400 + }, + { + "epoch": 0.01, + "learning_rate": 1.9893387724399313e-05, + "loss": 4.3078, + "step": 1500 + }, + { + "epoch": 0.01, + "learning_rate": 1.988615486988095e-05, + "loss": 4.2192, + "step": 1600 + }, + { + "epoch": 0.01, + "learning_rate": 1.9878922015362586e-05, + "loss": 4.1453, + "step": 1700 + }, + { + "epoch": 0.01, + "learning_rate": 1.987168916084422e-05, + "loss": 4.0878, + "step": 1800 + }, + { + "epoch": 0.01, + "learning_rate": 1.9864456306325856e-05, + "loss": 4.0281, + "step": 1900 + }, + { + "epoch": 0.01, + "learning_rate": 1.9857223451807492e-05, + "loss": 3.9697, + "step": 2000 + }, + { + "epoch": 0.01, + "eval_accuracy": 0.34088309394404065, + "eval_loss": 3.9705445766448975, + "eval_runtime": 29.5273, + "eval_samples_per_second": 219.56, + "eval_steps_per_second": 2.303, + "step": 2000 + }, + { + "epoch": 0.02, + "learning_rate": 1.984999059728913e-05, + "loss": 3.9334, + "step": 2100 + }, + { + "epoch": 0.02, + "learning_rate": 1.9842757742770765e-05, + "loss": 3.8789, + "step": 2200 + }, + { + "epoch": 0.02, + "learning_rate": 1.98355248882524e-05, + "loss": 3.8488, + "step": 2300 + }, + { + "epoch": 0.02, + "learning_rate": 1.9828292033734035e-05, + "loss": 3.8073, + "step": 2400 + }, + { + "epoch": 0.02, + "learning_rate": 1.982105917921567e-05, + "loss": 3.7793, + "step": 2500 + }, + { + "epoch": 0.02, + "learning_rate": 1.9813826324697308e-05, + "loss": 3.7395, + "step": 2600 + }, + { + "epoch": 0.02, + "learning_rate": 1.9806593470178945e-05, + "loss": 3.7196, + "step": 2700 + }, + { + "epoch": 0.02, + "learning_rate": 1.9799360615660578e-05, + "loss": 3.6915, + "step": 2800 + }, + { + "epoch": 0.02, + "learning_rate": 1.9792127761142214e-05, + "loss": 3.6569, + "step": 2900 + }, + { + "epoch": 0.02, + "learning_rate": 1.978489490662385e-05, + "loss": 3.6398, + "step": 3000 + }, + { + "epoch": 0.02, + "eval_accuracy": 0.3693563558386489, + "eval_loss": 3.6475651264190674, + "eval_runtime": 28.718, + "eval_samples_per_second": 225.747, + "eval_steps_per_second": 2.368, + "step": 3000 + }, + { + "epoch": 0.02, + "learning_rate": 1.9777662052105487e-05, + "loss": 3.6159, + "step": 3100 + }, + { + "epoch": 0.02, + "learning_rate": 1.977042919758712e-05, + "loss": 3.5994, + "step": 3200 + }, + { + "epoch": 0.02, + "learning_rate": 1.9763196343068757e-05, + "loss": 3.5769, + "step": 3300 + }, + { + "epoch": 0.02, + "learning_rate": 1.9755963488550393e-05, + "loss": 3.5605, + "step": 3400 + }, + { + "epoch": 0.03, + "learning_rate": 1.974873063403203e-05, + "loss": 3.5432, + "step": 3500 + }, + { + "epoch": 0.03, + "learning_rate": 1.9741497779513667e-05, + "loss": 3.5282, + "step": 3600 + }, + { + "epoch": 0.03, + "learning_rate": 1.97342649249953e-05, + "loss": 3.508, + "step": 3700 + }, + { + "epoch": 0.03, + "learning_rate": 1.9727032070476936e-05, + "loss": 3.4947, + "step": 3800 + }, + { + "epoch": 0.03, + "learning_rate": 1.9719799215958573e-05, + "loss": 3.4862, + "step": 3900 + }, + { + "epoch": 0.03, + "learning_rate": 1.971256636144021e-05, + "loss": 3.468, + "step": 4000 + }, + { + "epoch": 0.03, + "eval_accuracy": 0.38501903923685776, + "eval_loss": 3.4784200191497803, + "eval_runtime": 29.0726, + "eval_samples_per_second": 222.994, + "eval_steps_per_second": 2.339, + "step": 4000 + }, + { + "epoch": 0.03, + "learning_rate": 1.9705333506921846e-05, + "loss": 3.454, + "step": 4100 + }, + { + "epoch": 0.03, + "learning_rate": 1.969810065240348e-05, + "loss": 3.4428, + "step": 4200 + }, + { + "epoch": 0.03, + "learning_rate": 1.9690867797885115e-05, + "loss": 3.4316, + "step": 4300 + }, + { + "epoch": 0.03, + "learning_rate": 1.9683634943366752e-05, + "loss": 3.4171, + "step": 4400 + }, + { + "epoch": 0.03, + "learning_rate": 1.967640208884839e-05, + "loss": 3.4073, + "step": 4500 + }, + { + "epoch": 0.03, + "learning_rate": 1.9669241562875206e-05, + "loss": 3.4011, + "step": 4600 + }, + { + "epoch": 0.03, + "learning_rate": 1.966200870835684e-05, + "loss": 3.389, + "step": 4700 + }, + { + "epoch": 0.03, + "learning_rate": 1.9654775853838476e-05, + "loss": 3.3863, + "step": 4800 + }, + { + "epoch": 0.04, + "learning_rate": 1.9647542999320113e-05, + "loss": 3.3624, + "step": 4900 + }, + { + "epoch": 0.04, + "learning_rate": 1.964031014480175e-05, + "loss": 3.3567, + "step": 5000 + }, + { + "epoch": 0.04, + "eval_accuracy": 0.39525576696820947, + "eval_loss": 3.3732998371124268, + "eval_runtime": 28.1701, + "eval_samples_per_second": 230.137, + "eval_steps_per_second": 2.414, + "step": 5000 + }, + { + "epoch": 0.04, + "learning_rate": 1.9633077290283386e-05, + "loss": 3.3488, + "step": 5100 + }, + { + "epoch": 0.04, + "learning_rate": 1.962584443576502e-05, + "loss": 3.3415, + "step": 5200 + }, + { + "epoch": 0.04, + "learning_rate": 1.9618611581246655e-05, + "loss": 3.3334, + "step": 5300 + }, + { + "epoch": 0.04, + "learning_rate": 1.9611378726728292e-05, + "loss": 3.3279, + "step": 5400 + }, + { + "epoch": 0.04, + "learning_rate": 1.9604145872209928e-05, + "loss": 3.3107, + "step": 5500 + }, + { + "epoch": 0.04, + "learning_rate": 1.9596913017691565e-05, + "loss": 3.3096, + "step": 5600 + }, + { + "epoch": 0.04, + "learning_rate": 1.9589752491718383e-05, + "loss": 3.301, + "step": 5700 + }, + { + "epoch": 0.04, + "learning_rate": 1.958251963720002e-05, + "loss": 3.3033, + "step": 5800 + }, + { + "epoch": 0.04, + "learning_rate": 1.9575286782681656e-05, + "loss": 3.2967, + "step": 5900 + }, + { + "epoch": 0.04, + "learning_rate": 1.9568053928163292e-05, + "loss": 3.2828, + "step": 6000 + }, + { + "epoch": 0.04, + "eval_accuracy": 0.40260530558050767, + "eval_loss": 3.299899101257324, + "eval_runtime": 28.3308, + "eval_samples_per_second": 228.832, + "eval_steps_per_second": 2.4, + "step": 6000 + }, + { + "epoch": 0.04, + "learning_rate": 1.9560821073644925e-05, + "loss": 3.2859, + "step": 6100 + }, + { + "epoch": 0.04, + "learning_rate": 1.9553588219126562e-05, + "loss": 3.2764, + "step": 6200 + }, + { + "epoch": 0.05, + "learning_rate": 1.9546427693153383e-05, + "loss": 3.2684, + "step": 6300 + }, + { + "epoch": 0.05, + "learning_rate": 1.953919483863502e-05, + "loss": 3.2635, + "step": 6400 + }, + { + "epoch": 0.05, + "learning_rate": 1.9531961984116653e-05, + "loss": 3.2525, + "step": 6500 + }, + { + "epoch": 0.05, + "learning_rate": 1.952472912959829e-05, + "loss": 3.2538, + "step": 6600 + }, + { + "epoch": 0.05, + "learning_rate": 1.9517496275079926e-05, + "loss": 3.2469, + "step": 6700 + }, + { + "epoch": 0.05, + "learning_rate": 1.9510263420561563e-05, + "loss": 3.2353, + "step": 6800 + }, + { + "epoch": 0.05, + "learning_rate": 1.9503030566043196e-05, + "loss": 3.2268, + "step": 6900 + }, + { + "epoch": 0.05, + "learning_rate": 1.9495797711524832e-05, + "loss": 3.2235, + "step": 7000 + }, + { + "epoch": 0.05, + "eval_accuracy": 0.4080512229571761, + "eval_loss": 3.245333433151245, + "eval_runtime": 30.054, + "eval_samples_per_second": 215.712, + "eval_steps_per_second": 2.263, + "step": 7000 + }, + { + "epoch": 0.05, + "learning_rate": 1.9488564857006465e-05, + "loss": 3.2305, + "step": 7100 + }, + { + "epoch": 0.05, + "learning_rate": 1.9481332002488102e-05, + "loss": 3.2131, + "step": 7200 + }, + { + "epoch": 0.05, + "learning_rate": 1.947409914796974e-05, + "loss": 3.2181, + "step": 7300 + }, + { + "epoch": 0.05, + "learning_rate": 1.9466866293451375e-05, + "loss": 3.213, + "step": 7400 + }, + { + "epoch": 0.05, + "learning_rate": 1.945963343893301e-05, + "loss": 3.206, + "step": 7500 + }, + { + "epoch": 0.05, + "learning_rate": 1.9452400584414645e-05, + "loss": 3.1944, + "step": 7600 + }, + { + "epoch": 0.06, + "learning_rate": 1.944516772989628e-05, + "loss": 3.194, + "step": 7700 + }, + { + "epoch": 0.06, + "learning_rate": 1.9437934875377918e-05, + "loss": 3.19, + "step": 7800 + }, + { + "epoch": 0.06, + "learning_rate": 1.9430702020859554e-05, + "loss": 3.1975, + "step": 7900 + }, + { + "epoch": 0.06, + "learning_rate": 1.942346916634119e-05, + "loss": 3.1898, + "step": 8000 + }, + { + "epoch": 0.06, + "eval_accuracy": 0.41249965974358277, + "eval_loss": 3.20282244682312, + "eval_runtime": 31.4355, + "eval_samples_per_second": 206.232, + "eval_steps_per_second": 2.163, + "step": 8000 + }, + { + "epoch": 0.06, + "learning_rate": 1.9416236311822824e-05, + "loss": 3.1871, + "step": 8100 + }, + { + "epoch": 0.06, + "learning_rate": 1.9409075785849645e-05, + "loss": 3.1755, + "step": 8200 + }, + { + "epoch": 0.06, + "learning_rate": 1.940184293133128e-05, + "loss": 3.1774, + "step": 8300 + }, + { + "epoch": 0.06, + "learning_rate": 1.9394610076812915e-05, + "loss": 3.1726, + "step": 8400 + }, + { + "epoch": 0.06, + "learning_rate": 1.938737722229455e-05, + "loss": 3.173, + "step": 8500 + }, + { + "epoch": 0.06, + "learning_rate": 1.9380144367776188e-05, + "loss": 3.1738, + "step": 8600 + }, + { + "epoch": 0.06, + "learning_rate": 1.9372911513257824e-05, + "loss": 3.1624, + "step": 8700 + }, + { + "epoch": 0.06, + "learning_rate": 1.936567865873946e-05, + "loss": 3.1588, + "step": 8800 + }, + { + "epoch": 0.06, + "learning_rate": 1.9358445804221094e-05, + "loss": 3.1592, + "step": 8900 + }, + { + "epoch": 0.07, + "learning_rate": 1.935121294970273e-05, + "loss": 3.1552, + "step": 9000 + }, + { + "epoch": 0.07, + "eval_accuracy": 0.4160389313831348, + "eval_loss": 3.1682770252227783, + "eval_runtime": 28.3625, + "eval_samples_per_second": 228.576, + "eval_steps_per_second": 2.398, + "step": 9000 + }, + { + "epoch": 0.07, + "learning_rate": 1.9343980095184367e-05, + "loss": 3.1564, + "step": 9100 + }, + { + "epoch": 0.07, + "learning_rate": 1.9336747240666004e-05, + "loss": 3.1467, + "step": 9200 + }, + { + "epoch": 0.07, + "learning_rate": 1.932951438614764e-05, + "loss": 3.1519, + "step": 9300 + }, + { + "epoch": 0.07, + "learning_rate": 1.9322281531629273e-05, + "loss": 3.1442, + "step": 9400 + }, + { + "epoch": 0.07, + "learning_rate": 1.931504867711091e-05, + "loss": 3.1372, + "step": 9500 + }, + { + "epoch": 0.07, + "learning_rate": 1.9307815822592546e-05, + "loss": 3.1327, + "step": 9600 + }, + { + "epoch": 0.07, + "learning_rate": 1.9300582968074183e-05, + "loss": 3.1327, + "step": 9700 + }, + { + "epoch": 0.07, + "learning_rate": 1.929335011355582e-05, + "loss": 3.1293, + "step": 9800 + }, + { + "epoch": 0.07, + "learning_rate": 1.9286117259037452e-05, + "loss": 3.1369, + "step": 9900 + }, + { + "epoch": 0.07, + "learning_rate": 1.927888440451909e-05, + "loss": 3.1068, + "step": 10000 + }, + { + "epoch": 0.07, + "eval_accuracy": 0.41901262124470334, + "eval_loss": 3.139714002609253, + "eval_runtime": 28.3677, + "eval_samples_per_second": 228.535, + "eval_steps_per_second": 2.397, + "step": 10000 + }, + { + "epoch": 0.07, + "learning_rate": 1.9271651550000725e-05, + "loss": 3.1264, + "step": 10100 + }, + { + "epoch": 0.07, + "learning_rate": 1.9264418695482362e-05, + "loss": 3.1248, + "step": 10200 + }, + { + "epoch": 0.07, + "learning_rate": 1.9257185840963995e-05, + "loss": 3.1291, + "step": 10300 + }, + { + "epoch": 0.08, + "learning_rate": 1.924995298644563e-05, + "loss": 3.1076, + "step": 10400 + }, + { + "epoch": 0.08, + "learning_rate": 1.9242792460472453e-05, + "loss": 3.107, + "step": 10500 + }, + { + "epoch": 0.08, + "learning_rate": 1.923555960595409e-05, + "loss": 3.1117, + "step": 10600 + }, + { + "epoch": 0.08, + "learning_rate": 1.9228326751435723e-05, + "loss": 3.1053, + "step": 10700 + }, + { + "epoch": 0.08, + "learning_rate": 1.922109389691736e-05, + "loss": 3.1084, + "step": 10800 + }, + { + "epoch": 0.08, + "learning_rate": 1.9213861042398996e-05, + "loss": 3.1009, + "step": 10900 + }, + { + "epoch": 0.08, + "learning_rate": 1.9206628187880632e-05, + "loss": 3.1019, + "step": 11000 + }, + { + "epoch": 0.08, + "eval_accuracy": 0.4217092667701046, + "eval_loss": 3.115234136581421, + "eval_runtime": 28.1789, + "eval_samples_per_second": 230.066, + "eval_steps_per_second": 2.413, + "step": 11000 + }, + { + "epoch": 0.08, + "learning_rate": 1.919939533336227e-05, + "loss": 3.1015, + "step": 11100 + }, + { + "epoch": 0.08, + "learning_rate": 1.9192162478843902e-05, + "loss": 3.1001, + "step": 11200 + }, + { + "epoch": 0.08, + "learning_rate": 1.9184929624325538e-05, + "loss": 3.0938, + "step": 11300 + }, + { + "epoch": 0.08, + "learning_rate": 1.9177696769807175e-05, + "loss": 3.1016, + "step": 11400 + }, + { + "epoch": 0.08, + "learning_rate": 1.9170536243833993e-05, + "loss": 3.0905, + "step": 11500 + }, + { + "epoch": 0.08, + "learning_rate": 1.916330338931563e-05, + "loss": 3.0883, + "step": 11600 + }, + { + "epoch": 0.08, + "learning_rate": 1.9156070534797266e-05, + "loss": 3.0899, + "step": 11700 + }, + { + "epoch": 0.09, + "learning_rate": 1.91488376802789e-05, + "loss": 3.0803, + "step": 11800 + }, + { + "epoch": 0.09, + "learning_rate": 1.9141604825760535e-05, + "loss": 3.0806, + "step": 11900 + }, + { + "epoch": 0.09, + "learning_rate": 1.9134371971242172e-05, + "loss": 3.0849, + "step": 12000 + }, + { + "epoch": 0.09, + "eval_accuracy": 0.423874809834469, + "eval_loss": 3.094170331954956, + "eval_runtime": 28.7606, + "eval_samples_per_second": 225.413, + "eval_steps_per_second": 2.364, + "step": 12000 + }, + { + "epoch": 0.09, + "learning_rate": 1.912713911672381e-05, + "loss": 3.0781, + "step": 12100 + }, + { + "epoch": 0.09, + "learning_rate": 1.911990626220544e-05, + "loss": 3.0784, + "step": 12200 + }, + { + "epoch": 0.09, + "learning_rate": 1.9112673407687078e-05, + "loss": 3.0682, + "step": 12300 + }, + { + "epoch": 0.09, + "learning_rate": 1.9105440553168715e-05, + "loss": 3.0766, + "step": 12400 + }, + { + "epoch": 0.09, + "learning_rate": 1.909820769865035e-05, + "loss": 3.0708, + "step": 12500 + }, + { + "epoch": 0.09, + "learning_rate": 1.9090974844131988e-05, + "loss": 3.06, + "step": 12600 + }, + { + "epoch": 0.09, + "learning_rate": 1.908374198961362e-05, + "loss": 3.0599, + "step": 12700 + }, + { + "epoch": 0.09, + "learning_rate": 1.9076509135095257e-05, + "loss": 3.0593, + "step": 12800 + }, + { + "epoch": 0.09, + "learning_rate": 1.9069276280576894e-05, + "loss": 3.0675, + "step": 12900 + }, + { + "epoch": 0.09, + "learning_rate": 1.906204342605853e-05, + "loss": 3.0561, + "step": 13000 + }, + { + "epoch": 0.09, + "eval_accuracy": 0.42562236679339327, + "eval_loss": 3.076077938079834, + "eval_runtime": 28.8743, + "eval_samples_per_second": 224.525, + "eval_steps_per_second": 2.355, + "step": 13000 + }, + { + "epoch": 0.09, + "learning_rate": 1.9054810571540167e-05, + "loss": 3.0535, + "step": 13100 + }, + { + "epoch": 0.1, + "learning_rate": 1.90475777170218e-05, + "loss": 3.0541, + "step": 13200 + }, + { + "epoch": 0.1, + "learning_rate": 1.9040344862503437e-05, + "loss": 3.0628, + "step": 13300 + }, + { + "epoch": 0.1, + "learning_rate": 1.9033112007985073e-05, + "loss": 3.0601, + "step": 13400 + }, + { + "epoch": 0.1, + "learning_rate": 1.902587915346671e-05, + "loss": 3.0525, + "step": 13500 + }, + { + "epoch": 0.1, + "learning_rate": 1.9018718627493528e-05, + "loss": 3.0553, + "step": 13600 + }, + { + "epoch": 0.1, + "learning_rate": 1.9011485772975164e-05, + "loss": 3.0457, + "step": 13700 + }, + { + "epoch": 0.1, + "learning_rate": 1.90042529184568e-05, + "loss": 3.0466, + "step": 13800 + }, + { + "epoch": 0.1, + "learning_rate": 1.8997020063938437e-05, + "loss": 3.0506, + "step": 13900 + }, + { + "epoch": 0.1, + "learning_rate": 1.898978720942007e-05, + "loss": 3.0429, + "step": 14000 + }, + { + "epoch": 0.1, + "eval_accuracy": 0.42772923452891876, + "eval_loss": 3.0595009326934814, + "eval_runtime": 28.2676, + "eval_samples_per_second": 229.344, + "eval_steps_per_second": 2.406, + "step": 14000 + }, + { + "epoch": 0.1, + "learning_rate": 1.8982554354901707e-05, + "loss": 3.046, + "step": 14100 + }, + { + "epoch": 0.1, + "learning_rate": 1.8975321500383343e-05, + "loss": 3.0338, + "step": 14200 + }, + { + "epoch": 0.1, + "learning_rate": 1.896808864586498e-05, + "loss": 3.0346, + "step": 14300 + }, + { + "epoch": 0.1, + "learning_rate": 1.8960855791346616e-05, + "loss": 3.0413, + "step": 14400 + }, + { + "epoch": 0.1, + "learning_rate": 1.895362293682825e-05, + "loss": 3.0441, + "step": 14500 + }, + { + "epoch": 0.11, + "learning_rate": 1.8946390082309886e-05, + "loss": 3.0364, + "step": 14600 + }, + { + "epoch": 0.11, + "learning_rate": 1.8939157227791522e-05, + "loss": 3.0317, + "step": 14700 + }, + { + "epoch": 0.11, + "learning_rate": 1.893192437327316e-05, + "loss": 3.0346, + "step": 14800 + }, + { + "epoch": 0.11, + "learning_rate": 1.8924691518754795e-05, + "loss": 3.0259, + "step": 14900 + }, + { + "epoch": 0.11, + "learning_rate": 1.891745866423643e-05, + "loss": 3.035, + "step": 15000 + }, + { + "epoch": 0.11, + "eval_accuracy": 0.4292789890906231, + "eval_loss": 3.045100450515747, + "eval_runtime": 37.5227, + "eval_samples_per_second": 172.775, + "eval_steps_per_second": 1.812, + "step": 15000 + }, + { + "epoch": 0.11, + "learning_rate": 1.8910298138263247e-05, + "loss": 3.0315, + "step": 15100 + }, + { + "epoch": 0.11, + "learning_rate": 1.8903065283744883e-05, + "loss": 3.0256, + "step": 15200 + }, + { + "epoch": 0.11, + "learning_rate": 1.889583242922652e-05, + "loss": 3.0319, + "step": 15300 + }, + { + "epoch": 0.11, + "learning_rate": 1.8888599574708156e-05, + "loss": 3.0208, + "step": 15400 + }, + { + "epoch": 0.11, + "learning_rate": 1.888136672018979e-05, + "loss": 3.0232, + "step": 15500 + }, + { + "epoch": 0.11, + "learning_rate": 1.887420619421661e-05, + "loss": 3.0272, + "step": 15600 + }, + { + "epoch": 0.11, + "learning_rate": 1.8866973339698247e-05, + "loss": 3.019, + "step": 15700 + }, + { + "epoch": 0.11, + "learning_rate": 1.8859740485179884e-05, + "loss": 3.0133, + "step": 15800 + }, + { + "epoch": 0.12, + "learning_rate": 1.8852507630661517e-05, + "loss": 3.0151, + "step": 15900 + }, + { + "epoch": 0.12, + "learning_rate": 1.8845274776143153e-05, + "loss": 3.0077, + "step": 16000 + }, + { + "epoch": 0.12, + "eval_accuracy": 0.4305855737328095, + "eval_loss": 3.0321857929229736, + "eval_runtime": 28.0011, + "eval_samples_per_second": 231.526, + "eval_steps_per_second": 2.428, + "step": 16000 + }, + { + "epoch": 0.12, + "learning_rate": 1.883804192162479e-05, + "loss": 3.0229, + "step": 16100 + }, + { + "epoch": 0.12, + "learning_rate": 1.8830809067106426e-05, + "loss": 3.0126, + "step": 16200 + }, + { + "epoch": 0.12, + "learning_rate": 1.8823576212588063e-05, + "loss": 3.0175, + "step": 16300 + }, + { + "epoch": 0.12, + "learning_rate": 1.8816343358069696e-05, + "loss": 3.0128, + "step": 16400 + }, + { + "epoch": 0.12, + "learning_rate": 1.8809110503551333e-05, + "loss": 3.0058, + "step": 16500 + }, + { + "epoch": 0.12, + "learning_rate": 1.880187764903297e-05, + "loss": 3.0173, + "step": 16600 + }, + { + "epoch": 0.12, + "learning_rate": 1.8794644794514606e-05, + "loss": 3.0078, + "step": 16700 + }, + { + "epoch": 0.12, + "learning_rate": 1.8787411939996242e-05, + "loss": 2.9971, + "step": 16800 + }, + { + "epoch": 0.12, + "learning_rate": 1.8780179085477875e-05, + "loss": 3.0027, + "step": 16900 + }, + { + "epoch": 0.12, + "learning_rate": 1.8772946230959512e-05, + "loss": 3.0008, + "step": 17000 + }, + { + "epoch": 0.12, + "eval_accuracy": 0.4320149531353495, + "eval_loss": 3.0199708938598633, + "eval_runtime": 27.8606, + "eval_samples_per_second": 232.694, + "eval_steps_per_second": 2.441, + "step": 17000 + }, + { + "epoch": 0.12, + "learning_rate": 1.8765713376441148e-05, + "loss": 3.0051, + "step": 17100 + }, + { + "epoch": 0.12, + "learning_rate": 1.8758480521922785e-05, + "loss": 3.0007, + "step": 17200 + }, + { + "epoch": 0.13, + "learning_rate": 1.8751319995949603e-05, + "loss": 3.0039, + "step": 17300 + }, + { + "epoch": 0.13, + "learning_rate": 1.874408714143124e-05, + "loss": 3.0022, + "step": 17400 + }, + { + "epoch": 0.13, + "learning_rate": 1.8736854286912876e-05, + "loss": 3.0052, + "step": 17500 + }, + { + "epoch": 0.13, + "learning_rate": 1.8729621432394512e-05, + "loss": 3.0004, + "step": 17600 + }, + { + "epoch": 0.13, + "learning_rate": 1.8722388577876146e-05, + "loss": 2.9994, + "step": 17700 + }, + { + "epoch": 0.13, + "learning_rate": 1.8715155723357782e-05, + "loss": 2.9951, + "step": 17800 + }, + { + "epoch": 0.13, + "learning_rate": 1.870792286883942e-05, + "loss": 2.9933, + "step": 17900 + }, + { + "epoch": 0.13, + "learning_rate": 1.8700690014321055e-05, + "loss": 2.9952, + "step": 18000 + }, + { + "epoch": 0.13, + "eval_accuracy": 0.4330487277434497, + "eval_loss": 3.0093255043029785, + "eval_runtime": 28.2096, + "eval_samples_per_second": 229.815, + "eval_steps_per_second": 2.411, + "step": 18000 + }, + { + "epoch": 0.13, + "learning_rate": 1.8693457159802688e-05, + "loss": 2.993, + "step": 18100 + }, + { + "epoch": 0.13, + "learning_rate": 1.8686224305284325e-05, + "loss": 2.9969, + "step": 18200 + }, + { + "epoch": 0.13, + "learning_rate": 1.867899145076596e-05, + "loss": 2.995, + "step": 18300 + }, + { + "epoch": 0.13, + "learning_rate": 1.8671758596247594e-05, + "loss": 2.9944, + "step": 18400 + }, + { + "epoch": 0.13, + "learning_rate": 1.866452574172923e-05, + "loss": 2.9843, + "step": 18500 + }, + { + "epoch": 0.13, + "learning_rate": 1.8657292887210867e-05, + "loss": 2.993, + "step": 18600 + }, + { + "epoch": 0.14, + "learning_rate": 1.8650060032692504e-05, + "loss": 2.9936, + "step": 18700 + }, + { + "epoch": 0.14, + "learning_rate": 1.864282717817414e-05, + "loss": 2.9957, + "step": 18800 + }, + { + "epoch": 0.14, + "learning_rate": 1.8635594323655774e-05, + "loss": 2.9793, + "step": 18900 + }, + { + "epoch": 0.14, + "learning_rate": 1.862836146913741e-05, + "loss": 2.9825, + "step": 19000 + }, + { + "epoch": 0.14, + "eval_accuracy": 0.4340897611551176, + "eval_loss": 2.999600410461426, + "eval_runtime": 28.9603, + "eval_samples_per_second": 223.858, + "eval_steps_per_second": 2.348, + "step": 19000 + }, + { + "epoch": 0.14, + "learning_rate": 1.8621128614619047e-05, + "loss": 2.9911, + "step": 19100 + }, + { + "epoch": 0.14, + "learning_rate": 1.8613895760100683e-05, + "loss": 2.9847, + "step": 19200 + }, + { + "epoch": 0.14, + "learning_rate": 1.8606662905582316e-05, + "loss": 2.979, + "step": 19300 + }, + { + "epoch": 0.14, + "learning_rate": 1.8599430051063953e-05, + "loss": 2.9858, + "step": 19400 + }, + { + "epoch": 0.14, + "learning_rate": 1.859219719654559e-05, + "loss": 2.9766, + "step": 19500 + }, + { + "epoch": 0.14, + "learning_rate": 1.8584964342027226e-05, + "loss": 2.9735, + "step": 19600 + }, + { + "epoch": 0.14, + "learning_rate": 1.8577731487508862e-05, + "loss": 2.9777, + "step": 19700 + }, + { + "epoch": 0.14, + "learning_rate": 1.8570498632990495e-05, + "loss": 2.9714, + "step": 19800 + }, + { + "epoch": 0.14, + "learning_rate": 1.8563265778472132e-05, + "loss": 2.9762, + "step": 19900 + }, + { + "epoch": 0.14, + "learning_rate": 1.855603292395377e-05, + "loss": 2.9781, + "step": 20000 + }, + { + "epoch": 0.14, + "eval_accuracy": 0.4350987348510282, + "eval_loss": 2.990344285964966, + "eval_runtime": 28.2496, + "eval_samples_per_second": 229.49, + "eval_steps_per_second": 2.407, + "step": 20000 + }, + { + "epoch": 0.15, + "learning_rate": 1.8548800069435405e-05, + "loss": 2.9686, + "step": 20100 + }, + { + "epoch": 0.15, + "learning_rate": 1.854156721491704e-05, + "loss": 2.9724, + "step": 20200 + }, + { + "epoch": 0.15, + "learning_rate": 1.8534334360398675e-05, + "loss": 2.9771, + "step": 20300 + }, + { + "epoch": 0.15, + "learning_rate": 1.852710150588031e-05, + "loss": 2.9698, + "step": 20400 + }, + { + "epoch": 0.15, + "learning_rate": 1.8519868651361948e-05, + "loss": 2.9693, + "step": 20500 + }, + { + "epoch": 0.15, + "learning_rate": 1.8512635796843584e-05, + "loss": 2.9712, + "step": 20600 + }, + { + "epoch": 0.15, + "learning_rate": 1.850540294232522e-05, + "loss": 2.9724, + "step": 20700 + }, + { + "epoch": 0.15, + "learning_rate": 1.849824241635204e-05, + "loss": 2.9726, + "step": 20800 + }, + { + "epoch": 0.15, + "learning_rate": 1.8491009561833675e-05, + "loss": 2.9656, + "step": 20900 + }, + { + "epoch": 0.15, + "learning_rate": 1.848377670731531e-05, + "loss": 2.957, + "step": 21000 + }, + { + "epoch": 0.15, + "eval_accuracy": 0.435996406892234, + "eval_loss": 2.9821181297302246, + "eval_runtime": 27.9904, + "eval_samples_per_second": 231.615, + "eval_steps_per_second": 2.429, + "step": 21000 + }, + { + "epoch": 0.15, + "learning_rate": 1.8476543852796948e-05, + "loss": 2.9718, + "step": 21100 + }, + { + "epoch": 0.15, + "learning_rate": 1.846931099827858e-05, + "loss": 2.9685, + "step": 21200 + }, + { + "epoch": 0.15, + "learning_rate": 1.8462078143760218e-05, + "loss": 2.9582, + "step": 21300 + }, + { + "epoch": 0.15, + "learning_rate": 1.8454845289241854e-05, + "loss": 2.9644, + "step": 21400 + }, + { + "epoch": 0.16, + "learning_rate": 1.844761243472349e-05, + "loss": 2.9678, + "step": 21500 + }, + { + "epoch": 0.16, + "learning_rate": 1.8440379580205124e-05, + "loss": 2.9632, + "step": 21600 + }, + { + "epoch": 0.16, + "learning_rate": 1.843314672568676e-05, + "loss": 2.9642, + "step": 21700 + }, + { + "epoch": 0.16, + "learning_rate": 1.8425913871168397e-05, + "loss": 2.9633, + "step": 21800 + }, + { + "epoch": 0.16, + "learning_rate": 1.841875334519522e-05, + "loss": 2.9495, + "step": 21900 + }, + { + "epoch": 0.16, + "learning_rate": 1.841152049067685e-05, + "loss": 2.9676, + "step": 22000 + }, + { + "epoch": 0.16, + "eval_accuracy": 0.4368305644022224, + "eval_loss": 2.973825454711914, + "eval_runtime": 30.0107, + "eval_samples_per_second": 216.023, + "eval_steps_per_second": 2.266, + "step": 22000 + }, + { + "epoch": 0.16, + "learning_rate": 1.8404287636158488e-05, + "loss": 2.9593, + "step": 22100 + }, + { + "epoch": 0.16, + "learning_rate": 1.8397054781640125e-05, + "loss": 2.9548, + "step": 22200 + }, + { + "epoch": 0.16, + "learning_rate": 1.838982192712176e-05, + "loss": 2.9656, + "step": 22300 + }, + { + "epoch": 0.16, + "learning_rate": 1.8382589072603398e-05, + "loss": 2.957, + "step": 22400 + }, + { + "epoch": 0.16, + "learning_rate": 1.837535621808503e-05, + "loss": 2.9557, + "step": 22500 + }, + { + "epoch": 0.16, + "learning_rate": 1.8368123363566667e-05, + "loss": 2.9577, + "step": 22600 + }, + { + "epoch": 0.16, + "learning_rate": 1.8360962837593485e-05, + "loss": 2.9519, + "step": 22700 + }, + { + "epoch": 0.16, + "learning_rate": 1.8353729983075122e-05, + "loss": 2.955, + "step": 22800 + }, + { + "epoch": 0.17, + "learning_rate": 1.834649712855676e-05, + "loss": 2.9548, + "step": 22900 + }, + { + "epoch": 0.17, + "learning_rate": 1.8339336602583576e-05, + "loss": 2.9513, + "step": 23000 + }, + { + "epoch": 0.17, + "eval_accuracy": 0.43764657490329156, + "eval_loss": 2.9663443565368652, + "eval_runtime": 28.3956, + "eval_samples_per_second": 228.31, + "eval_steps_per_second": 2.395, + "step": 23000 + }, + { + "epoch": 0.17, + "learning_rate": 1.8332103748065213e-05, + "loss": 2.9514, + "step": 23100 + }, + { + "epoch": 0.17, + "learning_rate": 1.832487089354685e-05, + "loss": 2.9546, + "step": 23200 + }, + { + "epoch": 0.17, + "learning_rate": 1.8317638039028486e-05, + "loss": 2.9449, + "step": 23300 + }, + { + "epoch": 0.17, + "learning_rate": 1.831040518451012e-05, + "loss": 2.9515, + "step": 23400 + }, + { + "epoch": 0.17, + "learning_rate": 1.8303172329991756e-05, + "loss": 2.946, + "step": 23500 + }, + { + "epoch": 0.17, + "learning_rate": 1.8295939475473392e-05, + "loss": 2.9443, + "step": 23600 + }, + { + "epoch": 0.17, + "learning_rate": 1.828870662095503e-05, + "loss": 2.9437, + "step": 23700 + }, + { + "epoch": 0.17, + "learning_rate": 1.8281473766436665e-05, + "loss": 2.9395, + "step": 23800 + }, + { + "epoch": 0.17, + "learning_rate": 1.8274240911918298e-05, + "loss": 2.9422, + "step": 23900 + }, + { + "epoch": 0.17, + "learning_rate": 1.8267008057399935e-05, + "loss": 2.9475, + "step": 24000 + }, + { + "epoch": 0.17, + "eval_accuracy": 0.4384904108180369, + "eval_loss": 2.959416389465332, + "eval_runtime": 30.8733, + "eval_samples_per_second": 209.988, + "eval_steps_per_second": 2.203, + "step": 24000 + }, + { + "epoch": 0.17, + "learning_rate": 1.825977520288157e-05, + "loss": 2.9432, + "step": 24100 + }, + { + "epoch": 0.18, + "learning_rate": 1.8252542348363208e-05, + "loss": 2.9413, + "step": 24200 + }, + { + "epoch": 0.18, + "learning_rate": 1.8245309493844844e-05, + "loss": 2.9378, + "step": 24300 + }, + { + "epoch": 0.18, + "learning_rate": 1.8238076639326477e-05, + "loss": 2.9429, + "step": 24400 + }, + { + "epoch": 0.18, + "learning_rate": 1.8230843784808114e-05, + "loss": 2.9398, + "step": 24500 + }, + { + "epoch": 0.18, + "learning_rate": 1.822361093028975e-05, + "loss": 2.9462, + "step": 24600 + }, + { + "epoch": 0.18, + "learning_rate": 1.8216378075771387e-05, + "loss": 2.9334, + "step": 24700 + }, + { + "epoch": 0.18, + "learning_rate": 1.820914522125302e-05, + "loss": 2.944, + "step": 24800 + }, + { + "epoch": 0.18, + "learning_rate": 1.8201912366734657e-05, + "loss": 2.9331, + "step": 24900 + }, + { + "epoch": 0.18, + "learning_rate": 1.8194679512216293e-05, + "loss": 2.9406, + "step": 25000 + }, + { + "epoch": 0.18, + "eval_accuracy": 0.43914672764061663, + "eval_loss": 2.953129291534424, + "eval_runtime": 29.0217, + "eval_samples_per_second": 223.385, + "eval_steps_per_second": 2.343, + "step": 25000 + }, + { + "epoch": 0.18, + "learning_rate": 1.818744665769793e-05, + "loss": 2.9408, + "step": 25100 + }, + { + "epoch": 0.18, + "learning_rate": 1.8180213803179566e-05, + "loss": 2.9313, + "step": 25200 + }, + { + "epoch": 0.18, + "learning_rate": 1.81729809486612e-05, + "loss": 2.9436, + "step": 25300 + }, + { + "epoch": 0.18, + "learning_rate": 1.8165748094142836e-05, + "loss": 2.9374, + "step": 25400 + }, + { + "epoch": 0.18, + "learning_rate": 1.8158515239624472e-05, + "loss": 2.9326, + "step": 25500 + }, + { + "epoch": 0.19, + "learning_rate": 1.815128238510611e-05, + "loss": 2.942, + "step": 25600 + }, + { + "epoch": 0.19, + "learning_rate": 1.8144049530587745e-05, + "loss": 2.9281, + "step": 25700 + }, + { + "epoch": 0.19, + "learning_rate": 1.813681667606938e-05, + "loss": 2.9341, + "step": 25800 + }, + { + "epoch": 0.19, + "learning_rate": 1.8129583821551015e-05, + "loss": 2.9369, + "step": 25900 + }, + { + "epoch": 0.19, + "learning_rate": 1.812235096703265e-05, + "loss": 2.9387, + "step": 26000 + }, + { + "epoch": 0.19, + "eval_accuracy": 0.4397600965420875, + "eval_loss": 2.9472999572753906, + "eval_runtime": 28.7579, + "eval_samples_per_second": 225.434, + "eval_steps_per_second": 2.365, + "step": 26000 + }, + { + "epoch": 0.19, + "learning_rate": 1.8115118112514288e-05, + "loss": 2.9313, + "step": 26100 + }, + { + "epoch": 0.19, + "learning_rate": 1.8107957586541106e-05, + "loss": 2.9328, + "step": 26200 + }, + { + "epoch": 0.19, + "learning_rate": 1.810072473202274e-05, + "loss": 2.9346, + "step": 26300 + }, + { + "epoch": 0.19, + "learning_rate": 1.8093491877504376e-05, + "loss": 2.9297, + "step": 26400 + }, + { + "epoch": 0.19, + "learning_rate": 1.8086259022986012e-05, + "loss": 2.9283, + "step": 26500 + }, + { + "epoch": 0.19, + "learning_rate": 1.807902616846765e-05, + "loss": 2.9278, + "step": 26600 + }, + { + "epoch": 0.19, + "learning_rate": 1.8071793313949285e-05, + "loss": 2.9337, + "step": 26700 + }, + { + "epoch": 0.19, + "learning_rate": 1.806456045943092e-05, + "loss": 2.9267, + "step": 26800 + }, + { + "epoch": 0.19, + "learning_rate": 1.8057327604912555e-05, + "loss": 2.9238, + "step": 26900 + }, + { + "epoch": 0.2, + "learning_rate": 1.8050167078939376e-05, + "loss": 2.9353, + "step": 27000 + }, + { + "epoch": 0.2, + "eval_accuracy": 0.4403087411117463, + "eval_loss": 2.941570520401001, + "eval_runtime": 29.8384, + "eval_samples_per_second": 217.271, + "eval_steps_per_second": 2.279, + "step": 27000 + }, + { + "epoch": 0.2, + "learning_rate": 1.8042934224421013e-05, + "loss": 2.9234, + "step": 27100 + }, + { + "epoch": 0.2, + "learning_rate": 1.8035701369902646e-05, + "loss": 2.9276, + "step": 27200 + }, + { + "epoch": 0.2, + "learning_rate": 1.8028468515384282e-05, + "loss": 2.9249, + "step": 27300 + }, + { + "epoch": 0.2, + "learning_rate": 1.802123566086592e-05, + "loss": 2.9259, + "step": 27400 + }, + { + "epoch": 0.2, + "learning_rate": 1.8014002806347555e-05, + "loss": 2.9235, + "step": 27500 + }, + { + "epoch": 0.2, + "learning_rate": 1.8006769951829192e-05, + "loss": 2.9182, + "step": 27600 + }, + { + "epoch": 0.2, + "learning_rate": 1.7999537097310825e-05, + "loss": 2.9273, + "step": 27700 + }, + { + "epoch": 0.2, + "learning_rate": 1.799230424279246e-05, + "loss": 2.925, + "step": 27800 + }, + { + "epoch": 0.2, + "learning_rate": 1.7985071388274098e-05, + "loss": 2.9196, + "step": 27900 + }, + { + "epoch": 0.2, + "learning_rate": 1.7977838533755735e-05, + "loss": 2.9208, + "step": 28000 + }, + { + "epoch": 0.2, + "eval_accuracy": 0.44107756938962533, + "eval_loss": 2.936343193054199, + "eval_runtime": 29.069, + "eval_samples_per_second": 223.021, + "eval_steps_per_second": 2.339, + "step": 28000 + }, + { + "epoch": 0.2, + "learning_rate": 1.797060567923737e-05, + "loss": 2.907, + "step": 28100 + }, + { + "epoch": 0.2, + "learning_rate": 1.7963372824719004e-05, + "loss": 2.9215, + "step": 28200 + }, + { + "epoch": 0.2, + "learning_rate": 1.795613997020064e-05, + "loss": 2.9223, + "step": 28300 + }, + { + "epoch": 0.21, + "learning_rate": 1.7948907115682277e-05, + "loss": 2.9122, + "step": 28400 + }, + { + "epoch": 0.21, + "learning_rate": 1.7941674261163914e-05, + "loss": 2.9166, + "step": 28500 + }, + { + "epoch": 0.21, + "learning_rate": 1.7934441406645547e-05, + "loss": 2.9289, + "step": 28600 + }, + { + "epoch": 0.21, + "learning_rate": 1.7927208552127183e-05, + "loss": 2.914, + "step": 28700 + }, + { + "epoch": 0.21, + "learning_rate": 1.791997569760882e-05, + "loss": 2.9175, + "step": 28800 + }, + { + "epoch": 0.21, + "learning_rate": 1.7912742843090456e-05, + "loss": 2.9142, + "step": 28900 + }, + { + "epoch": 0.21, + "learning_rate": 1.7905509988572093e-05, + "loss": 2.9142, + "step": 29000 + }, + { + "epoch": 0.21, + "eval_accuracy": 0.4415330593134987, + "eval_loss": 2.9310333728790283, + "eval_runtime": 30.502, + "eval_samples_per_second": 212.543, + "eval_steps_per_second": 2.229, + "step": 29000 + }, + { + "epoch": 0.21, + "learning_rate": 1.789834946259891e-05, + "loss": 2.9196, + "step": 29100 + }, + { + "epoch": 0.21, + "learning_rate": 1.7891116608080548e-05, + "loss": 2.9166, + "step": 29200 + }, + { + "epoch": 0.21, + "learning_rate": 1.788388375356218e-05, + "loss": 2.917, + "step": 29300 + }, + { + "epoch": 0.21, + "learning_rate": 1.7876650899043817e-05, + "loss": 2.9178, + "step": 29400 + }, + { + "epoch": 0.21, + "learning_rate": 1.7869418044525454e-05, + "loss": 2.9148, + "step": 29500 + }, + { + "epoch": 0.21, + "learning_rate": 1.786218519000709e-05, + "loss": 2.9086, + "step": 29600 + }, + { + "epoch": 0.21, + "learning_rate": 1.7855024664033908e-05, + "loss": 2.9154, + "step": 29700 + }, + { + "epoch": 0.22, + "learning_rate": 1.7847791809515545e-05, + "loss": 2.9156, + "step": 29800 + }, + { + "epoch": 0.22, + "learning_rate": 1.784055895499718e-05, + "loss": 2.9178, + "step": 29900 + }, + { + "epoch": 0.22, + "learning_rate": 1.7833326100478814e-05, + "loss": 2.9167, + "step": 30000 + }, + { + "epoch": 0.22, + "eval_accuracy": 0.44185305157077487, + "eval_loss": 2.9265494346618652, + "eval_runtime": 28.0744, + "eval_samples_per_second": 230.922, + "eval_steps_per_second": 2.422, + "step": 30000 + }, + { + "epoch": 0.22, + "learning_rate": 1.782609324596045e-05, + "loss": 2.905, + "step": 30100 + }, + { + "epoch": 0.22, + "learning_rate": 1.7818860391442087e-05, + "loss": 2.9161, + "step": 30200 + }, + { + "epoch": 0.22, + "learning_rate": 1.7811627536923724e-05, + "loss": 2.9094, + "step": 30300 + }, + { + "epoch": 0.22, + "learning_rate": 1.780439468240536e-05, + "loss": 2.9145, + "step": 30400 + }, + { + "epoch": 0.22, + "learning_rate": 1.7797161827886994e-05, + "loss": 2.9115, + "step": 30500 + }, + { + "epoch": 0.22, + "learning_rate": 1.778992897336863e-05, + "loss": 2.9093, + "step": 30600 + }, + { + "epoch": 0.22, + "learning_rate": 1.7782696118850267e-05, + "loss": 2.9111, + "step": 30700 + }, + { + "epoch": 0.22, + "learning_rate": 1.7775463264331903e-05, + "loss": 2.9052, + "step": 30800 + }, + { + "epoch": 0.22, + "learning_rate": 1.776823040981354e-05, + "loss": 2.9072, + "step": 30900 + }, + { + "epoch": 0.22, + "learning_rate": 1.7760997555295173e-05, + "loss": 2.9069, + "step": 31000 + }, + { + "epoch": 0.22, + "eval_accuracy": 0.4425190467981115, + "eval_loss": 2.921447992324829, + "eval_runtime": 27.8624, + "eval_samples_per_second": 232.679, + "eval_steps_per_second": 2.441, + "step": 31000 + }, + { + "epoch": 0.22, + "learning_rate": 1.7753837029321994e-05, + "loss": 2.9063, + "step": 31100 + }, + { + "epoch": 0.23, + "learning_rate": 1.7746676503348816e-05, + "loss": 2.9101, + "step": 31200 + }, + { + "epoch": 0.23, + "learning_rate": 1.773944364883045e-05, + "loss": 2.8999, + "step": 31300 + }, + { + "epoch": 0.23, + "learning_rate": 1.7732210794312085e-05, + "loss": 2.9026, + "step": 31400 + }, + { + "epoch": 0.23, + "learning_rate": 1.772497793979372e-05, + "loss": 2.9066, + "step": 31500 + }, + { + "epoch": 0.23, + "learning_rate": 1.7717745085275355e-05, + "loss": 2.9032, + "step": 31600 + }, + { + "epoch": 0.23, + "learning_rate": 1.771051223075699e-05, + "loss": 2.9003, + "step": 31700 + }, + { + "epoch": 0.23, + "learning_rate": 1.7703279376238628e-05, + "loss": 2.9049, + "step": 31800 + }, + { + "epoch": 0.23, + "learning_rate": 1.769604652172026e-05, + "loss": 2.8992, + "step": 31900 + }, + { + "epoch": 0.23, + "learning_rate": 1.7688813667201898e-05, + "loss": 2.9067, + "step": 32000 + }, + { + "epoch": 0.23, + "eval_accuracy": 0.44299268373090406, + "eval_loss": 2.9168407917022705, + "eval_runtime": 29.0945, + "eval_samples_per_second": 222.825, + "eval_steps_per_second": 2.337, + "step": 32000 + }, + { + "epoch": 0.23, + "learning_rate": 1.7681580812683534e-05, + "loss": 2.9007, + "step": 32100 + }, + { + "epoch": 0.23, + "learning_rate": 1.767434795816517e-05, + "loss": 2.8995, + "step": 32200 + }, + { + "epoch": 0.23, + "learning_rate": 1.7667115103646807e-05, + "loss": 2.9044, + "step": 32300 + }, + { + "epoch": 0.23, + "learning_rate": 1.765988224912844e-05, + "loss": 2.8931, + "step": 32400 + }, + { + "epoch": 0.24, + "learning_rate": 1.7652649394610077e-05, + "loss": 2.902, + "step": 32500 + }, + { + "epoch": 0.24, + "learning_rate": 1.7645416540091713e-05, + "loss": 2.9019, + "step": 32600 + }, + { + "epoch": 0.24, + "learning_rate": 1.763818368557335e-05, + "loss": 2.9064, + "step": 32700 + }, + { + "epoch": 0.24, + "learning_rate": 1.7630950831054986e-05, + "loss": 2.9004, + "step": 32800 + }, + { + "epoch": 0.24, + "learning_rate": 1.7623790305081804e-05, + "loss": 2.8917, + "step": 32900 + }, + { + "epoch": 0.24, + "learning_rate": 1.761655745056344e-05, + "loss": 2.8978, + "step": 33000 + }, + { + "epoch": 0.24, + "eval_accuracy": 0.4434288168452635, + "eval_loss": 2.91280198097229, + "eval_runtime": 27.8037, + "eval_samples_per_second": 233.17, + "eval_steps_per_second": 2.446, + "step": 33000 + }, + { + "epoch": 0.24, + "learning_rate": 1.7609324596045077e-05, + "loss": 2.9001, + "step": 33100 + }, + { + "epoch": 0.24, + "learning_rate": 1.7602091741526714e-05, + "loss": 2.898, + "step": 33200 + }, + { + "epoch": 0.24, + "learning_rate": 1.7594858887008347e-05, + "loss": 2.8938, + "step": 33300 + }, + { + "epoch": 0.24, + "learning_rate": 1.7587626032489984e-05, + "loss": 2.8943, + "step": 33400 + }, + { + "epoch": 0.24, + "learning_rate": 1.758039317797162e-05, + "loss": 2.8966, + "step": 33500 + }, + { + "epoch": 0.24, + "learning_rate": 1.7573160323453257e-05, + "loss": 2.9033, + "step": 33600 + }, + { + "epoch": 0.24, + "learning_rate": 1.756592746893489e-05, + "loss": 2.8971, + "step": 33700 + }, + { + "epoch": 0.24, + "learning_rate": 1.7558694614416526e-05, + "loss": 2.8919, + "step": 33800 + }, + { + "epoch": 0.25, + "learning_rate": 1.7551461759898163e-05, + "loss": 2.8973, + "step": 33900 + }, + { + "epoch": 0.25, + "learning_rate": 1.75442289053798e-05, + "loss": 2.8982, + "step": 34000 + }, + { + "epoch": 0.25, + "eval_accuracy": 0.44378510312037817, + "eval_loss": 2.9087538719177246, + "eval_runtime": 30.238, + "eval_samples_per_second": 214.399, + "eval_steps_per_second": 2.249, + "step": 34000 + }, + { + "epoch": 0.25, + "learning_rate": 1.7536996050861436e-05, + "loss": 2.8893, + "step": 34100 + }, + { + "epoch": 0.25, + "learning_rate": 1.752976319634307e-05, + "loss": 2.8867, + "step": 34200 + }, + { + "epoch": 0.25, + "learning_rate": 1.7522530341824705e-05, + "loss": 2.8991, + "step": 34300 + }, + { + "epoch": 0.25, + "learning_rate": 1.7515297487306342e-05, + "loss": 2.8888, + "step": 34400 + }, + { + "epoch": 0.25, + "learning_rate": 1.750806463278798e-05, + "loss": 2.895, + "step": 34500 + }, + { + "epoch": 0.25, + "learning_rate": 1.7500831778269615e-05, + "loss": 2.8962, + "step": 34600 + }, + { + "epoch": 0.25, + "learning_rate": 1.7493598923751248e-05, + "loss": 2.8883, + "step": 34700 + }, + { + "epoch": 0.25, + "learning_rate": 1.7486366069232885e-05, + "loss": 2.8861, + "step": 34800 + }, + { + "epoch": 0.25, + "learning_rate": 1.747913321471452e-05, + "loss": 2.8939, + "step": 34900 + }, + { + "epoch": 0.25, + "learning_rate": 1.7471900360196158e-05, + "loss": 2.8856, + "step": 35000 + }, + { + "epoch": 0.25, + "eval_accuracy": 0.44437488091025396, + "eval_loss": 2.904993772506714, + "eval_runtime": 29.3345, + "eval_samples_per_second": 221.002, + "eval_steps_per_second": 2.318, + "step": 35000 + }, + { + "epoch": 0.25, + "learning_rate": 1.7464667505677794e-05, + "loss": 2.8966, + "step": 35100 + }, + { + "epoch": 0.25, + "learning_rate": 1.7457434651159427e-05, + "loss": 2.8854, + "step": 35200 + }, + { + "epoch": 0.26, + "learning_rate": 1.7450201796641064e-05, + "loss": 2.8906, + "step": 35300 + }, + { + "epoch": 0.26, + "learning_rate": 1.7443041270667885e-05, + "loss": 2.8887, + "step": 35400 + }, + { + "epoch": 0.26, + "learning_rate": 1.743580841614952e-05, + "loss": 2.8843, + "step": 35500 + }, + { + "epoch": 0.26, + "learning_rate": 1.7428575561631155e-05, + "loss": 2.8908, + "step": 35600 + }, + { + "epoch": 0.26, + "learning_rate": 1.742134270711279e-05, + "loss": 2.8883, + "step": 35700 + }, + { + "epoch": 0.26, + "learning_rate": 1.7414109852594428e-05, + "loss": 2.8848, + "step": 35800 + }, + { + "epoch": 0.26, + "learning_rate": 1.740687699807606e-05, + "loss": 2.876, + "step": 35900 + }, + { + "epoch": 0.26, + "learning_rate": 1.7399644143557697e-05, + "loss": 2.8981, + "step": 36000 + }, + { + "epoch": 0.26, + "eval_accuracy": 0.44447287475841796, + "eval_loss": 2.901261806488037, + "eval_runtime": 28.6761, + "eval_samples_per_second": 226.077, + "eval_steps_per_second": 2.371, + "step": 36000 + }, + { + "epoch": 0.26, + "learning_rate": 1.7392411289039334e-05, + "loss": 2.8881, + "step": 36100 + }, + { + "epoch": 0.26, + "learning_rate": 1.7385178434520967e-05, + "loss": 2.8854, + "step": 36200 + }, + { + "epoch": 0.26, + "learning_rate": 1.7377945580002604e-05, + "loss": 2.8843, + "step": 36300 + }, + { + "epoch": 0.26, + "learning_rate": 1.7370785054029425e-05, + "loss": 2.8885, + "step": 36400 + }, + { + "epoch": 0.26, + "learning_rate": 1.736355219951106e-05, + "loss": 2.8869, + "step": 36500 + }, + { + "epoch": 0.26, + "learning_rate": 1.7356319344992695e-05, + "loss": 2.8813, + "step": 36600 + }, + { + "epoch": 0.27, + "learning_rate": 1.734908649047433e-05, + "loss": 2.884, + "step": 36700 + }, + { + "epoch": 0.27, + "learning_rate": 1.7341853635955968e-05, + "loss": 2.8834, + "step": 36800 + }, + { + "epoch": 0.27, + "learning_rate": 1.7334620781437604e-05, + "loss": 2.8773, + "step": 36900 + }, + { + "epoch": 0.27, + "learning_rate": 1.732738792691924e-05, + "loss": 2.8813, + "step": 37000 + }, + { + "epoch": 0.27, + "eval_accuracy": 0.44499187921350863, + "eval_loss": 2.8976523876190186, + "eval_runtime": 27.9421, + "eval_samples_per_second": 232.015, + "eval_steps_per_second": 2.434, + "step": 37000 + }, + { + "epoch": 0.27, + "learning_rate": 1.7320155072400874e-05, + "loss": 2.8865, + "step": 37100 + }, + { + "epoch": 0.27, + "learning_rate": 1.731292221788251e-05, + "loss": 2.881, + "step": 37200 + }, + { + "epoch": 0.27, + "learning_rate": 1.7305689363364147e-05, + "loss": 2.8781, + "step": 37300 + }, + { + "epoch": 0.27, + "learning_rate": 1.7298456508845783e-05, + "loss": 2.884, + "step": 37400 + }, + { + "epoch": 0.27, + "learning_rate": 1.7291223654327417e-05, + "loss": 2.8802, + "step": 37500 + }, + { + "epoch": 0.27, + "learning_rate": 1.7283990799809053e-05, + "loss": 2.8802, + "step": 37600 + }, + { + "epoch": 0.27, + "learning_rate": 1.727675794529069e-05, + "loss": 2.8685, + "step": 37700 + }, + { + "epoch": 0.27, + "learning_rate": 1.7269525090772326e-05, + "loss": 2.8745, + "step": 37800 + }, + { + "epoch": 0.27, + "learning_rate": 1.7262292236253963e-05, + "loss": 2.8799, + "step": 37900 + }, + { + "epoch": 0.27, + "learning_rate": 1.7255059381735596e-05, + "loss": 2.8765, + "step": 38000 + }, + { + "epoch": 0.27, + "eval_accuracy": 0.44532699397821757, + "eval_loss": 2.8943746089935303, + "eval_runtime": 28.0195, + "eval_samples_per_second": 231.375, + "eval_steps_per_second": 2.427, + "step": 38000 + }, + { + "epoch": 0.28, + "learning_rate": 1.7247826527217232e-05, + "loss": 2.8834, + "step": 38100 + }, + { + "epoch": 0.28, + "learning_rate": 1.724059367269887e-05, + "loss": 2.8782, + "step": 38200 + }, + { + "epoch": 0.28, + "learning_rate": 1.7233360818180505e-05, + "loss": 2.8855, + "step": 38300 + }, + { + "epoch": 0.28, + "learning_rate": 1.7226127963662142e-05, + "loss": 2.8681, + "step": 38400 + }, + { + "epoch": 0.28, + "learning_rate": 1.7218895109143775e-05, + "loss": 2.8832, + "step": 38500 + }, + { + "epoch": 0.28, + "learning_rate": 1.721166225462541e-05, + "loss": 2.8764, + "step": 38600 + }, + { + "epoch": 0.28, + "learning_rate": 1.7204429400107048e-05, + "loss": 2.8771, + "step": 38700 + }, + { + "epoch": 0.28, + "learning_rate": 1.7197196545588684e-05, + "loss": 2.869, + "step": 38800 + }, + { + "epoch": 0.28, + "learning_rate": 1.7190036019615502e-05, + "loss": 2.8749, + "step": 38900 + }, + { + "epoch": 0.28, + "learning_rate": 1.718280316509714e-05, + "loss": 2.879, + "step": 39000 + }, + { + "epoch": 0.28, + "eval_accuracy": 0.4457794594006043, + "eval_loss": 2.891030788421631, + "eval_runtime": 33.3451, + "eval_samples_per_second": 194.421, + "eval_steps_per_second": 2.039, + "step": 39000 + }, + { + "epoch": 0.28, + "learning_rate": 1.7175570310578776e-05, + "loss": 2.8757, + "step": 39100 + }, + { + "epoch": 0.28, + "learning_rate": 1.7168409784605594e-05, + "loss": 2.8721, + "step": 39200 + }, + { + "epoch": 0.28, + "learning_rate": 1.716117693008723e-05, + "loss": 2.8751, + "step": 39300 + }, + { + "epoch": 0.28, + "learning_rate": 1.7153944075568863e-05, + "loss": 2.8813, + "step": 39400 + }, + { + "epoch": 0.29, + "learning_rate": 1.71467112210505e-05, + "loss": 2.8738, + "step": 39500 + }, + { + "epoch": 0.29, + "learning_rate": 1.7139478366532136e-05, + "loss": 2.8761, + "step": 39600 + }, + { + "epoch": 0.29, + "learning_rate": 1.7132245512013773e-05, + "loss": 2.877, + "step": 39700 + }, + { + "epoch": 0.29, + "learning_rate": 1.712501265749541e-05, + "loss": 2.8673, + "step": 39800 + }, + { + "epoch": 0.29, + "learning_rate": 1.7117779802977042e-05, + "loss": 2.8789, + "step": 39900 + }, + { + "epoch": 0.29, + "learning_rate": 1.711054694845868e-05, + "loss": 2.8738, + "step": 40000 + }, + { + "epoch": 0.29, + "eval_accuracy": 0.4462434179286399, + "eval_loss": 2.8878371715545654, + "eval_runtime": 30.9298, + "eval_samples_per_second": 209.604, + "eval_steps_per_second": 2.199, + "step": 40000 + }, + { + "epoch": 0.29, + "learning_rate": 1.7103314093940315e-05, + "loss": 2.8666, + "step": 40100 + }, + { + "epoch": 0.29, + "learning_rate": 1.7096081239421952e-05, + "loss": 2.8741, + "step": 40200 + }, + { + "epoch": 0.29, + "learning_rate": 1.708884838490359e-05, + "loss": 2.8752, + "step": 40300 + }, + { + "epoch": 0.29, + "learning_rate": 1.708161553038522e-05, + "loss": 2.8725, + "step": 40400 + }, + { + "epoch": 0.29, + "learning_rate": 1.7074382675866858e-05, + "loss": 2.8711, + "step": 40500 + }, + { + "epoch": 0.29, + "learning_rate": 1.7067149821348495e-05, + "loss": 2.8709, + "step": 40600 + }, + { + "epoch": 0.29, + "learning_rate": 1.705991696683013e-05, + "loss": 2.8677, + "step": 40700 + }, + { + "epoch": 0.3, + "learning_rate": 1.7052684112311764e-05, + "loss": 2.8678, + "step": 40800 + }, + { + "epoch": 0.3, + "learning_rate": 1.70454512577934e-05, + "loss": 2.8671, + "step": 40900 + }, + { + "epoch": 0.3, + "learning_rate": 1.7038218403275037e-05, + "loss": 2.8671, + "step": 41000 + }, + { + "epoch": 0.3, + "eval_accuracy": 0.4465404239746184, + "eval_loss": 2.8851165771484375, + "eval_runtime": 28.3107, + "eval_samples_per_second": 228.994, + "eval_steps_per_second": 2.402, + "step": 41000 + }, + { + "epoch": 0.3, + "learning_rate": 1.7030985548756674e-05, + "loss": 2.8682, + "step": 41100 + }, + { + "epoch": 0.3, + "learning_rate": 1.7023825022783492e-05, + "loss": 2.8793, + "step": 41200 + }, + { + "epoch": 0.3, + "learning_rate": 1.701659216826513e-05, + "loss": 2.864, + "step": 41300 + }, + { + "epoch": 0.3, + "learning_rate": 1.7009359313746765e-05, + "loss": 2.8711, + "step": 41400 + }, + { + "epoch": 0.3, + "learning_rate": 1.70021264592284e-05, + "loss": 2.8738, + "step": 41500 + }, + { + "epoch": 0.3, + "learning_rate": 1.6994893604710038e-05, + "loss": 2.862, + "step": 41600 + }, + { + "epoch": 0.3, + "learning_rate": 1.698766075019167e-05, + "loss": 2.8652, + "step": 41700 + }, + { + "epoch": 0.3, + "learning_rate": 1.6980427895673307e-05, + "loss": 2.8758, + "step": 41800 + }, + { + "epoch": 0.3, + "learning_rate": 1.6973195041154944e-05, + "loss": 2.8695, + "step": 41900 + }, + { + "epoch": 0.3, + "learning_rate": 1.696596218663658e-05, + "loss": 2.866, + "step": 42000 + }, + { + "epoch": 0.3, + "eval_accuracy": 0.44681746831078567, + "eval_loss": 2.882030963897705, + "eval_runtime": 28.8345, + "eval_samples_per_second": 224.835, + "eval_steps_per_second": 2.358, + "step": 42000 + }, + { + "epoch": 0.3, + "learning_rate": 1.6958729332118217e-05, + "loss": 2.8696, + "step": 42100 + }, + { + "epoch": 0.31, + "learning_rate": 1.695149647759985e-05, + "loss": 2.8766, + "step": 42200 + }, + { + "epoch": 0.31, + "learning_rate": 1.6944263623081487e-05, + "loss": 2.8706, + "step": 42300 + }, + { + "epoch": 0.31, + "learning_rate": 1.6937030768563123e-05, + "loss": 2.8673, + "step": 42400 + }, + { + "epoch": 0.31, + "learning_rate": 1.692979791404476e-05, + "loss": 2.864, + "step": 42500 + }, + { + "epoch": 0.31, + "learning_rate": 1.6922565059526396e-05, + "loss": 2.8708, + "step": 42600 + }, + { + "epoch": 0.31, + "learning_rate": 1.691533220500803e-05, + "loss": 2.8642, + "step": 42700 + }, + { + "epoch": 0.31, + "learning_rate": 1.6908099350489666e-05, + "loss": 2.8601, + "step": 42800 + }, + { + "epoch": 0.31, + "learning_rate": 1.6900938824516484e-05, + "loss": 2.8662, + "step": 42900 + }, + { + "epoch": 0.31, + "learning_rate": 1.689370596999812e-05, + "loss": 2.8561, + "step": 43000 + }, + { + "epoch": 0.31, + "eval_accuracy": 0.4472874758417944, + "eval_loss": 2.879122018814087, + "eval_runtime": 29.1494, + "eval_samples_per_second": 222.406, + "eval_steps_per_second": 2.333, + "step": 43000 + }, + { + "epoch": 0.31, + "learning_rate": 1.6886473115479757e-05, + "loss": 2.8746, + "step": 43100 + }, + { + "epoch": 0.31, + "learning_rate": 1.687924026096139e-05, + "loss": 2.8657, + "step": 43200 + }, + { + "epoch": 0.31, + "learning_rate": 1.6872007406443027e-05, + "loss": 2.8655, + "step": 43300 + }, + { + "epoch": 0.31, + "learning_rate": 1.6864774551924663e-05, + "loss": 2.8716, + "step": 43400 + }, + { + "epoch": 0.31, + "learning_rate": 1.68575416974063e-05, + "loss": 2.8701, + "step": 43500 + }, + { + "epoch": 0.32, + "learning_rate": 1.6850308842887936e-05, + "loss": 2.8669, + "step": 43600 + }, + { + "epoch": 0.32, + "learning_rate": 1.684307598836957e-05, + "loss": 2.8615, + "step": 43700 + }, + { + "epoch": 0.32, + "learning_rate": 1.6835843133851206e-05, + "loss": 2.8664, + "step": 43800 + }, + { + "epoch": 0.32, + "learning_rate": 1.6828610279332842e-05, + "loss": 2.8594, + "step": 43900 + }, + { + "epoch": 0.32, + "learning_rate": 1.6821449753359664e-05, + "loss": 2.8601, + "step": 44000 + }, + { + "epoch": 0.32, + "eval_accuracy": 0.4477096962493157, + "eval_loss": 2.8765430450439453, + "eval_runtime": 29.4041, + "eval_samples_per_second": 220.48, + "eval_steps_per_second": 2.313, + "step": 44000 + }, + { + "epoch": 0.32, + "learning_rate": 1.6814216898841297e-05, + "loss": 2.8605, + "step": 44100 + }, + { + "epoch": 0.32, + "learning_rate": 1.6806984044322933e-05, + "loss": 2.8665, + "step": 44200 + }, + { + "epoch": 0.32, + "learning_rate": 1.679975118980457e-05, + "loss": 2.8647, + "step": 44300 + }, + { + "epoch": 0.32, + "learning_rate": 1.6792518335286206e-05, + "loss": 2.8585, + "step": 44400 + }, + { + "epoch": 0.32, + "learning_rate": 1.678528548076784e-05, + "loss": 2.8644, + "step": 44500 + }, + { + "epoch": 0.32, + "learning_rate": 1.6778052626249476e-05, + "loss": 2.8634, + "step": 44600 + }, + { + "epoch": 0.32, + "learning_rate": 1.6770819771731113e-05, + "loss": 2.8644, + "step": 44700 + }, + { + "epoch": 0.32, + "learning_rate": 1.676358691721275e-05, + "loss": 2.8555, + "step": 44800 + }, + { + "epoch": 0.32, + "learning_rate": 1.6756354062694386e-05, + "loss": 2.8591, + "step": 44900 + }, + { + "epoch": 0.33, + "learning_rate": 1.6749193536721204e-05, + "loss": 2.8518, + "step": 45000 + }, + { + "epoch": 0.33, + "eval_accuracy": 0.44793955836229293, + "eval_loss": 2.8740601539611816, + "eval_runtime": 29.2958, + "eval_samples_per_second": 221.294, + "eval_steps_per_second": 2.321, + "step": 45000 + }, + { + "epoch": 0.33, + "learning_rate": 1.674196068220284e-05, + "loss": 2.8613, + "step": 45100 + }, + { + "epoch": 0.33, + "learning_rate": 1.6734727827684477e-05, + "loss": 2.8672, + "step": 45200 + }, + { + "epoch": 0.33, + "learning_rate": 1.6727494973166113e-05, + "loss": 2.8603, + "step": 45300 + }, + { + "epoch": 0.33, + "learning_rate": 1.6720262118647746e-05, + "loss": 2.8581, + "step": 45400 + }, + { + "epoch": 0.33, + "learning_rate": 1.6713029264129383e-05, + "loss": 2.8484, + "step": 45500 + }, + { + "epoch": 0.33, + "learning_rate": 1.670579640961102e-05, + "loss": 2.8483, + "step": 45600 + }, + { + "epoch": 0.33, + "learning_rate": 1.6698563555092656e-05, + "loss": 2.8518, + "step": 45700 + }, + { + "epoch": 0.33, + "learning_rate": 1.6691330700574292e-05, + "loss": 2.8528, + "step": 45800 + }, + { + "epoch": 0.33, + "learning_rate": 1.6684097846055925e-05, + "loss": 2.8618, + "step": 45900 + }, + { + "epoch": 0.33, + "learning_rate": 1.6676864991537562e-05, + "loss": 2.8577, + "step": 46000 + }, + { + "epoch": 0.33, + "eval_accuracy": 0.4482964495377049, + "eval_loss": 2.8713486194610596, + "eval_runtime": 27.9428, + "eval_samples_per_second": 232.01, + "eval_steps_per_second": 2.434, + "step": 46000 + }, + { + "epoch": 0.33, + "learning_rate": 1.66696321370192e-05, + "loss": 2.8578, + "step": 46100 + }, + { + "epoch": 0.33, + "learning_rate": 1.6662399282500835e-05, + "loss": 2.8581, + "step": 46200 + }, + { + "epoch": 0.33, + "learning_rate": 1.665516642798247e-05, + "loss": 2.8532, + "step": 46300 + }, + { + "epoch": 0.34, + "learning_rate": 1.6647933573464105e-05, + "loss": 2.8543, + "step": 46400 + }, + { + "epoch": 0.34, + "learning_rate": 1.664070071894574e-05, + "loss": 2.865, + "step": 46500 + }, + { + "epoch": 0.34, + "learning_rate": 1.6633467864427378e-05, + "loss": 2.8621, + "step": 46600 + }, + { + "epoch": 0.34, + "learning_rate": 1.6626235009909014e-05, + "loss": 2.8531, + "step": 46700 + }, + { + "epoch": 0.34, + "learning_rate": 1.6619002155390647e-05, + "loss": 2.8576, + "step": 46800 + }, + { + "epoch": 0.34, + "learning_rate": 1.6611769300872284e-05, + "loss": 2.8607, + "step": 46900 + }, + { + "epoch": 0.34, + "learning_rate": 1.660453644635392e-05, + "loss": 2.8588, + "step": 47000 + }, + { + "epoch": 0.34, + "eval_accuracy": 0.44838053067903083, + "eval_loss": 2.869096040725708, + "eval_runtime": 27.7995, + "eval_samples_per_second": 233.206, + "eval_steps_per_second": 2.446, + "step": 47000 + }, + { + "epoch": 0.34, + "learning_rate": 1.6597448248925923e-05, + "loss": 2.8548, + "step": 47100 + }, + { + "epoch": 0.34, + "learning_rate": 1.659021539440756e-05, + "loss": 2.8561, + "step": 47200 + }, + { + "epoch": 0.34, + "learning_rate": 1.6582982539889193e-05, + "loss": 2.8538, + "step": 47300 + }, + { + "epoch": 0.34, + "learning_rate": 1.657574968537083e-05, + "loss": 2.851, + "step": 47400 + }, + { + "epoch": 0.34, + "learning_rate": 1.6568516830852466e-05, + "loss": 2.8456, + "step": 47500 + }, + { + "epoch": 0.34, + "learning_rate": 1.6561283976334102e-05, + "loss": 2.8511, + "step": 47600 + }, + { + "epoch": 0.35, + "learning_rate": 1.655405112181574e-05, + "loss": 2.8498, + "step": 47700 + }, + { + "epoch": 0.35, + "learning_rate": 1.6546818267297372e-05, + "loss": 2.8539, + "step": 47800 + }, + { + "epoch": 0.35, + "learning_rate": 1.653958541277901e-05, + "loss": 2.8587, + "step": 47900 + }, + { + "epoch": 0.35, + "learning_rate": 1.6532352558260645e-05, + "loss": 2.8584, + "step": 48000 + }, + { + "epoch": 0.35, + "eval_accuracy": 0.44867330242292813, + "eval_loss": 2.8666162490844727, + "eval_runtime": 29.6889, + "eval_samples_per_second": 218.364, + "eval_steps_per_second": 2.29, + "step": 48000 + }, + { + "epoch": 0.35, + "learning_rate": 1.652511970374228e-05, + "loss": 2.8576, + "step": 48100 + }, + { + "epoch": 0.35, + "learning_rate": 1.6517886849223915e-05, + "loss": 2.8559, + "step": 48200 + }, + { + "epoch": 0.35, + "learning_rate": 1.651065399470555e-05, + "loss": 2.8476, + "step": 48300 + }, + { + "epoch": 0.35, + "learning_rate": 1.6503421140187188e-05, + "loss": 2.8528, + "step": 48400 + }, + { + "epoch": 0.35, + "learning_rate": 1.6496188285668824e-05, + "loss": 2.8514, + "step": 48500 + }, + { + "epoch": 0.35, + "learning_rate": 1.648895543115046e-05, + "loss": 2.8468, + "step": 48600 + }, + { + "epoch": 0.35, + "learning_rate": 1.6481722576632094e-05, + "loss": 2.8479, + "step": 48700 + }, + { + "epoch": 0.35, + "learning_rate": 1.647448972211373e-05, + "loss": 2.858, + "step": 48800 + }, + { + "epoch": 0.35, + "learning_rate": 1.6467256867595367e-05, + "loss": 2.8567, + "step": 48900 + }, + { + "epoch": 0.35, + "learning_rate": 1.6460024013077003e-05, + "loss": 2.8527, + "step": 49000 + }, + { + "epoch": 0.35, + "eval_accuracy": 0.448782789376741, + "eval_loss": 2.864643096923828, + "eval_runtime": 27.8655, + "eval_samples_per_second": 232.653, + "eval_steps_per_second": 2.44, + "step": 49000 + }, + { + "epoch": 0.36, + "learning_rate": 1.645279115855864e-05, + "loss": 2.8534, + "step": 49100 + }, + { + "epoch": 0.36, + "learning_rate": 1.6445558304040273e-05, + "loss": 2.8563, + "step": 49200 + }, + { + "epoch": 0.36, + "learning_rate": 1.643832544952191e-05, + "loss": 2.8536, + "step": 49300 + }, + { + "epoch": 0.36, + "learning_rate": 1.6431092595003546e-05, + "loss": 2.8378, + "step": 49400 + }, + { + "epoch": 0.36, + "learning_rate": 1.6423932069030364e-05, + "loss": 2.8455, + "step": 49500 + }, + { + "epoch": 0.36, + "learning_rate": 1.6416699214512e-05, + "loss": 2.8519, + "step": 49600 + }, + { + "epoch": 0.36, + "learning_rate": 1.6409466359993634e-05, + "loss": 2.8505, + "step": 49700 + }, + { + "epoch": 0.36, + "learning_rate": 1.640223350547527e-05, + "loss": 2.8437, + "step": 49800 + }, + { + "epoch": 0.36, + "learning_rate": 1.6395000650956907e-05, + "loss": 2.8428, + "step": 49900 + }, + { + "epoch": 0.36, + "learning_rate": 1.6387767796438543e-05, + "loss": 2.8425, + "step": 50000 + }, + { + "epoch": 0.36, + "eval_accuracy": 0.44904108180369173, + "eval_loss": 2.8624136447906494, + "eval_runtime": 32.7107, + "eval_samples_per_second": 198.192, + "eval_steps_per_second": 2.079, + "step": 50000 + }, + { + "epoch": 0.36, + "learning_rate": 1.638053494192018e-05, + "loss": 2.8406, + "step": 50100 + }, + { + "epoch": 0.36, + "learning_rate": 1.6373302087401813e-05, + "loss": 2.8552, + "step": 50200 + }, + { + "epoch": 0.36, + "learning_rate": 1.636606923288345e-05, + "loss": 2.8473, + "step": 50300 + }, + { + "epoch": 0.36, + "learning_rate": 1.6358836378365086e-05, + "loss": 2.8458, + "step": 50400 + }, + { + "epoch": 0.37, + "learning_rate": 1.6351603523846723e-05, + "loss": 2.8471, + "step": 50500 + }, + { + "epoch": 0.37, + "learning_rate": 1.634437066932836e-05, + "loss": 2.8518, + "step": 50600 + }, + { + "epoch": 0.37, + "learning_rate": 1.6337137814809992e-05, + "loss": 2.848, + "step": 50700 + }, + { + "epoch": 0.37, + "learning_rate": 1.632990496029163e-05, + "loss": 2.8491, + "step": 50800 + }, + { + "epoch": 0.37, + "learning_rate": 1.6322672105773265e-05, + "loss": 2.8433, + "step": 50900 + }, + { + "epoch": 0.37, + "learning_rate": 1.6315439251254902e-05, + "loss": 2.8457, + "step": 51000 + }, + { + "epoch": 0.37, + "eval_accuracy": 0.4494481797037803, + "eval_loss": 2.8601181507110596, + "eval_runtime": 31.3807, + "eval_samples_per_second": 206.592, + "eval_steps_per_second": 2.167, + "step": 51000 + }, + { + "epoch": 0.37, + "learning_rate": 1.6308206396736538e-05, + "loss": 2.8436, + "step": 51100 + }, + { + "epoch": 0.37, + "learning_rate": 1.630097354221817e-05, + "loss": 2.8474, + "step": 51200 + }, + { + "epoch": 0.37, + "learning_rate": 1.6293740687699808e-05, + "loss": 2.8361, + "step": 51300 + }, + { + "epoch": 0.37, + "learning_rate": 1.6286507833181444e-05, + "loss": 2.842, + "step": 51400 + }, + { + "epoch": 0.37, + "learning_rate": 1.627927497866308e-05, + "loss": 2.8527, + "step": 51500 + }, + { + "epoch": 0.37, + "learning_rate": 1.62721144526899e-05, + "loss": 2.8376, + "step": 51600 + }, + { + "epoch": 0.37, + "learning_rate": 1.6264881598171535e-05, + "loss": 2.8461, + "step": 51700 + }, + { + "epoch": 0.37, + "learning_rate": 1.6257721072198357e-05, + "loss": 2.836, + "step": 51800 + }, + { + "epoch": 0.38, + "learning_rate": 1.625048821767999e-05, + "loss": 2.8444, + "step": 51900 + }, + { + "epoch": 0.38, + "learning_rate": 1.6243255363161627e-05, + "loss": 2.849, + "step": 52000 + }, + { + "epoch": 0.38, + "eval_accuracy": 0.4496465870012975, + "eval_loss": 2.8579957485198975, + "eval_runtime": 30.949, + "eval_samples_per_second": 209.474, + "eval_steps_per_second": 2.197, + "step": 52000 + }, + { + "epoch": 0.38, + "learning_rate": 1.6236022508643263e-05, + "loss": 2.8402, + "step": 52100 + }, + { + "epoch": 0.38, + "learning_rate": 1.62287896541249e-05, + "loss": 2.8411, + "step": 52200 + }, + { + "epoch": 0.38, + "learning_rate": 1.6221556799606536e-05, + "loss": 2.8423, + "step": 52300 + }, + { + "epoch": 0.38, + "learning_rate": 1.621432394508817e-05, + "loss": 2.838, + "step": 52400 + }, + { + "epoch": 0.38, + "learning_rate": 1.6207091090569806e-05, + "loss": 2.8415, + "step": 52500 + }, + { + "epoch": 0.38, + "learning_rate": 1.6199858236051442e-05, + "loss": 2.8418, + "step": 52600 + }, + { + "epoch": 0.38, + "learning_rate": 1.619262538153308e-05, + "loss": 2.8366, + "step": 52700 + }, + { + "epoch": 0.38, + "learning_rate": 1.6185392527014715e-05, + "loss": 2.8443, + "step": 52800 + }, + { + "epoch": 0.38, + "learning_rate": 1.617815967249635e-05, + "loss": 2.8395, + "step": 52900 + }, + { + "epoch": 0.38, + "learning_rate": 1.6170926817977985e-05, + "loss": 2.8431, + "step": 53000 + }, + { + "epoch": 0.38, + "eval_accuracy": 0.44991455783300516, + "eval_loss": 2.856027841567993, + "eval_runtime": 31.4773, + "eval_samples_per_second": 205.958, + "eval_steps_per_second": 2.16, + "step": 53000 + }, + { + "epoch": 0.38, + "learning_rate": 1.616369396345962e-05, + "loss": 2.8451, + "step": 53100 + }, + { + "epoch": 0.38, + "learning_rate": 1.6156461108941258e-05, + "loss": 2.8367, + "step": 53200 + }, + { + "epoch": 0.39, + "learning_rate": 1.6149300582968076e-05, + "loss": 2.8403, + "step": 53300 + }, + { + "epoch": 0.39, + "learning_rate": 1.614206772844971e-05, + "loss": 2.8385, + "step": 53400 + }, + { + "epoch": 0.39, + "learning_rate": 1.6134834873931346e-05, + "loss": 2.8323, + "step": 53500 + }, + { + "epoch": 0.39, + "learning_rate": 1.6127602019412982e-05, + "loss": 2.8408, + "step": 53600 + }, + { + "epoch": 0.39, + "learning_rate": 1.612036916489462e-05, + "loss": 2.8449, + "step": 53700 + }, + { + "epoch": 0.39, + "learning_rate": 1.6113136310376255e-05, + "loss": 2.8424, + "step": 53800 + }, + { + "epoch": 0.39, + "learning_rate": 1.6105903455857888e-05, + "loss": 2.8384, + "step": 53900 + }, + { + "epoch": 0.39, + "learning_rate": 1.609874292988471e-05, + "loss": 2.8463, + "step": 54000 + }, + { + "epoch": 0.39, + "eval_accuracy": 0.45014865424806355, + "eval_loss": 2.8539552688598633, + "eval_runtime": 31.4595, + "eval_samples_per_second": 206.075, + "eval_steps_per_second": 2.162, + "step": 54000 + }, + { + "epoch": 0.39, + "learning_rate": 1.6091510075366346e-05, + "loss": 2.8336, + "step": 54100 + }, + { + "epoch": 0.39, + "learning_rate": 1.6084277220847983e-05, + "loss": 2.8433, + "step": 54200 + }, + { + "epoch": 0.39, + "learning_rate": 1.6077044366329616e-05, + "loss": 2.8454, + "step": 54300 + }, + { + "epoch": 0.39, + "learning_rate": 1.6069811511811252e-05, + "loss": 2.846, + "step": 54400 + }, + { + "epoch": 0.39, + "learning_rate": 1.606257865729289e-05, + "loss": 2.8422, + "step": 54500 + }, + { + "epoch": 0.39, + "learning_rate": 1.6055345802774525e-05, + "loss": 2.8489, + "step": 54600 + }, + { + "epoch": 0.4, + "learning_rate": 1.6048112948256162e-05, + "loss": 2.8397, + "step": 54700 + }, + { + "epoch": 0.4, + "learning_rate": 1.6040880093737795e-05, + "loss": 2.8381, + "step": 54800 + }, + { + "epoch": 0.4, + "learning_rate": 1.603364723921943e-05, + "loss": 2.8319, + "step": 54900 + }, + { + "epoch": 0.4, + "learning_rate": 1.6026414384701068e-05, + "loss": 2.8437, + "step": 55000 + }, + { + "epoch": 0.4, + "eval_accuracy": 0.45043900639077167, + "eval_loss": 2.8520514965057373, + "eval_runtime": 28.1189, + "eval_samples_per_second": 230.557, + "eval_steps_per_second": 2.418, + "step": 55000 + }, + { + "epoch": 0.4, + "learning_rate": 1.6019181530182705e-05, + "loss": 2.8415, + "step": 55100 + }, + { + "epoch": 0.4, + "learning_rate": 1.601194867566434e-05, + "loss": 2.8417, + "step": 55200 + }, + { + "epoch": 0.4, + "learning_rate": 1.6004715821145974e-05, + "loss": 2.8385, + "step": 55300 + }, + { + "epoch": 0.4, + "learning_rate": 1.599748296662761e-05, + "loss": 2.8361, + "step": 55400 + }, + { + "epoch": 0.4, + "learning_rate": 1.5990250112109247e-05, + "loss": 2.8356, + "step": 55500 + }, + { + "epoch": 0.4, + "learning_rate": 1.5983017257590884e-05, + "loss": 2.8419, + "step": 55600 + }, + { + "epoch": 0.4, + "learning_rate": 1.5975784403072517e-05, + "loss": 2.8403, + "step": 55700 + }, + { + "epoch": 0.4, + "learning_rate": 1.5968551548554153e-05, + "loss": 2.8414, + "step": 55800 + }, + { + "epoch": 0.4, + "learning_rate": 1.596131869403579e-05, + "loss": 2.8342, + "step": 55900 + }, + { + "epoch": 0.41, + "learning_rate": 1.5954085839517426e-05, + "loss": 2.845, + "step": 56000 + }, + { + "epoch": 0.41, + "eval_accuracy": 0.45049465721812404, + "eval_loss": 2.850494146347046, + "eval_runtime": 27.8769, + "eval_samples_per_second": 232.558, + "eval_steps_per_second": 2.439, + "step": 56000 + }, + { + "epoch": 0.41, + "learning_rate": 1.5946852984999063e-05, + "loss": 2.8348, + "step": 56100 + }, + { + "epoch": 0.41, + "learning_rate": 1.5939620130480696e-05, + "loss": 2.8362, + "step": 56200 + }, + { + "epoch": 0.41, + "learning_rate": 1.5932387275962333e-05, + "loss": 2.8346, + "step": 56300 + }, + { + "epoch": 0.41, + "learning_rate": 1.592515442144397e-05, + "loss": 2.8384, + "step": 56400 + }, + { + "epoch": 0.41, + "learning_rate": 1.5917921566925606e-05, + "loss": 2.8455, + "step": 56500 + }, + { + "epoch": 0.41, + "learning_rate": 1.5910761040952424e-05, + "loss": 2.8325, + "step": 56600 + }, + { + "epoch": 0.41, + "learning_rate": 1.590352818643406e-05, + "loss": 2.8399, + "step": 56700 + }, + { + "epoch": 0.41, + "learning_rate": 1.5896295331915693e-05, + "loss": 2.8294, + "step": 56800 + }, + { + "epoch": 0.41, + "learning_rate": 1.588906247739733e-05, + "loss": 2.8307, + "step": 56900 + }, + { + "epoch": 0.41, + "learning_rate": 1.5881829622878966e-05, + "loss": 2.8218, + "step": 57000 + }, + { + "epoch": 0.41, + "eval_accuracy": 0.4507807750587509, + "eval_loss": 2.848620653152466, + "eval_runtime": 29.8126, + "eval_samples_per_second": 217.458, + "eval_steps_per_second": 2.281, + "step": 57000 + }, + { + "epoch": 0.41, + "learning_rate": 1.5874596768360603e-05, + "loss": 2.8282, + "step": 57100 + }, + { + "epoch": 0.41, + "learning_rate": 1.5867363913842236e-05, + "loss": 2.8378, + "step": 57200 + }, + { + "epoch": 0.41, + "learning_rate": 1.5860131059323872e-05, + "loss": 2.8355, + "step": 57300 + }, + { + "epoch": 0.42, + "learning_rate": 1.585289820480551e-05, + "loss": 2.8298, + "step": 57400 + }, + { + "epoch": 0.42, + "learning_rate": 1.5845665350287145e-05, + "loss": 2.8378, + "step": 57500 + }, + { + "epoch": 0.42, + "learning_rate": 1.5838432495768782e-05, + "loss": 2.8199, + "step": 57600 + }, + { + "epoch": 0.42, + "learning_rate": 1.58312719697956e-05, + "loss": 2.8338, + "step": 57700 + }, + { + "epoch": 0.42, + "learning_rate": 1.5824039115277237e-05, + "loss": 2.8349, + "step": 57800 + }, + { + "epoch": 0.42, + "learning_rate": 1.5816806260758873e-05, + "loss": 2.829, + "step": 57900 + }, + { + "epoch": 0.42, + "learning_rate": 1.580957340624051e-05, + "loss": 2.8366, + "step": 58000 + }, + { + "epoch": 0.42, + "eval_accuracy": 0.4509053845199965, + "eval_loss": 2.8470458984375, + "eval_runtime": 29.7851, + "eval_samples_per_second": 217.659, + "eval_steps_per_second": 2.283, + "step": 58000 + }, + { + "epoch": 0.42, + "learning_rate": 1.5802340551722143e-05, + "loss": 2.8331, + "step": 58100 + }, + { + "epoch": 0.42, + "learning_rate": 1.579510769720378e-05, + "loss": 2.8348, + "step": 58200 + }, + { + "epoch": 0.42, + "learning_rate": 1.5787874842685416e-05, + "loss": 2.8353, + "step": 58300 + }, + { + "epoch": 0.42, + "learning_rate": 1.5780641988167052e-05, + "loss": 2.8369, + "step": 58400 + }, + { + "epoch": 0.42, + "learning_rate": 1.577340913364869e-05, + "loss": 2.8315, + "step": 58500 + }, + { + "epoch": 0.42, + "learning_rate": 1.5766176279130322e-05, + "loss": 2.8344, + "step": 58600 + }, + { + "epoch": 0.42, + "learning_rate": 1.5759015753157143e-05, + "loss": 2.8307, + "step": 58700 + }, + { + "epoch": 0.43, + "learning_rate": 1.575178289863878e-05, + "loss": 2.8218, + "step": 58800 + }, + { + "epoch": 0.43, + "learning_rate": 1.5744550044120413e-05, + "loss": 2.8299, + "step": 58900 + }, + { + "epoch": 0.43, + "learning_rate": 1.573731718960205e-05, + "loss": 2.8339, + "step": 59000 + }, + { + "epoch": 0.43, + "eval_accuracy": 0.45116549164783915, + "eval_loss": 2.8453407287597656, + "eval_runtime": 29.6273, + "eval_samples_per_second": 218.818, + "eval_steps_per_second": 2.295, + "step": 59000 + }, + { + "epoch": 0.43, + "learning_rate": 1.5730084335083686e-05, + "loss": 2.8327, + "step": 59100 + }, + { + "epoch": 0.43, + "learning_rate": 1.5722851480565323e-05, + "loss": 2.8305, + "step": 59200 + }, + { + "epoch": 0.43, + "learning_rate": 1.571561862604696e-05, + "loss": 2.8327, + "step": 59300 + }, + { + "epoch": 0.43, + "learning_rate": 1.5708385771528592e-05, + "loss": 2.8264, + "step": 59400 + }, + { + "epoch": 0.43, + "learning_rate": 1.570115291701023e-05, + "loss": 2.8368, + "step": 59500 + }, + { + "epoch": 0.43, + "learning_rate": 1.5693920062491865e-05, + "loss": 2.8295, + "step": 59600 + }, + { + "epoch": 0.43, + "learning_rate": 1.5686687207973502e-05, + "loss": 2.8248, + "step": 59700 + }, + { + "epoch": 0.43, + "learning_rate": 1.5679454353455138e-05, + "loss": 2.826, + "step": 59800 + }, + { + "epoch": 0.43, + "learning_rate": 1.567222149893677e-05, + "loss": 2.8306, + "step": 59900 + }, + { + "epoch": 0.43, + "learning_rate": 1.5664988644418408e-05, + "loss": 2.8338, + "step": 60000 + }, + { + "epoch": 0.43, + "eval_accuracy": 0.4511189143249464, + "eval_loss": 2.843701124191284, + "eval_runtime": 30.181, + "eval_samples_per_second": 214.804, + "eval_steps_per_second": 2.253, + "step": 60000 + }, + { + "epoch": 0.43, + "learning_rate": 1.5657755789900044e-05, + "loss": 2.8309, + "step": 60100 + }, + { + "epoch": 0.44, + "learning_rate": 1.5650595263926862e-05, + "loss": 2.8288, + "step": 60200 + }, + { + "epoch": 0.44, + "learning_rate": 1.56433624094085e-05, + "loss": 2.8238, + "step": 60300 + }, + { + "epoch": 0.44, + "learning_rate": 1.5636129554890132e-05, + "loss": 2.8257, + "step": 60400 + }, + { + "epoch": 0.44, + "learning_rate": 1.562889670037177e-05, + "loss": 2.8334, + "step": 60500 + }, + { + "epoch": 0.44, + "learning_rate": 1.5621663845853405e-05, + "loss": 2.8232, + "step": 60600 + }, + { + "epoch": 0.44, + "learning_rate": 1.561443099133504e-05, + "loss": 2.8356, + "step": 60700 + }, + { + "epoch": 0.44, + "learning_rate": 1.5607198136816678e-05, + "loss": 2.8199, + "step": 60800 + }, + { + "epoch": 0.44, + "learning_rate": 1.559996528229831e-05, + "loss": 2.8215, + "step": 60900 + }, + { + "epoch": 0.44, + "learning_rate": 1.5592732427779948e-05, + "loss": 2.8237, + "step": 61000 + }, + { + "epoch": 0.44, + "eval_accuracy": 0.4512713491998681, + "eval_loss": 2.842045783996582, + "eval_runtime": 33.0993, + "eval_samples_per_second": 195.865, + "eval_steps_per_second": 2.054, + "step": 61000 + }, + { + "epoch": 0.44, + "learning_rate": 1.5585499573261584e-05, + "loss": 2.8305, + "step": 61100 + }, + { + "epoch": 0.44, + "learning_rate": 1.5578339047288406e-05, + "loss": 2.823, + "step": 61200 + }, + { + "epoch": 0.44, + "learning_rate": 1.557110619277004e-05, + "loss": 2.839, + "step": 61300 + }, + { + "epoch": 0.44, + "learning_rate": 1.5563873338251675e-05, + "loss": 2.8265, + "step": 61400 + }, + { + "epoch": 0.44, + "learning_rate": 1.5556640483733312e-05, + "loss": 2.8256, + "step": 61500 + }, + { + "epoch": 0.45, + "learning_rate": 1.554940762921495e-05, + "loss": 2.8295, + "step": 61600 + }, + { + "epoch": 0.45, + "learning_rate": 1.5542174774696585e-05, + "loss": 2.8207, + "step": 61700 + }, + { + "epoch": 0.45, + "learning_rate": 1.5534941920178218e-05, + "loss": 2.8201, + "step": 61800 + }, + { + "epoch": 0.45, + "learning_rate": 1.5527709065659855e-05, + "loss": 2.8283, + "step": 61900 + }, + { + "epoch": 0.45, + "learning_rate": 1.552047621114149e-05, + "loss": 2.8334, + "step": 62000 + }, + { + "epoch": 0.45, + "eval_accuracy": 0.45149939661195343, + "eval_loss": 2.840452194213867, + "eval_runtime": 29.9094, + "eval_samples_per_second": 216.755, + "eval_steps_per_second": 2.274, + "step": 62000 + }, + { + "epoch": 0.45, + "learning_rate": 1.5513243356623128e-05, + "loss": 2.8248, + "step": 62100 + }, + { + "epoch": 0.45, + "learning_rate": 1.5506010502104764e-05, + "loss": 2.8386, + "step": 62200 + }, + { + "epoch": 0.45, + "learning_rate": 1.5498777647586397e-05, + "loss": 2.8242, + "step": 62300 + }, + { + "epoch": 0.45, + "learning_rate": 1.5491544793068034e-05, + "loss": 2.8268, + "step": 62400 + }, + { + "epoch": 0.45, + "learning_rate": 1.548431193854967e-05, + "loss": 2.8286, + "step": 62500 + }, + { + "epoch": 0.45, + "learning_rate": 1.5477079084031307e-05, + "loss": 2.8274, + "step": 62600 + }, + { + "epoch": 0.45, + "learning_rate": 1.546984622951294e-05, + "loss": 2.8292, + "step": 62700 + }, + { + "epoch": 0.45, + "learning_rate": 1.5462613374994576e-05, + "loss": 2.8183, + "step": 62800 + }, + { + "epoch": 0.45, + "learning_rate": 1.5455380520476213e-05, + "loss": 2.8279, + "step": 62900 + }, + { + "epoch": 0.46, + "learning_rate": 1.544814766595785e-05, + "loss": 2.8229, + "step": 63000 + }, + { + "epoch": 0.46, + "eval_accuracy": 0.4518381407784462, + "eval_loss": 2.8387556076049805, + "eval_runtime": 29.7722, + "eval_samples_per_second": 217.753, + "eval_steps_per_second": 2.284, + "step": 63000 + }, + { + "epoch": 0.46, + "learning_rate": 1.5440914811439486e-05, + "loss": 2.8227, + "step": 63100 + }, + { + "epoch": 0.46, + "learning_rate": 1.543368195692112e-05, + "loss": 2.8249, + "step": 63200 + }, + { + "epoch": 0.46, + "learning_rate": 1.5426449102402756e-05, + "loss": 2.8246, + "step": 63300 + }, + { + "epoch": 0.46, + "learning_rate": 1.5419288576429574e-05, + "loss": 2.8265, + "step": 63400 + }, + { + "epoch": 0.46, + "learning_rate": 1.541205572191121e-05, + "loss": 2.824, + "step": 63500 + }, + { + "epoch": 0.46, + "learning_rate": 1.5404822867392847e-05, + "loss": 2.8188, + "step": 63600 + }, + { + "epoch": 0.46, + "learning_rate": 1.5397590012874483e-05, + "loss": 2.8234, + "step": 63700 + }, + { + "epoch": 0.46, + "learning_rate": 1.5390357158356116e-05, + "loss": 2.8216, + "step": 63800 + }, + { + "epoch": 0.46, + "learning_rate": 1.5383124303837753e-05, + "loss": 2.8256, + "step": 63900 + }, + { + "epoch": 0.46, + "learning_rate": 1.537589144931939e-05, + "loss": 2.8214, + "step": 64000 + }, + { + "epoch": 0.46, + "eval_accuracy": 0.45194218362958327, + "eval_loss": 2.837294340133667, + "eval_runtime": 33.0659, + "eval_samples_per_second": 196.063, + "eval_steps_per_second": 2.057, + "step": 64000 + }, + { + "epoch": 0.46, + "learning_rate": 1.5368658594801026e-05, + "loss": 2.8143, + "step": 64100 + }, + { + "epoch": 0.46, + "learning_rate": 1.536142574028266e-05, + "loss": 2.8199, + "step": 64200 + }, + { + "epoch": 0.47, + "learning_rate": 1.5354192885764295e-05, + "loss": 2.8217, + "step": 64300 + }, + { + "epoch": 0.47, + "learning_rate": 1.5346960031245932e-05, + "loss": 2.8202, + "step": 64400 + }, + { + "epoch": 0.47, + "learning_rate": 1.533972717672757e-05, + "loss": 2.8211, + "step": 64500 + }, + { + "epoch": 0.47, + "learning_rate": 1.5332566650754386e-05, + "loss": 2.8251, + "step": 64600 + }, + { + "epoch": 0.47, + "learning_rate": 1.5325333796236023e-05, + "loss": 2.8261, + "step": 64700 + }, + { + "epoch": 0.47, + "learning_rate": 1.531810094171766e-05, + "loss": 2.8217, + "step": 64800 + }, + { + "epoch": 0.47, + "learning_rate": 1.5310868087199296e-05, + "loss": 2.8227, + "step": 64900 + }, + { + "epoch": 0.47, + "learning_rate": 1.5303635232680933e-05, + "loss": 2.8245, + "step": 65000 + }, + { + "epoch": 0.47, + "eval_accuracy": 0.45223193087199404, + "eval_loss": 2.835636854171753, + "eval_runtime": 29.5456, + "eval_samples_per_second": 219.424, + "eval_steps_per_second": 2.302, + "step": 65000 + }, + { + "epoch": 0.47, + "learning_rate": 1.5296402378162566e-05, + "loss": 2.8268, + "step": 65100 + }, + { + "epoch": 0.47, + "learning_rate": 1.5289169523644202e-05, + "loss": 2.8265, + "step": 65200 + }, + { + "epoch": 0.47, + "learning_rate": 1.528193666912584e-05, + "loss": 2.8221, + "step": 65300 + }, + { + "epoch": 0.47, + "learning_rate": 1.5274703814607475e-05, + "loss": 2.8193, + "step": 65400 + }, + { + "epoch": 0.47, + "learning_rate": 1.5267470960089112e-05, + "loss": 2.8281, + "step": 65500 + }, + { + "epoch": 0.47, + "learning_rate": 1.526031043411593e-05, + "loss": 2.8179, + "step": 65600 + }, + { + "epoch": 0.48, + "learning_rate": 1.5253077579597566e-05, + "loss": 2.8206, + "step": 65700 + }, + { + "epoch": 0.48, + "learning_rate": 1.5245844725079201e-05, + "loss": 2.8187, + "step": 65800 + }, + { + "epoch": 0.48, + "learning_rate": 1.5238611870560838e-05, + "loss": 2.8146, + "step": 65900 + }, + { + "epoch": 0.48, + "learning_rate": 1.5231379016042474e-05, + "loss": 2.822, + "step": 66000 + }, + { + "epoch": 0.48, + "eval_accuracy": 0.4523559354329423, + "eval_loss": 2.834322929382324, + "eval_runtime": 27.8576, + "eval_samples_per_second": 232.719, + "eval_steps_per_second": 2.441, + "step": 66000 + }, + { + "epoch": 0.48, + "learning_rate": 1.5224146161524109e-05, + "loss": 2.8221, + "step": 66100 + }, + { + "epoch": 0.48, + "learning_rate": 1.5216913307005745e-05, + "loss": 2.8157, + "step": 66200 + }, + { + "epoch": 0.48, + "learning_rate": 1.520968045248738e-05, + "loss": 2.8233, + "step": 66300 + }, + { + "epoch": 0.48, + "learning_rate": 1.5202447597969017e-05, + "loss": 2.8273, + "step": 66400 + }, + { + "epoch": 0.48, + "learning_rate": 1.5195214743450653e-05, + "loss": 2.8149, + "step": 66500 + }, + { + "epoch": 0.48, + "learning_rate": 1.5187981888932288e-05, + "loss": 2.8215, + "step": 66600 + }, + { + "epoch": 0.48, + "learning_rate": 1.5180821362959106e-05, + "loss": 2.822, + "step": 66700 + }, + { + "epoch": 0.48, + "learning_rate": 1.5173588508440743e-05, + "loss": 2.8222, + "step": 66800 + }, + { + "epoch": 0.48, + "learning_rate": 1.5166355653922378e-05, + "loss": 2.8245, + "step": 66900 + }, + { + "epoch": 0.48, + "learning_rate": 1.5159122799404014e-05, + "loss": 2.8139, + "step": 67000 + }, + { + "epoch": 0.48, + "eval_accuracy": 0.45257007013818945, + "eval_loss": 2.833111524581909, + "eval_runtime": 29.3281, + "eval_samples_per_second": 221.051, + "eval_steps_per_second": 2.319, + "step": 67000 + }, + { + "epoch": 0.49, + "learning_rate": 1.5151889944885649e-05, + "loss": 2.8134, + "step": 67100 + }, + { + "epoch": 0.49, + "learning_rate": 1.5144657090367285e-05, + "loss": 2.812, + "step": 67200 + }, + { + "epoch": 0.49, + "learning_rate": 1.513742423584892e-05, + "loss": 2.8225, + "step": 67300 + }, + { + "epoch": 0.49, + "learning_rate": 1.5130191381330557e-05, + "loss": 2.8177, + "step": 67400 + }, + { + "epoch": 0.49, + "learning_rate": 1.5122958526812193e-05, + "loss": 2.8167, + "step": 67500 + }, + { + "epoch": 0.49, + "learning_rate": 1.5115725672293828e-05, + "loss": 2.8189, + "step": 67600 + }, + { + "epoch": 0.49, + "learning_rate": 1.5108565146320648e-05, + "loss": 2.8239, + "step": 67700 + }, + { + "epoch": 0.49, + "learning_rate": 1.5101332291802284e-05, + "loss": 2.8216, + "step": 67800 + }, + { + "epoch": 0.49, + "learning_rate": 1.509409943728392e-05, + "loss": 2.8193, + "step": 67900 + }, + { + "epoch": 0.49, + "learning_rate": 1.5086866582765556e-05, + "loss": 2.8201, + "step": 68000 + }, + { + "epoch": 0.49, + "eval_accuracy": 0.45263963367237997, + "eval_loss": 2.8317487239837646, + "eval_runtime": 30.249, + "eval_samples_per_second": 214.321, + "eval_steps_per_second": 2.248, + "step": 68000 + }, + { + "epoch": 0.49, + "learning_rate": 1.5079633728247192e-05, + "loss": 2.8189, + "step": 68100 + }, + { + "epoch": 0.49, + "learning_rate": 1.5072400873728827e-05, + "loss": 2.8271, + "step": 68200 + }, + { + "epoch": 0.49, + "learning_rate": 1.5065168019210463e-05, + "loss": 2.8229, + "step": 68300 + }, + { + "epoch": 0.49, + "learning_rate": 1.5057935164692098e-05, + "loss": 2.8144, + "step": 68400 + }, + { + "epoch": 0.5, + "learning_rate": 1.5050702310173735e-05, + "loss": 2.8177, + "step": 68500 + }, + { + "epoch": 0.5, + "learning_rate": 1.5043469455655371e-05, + "loss": 2.8075, + "step": 68600 + }, + { + "epoch": 0.5, + "learning_rate": 1.5036236601137006e-05, + "loss": 2.818, + "step": 68700 + }, + { + "epoch": 0.5, + "learning_rate": 1.5029003746618643e-05, + "loss": 2.8183, + "step": 68800 + }, + { + "epoch": 0.5, + "learning_rate": 1.5021915549190644e-05, + "loss": 2.8202, + "step": 68900 + }, + { + "epoch": 0.5, + "learning_rate": 1.501468269467228e-05, + "loss": 2.8132, + "step": 69000 + }, + { + "epoch": 0.5, + "eval_accuracy": 0.4527346030190574, + "eval_loss": 2.830460548400879, + "eval_runtime": 31.7655, + "eval_samples_per_second": 204.09, + "eval_steps_per_second": 2.141, + "step": 69000 + }, + { + "epoch": 0.5, + "learning_rate": 1.5007449840153915e-05, + "loss": 2.8269, + "step": 69100 + }, + { + "epoch": 0.5, + "learning_rate": 1.5000216985635552e-05, + "loss": 2.8226, + "step": 69200 + }, + { + "epoch": 0.5, + "learning_rate": 1.4992984131117188e-05, + "loss": 2.8165, + "step": 69300 + }, + { + "epoch": 0.5, + "learning_rate": 1.4985751276598823e-05, + "loss": 2.8167, + "step": 69400 + }, + { + "epoch": 0.5, + "learning_rate": 1.497851842208046e-05, + "loss": 2.8201, + "step": 69500 + }, + { + "epoch": 0.5, + "learning_rate": 1.4971285567562094e-05, + "loss": 2.814, + "step": 69600 + }, + { + "epoch": 0.5, + "learning_rate": 1.4964052713043731e-05, + "loss": 2.811, + "step": 69700 + }, + { + "epoch": 0.5, + "learning_rate": 1.4956819858525366e-05, + "loss": 2.8127, + "step": 69800 + }, + { + "epoch": 0.51, + "learning_rate": 1.4949587004007002e-05, + "loss": 2.8146, + "step": 69900 + }, + { + "epoch": 0.51, + "learning_rate": 1.4942354149488639e-05, + "loss": 2.8138, + "step": 70000 + }, + { + "epoch": 0.51, + "eval_accuracy": 0.45299531504719737, + "eval_loss": 2.8289763927459717, + "eval_runtime": 32.3608, + "eval_samples_per_second": 200.335, + "eval_steps_per_second": 2.101, + "step": 70000 + }, + { + "epoch": 0.51, + "learning_rate": 1.4935121294970274e-05, + "loss": 2.8153, + "step": 70100 + }, + { + "epoch": 0.51, + "learning_rate": 1.492788844045191e-05, + "loss": 2.8128, + "step": 70200 + }, + { + "epoch": 0.51, + "learning_rate": 1.4920655585933545e-05, + "loss": 2.8177, + "step": 70300 + }, + { + "epoch": 0.51, + "learning_rate": 1.4913422731415181e-05, + "loss": 2.8185, + "step": 70400 + }, + { + "epoch": 0.51, + "learning_rate": 1.4906189876896818e-05, + "loss": 2.8144, + "step": 70500 + }, + { + "epoch": 0.51, + "learning_rate": 1.4898957022378453e-05, + "loss": 2.8157, + "step": 70600 + }, + { + "epoch": 0.51, + "learning_rate": 1.489172416786009e-05, + "loss": 2.8074, + "step": 70700 + }, + { + "epoch": 0.51, + "learning_rate": 1.4884491313341724e-05, + "loss": 2.8124, + "step": 70800 + }, + { + "epoch": 0.51, + "learning_rate": 1.487725845882336e-05, + "loss": 2.8143, + "step": 70900 + }, + { + "epoch": 0.51, + "learning_rate": 1.4870025604304995e-05, + "loss": 2.8171, + "step": 71000 + }, + { + "epoch": 0.51, + "eval_accuracy": 0.45300983265433276, + "eval_loss": 2.8278744220733643, + "eval_runtime": 29.4032, + "eval_samples_per_second": 220.486, + "eval_steps_per_second": 2.313, + "step": 71000 + }, + { + "epoch": 0.51, + "learning_rate": 1.4862792749786632e-05, + "loss": 2.8229, + "step": 71100 + }, + { + "epoch": 0.51, + "learning_rate": 1.4855559895268268e-05, + "loss": 2.8153, + "step": 71200 + }, + { + "epoch": 0.52, + "learning_rate": 1.4848327040749903e-05, + "loss": 2.8163, + "step": 71300 + }, + { + "epoch": 0.52, + "learning_rate": 1.484109418623154e-05, + "loss": 2.8162, + "step": 71400 + }, + { + "epoch": 0.52, + "learning_rate": 1.4833861331713175e-05, + "loss": 2.8081, + "step": 71500 + }, + { + "epoch": 0.52, + "learning_rate": 1.4826628477194811e-05, + "loss": 2.8158, + "step": 71600 + }, + { + "epoch": 0.52, + "learning_rate": 1.4819467951221631e-05, + "loss": 2.8137, + "step": 71700 + }, + { + "epoch": 0.52, + "learning_rate": 1.4812235096703267e-05, + "loss": 2.8065, + "step": 71800 + }, + { + "epoch": 0.52, + "learning_rate": 1.4805002242184902e-05, + "loss": 2.8151, + "step": 71900 + }, + { + "epoch": 0.52, + "learning_rate": 1.4797769387666539e-05, + "loss": 2.8123, + "step": 72000 + }, + { + "epoch": 0.52, + "eval_accuracy": 0.4531562185262814, + "eval_loss": 2.8267478942871094, + "eval_runtime": 28.1731, + "eval_samples_per_second": 230.113, + "eval_steps_per_second": 2.414, + "step": 72000 + }, + { + "epoch": 0.52, + "learning_rate": 1.4790536533148174e-05, + "loss": 2.8118, + "step": 72100 + }, + { + "epoch": 0.52, + "learning_rate": 1.478330367862981e-05, + "loss": 2.8177, + "step": 72200 + }, + { + "epoch": 0.52, + "learning_rate": 1.4776070824111447e-05, + "loss": 2.8059, + "step": 72300 + }, + { + "epoch": 0.52, + "learning_rate": 1.4768837969593081e-05, + "loss": 2.8214, + "step": 72400 + }, + { + "epoch": 0.52, + "learning_rate": 1.4761605115074718e-05, + "loss": 2.8068, + "step": 72500 + }, + { + "epoch": 0.53, + "learning_rate": 1.4754372260556353e-05, + "loss": 2.8163, + "step": 72600 + }, + { + "epoch": 0.53, + "learning_rate": 1.474713940603799e-05, + "loss": 2.8139, + "step": 72700 + }, + { + "epoch": 0.53, + "learning_rate": 1.4739978880064807e-05, + "loss": 2.8139, + "step": 72800 + }, + { + "epoch": 0.53, + "learning_rate": 1.4732746025546442e-05, + "loss": 2.8151, + "step": 72900 + }, + { + "epoch": 0.53, + "learning_rate": 1.4725513171028079e-05, + "loss": 2.8118, + "step": 73000 + }, + { + "epoch": 0.53, + "eval_accuracy": 0.45336127972706897, + "eval_loss": 2.825540781021118, + "eval_runtime": 30.4285, + "eval_samples_per_second": 213.057, + "eval_steps_per_second": 2.235, + "step": 73000 + }, + { + "epoch": 0.53, + "learning_rate": 1.4718280316509715e-05, + "loss": 2.8174, + "step": 73100 + }, + { + "epoch": 0.53, + "learning_rate": 1.471104746199135e-05, + "loss": 2.8049, + "step": 73200 + }, + { + "epoch": 0.53, + "learning_rate": 1.4703814607472986e-05, + "loss": 2.8129, + "step": 73300 + }, + { + "epoch": 0.53, + "learning_rate": 1.4696581752954621e-05, + "loss": 2.8127, + "step": 73400 + }, + { + "epoch": 0.53, + "learning_rate": 1.4689348898436258e-05, + "loss": 2.8125, + "step": 73500 + }, + { + "epoch": 0.53, + "learning_rate": 1.4682116043917893e-05, + "loss": 2.8173, + "step": 73600 + }, + { + "epoch": 0.53, + "learning_rate": 1.4674883189399529e-05, + "loss": 2.8136, + "step": 73700 + }, + { + "epoch": 0.53, + "learning_rate": 1.4667722663426349e-05, + "loss": 2.8116, + "step": 73800 + }, + { + "epoch": 0.53, + "learning_rate": 1.4660489808907985e-05, + "loss": 2.804, + "step": 73900 + }, + { + "epoch": 0.54, + "learning_rate": 1.465325695438962e-05, + "loss": 2.8183, + "step": 74000 + }, + { + "epoch": 0.54, + "eval_accuracy": 0.45355484782220773, + "eval_loss": 2.8243465423583984, + "eval_runtime": 30.6984, + "eval_samples_per_second": 211.184, + "eval_steps_per_second": 2.215, + "step": 74000 + }, + { + "epoch": 0.54, + "learning_rate": 1.4646024099871257e-05, + "loss": 2.8043, + "step": 74100 + }, + { + "epoch": 0.54, + "learning_rate": 1.4638791245352893e-05, + "loss": 2.8099, + "step": 74200 + }, + { + "epoch": 0.54, + "learning_rate": 1.4631558390834528e-05, + "loss": 2.8101, + "step": 74300 + }, + { + "epoch": 0.54, + "learning_rate": 1.4624325536316165e-05, + "loss": 2.8051, + "step": 74400 + }, + { + "epoch": 0.54, + "learning_rate": 1.46170926817978e-05, + "loss": 2.8176, + "step": 74500 + }, + { + "epoch": 0.54, + "learning_rate": 1.4609859827279436e-05, + "loss": 2.826, + "step": 74600 + }, + { + "epoch": 0.54, + "learning_rate": 1.460262697276107e-05, + "loss": 2.7999, + "step": 74700 + }, + { + "epoch": 0.54, + "learning_rate": 1.4595394118242707e-05, + "loss": 2.8013, + "step": 74800 + }, + { + "epoch": 0.54, + "learning_rate": 1.4588161263724344e-05, + "loss": 2.8069, + "step": 74900 + }, + { + "epoch": 0.54, + "learning_rate": 1.4580928409205979e-05, + "loss": 2.8052, + "step": 75000 + }, + { + "epoch": 0.54, + "eval_accuracy": 0.4536056594471816, + "eval_loss": 2.8232624530792236, + "eval_runtime": 33.2026, + "eval_samples_per_second": 195.256, + "eval_steps_per_second": 2.048, + "step": 75000 + }, + { + "epoch": 0.54, + "learning_rate": 1.4573695554687615e-05, + "loss": 2.8113, + "step": 75100 + }, + { + "epoch": 0.54, + "learning_rate": 1.456646270016925e-05, + "loss": 2.809, + "step": 75200 + }, + { + "epoch": 0.54, + "learning_rate": 1.4559229845650886e-05, + "loss": 2.8048, + "step": 75300 + }, + { + "epoch": 0.55, + "learning_rate": 1.4551996991132521e-05, + "loss": 2.8071, + "step": 75400 + }, + { + "epoch": 0.55, + "learning_rate": 1.454483646515934e-05, + "loss": 2.8094, + "step": 75500 + }, + { + "epoch": 0.55, + "learning_rate": 1.4537603610640976e-05, + "loss": 2.8038, + "step": 75600 + }, + { + "epoch": 0.55, + "learning_rate": 1.4530370756122612e-05, + "loss": 2.815, + "step": 75700 + }, + { + "epoch": 0.55, + "learning_rate": 1.4523137901604247e-05, + "loss": 2.8076, + "step": 75800 + }, + { + "epoch": 0.55, + "learning_rate": 1.4515905047085884e-05, + "loss": 2.8066, + "step": 75900 + }, + { + "epoch": 0.55, + "learning_rate": 1.4508672192567518e-05, + "loss": 2.8101, + "step": 76000 + }, + { + "epoch": 0.55, + "eval_accuracy": 0.4537810805334011, + "eval_loss": 2.8219878673553467, + "eval_runtime": 29.3462, + "eval_samples_per_second": 220.914, + "eval_steps_per_second": 2.317, + "step": 76000 + }, + { + "epoch": 0.55, + "learning_rate": 1.4501439338049155e-05, + "loss": 2.8143, + "step": 76100 + }, + { + "epoch": 0.55, + "learning_rate": 1.449420648353079e-05, + "loss": 2.8062, + "step": 76200 + }, + { + "epoch": 0.55, + "learning_rate": 1.4486973629012426e-05, + "loss": 2.8041, + "step": 76300 + }, + { + "epoch": 0.55, + "learning_rate": 1.4479740774494063e-05, + "loss": 2.8018, + "step": 76400 + }, + { + "epoch": 0.55, + "learning_rate": 1.4472507919975698e-05, + "loss": 2.8006, + "step": 76500 + }, + { + "epoch": 0.55, + "learning_rate": 1.4465275065457334e-05, + "loss": 2.7967, + "step": 76600 + }, + { + "epoch": 0.55, + "learning_rate": 1.4458042210938969e-05, + "loss": 2.8028, + "step": 76700 + }, + { + "epoch": 0.56, + "learning_rate": 1.4450809356420605e-05, + "loss": 2.8051, + "step": 76800 + }, + { + "epoch": 0.56, + "learning_rate": 1.444357650190224e-05, + "loss": 2.8104, + "step": 76900 + }, + { + "epoch": 0.56, + "learning_rate": 1.4436343647383877e-05, + "loss": 2.8021, + "step": 77000 + }, + { + "epoch": 0.56, + "eval_accuracy": 0.45396497022378285, + "eval_loss": 2.8208632469177246, + "eval_runtime": 29.8785, + "eval_samples_per_second": 216.979, + "eval_steps_per_second": 2.276, + "step": 77000 + }, + { + "epoch": 0.56, + "learning_rate": 1.4429110792865513e-05, + "loss": 2.7955, + "step": 77100 + }, + { + "epoch": 0.56, + "learning_rate": 1.4421877938347148e-05, + "loss": 2.7983, + "step": 77200 + }, + { + "epoch": 0.56, + "learning_rate": 1.4414645083828785e-05, + "loss": 2.8088, + "step": 77300 + }, + { + "epoch": 0.56, + "learning_rate": 1.440741222931042e-05, + "loss": 2.8086, + "step": 77400 + }, + { + "epoch": 0.56, + "learning_rate": 1.4400179374792056e-05, + "loss": 2.8189, + "step": 77500 + }, + { + "epoch": 0.56, + "learning_rate": 1.4393018848818876e-05, + "loss": 2.8047, + "step": 77600 + }, + { + "epoch": 0.56, + "learning_rate": 1.4385785994300512e-05, + "loss": 2.8058, + "step": 77700 + }, + { + "epoch": 0.56, + "learning_rate": 1.4378553139782147e-05, + "loss": 2.8104, + "step": 77800 + }, + { + "epoch": 0.56, + "learning_rate": 1.4371320285263784e-05, + "loss": 2.8071, + "step": 77900 + }, + { + "epoch": 0.56, + "learning_rate": 1.4364159759290603e-05, + "loss": 2.8076, + "step": 78000 + }, + { + "epoch": 0.56, + "eval_accuracy": 0.4540236455526218, + "eval_loss": 2.819603204727173, + "eval_runtime": 28.0095, + "eval_samples_per_second": 231.457, + "eval_steps_per_second": 2.428, + "step": 78000 + }, + { + "epoch": 0.56, + "learning_rate": 1.435692690477224e-05, + "loss": 2.7967, + "step": 78100 + }, + { + "epoch": 0.57, + "learning_rate": 1.4349694050253875e-05, + "loss": 2.8074, + "step": 78200 + }, + { + "epoch": 0.57, + "learning_rate": 1.4342461195735511e-05, + "loss": 2.8124, + "step": 78300 + }, + { + "epoch": 0.57, + "learning_rate": 1.4335228341217146e-05, + "loss": 2.8052, + "step": 78400 + }, + { + "epoch": 0.57, + "learning_rate": 1.4327995486698782e-05, + "loss": 2.8116, + "step": 78500 + }, + { + "epoch": 0.57, + "learning_rate": 1.4320762632180419e-05, + "loss": 2.8076, + "step": 78600 + }, + { + "epoch": 0.57, + "learning_rate": 1.4313529777662054e-05, + "loss": 2.8153, + "step": 78700 + }, + { + "epoch": 0.57, + "learning_rate": 1.430629692314369e-05, + "loss": 2.7979, + "step": 78800 + }, + { + "epoch": 0.57, + "learning_rate": 1.4299064068625325e-05, + "loss": 2.8034, + "step": 78900 + }, + { + "epoch": 0.57, + "learning_rate": 1.4291903542652143e-05, + "loss": 2.7937, + "step": 79000 + }, + { + "epoch": 0.57, + "eval_accuracy": 0.45417608042754354, + "eval_loss": 2.8189663887023926, + "eval_runtime": 29.6646, + "eval_samples_per_second": 218.543, + "eval_steps_per_second": 2.292, + "step": 79000 + }, + { + "epoch": 0.57, + "learning_rate": 1.428467068813378e-05, + "loss": 2.7999, + "step": 79100 + }, + { + "epoch": 0.57, + "learning_rate": 1.4277437833615415e-05, + "loss": 2.7967, + "step": 79200 + }, + { + "epoch": 0.57, + "learning_rate": 1.4270204979097051e-05, + "loss": 2.8032, + "step": 79300 + }, + { + "epoch": 0.57, + "learning_rate": 1.4262972124578688e-05, + "loss": 2.8101, + "step": 79400 + }, + { + "epoch": 0.58, + "learning_rate": 1.4255739270060322e-05, + "loss": 2.8052, + "step": 79500 + }, + { + "epoch": 0.58, + "learning_rate": 1.4248506415541959e-05, + "loss": 2.7995, + "step": 79600 + }, + { + "epoch": 0.58, + "learning_rate": 1.4241273561023594e-05, + "loss": 2.8048, + "step": 79700 + }, + { + "epoch": 0.58, + "learning_rate": 1.423404070650523e-05, + "loss": 2.8033, + "step": 79800 + }, + { + "epoch": 0.58, + "learning_rate": 1.4226807851986865e-05, + "loss": 2.8033, + "step": 79900 + }, + { + "epoch": 0.58, + "learning_rate": 1.4219574997468502e-05, + "loss": 2.8057, + "step": 80000 + }, + { + "epoch": 0.58, + "eval_accuracy": 0.4541452305123808, + "eval_loss": 2.8179192543029785, + "eval_runtime": 27.9926, + "eval_samples_per_second": 231.597, + "eval_steps_per_second": 2.429, + "step": 80000 + }, + { + "epoch": 0.58, + "learning_rate": 1.4212342142950138e-05, + "loss": 2.8101, + "step": 80100 + }, + { + "epoch": 0.58, + "learning_rate": 1.4205109288431773e-05, + "loss": 2.8088, + "step": 80200 + }, + { + "epoch": 0.58, + "learning_rate": 1.419787643391341e-05, + "loss": 2.8033, + "step": 80300 + }, + { + "epoch": 0.58, + "learning_rate": 1.4190643579395044e-05, + "loss": 2.8072, + "step": 80400 + }, + { + "epoch": 0.58, + "learning_rate": 1.418341072487668e-05, + "loss": 2.8053, + "step": 80500 + }, + { + "epoch": 0.58, + "learning_rate": 1.4176177870358316e-05, + "loss": 2.8044, + "step": 80600 + }, + { + "epoch": 0.58, + "learning_rate": 1.4168945015839952e-05, + "loss": 2.8059, + "step": 80700 + }, + { + "epoch": 0.58, + "learning_rate": 1.4161712161321589e-05, + "loss": 2.7949, + "step": 80800 + }, + { + "epoch": 0.59, + "learning_rate": 1.4154479306803223e-05, + "loss": 2.8008, + "step": 80900 + }, + { + "epoch": 0.59, + "learning_rate": 1.414724645228486e-05, + "loss": 2.8082, + "step": 81000 + }, + { + "epoch": 0.59, + "eval_accuracy": 0.45446219826817047, + "eval_loss": 2.8168437480926514, + "eval_runtime": 30.4701, + "eval_samples_per_second": 212.766, + "eval_steps_per_second": 2.232, + "step": 81000 + }, + { + "epoch": 0.59, + "learning_rate": 1.4140013597766495e-05, + "loss": 2.803, + "step": 81100 + }, + { + "epoch": 0.59, + "learning_rate": 1.4132780743248131e-05, + "loss": 2.806, + "step": 81200 + }, + { + "epoch": 0.59, + "learning_rate": 1.4125620217274951e-05, + "loss": 2.8034, + "step": 81300 + }, + { + "epoch": 0.59, + "learning_rate": 1.4118387362756588e-05, + "loss": 2.7982, + "step": 81400 + }, + { + "epoch": 0.59, + "learning_rate": 1.4111154508238222e-05, + "loss": 2.7955, + "step": 81500 + }, + { + "epoch": 0.59, + "learning_rate": 1.4103921653719859e-05, + "loss": 2.8114, + "step": 81600 + }, + { + "epoch": 0.59, + "learning_rate": 1.4096688799201494e-05, + "loss": 2.8103, + "step": 81700 + }, + { + "epoch": 0.59, + "learning_rate": 1.408945594468313e-05, + "loss": 2.7982, + "step": 81800 + }, + { + "epoch": 0.59, + "learning_rate": 1.4082223090164767e-05, + "loss": 2.8026, + "step": 81900 + }, + { + "epoch": 0.59, + "learning_rate": 1.4074990235646402e-05, + "loss": 2.7986, + "step": 82000 + }, + { + "epoch": 0.59, + "eval_accuracy": 0.45461100374130836, + "eval_loss": 2.815699577331543, + "eval_runtime": 31.1592, + "eval_samples_per_second": 208.061, + "eval_steps_per_second": 2.182, + "step": 82000 + }, + { + "epoch": 0.59, + "learning_rate": 1.4067757381128038e-05, + "loss": 2.8006, + "step": 82100 + }, + { + "epoch": 0.59, + "learning_rate": 1.4060524526609673e-05, + "loss": 2.8056, + "step": 82200 + }, + { + "epoch": 0.6, + "learning_rate": 1.405329167209131e-05, + "loss": 2.7992, + "step": 82300 + }, + { + "epoch": 0.6, + "learning_rate": 1.4046058817572946e-05, + "loss": 2.8123, + "step": 82400 + }, + { + "epoch": 0.6, + "learning_rate": 1.403882596305458e-05, + "loss": 2.7977, + "step": 82500 + }, + { + "epoch": 0.6, + "learning_rate": 1.4031593108536217e-05, + "loss": 2.808, + "step": 82600 + }, + { + "epoch": 0.6, + "learning_rate": 1.4024360254017852e-05, + "loss": 2.7964, + "step": 82700 + }, + { + "epoch": 0.6, + "learning_rate": 1.4017127399499489e-05, + "loss": 2.7978, + "step": 82800 + }, + { + "epoch": 0.6, + "learning_rate": 1.4009894544981123e-05, + "loss": 2.7935, + "step": 82900 + }, + { + "epoch": 0.6, + "learning_rate": 1.400266169046276e-05, + "loss": 2.8062, + "step": 83000 + }, + { + "epoch": 0.6, + "eval_accuracy": 0.45453962550622595, + "eval_loss": 2.814990520477295, + "eval_runtime": 29.5244, + "eval_samples_per_second": 219.581, + "eval_steps_per_second": 2.303, + "step": 83000 + }, + { + "epoch": 0.6, + "learning_rate": 1.3995428835944396e-05, + "loss": 2.7966, + "step": 83100 + }, + { + "epoch": 0.6, + "learning_rate": 1.3988195981426031e-05, + "loss": 2.803, + "step": 83200 + }, + { + "epoch": 0.6, + "learning_rate": 1.3981107783998034e-05, + "loss": 2.799, + "step": 83300 + }, + { + "epoch": 0.6, + "learning_rate": 1.3973874929479669e-05, + "loss": 2.801, + "step": 83400 + }, + { + "epoch": 0.6, + "learning_rate": 1.3966642074961306e-05, + "loss": 2.7902, + "step": 83500 + }, + { + "epoch": 0.6, + "learning_rate": 1.395940922044294e-05, + "loss": 2.7999, + "step": 83600 + }, + { + "epoch": 0.61, + "learning_rate": 1.3952176365924577e-05, + "loss": 2.8011, + "step": 83700 + }, + { + "epoch": 0.61, + "learning_rate": 1.3944943511406213e-05, + "loss": 2.8004, + "step": 83800 + }, + { + "epoch": 0.61, + "learning_rate": 1.3937710656887848e-05, + "loss": 2.7973, + "step": 83900 + }, + { + "epoch": 0.61, + "learning_rate": 1.3930477802369485e-05, + "loss": 2.7981, + "step": 84000 + }, + { + "epoch": 0.61, + "eval_accuracy": 0.4545880175300106, + "eval_loss": 2.813809871673584, + "eval_runtime": 28.1508, + "eval_samples_per_second": 230.295, + "eval_steps_per_second": 2.416, + "step": 84000 + }, + { + "epoch": 0.61, + "learning_rate": 1.392324494785112e-05, + "loss": 2.7926, + "step": 84100 + }, + { + "epoch": 0.61, + "learning_rate": 1.3916012093332756e-05, + "loss": 2.7982, + "step": 84200 + }, + { + "epoch": 0.61, + "learning_rate": 1.390877923881439e-05, + "loss": 2.8091, + "step": 84300 + }, + { + "epoch": 0.61, + "learning_rate": 1.3901618712841212e-05, + "loss": 2.8055, + "step": 84400 + }, + { + "epoch": 0.61, + "learning_rate": 1.389445818686803e-05, + "loss": 2.7951, + "step": 84500 + }, + { + "epoch": 0.61, + "learning_rate": 1.3887225332349665e-05, + "loss": 2.8024, + "step": 84600 + }, + { + "epoch": 0.61, + "learning_rate": 1.3879992477831302e-05, + "loss": 2.7985, + "step": 84700 + }, + { + "epoch": 0.61, + "learning_rate": 1.3872759623312936e-05, + "loss": 2.7949, + "step": 84800 + }, + { + "epoch": 0.61, + "learning_rate": 1.3865526768794573e-05, + "loss": 2.7992, + "step": 84900 + }, + { + "epoch": 0.61, + "learning_rate": 1.3858293914276208e-05, + "loss": 2.8041, + "step": 85000 + }, + { + "epoch": 0.61, + "eval_accuracy": 0.4546454830582549, + "eval_loss": 2.8130455017089844, + "eval_runtime": 29.7879, + "eval_samples_per_second": 217.639, + "eval_steps_per_second": 2.283, + "step": 85000 + }, + { + "epoch": 0.62, + "learning_rate": 1.3851061059757844e-05, + "loss": 2.8013, + "step": 85100 + }, + { + "epoch": 0.62, + "learning_rate": 1.384382820523948e-05, + "loss": 2.8052, + "step": 85200 + }, + { + "epoch": 0.62, + "learning_rate": 1.3836595350721116e-05, + "loss": 2.801, + "step": 85300 + }, + { + "epoch": 0.62, + "learning_rate": 1.3829362496202752e-05, + "loss": 2.8045, + "step": 85400 + }, + { + "epoch": 0.62, + "learning_rate": 1.3822129641684387e-05, + "loss": 2.8031, + "step": 85500 + }, + { + "epoch": 0.62, + "learning_rate": 1.3814896787166023e-05, + "loss": 2.7966, + "step": 85600 + }, + { + "epoch": 0.62, + "learning_rate": 1.3807663932647658e-05, + "loss": 2.7934, + "step": 85700 + }, + { + "epoch": 0.62, + "learning_rate": 1.3800431078129295e-05, + "loss": 2.7978, + "step": 85800 + }, + { + "epoch": 0.62, + "learning_rate": 1.3793198223610931e-05, + "loss": 2.7973, + "step": 85900 + }, + { + "epoch": 0.62, + "learning_rate": 1.3785965369092566e-05, + "loss": 2.7978, + "step": 86000 + }, + { + "epoch": 0.62, + "eval_accuracy": 0.4548626422649887, + "eval_loss": 2.8118443489074707, + "eval_runtime": 29.2336, + "eval_samples_per_second": 221.765, + "eval_steps_per_second": 2.326, + "step": 86000 + }, + { + "epoch": 0.62, + "learning_rate": 1.3778732514574203e-05, + "loss": 2.798, + "step": 86100 + }, + { + "epoch": 0.62, + "learning_rate": 1.3771499660055837e-05, + "loss": 2.7967, + "step": 86200 + }, + { + "epoch": 0.62, + "learning_rate": 1.3764266805537474e-05, + "loss": 2.8004, + "step": 86300 + }, + { + "epoch": 0.62, + "learning_rate": 1.375703395101911e-05, + "loss": 2.7957, + "step": 86400 + }, + { + "epoch": 0.63, + "learning_rate": 1.3749801096500745e-05, + "loss": 2.7932, + "step": 86500 + }, + { + "epoch": 0.63, + "learning_rate": 1.3742568241982382e-05, + "loss": 2.8002, + "step": 86600 + }, + { + "epoch": 0.63, + "learning_rate": 1.3735335387464017e-05, + "loss": 2.8027, + "step": 86700 + }, + { + "epoch": 0.63, + "learning_rate": 1.3728102532945653e-05, + "loss": 2.8001, + "step": 86800 + }, + { + "epoch": 0.63, + "learning_rate": 1.3720869678427288e-05, + "loss": 2.7988, + "step": 86900 + }, + { + "epoch": 0.63, + "learning_rate": 1.3713636823908925e-05, + "loss": 2.8016, + "step": 87000 + }, + { + "epoch": 0.63, + "eval_accuracy": 0.45493644010126033, + "eval_loss": 2.8109002113342285, + "eval_runtime": 29.8642, + "eval_samples_per_second": 217.082, + "eval_steps_per_second": 2.277, + "step": 87000 + }, + { + "epoch": 0.63, + "learning_rate": 1.3706476297935744e-05, + "loss": 2.7946, + "step": 87100 + }, + { + "epoch": 0.63, + "learning_rate": 1.369924344341738e-05, + "loss": 2.794, + "step": 87200 + }, + { + "epoch": 0.63, + "learning_rate": 1.3692010588899016e-05, + "loss": 2.8011, + "step": 87300 + }, + { + "epoch": 0.63, + "learning_rate": 1.3684777734380652e-05, + "loss": 2.7973, + "step": 87400 + }, + { + "epoch": 0.63, + "learning_rate": 1.3677544879862289e-05, + "loss": 2.8076, + "step": 87500 + }, + { + "epoch": 0.63, + "learning_rate": 1.3670312025343923e-05, + "loss": 2.8012, + "step": 87600 + }, + { + "epoch": 0.63, + "learning_rate": 1.366307917082556e-05, + "loss": 2.7916, + "step": 87700 + }, + { + "epoch": 0.64, + "learning_rate": 1.3655846316307195e-05, + "loss": 2.798, + "step": 87800 + }, + { + "epoch": 0.64, + "learning_rate": 1.3648613461788831e-05, + "loss": 2.7971, + "step": 87900 + }, + { + "epoch": 0.64, + "learning_rate": 1.3641380607270466e-05, + "loss": 2.7901, + "step": 88000 + }, + { + "epoch": 0.64, + "eval_accuracy": 0.4551348473987775, + "eval_loss": 2.809919834136963, + "eval_runtime": 30.2675, + "eval_samples_per_second": 214.19, + "eval_steps_per_second": 2.247, + "step": 88000 + }, + { + "epoch": 0.64, + "learning_rate": 1.3634220081297288e-05, + "loss": 2.8022, + "step": 88100 + }, + { + "epoch": 0.64, + "learning_rate": 1.3626987226778922e-05, + "loss": 2.7927, + "step": 88200 + }, + { + "epoch": 0.64, + "learning_rate": 1.3619754372260559e-05, + "loss": 2.7951, + "step": 88300 + }, + { + "epoch": 0.64, + "learning_rate": 1.3612521517742194e-05, + "loss": 2.7995, + "step": 88400 + }, + { + "epoch": 0.64, + "learning_rate": 1.360528866322383e-05, + "loss": 2.7966, + "step": 88500 + }, + { + "epoch": 0.64, + "learning_rate": 1.3598055808705463e-05, + "loss": 2.7882, + "step": 88600 + }, + { + "epoch": 0.64, + "learning_rate": 1.35908229541871e-05, + "loss": 2.7933, + "step": 88700 + }, + { + "epoch": 0.64, + "learning_rate": 1.3583590099668735e-05, + "loss": 2.7965, + "step": 88800 + }, + { + "epoch": 0.64, + "learning_rate": 1.3576357245150371e-05, + "loss": 2.7948, + "step": 88900 + }, + { + "epoch": 0.64, + "learning_rate": 1.3569124390632008e-05, + "loss": 2.8075, + "step": 89000 + }, + { + "epoch": 0.64, + "eval_accuracy": 0.4553132929864835, + "eval_loss": 2.809250593185425, + "eval_runtime": 29.5022, + "eval_samples_per_second": 219.746, + "eval_steps_per_second": 2.305, + "step": 89000 + }, + { + "epoch": 0.64, + "learning_rate": 1.3561891536113643e-05, + "loss": 2.7911, + "step": 89100 + }, + { + "epoch": 0.65, + "learning_rate": 1.3554658681595279e-05, + "loss": 2.7966, + "step": 89200 + }, + { + "epoch": 0.65, + "learning_rate": 1.3547425827076914e-05, + "loss": 2.7951, + "step": 89300 + }, + { + "epoch": 0.65, + "learning_rate": 1.354019297255855e-05, + "loss": 2.7992, + "step": 89400 + }, + { + "epoch": 0.65, + "learning_rate": 1.3532960118040185e-05, + "loss": 2.7913, + "step": 89500 + }, + { + "epoch": 0.65, + "learning_rate": 1.3525727263521822e-05, + "loss": 2.7985, + "step": 89600 + }, + { + "epoch": 0.65, + "learning_rate": 1.3518494409003458e-05, + "loss": 2.7968, + "step": 89700 + }, + { + "epoch": 0.65, + "learning_rate": 1.3511261554485093e-05, + "loss": 2.7985, + "step": 89800 + }, + { + "epoch": 0.65, + "learning_rate": 1.350402869996673e-05, + "loss": 2.7937, + "step": 89900 + }, + { + "epoch": 0.65, + "learning_rate": 1.3496795845448364e-05, + "loss": 2.7915, + "step": 90000 + }, + { + "epoch": 0.65, + "eval_accuracy": 0.4552001766308868, + "eval_loss": 2.808422327041626, + "eval_runtime": 29.3832, + "eval_samples_per_second": 220.636, + "eval_steps_per_second": 2.314, + "step": 90000 + }, + { + "epoch": 0.65, + "learning_rate": 1.3489562990930001e-05, + "loss": 2.7934, + "step": 90100 + }, + { + "epoch": 0.65, + "learning_rate": 1.348240246495682e-05, + "loss": 2.7959, + "step": 90200 + }, + { + "epoch": 0.65, + "learning_rate": 1.3475169610438457e-05, + "loss": 2.8034, + "step": 90300 + }, + { + "epoch": 0.65, + "learning_rate": 1.3467936755920092e-05, + "loss": 2.7908, + "step": 90400 + }, + { + "epoch": 0.65, + "learning_rate": 1.3460703901401728e-05, + "loss": 2.7904, + "step": 90500 + }, + { + "epoch": 0.66, + "learning_rate": 1.3453543375428548e-05, + "loss": 2.789, + "step": 90600 + }, + { + "epoch": 0.66, + "learning_rate": 1.3446310520910185e-05, + "loss": 2.7961, + "step": 90700 + }, + { + "epoch": 0.66, + "learning_rate": 1.343907766639182e-05, + "loss": 2.7939, + "step": 90800 + }, + { + "epoch": 0.66, + "learning_rate": 1.3431844811873456e-05, + "loss": 2.7922, + "step": 90900 + }, + { + "epoch": 0.66, + "learning_rate": 1.3424611957355091e-05, + "loss": 2.7916, + "step": 91000 + }, + { + "epoch": 0.66, + "eval_accuracy": 0.4554560494566483, + "eval_loss": 2.807447910308838, + "eval_runtime": 30.8057, + "eval_samples_per_second": 210.448, + "eval_steps_per_second": 2.207, + "step": 91000 + }, + { + "epoch": 0.66, + "learning_rate": 1.3417379102836727e-05, + "loss": 2.7855, + "step": 91100 + }, + { + "epoch": 0.66, + "learning_rate": 1.3410146248318364e-05, + "loss": 2.8014, + "step": 91200 + }, + { + "epoch": 0.66, + "learning_rate": 1.3402913393799999e-05, + "loss": 2.7801, + "step": 91300 + }, + { + "epoch": 0.66, + "learning_rate": 1.3395680539281635e-05, + "loss": 2.7898, + "step": 91400 + }, + { + "epoch": 0.66, + "learning_rate": 1.338844768476327e-05, + "loss": 2.7983, + "step": 91500 + }, + { + "epoch": 0.66, + "learning_rate": 1.3381214830244907e-05, + "loss": 2.7945, + "step": 91600 + }, + { + "epoch": 0.66, + "learning_rate": 1.3373981975726541e-05, + "loss": 2.7854, + "step": 91700 + }, + { + "epoch": 0.66, + "learning_rate": 1.3366749121208178e-05, + "loss": 2.7905, + "step": 91800 + }, + { + "epoch": 0.66, + "learning_rate": 1.3359516266689814e-05, + "loss": 2.7875, + "step": 91900 + }, + { + "epoch": 0.67, + "learning_rate": 1.335228341217145e-05, + "loss": 2.7751, + "step": 92000 + }, + { + "epoch": 0.67, + "eval_accuracy": 0.4554330632453506, + "eval_loss": 2.806763172149658, + "eval_runtime": 32.0762, + "eval_samples_per_second": 202.113, + "eval_steps_per_second": 2.12, + "step": 92000 + }, + { + "epoch": 0.67, + "learning_rate": 1.3345050557653086e-05, + "loss": 2.7943, + "step": 92100 + }, + { + "epoch": 0.67, + "learning_rate": 1.333781770313472e-05, + "loss": 2.7919, + "step": 92200 + }, + { + "epoch": 0.67, + "learning_rate": 1.3330584848616357e-05, + "loss": 2.7911, + "step": 92300 + }, + { + "epoch": 0.67, + "learning_rate": 1.3323351994097994e-05, + "loss": 2.789, + "step": 92400 + }, + { + "epoch": 0.67, + "learning_rate": 1.3316119139579628e-05, + "loss": 2.7876, + "step": 92500 + }, + { + "epoch": 0.67, + "learning_rate": 1.3308886285061265e-05, + "loss": 2.7861, + "step": 92600 + }, + { + "epoch": 0.67, + "learning_rate": 1.33016534305429e-05, + "loss": 2.7807, + "step": 92700 + }, + { + "epoch": 0.67, + "learning_rate": 1.3294420576024536e-05, + "loss": 2.8013, + "step": 92800 + }, + { + "epoch": 0.67, + "learning_rate": 1.3287260050051354e-05, + "loss": 2.7933, + "step": 92900 + }, + { + "epoch": 0.67, + "learning_rate": 1.3280027195532989e-05, + "loss": 2.7896, + "step": 93000 + }, + { + "epoch": 0.67, + "eval_accuracy": 0.45561574313513775, + "eval_loss": 2.8058676719665527, + "eval_runtime": 29.3079, + "eval_samples_per_second": 221.203, + "eval_steps_per_second": 2.32, + "step": 93000 + }, + { + "epoch": 0.67, + "learning_rate": 1.3272794341014626e-05, + "loss": 2.7916, + "step": 93100 + }, + { + "epoch": 0.67, + "learning_rate": 1.326556148649626e-05, + "loss": 2.7959, + "step": 93200 + }, + { + "epoch": 0.67, + "learning_rate": 1.3258328631977897e-05, + "loss": 2.7946, + "step": 93300 + }, + { + "epoch": 0.68, + "learning_rate": 1.3251095777459533e-05, + "loss": 2.789, + "step": 93400 + }, + { + "epoch": 0.68, + "learning_rate": 1.3243862922941168e-05, + "loss": 2.7914, + "step": 93500 + }, + { + "epoch": 0.68, + "learning_rate": 1.3236630068422805e-05, + "loss": 2.7956, + "step": 93600 + }, + { + "epoch": 0.68, + "learning_rate": 1.322939721390444e-05, + "loss": 2.7945, + "step": 93700 + }, + { + "epoch": 0.68, + "learning_rate": 1.3222164359386076e-05, + "loss": 2.7877, + "step": 93800 + }, + { + "epoch": 0.68, + "learning_rate": 1.3214931504867713e-05, + "loss": 2.7865, + "step": 93900 + }, + { + "epoch": 0.68, + "learning_rate": 1.3207698650349347e-05, + "loss": 2.7886, + "step": 94000 + }, + { + "epoch": 0.68, + "eval_accuracy": 0.45565748125565203, + "eval_loss": 2.8051185607910156, + "eval_runtime": 29.7995, + "eval_samples_per_second": 217.554, + "eval_steps_per_second": 2.282, + "step": 94000 + }, + { + "epoch": 0.68, + "learning_rate": 1.3200538124376167e-05, + "loss": 2.7942, + "step": 94100 + }, + { + "epoch": 0.68, + "learning_rate": 1.3193305269857804e-05, + "loss": 2.7838, + "step": 94200 + }, + { + "epoch": 0.68, + "learning_rate": 1.3186072415339439e-05, + "loss": 2.7905, + "step": 94300 + }, + { + "epoch": 0.68, + "learning_rate": 1.3178839560821075e-05, + "loss": 2.7868, + "step": 94400 + }, + { + "epoch": 0.68, + "learning_rate": 1.3171606706302712e-05, + "loss": 2.7851, + "step": 94500 + }, + { + "epoch": 0.68, + "learning_rate": 1.3164373851784346e-05, + "loss": 2.7934, + "step": 94600 + }, + { + "epoch": 0.68, + "learning_rate": 1.3157140997265983e-05, + "loss": 2.789, + "step": 94700 + }, + { + "epoch": 0.69, + "learning_rate": 1.3149908142747618e-05, + "loss": 2.7892, + "step": 94800 + }, + { + "epoch": 0.69, + "learning_rate": 1.3142675288229254e-05, + "loss": 2.7828, + "step": 94900 + }, + { + "epoch": 0.69, + "learning_rate": 1.3135442433710889e-05, + "loss": 2.7909, + "step": 95000 + }, + { + "epoch": 0.69, + "eval_accuracy": 0.45570647817973403, + "eval_loss": 2.804401397705078, + "eval_runtime": 30.1768, + "eval_samples_per_second": 214.834, + "eval_steps_per_second": 2.253, + "step": 95000 + }, + { + "epoch": 0.69, + "learning_rate": 1.3128209579192526e-05, + "loss": 2.7872, + "step": 95100 + }, + { + "epoch": 0.69, + "learning_rate": 1.3121049053219344e-05, + "loss": 2.7878, + "step": 95200 + }, + { + "epoch": 0.69, + "learning_rate": 1.3113888527246163e-05, + "loss": 2.7852, + "step": 95300 + }, + { + "epoch": 0.69, + "learning_rate": 1.31066556727278e-05, + "loss": 2.7896, + "step": 95400 + }, + { + "epoch": 0.69, + "learning_rate": 1.3099422818209435e-05, + "loss": 2.7886, + "step": 95500 + }, + { + "epoch": 0.69, + "learning_rate": 1.3092189963691071e-05, + "loss": 2.7876, + "step": 95600 + }, + { + "epoch": 0.69, + "learning_rate": 1.3084957109172706e-05, + "loss": 2.7866, + "step": 95700 + }, + { + "epoch": 0.69, + "learning_rate": 1.3077724254654343e-05, + "loss": 2.791, + "step": 95800 + }, + { + "epoch": 0.69, + "learning_rate": 1.3070491400135979e-05, + "loss": 2.7915, + "step": 95900 + }, + { + "epoch": 0.69, + "learning_rate": 1.3063258545617614e-05, + "loss": 2.7926, + "step": 96000 + }, + { + "epoch": 0.69, + "eval_accuracy": 0.45580084262611414, + "eval_loss": 2.803481101989746, + "eval_runtime": 30.9308, + "eval_samples_per_second": 209.597, + "eval_steps_per_second": 2.198, + "step": 96000 + }, + { + "epoch": 0.7, + "learning_rate": 1.305602569109925e-05, + "loss": 2.7844, + "step": 96100 + }, + { + "epoch": 0.7, + "learning_rate": 1.3048792836580885e-05, + "loss": 2.7857, + "step": 96200 + }, + { + "epoch": 0.7, + "learning_rate": 1.3041559982062522e-05, + "loss": 2.7882, + "step": 96300 + }, + { + "epoch": 0.7, + "learning_rate": 1.3034327127544158e-05, + "loss": 2.7929, + "step": 96400 + }, + { + "epoch": 0.7, + "learning_rate": 1.3027094273025793e-05, + "loss": 2.7979, + "step": 96500 + }, + { + "epoch": 0.7, + "learning_rate": 1.301986141850743e-05, + "loss": 2.798, + "step": 96600 + }, + { + "epoch": 0.7, + "learning_rate": 1.3012628563989064e-05, + "loss": 2.7847, + "step": 96700 + }, + { + "epoch": 0.7, + "learning_rate": 1.3005395709470701e-05, + "loss": 2.7885, + "step": 96800 + }, + { + "epoch": 0.7, + "learning_rate": 1.2998162854952336e-05, + "loss": 2.7781, + "step": 96900 + }, + { + "epoch": 0.7, + "learning_rate": 1.2990930000433972e-05, + "loss": 2.7931, + "step": 97000 + }, + { + "epoch": 0.7, + "eval_accuracy": 0.45598533721679324, + "eval_loss": 2.802797317504883, + "eval_runtime": 30.7179, + "eval_samples_per_second": 211.05, + "eval_steps_per_second": 2.214, + "step": 97000 + }, + { + "epoch": 0.7, + "learning_rate": 1.2983769474460792e-05, + "loss": 2.7851, + "step": 97100 + }, + { + "epoch": 0.7, + "learning_rate": 1.2976536619942428e-05, + "loss": 2.7835, + "step": 97200 + }, + { + "epoch": 0.7, + "learning_rate": 1.2969303765424063e-05, + "loss": 2.7948, + "step": 97300 + }, + { + "epoch": 0.7, + "learning_rate": 1.29620709109057e-05, + "loss": 2.7872, + "step": 97400 + }, + { + "epoch": 0.71, + "learning_rate": 1.2954838056387336e-05, + "loss": 2.7882, + "step": 97500 + }, + { + "epoch": 0.71, + "learning_rate": 1.2947605201868971e-05, + "loss": 2.792, + "step": 97600 + }, + { + "epoch": 0.71, + "learning_rate": 1.2940372347350608e-05, + "loss": 2.7907, + "step": 97700 + }, + { + "epoch": 0.71, + "learning_rate": 1.2933139492832242e-05, + "loss": 2.7916, + "step": 97800 + }, + { + "epoch": 0.71, + "learning_rate": 1.2925906638313879e-05, + "loss": 2.7805, + "step": 97900 + }, + { + "epoch": 0.71, + "learning_rate": 1.2918673783795514e-05, + "loss": 2.7838, + "step": 98000 + }, + { + "epoch": 0.71, + "eval_accuracy": 0.4561528945991477, + "eval_loss": 2.802030563354492, + "eval_runtime": 29.961, + "eval_samples_per_second": 216.381, + "eval_steps_per_second": 2.27, + "step": 98000 + }, + { + "epoch": 0.71, + "learning_rate": 1.291144092927715e-05, + "loss": 2.7927, + "step": 98100 + }, + { + "epoch": 0.71, + "learning_rate": 1.2904208074758787e-05, + "loss": 2.7861, + "step": 98200 + }, + { + "epoch": 0.71, + "learning_rate": 1.2896975220240422e-05, + "loss": 2.7933, + "step": 98300 + }, + { + "epoch": 0.71, + "learning_rate": 1.2889742365722058e-05, + "loss": 2.785, + "step": 98400 + }, + { + "epoch": 0.71, + "learning_rate": 1.2882509511203693e-05, + "loss": 2.7827, + "step": 98500 + }, + { + "epoch": 0.71, + "learning_rate": 1.287527665668533e-05, + "loss": 2.7861, + "step": 98600 + }, + { + "epoch": 0.71, + "learning_rate": 1.2868043802166964e-05, + "loss": 2.7901, + "step": 98700 + }, + { + "epoch": 0.71, + "learning_rate": 1.28608109476486e-05, + "loss": 2.7939, + "step": 98800 + }, + { + "epoch": 0.72, + "learning_rate": 1.2853578093130237e-05, + "loss": 2.7834, + "step": 98900 + }, + { + "epoch": 0.72, + "learning_rate": 1.2846489895702239e-05, + "loss": 2.779, + "step": 99000 + }, + { + "epoch": 0.72, + "eval_accuracy": 0.45607909676287606, + "eval_loss": 2.8013815879821777, + "eval_runtime": 29.8613, + "eval_samples_per_second": 217.104, + "eval_steps_per_second": 2.277, + "step": 99000 + }, + { + "epoch": 0.72, + "learning_rate": 1.2839257041183875e-05, + "loss": 2.7808, + "step": 99100 + }, + { + "epoch": 0.72, + "learning_rate": 1.283202418666551e-05, + "loss": 2.7838, + "step": 99200 + }, + { + "epoch": 0.72, + "learning_rate": 1.2824791332147146e-05, + "loss": 2.7794, + "step": 99300 + }, + { + "epoch": 0.72, + "learning_rate": 1.2817558477628781e-05, + "loss": 2.7863, + "step": 99400 + }, + { + "epoch": 0.72, + "learning_rate": 1.2810325623110418e-05, + "loss": 2.7833, + "step": 99500 + }, + { + "epoch": 0.72, + "learning_rate": 1.2803092768592054e-05, + "loss": 2.7833, + "step": 99600 + }, + { + "epoch": 0.72, + "learning_rate": 1.2795859914073689e-05, + "loss": 2.7824, + "step": 99700 + }, + { + "epoch": 0.72, + "learning_rate": 1.2788627059555326e-05, + "loss": 2.792, + "step": 99800 + }, + { + "epoch": 0.72, + "learning_rate": 1.278139420503696e-05, + "loss": 2.7919, + "step": 99900 + }, + { + "epoch": 0.72, + "learning_rate": 1.2774161350518597e-05, + "loss": 2.7922, + "step": 100000 + }, + { + "epoch": 0.72, + "eval_accuracy": 0.4561583387018235, + "eval_loss": 2.8006463050842285, + "eval_runtime": 28.183, + "eval_samples_per_second": 230.032, + "eval_steps_per_second": 2.413, + "step": 100000 + }, + { + "epoch": 0.72, + "learning_rate": 1.2766928496000233e-05, + "loss": 2.7826, + "step": 100100 + }, + { + "epoch": 0.72, + "learning_rate": 1.2759695641481868e-05, + "loss": 2.788, + "step": 100200 + }, + { + "epoch": 0.73, + "learning_rate": 1.2752462786963505e-05, + "loss": 2.7929, + "step": 100300 + }, + { + "epoch": 0.73, + "learning_rate": 1.274522993244514e-05, + "loss": 2.7889, + "step": 100400 + }, + { + "epoch": 0.73, + "learning_rate": 1.2737997077926776e-05, + "loss": 2.7805, + "step": 100500 + }, + { + "epoch": 0.73, + "learning_rate": 1.2730764223408411e-05, + "loss": 2.7857, + "step": 100600 + }, + { + "epoch": 0.73, + "learning_rate": 1.2723531368890047e-05, + "loss": 2.7963, + "step": 100700 + }, + { + "epoch": 0.73, + "learning_rate": 1.2716298514371684e-05, + "loss": 2.7759, + "step": 100800 + }, + { + "epoch": 0.73, + "learning_rate": 1.2709065659853319e-05, + "loss": 2.784, + "step": 100900 + }, + { + "epoch": 0.73, + "learning_rate": 1.2701832805334955e-05, + "loss": 2.7786, + "step": 101000 + }, + { + "epoch": 0.73, + "eval_accuracy": 0.4561758808104454, + "eval_loss": 2.7999138832092285, + "eval_runtime": 29.8581, + "eval_samples_per_second": 217.127, + "eval_steps_per_second": 2.277, + "step": 101000 + }, + { + "epoch": 0.73, + "learning_rate": 1.269459995081659e-05, + "loss": 2.7861, + "step": 101100 + }, + { + "epoch": 0.73, + "learning_rate": 1.2687367096298227e-05, + "loss": 2.7875, + "step": 101200 + }, + { + "epoch": 0.73, + "learning_rate": 1.2680134241779861e-05, + "loss": 2.7815, + "step": 101300 + }, + { + "epoch": 0.73, + "learning_rate": 1.2672901387261498e-05, + "loss": 2.7838, + "step": 101400 + }, + { + "epoch": 0.73, + "learning_rate": 1.2665668532743135e-05, + "loss": 2.7861, + "step": 101500 + }, + { + "epoch": 0.73, + "learning_rate": 1.265843567822477e-05, + "loss": 2.7867, + "step": 101600 + }, + { + "epoch": 0.74, + "learning_rate": 1.2651202823706406e-05, + "loss": 2.7878, + "step": 101700 + }, + { + "epoch": 0.74, + "learning_rate": 1.264396996918804e-05, + "loss": 2.7861, + "step": 101800 + }, + { + "epoch": 0.74, + "learning_rate": 1.2636809443214859e-05, + "loss": 2.7844, + "step": 101900 + }, + { + "epoch": 0.74, + "learning_rate": 1.2629576588696495e-05, + "loss": 2.7791, + "step": 102000 + }, + { + "epoch": 0.74, + "eval_accuracy": 0.45630411967347484, + "eval_loss": 2.7991721630096436, + "eval_runtime": 29.6469, + "eval_samples_per_second": 218.674, + "eval_steps_per_second": 2.294, + "step": 102000 + }, + { + "epoch": 0.74, + "learning_rate": 1.262234373417813e-05, + "loss": 2.7853, + "step": 102100 + }, + { + "epoch": 0.74, + "learning_rate": 1.2615110879659767e-05, + "loss": 2.7833, + "step": 102200 + }, + { + "epoch": 0.74, + "learning_rate": 1.2607878025141403e-05, + "loss": 2.7832, + "step": 102300 + }, + { + "epoch": 0.74, + "learning_rate": 1.2600645170623038e-05, + "loss": 2.778, + "step": 102400 + }, + { + "epoch": 0.74, + "learning_rate": 1.2593412316104674e-05, + "loss": 2.7849, + "step": 102500 + }, + { + "epoch": 0.74, + "learning_rate": 1.258617946158631e-05, + "loss": 2.7798, + "step": 102600 + }, + { + "epoch": 0.74, + "learning_rate": 1.2578946607067946e-05, + "loss": 2.7834, + "step": 102700 + }, + { + "epoch": 0.74, + "learning_rate": 1.257171375254958e-05, + "loss": 2.7792, + "step": 102800 + }, + { + "epoch": 0.74, + "learning_rate": 1.2564480898031217e-05, + "loss": 2.7889, + "step": 102900 + }, + { + "epoch": 0.74, + "learning_rate": 1.2557248043512854e-05, + "loss": 2.7908, + "step": 103000 + }, + { + "epoch": 0.74, + "eval_accuracy": 0.4565454748921009, + "eval_loss": 2.798401117324829, + "eval_runtime": 29.5641, + "eval_samples_per_second": 219.287, + "eval_steps_per_second": 2.3, + "step": 103000 + }, + { + "epoch": 0.75, + "learning_rate": 1.2550087517539673e-05, + "loss": 2.7769, + "step": 103100 + }, + { + "epoch": 0.75, + "learning_rate": 1.2542854663021308e-05, + "loss": 2.7866, + "step": 103200 + }, + { + "epoch": 0.75, + "learning_rate": 1.2535621808502945e-05, + "loss": 2.7823, + "step": 103300 + }, + { + "epoch": 0.75, + "learning_rate": 1.2528388953984581e-05, + "loss": 2.785, + "step": 103400 + }, + { + "epoch": 0.75, + "learning_rate": 1.2521156099466216e-05, + "loss": 2.7782, + "step": 103500 + }, + { + "epoch": 0.75, + "learning_rate": 1.2513923244947853e-05, + "loss": 2.7795, + "step": 103600 + }, + { + "epoch": 0.75, + "learning_rate": 1.2506690390429487e-05, + "loss": 2.7857, + "step": 103700 + }, + { + "epoch": 0.75, + "learning_rate": 1.2499457535911124e-05, + "loss": 2.7851, + "step": 103800 + }, + { + "epoch": 0.75, + "learning_rate": 1.2492224681392759e-05, + "loss": 2.7816, + "step": 103900 + }, + { + "epoch": 0.75, + "learning_rate": 1.2484991826874395e-05, + "loss": 2.7872, + "step": 104000 + }, + { + "epoch": 0.75, + "eval_accuracy": 0.45662834623283216, + "eval_loss": 2.7977957725524902, + "eval_runtime": 28.0059, + "eval_samples_per_second": 231.487, + "eval_steps_per_second": 2.428, + "step": 104000 + }, + { + "epoch": 0.75, + "learning_rate": 1.2477758972356032e-05, + "loss": 2.7802, + "step": 104100 + }, + { + "epoch": 0.75, + "learning_rate": 1.2470526117837667e-05, + "loss": 2.7833, + "step": 104200 + }, + { + "epoch": 0.75, + "learning_rate": 1.2463365591864486e-05, + "loss": 2.779, + "step": 104300 + }, + { + "epoch": 0.76, + "learning_rate": 1.2456132737346123e-05, + "loss": 2.7811, + "step": 104400 + }, + { + "epoch": 0.76, + "learning_rate": 1.244889988282776e-05, + "loss": 2.7838, + "step": 104500 + }, + { + "epoch": 0.76, + "learning_rate": 1.2441667028309394e-05, + "loss": 2.7762, + "step": 104600 + }, + { + "epoch": 0.76, + "learning_rate": 1.243443417379103e-05, + "loss": 2.781, + "step": 104700 + }, + { + "epoch": 0.76, + "learning_rate": 1.2427201319272665e-05, + "loss": 2.7898, + "step": 104800 + }, + { + "epoch": 0.76, + "learning_rate": 1.2419968464754302e-05, + "loss": 2.7823, + "step": 104900 + }, + { + "epoch": 0.76, + "learning_rate": 1.2412735610235937e-05, + "loss": 2.7763, + "step": 105000 + }, + { + "epoch": 0.76, + "eval_accuracy": 0.45665980104829224, + "eval_loss": 2.7971575260162354, + "eval_runtime": 29.269, + "eval_samples_per_second": 221.497, + "eval_steps_per_second": 2.323, + "step": 105000 + }, + { + "epoch": 0.76, + "learning_rate": 1.2405502755717573e-05, + "loss": 2.7825, + "step": 105100 + }, + { + "epoch": 0.76, + "learning_rate": 1.239826990119921e-05, + "loss": 2.7826, + "step": 105200 + }, + { + "epoch": 0.76, + "learning_rate": 1.2391037046680845e-05, + "loss": 2.7811, + "step": 105300 + }, + { + "epoch": 0.76, + "learning_rate": 1.2383876520707663e-05, + "loss": 2.7769, + "step": 105400 + }, + { + "epoch": 0.76, + "learning_rate": 1.23766436661893e-05, + "loss": 2.7875, + "step": 105500 + }, + { + "epoch": 0.76, + "learning_rate": 1.2369410811670934e-05, + "loss": 2.7801, + "step": 105600 + }, + { + "epoch": 0.76, + "learning_rate": 1.236217795715257e-05, + "loss": 2.7875, + "step": 105700 + }, + { + "epoch": 0.77, + "learning_rate": 1.2354945102634205e-05, + "loss": 2.7838, + "step": 105800 + }, + { + "epoch": 0.77, + "learning_rate": 1.2347712248115842e-05, + "loss": 2.7865, + "step": 105900 + }, + { + "epoch": 0.77, + "learning_rate": 1.2340479393597478e-05, + "loss": 2.7785, + "step": 106000 + }, + { + "epoch": 0.77, + "eval_accuracy": 0.4568334074336198, + "eval_loss": 2.7966232299804688, + "eval_runtime": 29.6697, + "eval_samples_per_second": 218.506, + "eval_steps_per_second": 2.292, + "step": 106000 + }, + { + "epoch": 0.77, + "learning_rate": 1.2333246539079113e-05, + "loss": 2.7809, + "step": 106100 + }, + { + "epoch": 0.77, + "learning_rate": 1.232601368456075e-05, + "loss": 2.7837, + "step": 106200 + }, + { + "epoch": 0.77, + "learning_rate": 1.2318780830042385e-05, + "loss": 2.7868, + "step": 106300 + }, + { + "epoch": 0.77, + "learning_rate": 1.2311547975524021e-05, + "loss": 2.781, + "step": 106400 + }, + { + "epoch": 0.77, + "learning_rate": 1.2304315121005656e-05, + "loss": 2.781, + "step": 106500 + }, + { + "epoch": 0.77, + "learning_rate": 1.2297082266487292e-05, + "loss": 2.7859, + "step": 106600 + }, + { + "epoch": 0.77, + "learning_rate": 1.2289849411968929e-05, + "loss": 2.7813, + "step": 106700 + }, + { + "epoch": 0.77, + "learning_rate": 1.2282616557450564e-05, + "loss": 2.7857, + "step": 106800 + }, + { + "epoch": 0.77, + "learning_rate": 1.22753837029322e-05, + "loss": 2.7835, + "step": 106900 + }, + { + "epoch": 0.77, + "learning_rate": 1.2268150848413835e-05, + "loss": 2.7861, + "step": 107000 + }, + { + "epoch": 0.77, + "eval_accuracy": 0.4568231241285655, + "eval_loss": 2.795985698699951, + "eval_runtime": 29.6433, + "eval_samples_per_second": 218.7, + "eval_steps_per_second": 2.294, + "step": 107000 + }, + { + "epoch": 0.77, + "learning_rate": 1.2260917993895472e-05, + "loss": 2.7802, + "step": 107100 + }, + { + "epoch": 0.78, + "learning_rate": 1.2253685139377108e-05, + "loss": 2.7901, + "step": 107200 + }, + { + "epoch": 0.78, + "learning_rate": 1.2246452284858743e-05, + "loss": 2.7804, + "step": 107300 + }, + { + "epoch": 0.78, + "learning_rate": 1.223921943034038e-05, + "loss": 2.7784, + "step": 107400 + }, + { + "epoch": 0.78, + "learning_rate": 1.2231986575822014e-05, + "loss": 2.7824, + "step": 107500 + }, + { + "epoch": 0.78, + "learning_rate": 1.2224826049848834e-05, + "loss": 2.7756, + "step": 107600 + }, + { + "epoch": 0.78, + "learning_rate": 1.221759319533047e-05, + "loss": 2.7849, + "step": 107700 + }, + { + "epoch": 0.78, + "learning_rate": 1.2210360340812107e-05, + "loss": 2.7808, + "step": 107800 + }, + { + "epoch": 0.78, + "learning_rate": 1.2203127486293742e-05, + "loss": 2.7741, + "step": 107900 + }, + { + "epoch": 0.78, + "learning_rate": 1.2195894631775378e-05, + "loss": 2.784, + "step": 108000 + }, + { + "epoch": 0.78, + "eval_accuracy": 0.4569779786046765, + "eval_loss": 2.795300245285034, + "eval_runtime": 29.2976, + "eval_samples_per_second": 221.281, + "eval_steps_per_second": 2.321, + "step": 108000 + }, + { + "epoch": 0.78, + "learning_rate": 1.2188661777257013e-05, + "loss": 2.781, + "step": 108100 + }, + { + "epoch": 0.78, + "learning_rate": 1.218142892273865e-05, + "loss": 2.7924, + "step": 108200 + }, + { + "epoch": 0.78, + "learning_rate": 1.2174196068220286e-05, + "loss": 2.7781, + "step": 108300 + }, + { + "epoch": 0.78, + "learning_rate": 1.2166963213701921e-05, + "loss": 2.7808, + "step": 108400 + }, + { + "epoch": 0.78, + "learning_rate": 1.2159730359183557e-05, + "loss": 2.7725, + "step": 108500 + }, + { + "epoch": 0.79, + "learning_rate": 1.2152497504665192e-05, + "loss": 2.7791, + "step": 108600 + }, + { + "epoch": 0.79, + "learning_rate": 1.2145264650146829e-05, + "loss": 2.79, + "step": 108700 + }, + { + "epoch": 0.79, + "learning_rate": 1.2138031795628464e-05, + "loss": 2.7776, + "step": 108800 + }, + { + "epoch": 0.79, + "learning_rate": 1.21307989411101e-05, + "loss": 2.7821, + "step": 108900 + }, + { + "epoch": 0.79, + "learning_rate": 1.2123638415136918e-05, + "loss": 2.7804, + "step": 109000 + }, + { + "epoch": 0.79, + "eval_accuracy": 0.457063269546597, + "eval_loss": 2.794382333755493, + "eval_runtime": 29.8656, + "eval_samples_per_second": 217.073, + "eval_steps_per_second": 2.277, + "step": 109000 + }, + { + "epoch": 0.79, + "learning_rate": 1.2116405560618553e-05, + "loss": 2.7838, + "step": 109100 + }, + { + "epoch": 0.79, + "learning_rate": 1.210917270610019e-05, + "loss": 2.7806, + "step": 109200 + }, + { + "epoch": 0.79, + "learning_rate": 1.2101939851581826e-05, + "loss": 2.7839, + "step": 109300 + }, + { + "epoch": 0.79, + "learning_rate": 1.2094706997063461e-05, + "loss": 2.7805, + "step": 109400 + }, + { + "epoch": 0.79, + "learning_rate": 1.2087474142545097e-05, + "loss": 2.7782, + "step": 109500 + }, + { + "epoch": 0.79, + "learning_rate": 1.2080241288026732e-05, + "loss": 2.7788, + "step": 109600 + }, + { + "epoch": 0.79, + "learning_rate": 1.2073008433508369e-05, + "loss": 2.7809, + "step": 109700 + }, + { + "epoch": 0.79, + "learning_rate": 1.2065775578990005e-05, + "loss": 2.7753, + "step": 109800 + }, + { + "epoch": 0.79, + "learning_rate": 1.205854272447164e-05, + "loss": 2.7894, + "step": 109900 + }, + { + "epoch": 0.8, + "learning_rate": 1.2051309869953277e-05, + "loss": 2.7828, + "step": 110000 + }, + { + "epoch": 0.8, + "eval_accuracy": 0.45704633233827235, + "eval_loss": 2.793990135192871, + "eval_runtime": 30.316, + "eval_samples_per_second": 213.848, + "eval_steps_per_second": 2.243, + "step": 110000 + }, + { + "epoch": 0.8, + "learning_rate": 1.2044077015434911e-05, + "loss": 2.7702, + "step": 110100 + }, + { + "epoch": 0.8, + "learning_rate": 1.2036844160916548e-05, + "loss": 2.7875, + "step": 110200 + }, + { + "epoch": 0.8, + "learning_rate": 1.2029611306398183e-05, + "loss": 2.7813, + "step": 110300 + }, + { + "epoch": 0.8, + "learning_rate": 1.2022450780425004e-05, + "loss": 2.7843, + "step": 110400 + }, + { + "epoch": 0.8, + "learning_rate": 1.2015217925906639e-05, + "loss": 2.7784, + "step": 110500 + }, + { + "epoch": 0.8, + "learning_rate": 1.2007985071388275e-05, + "loss": 2.7802, + "step": 110600 + }, + { + "epoch": 0.8, + "learning_rate": 1.200075221686991e-05, + "loss": 2.7725, + "step": 110700 + }, + { + "epoch": 0.8, + "learning_rate": 1.1993519362351547e-05, + "loss": 2.7745, + "step": 110800 + }, + { + "epoch": 0.8, + "learning_rate": 1.1986286507833183e-05, + "loss": 2.7734, + "step": 110900 + }, + { + "epoch": 0.8, + "learning_rate": 1.1979125981860003e-05, + "loss": 2.7761, + "step": 111000 + }, + { + "epoch": 0.8, + "eval_accuracy": 0.4570904900599759, + "eval_loss": 2.793330192565918, + "eval_runtime": 30.099, + "eval_samples_per_second": 215.389, + "eval_steps_per_second": 2.259, + "step": 111000 + }, + { + "epoch": 0.8, + "learning_rate": 1.1971893127341638e-05, + "loss": 2.7721, + "step": 111100 + }, + { + "epoch": 0.8, + "learning_rate": 1.1964660272823274e-05, + "loss": 2.7846, + "step": 111200 + }, + { + "epoch": 0.81, + "learning_rate": 1.195742741830491e-05, + "loss": 2.7753, + "step": 111300 + }, + { + "epoch": 0.81, + "learning_rate": 1.1950194563786546e-05, + "loss": 2.767, + "step": 111400 + }, + { + "epoch": 0.81, + "learning_rate": 1.1942961709268182e-05, + "loss": 2.7864, + "step": 111500 + }, + { + "epoch": 0.81, + "learning_rate": 1.1935728854749817e-05, + "loss": 2.7781, + "step": 111600 + }, + { + "epoch": 0.81, + "learning_rate": 1.1928496000231454e-05, + "loss": 2.7768, + "step": 111700 + }, + { + "epoch": 0.81, + "learning_rate": 1.1921263145713088e-05, + "loss": 2.7838, + "step": 111800 + }, + { + "epoch": 0.81, + "learning_rate": 1.1914030291194725e-05, + "loss": 2.7771, + "step": 111900 + }, + { + "epoch": 0.81, + "learning_rate": 1.1906797436676361e-05, + "loss": 2.7797, + "step": 112000 + }, + { + "epoch": 0.81, + "eval_accuracy": 0.45710379786651667, + "eval_loss": 2.7928030490875244, + "eval_runtime": 30.824, + "eval_samples_per_second": 210.323, + "eval_steps_per_second": 2.206, + "step": 112000 + }, + { + "epoch": 0.81, + "learning_rate": 1.1899564582157996e-05, + "loss": 2.7739, + "step": 112100 + }, + { + "epoch": 0.81, + "learning_rate": 1.1892331727639633e-05, + "loss": 2.7837, + "step": 112200 + }, + { + "epoch": 0.81, + "learning_rate": 1.1885098873121268e-05, + "loss": 2.7712, + "step": 112300 + }, + { + "epoch": 0.81, + "learning_rate": 1.1877866018602904e-05, + "loss": 2.7802, + "step": 112400 + }, + { + "epoch": 0.81, + "learning_rate": 1.1870633164084539e-05, + "loss": 2.7717, + "step": 112500 + }, + { + "epoch": 0.81, + "learning_rate": 1.1863400309566175e-05, + "loss": 2.7827, + "step": 112600 + }, + { + "epoch": 0.82, + "learning_rate": 1.1856167455047812e-05, + "loss": 2.7758, + "step": 112700 + }, + { + "epoch": 0.82, + "learning_rate": 1.1848934600529447e-05, + "loss": 2.7769, + "step": 112800 + }, + { + "epoch": 0.82, + "learning_rate": 1.1841701746011083e-05, + "loss": 2.7712, + "step": 112900 + }, + { + "epoch": 0.82, + "learning_rate": 1.1834468891492716e-05, + "loss": 2.7792, + "step": 113000 + }, + { + "epoch": 0.82, + "eval_accuracy": 0.4573149080702773, + "eval_loss": 2.792189598083496, + "eval_runtime": 27.9605, + "eval_samples_per_second": 231.863, + "eval_steps_per_second": 2.432, + "step": 113000 + }, + { + "epoch": 0.82, + "learning_rate": 1.1827236036974353e-05, + "loss": 2.7799, + "step": 113100 + }, + { + "epoch": 0.82, + "learning_rate": 1.1820075511001173e-05, + "loss": 2.7849, + "step": 113200 + }, + { + "epoch": 0.82, + "learning_rate": 1.1812842656482807e-05, + "loss": 2.7789, + "step": 113300 + }, + { + "epoch": 0.82, + "learning_rate": 1.1805609801964444e-05, + "loss": 2.7732, + "step": 113400 + }, + { + "epoch": 0.82, + "learning_rate": 1.179837694744608e-05, + "loss": 2.7791, + "step": 113500 + }, + { + "epoch": 0.82, + "learning_rate": 1.1791144092927715e-05, + "loss": 2.7844, + "step": 113600 + }, + { + "epoch": 0.82, + "learning_rate": 1.1783911238409352e-05, + "loss": 2.7713, + "step": 113700 + }, + { + "epoch": 0.82, + "learning_rate": 1.1776678383890987e-05, + "loss": 2.7754, + "step": 113800 + }, + { + "epoch": 0.82, + "learning_rate": 1.1769445529372623e-05, + "loss": 2.7714, + "step": 113900 + }, + { + "epoch": 0.82, + "learning_rate": 1.1762212674854258e-05, + "loss": 2.7819, + "step": 114000 + }, + { + "epoch": 0.82, + "eval_accuracy": 0.4572997855628446, + "eval_loss": 2.791494846343994, + "eval_runtime": 28.1324, + "eval_samples_per_second": 230.446, + "eval_steps_per_second": 2.417, + "step": 114000 + }, + { + "epoch": 0.83, + "learning_rate": 1.1754979820335894e-05, + "loss": 2.7814, + "step": 114100 + }, + { + "epoch": 0.83, + "learning_rate": 1.1747746965817531e-05, + "loss": 2.7755, + "step": 114200 + }, + { + "epoch": 0.83, + "learning_rate": 1.1740514111299166e-05, + "loss": 2.7741, + "step": 114300 + }, + { + "epoch": 0.83, + "learning_rate": 1.1733281256780802e-05, + "loss": 2.772, + "step": 114400 + }, + { + "epoch": 0.83, + "learning_rate": 1.1726120730807622e-05, + "loss": 2.7795, + "step": 114500 + }, + { + "epoch": 0.83, + "learning_rate": 1.1718887876289257e-05, + "loss": 2.7728, + "step": 114600 + }, + { + "epoch": 0.83, + "learning_rate": 1.1711655021770893e-05, + "loss": 2.7714, + "step": 114700 + }, + { + "epoch": 0.83, + "learning_rate": 1.170442216725253e-05, + "loss": 2.7771, + "step": 114800 + }, + { + "epoch": 0.83, + "learning_rate": 1.1697189312734165e-05, + "loss": 2.779, + "step": 114900 + }, + { + "epoch": 0.83, + "learning_rate": 1.1689956458215801e-05, + "loss": 2.7837, + "step": 115000 + }, + { + "epoch": 0.83, + "eval_accuracy": 0.4572991806625473, + "eval_loss": 2.7909815311431885, + "eval_runtime": 29.3759, + "eval_samples_per_second": 220.691, + "eval_steps_per_second": 2.315, + "step": 115000 + }, + { + "epoch": 0.83, + "learning_rate": 1.168279593224262e-05, + "loss": 2.7764, + "step": 115100 + }, + { + "epoch": 0.83, + "learning_rate": 1.1675563077724254e-05, + "loss": 2.7767, + "step": 115200 + }, + { + "epoch": 0.83, + "learning_rate": 1.166833022320589e-05, + "loss": 2.7741, + "step": 115300 + }, + { + "epoch": 0.83, + "learning_rate": 1.1661097368687525e-05, + "loss": 2.7706, + "step": 115400 + }, + { + "epoch": 0.84, + "learning_rate": 1.1653864514169162e-05, + "loss": 2.7744, + "step": 115500 + }, + { + "epoch": 0.84, + "learning_rate": 1.1646631659650798e-05, + "loss": 2.7759, + "step": 115600 + }, + { + "epoch": 0.84, + "learning_rate": 1.1639398805132433e-05, + "loss": 2.77, + "step": 115700 + }, + { + "epoch": 0.84, + "learning_rate": 1.163216595061407e-05, + "loss": 2.7758, + "step": 115800 + }, + { + "epoch": 0.84, + "learning_rate": 1.1624933096095705e-05, + "loss": 2.7732, + "step": 115900 + }, + { + "epoch": 0.84, + "learning_rate": 1.1617700241577341e-05, + "loss": 2.781, + "step": 116000 + }, + { + "epoch": 0.84, + "eval_accuracy": 0.45746008414163136, + "eval_loss": 2.790616989135742, + "eval_runtime": 30.033, + "eval_samples_per_second": 215.862, + "eval_steps_per_second": 2.264, + "step": 116000 + }, + { + "epoch": 0.84, + "learning_rate": 1.1610467387058976e-05, + "loss": 2.776, + "step": 116100 + }, + { + "epoch": 0.84, + "learning_rate": 1.1603234532540612e-05, + "loss": 2.7762, + "step": 116200 + }, + { + "epoch": 0.84, + "learning_rate": 1.1596001678022249e-05, + "loss": 2.7787, + "step": 116300 + }, + { + "epoch": 0.84, + "learning_rate": 1.1588768823503884e-05, + "loss": 2.7725, + "step": 116400 + }, + { + "epoch": 0.84, + "learning_rate": 1.1581608297530704e-05, + "loss": 2.7759, + "step": 116500 + }, + { + "epoch": 0.84, + "learning_rate": 1.157437544301234e-05, + "loss": 2.774, + "step": 116600 + }, + { + "epoch": 0.84, + "learning_rate": 1.1567142588493977e-05, + "loss": 2.7737, + "step": 116700 + }, + { + "epoch": 0.84, + "learning_rate": 1.1559909733975611e-05, + "loss": 2.7801, + "step": 116800 + }, + { + "epoch": 0.85, + "learning_rate": 1.1552676879457248e-05, + "loss": 2.7774, + "step": 116900 + }, + { + "epoch": 0.85, + "learning_rate": 1.1545444024938883e-05, + "loss": 2.7765, + "step": 117000 + }, + { + "epoch": 0.85, + "eval_accuracy": 0.45765970123974314, + "eval_loss": 2.7898108959198, + "eval_runtime": 30.1629, + "eval_samples_per_second": 214.933, + "eval_steps_per_second": 2.254, + "step": 117000 + }, + { + "epoch": 0.85, + "learning_rate": 1.1538283498965704e-05, + "loss": 2.7819, + "step": 117100 + }, + { + "epoch": 0.85, + "learning_rate": 1.1531050644447339e-05, + "loss": 2.7817, + "step": 117200 + }, + { + "epoch": 0.85, + "learning_rate": 1.1523817789928975e-05, + "loss": 2.7694, + "step": 117300 + }, + { + "epoch": 0.85, + "learning_rate": 1.151658493541061e-05, + "loss": 2.7779, + "step": 117400 + }, + { + "epoch": 0.85, + "learning_rate": 1.1509352080892247e-05, + "loss": 2.7796, + "step": 117500 + }, + { + "epoch": 0.85, + "learning_rate": 1.1502119226373882e-05, + "loss": 2.7798, + "step": 117600 + }, + { + "epoch": 0.85, + "learning_rate": 1.1494886371855518e-05, + "loss": 2.7729, + "step": 117700 + }, + { + "epoch": 0.85, + "learning_rate": 1.1487653517337155e-05, + "loss": 2.7779, + "step": 117800 + }, + { + "epoch": 0.85, + "learning_rate": 1.148042066281879e-05, + "loss": 2.7795, + "step": 117900 + }, + { + "epoch": 0.85, + "learning_rate": 1.1473187808300426e-05, + "loss": 2.7778, + "step": 118000 + }, + { + "epoch": 0.85, + "eval_accuracy": 0.4575302525761191, + "eval_loss": 2.789475202560425, + "eval_runtime": 31.2717, + "eval_samples_per_second": 207.312, + "eval_steps_per_second": 2.174, + "step": 118000 + }, + { + "epoch": 0.85, + "learning_rate": 1.146595495378206e-05, + "loss": 2.7761, + "step": 118100 + }, + { + "epoch": 0.85, + "learning_rate": 1.1458722099263697e-05, + "loss": 2.7809, + "step": 118200 + }, + { + "epoch": 0.86, + "learning_rate": 1.1451489244745332e-05, + "loss": 2.7705, + "step": 118300 + }, + { + "epoch": 0.86, + "learning_rate": 1.1444256390226969e-05, + "loss": 2.773, + "step": 118400 + }, + { + "epoch": 0.86, + "learning_rate": 1.1437023535708605e-05, + "loss": 2.7688, + "step": 118500 + }, + { + "epoch": 0.86, + "learning_rate": 1.142979068119024e-05, + "loss": 2.7681, + "step": 118600 + }, + { + "epoch": 0.86, + "learning_rate": 1.1422557826671877e-05, + "loss": 2.7745, + "step": 118700 + }, + { + "epoch": 0.86, + "learning_rate": 1.1415324972153511e-05, + "loss": 2.7753, + "step": 118800 + }, + { + "epoch": 0.86, + "learning_rate": 1.1408092117635148e-05, + "loss": 2.7714, + "step": 118900 + }, + { + "epoch": 0.86, + "learning_rate": 1.1400859263116784e-05, + "loss": 2.776, + "step": 119000 + }, + { + "epoch": 0.86, + "eval_accuracy": 0.4576566767382566, + "eval_loss": 2.7886581420898438, + "eval_runtime": 29.9514, + "eval_samples_per_second": 216.451, + "eval_steps_per_second": 2.27, + "step": 119000 + }, + { + "epoch": 0.86, + "learning_rate": 1.139362640859842e-05, + "loss": 2.7766, + "step": 119100 + }, + { + "epoch": 0.86, + "learning_rate": 1.1386465882625237e-05, + "loss": 2.7718, + "step": 119200 + }, + { + "epoch": 0.86, + "learning_rate": 1.1379233028106874e-05, + "loss": 2.779, + "step": 119300 + }, + { + "epoch": 0.86, + "learning_rate": 1.1372000173588509e-05, + "loss": 2.7753, + "step": 119400 + }, + { + "epoch": 0.86, + "learning_rate": 1.1364767319070145e-05, + "loss": 2.7763, + "step": 119500 + }, + { + "epoch": 0.87, + "learning_rate": 1.135753446455178e-05, + "loss": 2.7798, + "step": 119600 + }, + { + "epoch": 0.87, + "learning_rate": 1.1350301610033416e-05, + "loss": 2.775, + "step": 119700 + }, + { + "epoch": 0.87, + "learning_rate": 1.1343068755515051e-05, + "loss": 2.7736, + "step": 119800 + }, + { + "epoch": 0.87, + "learning_rate": 1.1335908229541873e-05, + "loss": 2.7801, + "step": 119900 + }, + { + "epoch": 0.87, + "learning_rate": 1.1328675375023507e-05, + "loss": 2.7719, + "step": 120000 + }, + { + "epoch": 0.87, + "eval_accuracy": 0.45784359093012494, + "eval_loss": 2.788266658782959, + "eval_runtime": 31.0222, + "eval_samples_per_second": 208.979, + "eval_steps_per_second": 2.192, + "step": 120000 + }, + { + "epoch": 0.87, + "learning_rate": 1.1321442520505144e-05, + "loss": 2.771, + "step": 120100 + }, + { + "epoch": 0.87, + "learning_rate": 1.1314209665986779e-05, + "loss": 2.7762, + "step": 120200 + }, + { + "epoch": 0.87, + "learning_rate": 1.1306976811468415e-05, + "loss": 2.7741, + "step": 120300 + }, + { + "epoch": 0.87, + "learning_rate": 1.1299743956950052e-05, + "loss": 2.773, + "step": 120400 + }, + { + "epoch": 0.87, + "learning_rate": 1.1292511102431687e-05, + "loss": 2.7681, + "step": 120500 + }, + { + "epoch": 0.87, + "learning_rate": 1.1285278247913323e-05, + "loss": 2.7713, + "step": 120600 + }, + { + "epoch": 0.87, + "learning_rate": 1.1278045393394958e-05, + "loss": 2.7704, + "step": 120700 + }, + { + "epoch": 0.87, + "learning_rate": 1.1270812538876595e-05, + "loss": 2.7758, + "step": 120800 + }, + { + "epoch": 0.87, + "learning_rate": 1.126357968435823e-05, + "loss": 2.7696, + "step": 120900 + }, + { + "epoch": 0.88, + "learning_rate": 1.1256346829839866e-05, + "loss": 2.7759, + "step": 121000 + }, + { + "epoch": 0.88, + "eval_accuracy": 0.45785326933488185, + "eval_loss": 2.787775993347168, + "eval_runtime": 29.7592, + "eval_samples_per_second": 217.849, + "eval_steps_per_second": 2.285, + "step": 121000 + }, + { + "epoch": 0.88, + "learning_rate": 1.1249113975321502e-05, + "loss": 2.7682, + "step": 121100 + }, + { + "epoch": 0.88, + "learning_rate": 1.1241881120803137e-05, + "loss": 2.7752, + "step": 121200 + }, + { + "epoch": 0.88, + "learning_rate": 1.1234648266284774e-05, + "loss": 2.7708, + "step": 121300 + }, + { + "epoch": 0.88, + "learning_rate": 1.1227415411766409e-05, + "loss": 2.7718, + "step": 121400 + }, + { + "epoch": 0.88, + "learning_rate": 1.1220182557248045e-05, + "loss": 2.7701, + "step": 121500 + }, + { + "epoch": 0.88, + "learning_rate": 1.1212949702729682e-05, + "loss": 2.7732, + "step": 121600 + }, + { + "epoch": 0.88, + "learning_rate": 1.1205716848211316e-05, + "loss": 2.7709, + "step": 121700 + }, + { + "epoch": 0.88, + "learning_rate": 1.1198483993692953e-05, + "loss": 2.7735, + "step": 121800 + }, + { + "epoch": 0.88, + "learning_rate": 1.1191251139174588e-05, + "loss": 2.7724, + "step": 121900 + }, + { + "epoch": 0.88, + "learning_rate": 1.1184018284656224e-05, + "loss": 2.7654, + "step": 122000 + }, + { + "epoch": 0.88, + "eval_accuracy": 0.45778854500306987, + "eval_loss": 2.787409782409668, + "eval_runtime": 30.1905, + "eval_samples_per_second": 214.736, + "eval_steps_per_second": 2.252, + "step": 122000 + }, + { + "epoch": 0.88, + "learning_rate": 1.1176785430137859e-05, + "loss": 2.7705, + "step": 122100 + }, + { + "epoch": 0.88, + "learning_rate": 1.1169552575619496e-05, + "loss": 2.7753, + "step": 122200 + }, + { + "epoch": 0.88, + "learning_rate": 1.1162392049646314e-05, + "loss": 2.776, + "step": 122300 + }, + { + "epoch": 0.89, + "learning_rate": 1.1155159195127948e-05, + "loss": 2.7663, + "step": 122400 + }, + { + "epoch": 0.89, + "learning_rate": 1.1147926340609585e-05, + "loss": 2.7699, + "step": 122500 + }, + { + "epoch": 0.89, + "learning_rate": 1.1140693486091221e-05, + "loss": 2.7761, + "step": 122600 + }, + { + "epoch": 0.89, + "learning_rate": 1.1133460631572856e-05, + "loss": 2.7632, + "step": 122700 + }, + { + "epoch": 0.89, + "learning_rate": 1.1126227777054493e-05, + "loss": 2.7688, + "step": 122800 + }, + { + "epoch": 0.89, + "learning_rate": 1.1118994922536128e-05, + "loss": 2.7719, + "step": 122900 + }, + { + "epoch": 0.89, + "learning_rate": 1.1111762068017764e-05, + "loss": 2.7661, + "step": 123000 + }, + { + "epoch": 0.89, + "eval_accuracy": 0.45800570420980363, + "eval_loss": 2.7868072986602783, + "eval_runtime": 29.8221, + "eval_samples_per_second": 217.389, + "eval_steps_per_second": 2.28, + "step": 123000 + }, + { + "epoch": 0.89, + "learning_rate": 1.11045292134994e-05, + "loss": 2.7701, + "step": 123100 + }, + { + "epoch": 0.89, + "learning_rate": 1.1097296358981035e-05, + "loss": 2.763, + "step": 123200 + }, + { + "epoch": 0.89, + "learning_rate": 1.1090063504462672e-05, + "loss": 2.7767, + "step": 123300 + }, + { + "epoch": 0.89, + "learning_rate": 1.1082830649944307e-05, + "loss": 2.7696, + "step": 123400 + }, + { + "epoch": 0.89, + "learning_rate": 1.1075597795425943e-05, + "loss": 2.7704, + "step": 123500 + }, + { + "epoch": 0.89, + "learning_rate": 1.1068364940907578e-05, + "loss": 2.7772, + "step": 123600 + }, + { + "epoch": 0.89, + "learning_rate": 1.1061132086389215e-05, + "loss": 2.7761, + "step": 123700 + }, + { + "epoch": 0.9, + "learning_rate": 1.1053899231870851e-05, + "loss": 2.7743, + "step": 123800 + }, + { + "epoch": 0.9, + "learning_rate": 1.1046666377352486e-05, + "loss": 2.7656, + "step": 123900 + }, + { + "epoch": 0.9, + "learning_rate": 1.1039433522834122e-05, + "loss": 2.7718, + "step": 124000 + }, + { + "epoch": 0.9, + "eval_accuracy": 0.4579833228988032, + "eval_loss": 2.786105155944824, + "eval_runtime": 29.402, + "eval_samples_per_second": 220.495, + "eval_steps_per_second": 2.313, + "step": 124000 + }, + { + "epoch": 0.9, + "learning_rate": 1.1032200668315757e-05, + "loss": 2.7757, + "step": 124100 + }, + { + "epoch": 0.9, + "learning_rate": 1.1024967813797394e-05, + "loss": 2.7759, + "step": 124200 + }, + { + "epoch": 0.9, + "learning_rate": 1.101773495927903e-05, + "loss": 2.7791, + "step": 124300 + }, + { + "epoch": 0.9, + "learning_rate": 1.1010502104760665e-05, + "loss": 2.7698, + "step": 124400 + }, + { + "epoch": 0.9, + "learning_rate": 1.1003269250242302e-05, + "loss": 2.7707, + "step": 124500 + }, + { + "epoch": 0.9, + "learning_rate": 1.0996036395723936e-05, + "loss": 2.7732, + "step": 124600 + }, + { + "epoch": 0.9, + "learning_rate": 1.0988803541205573e-05, + "loss": 2.7705, + "step": 124700 + }, + { + "epoch": 0.9, + "learning_rate": 1.0981570686687208e-05, + "loss": 2.7693, + "step": 124800 + }, + { + "epoch": 0.9, + "learning_rate": 1.0974337832168844e-05, + "loss": 2.7693, + "step": 124900 + }, + { + "epoch": 0.9, + "learning_rate": 1.0967177306195664e-05, + "loss": 2.7775, + "step": 125000 + }, + { + "epoch": 0.9, + "eval_accuracy": 0.4579536827842351, + "eval_loss": 2.785790205001831, + "eval_runtime": 30.0386, + "eval_samples_per_second": 215.823, + "eval_steps_per_second": 2.264, + "step": 125000 + }, + { + "epoch": 0.9, + "learning_rate": 1.09599444516773e-05, + "loss": 2.7759, + "step": 125100 + }, + { + "epoch": 0.91, + "learning_rate": 1.0952711597158935e-05, + "loss": 2.7729, + "step": 125200 + }, + { + "epoch": 0.91, + "learning_rate": 1.0945478742640572e-05, + "loss": 2.7698, + "step": 125300 + }, + { + "epoch": 0.91, + "learning_rate": 1.0938245888122208e-05, + "loss": 2.7665, + "step": 125400 + }, + { + "epoch": 0.91, + "learning_rate": 1.0931013033603843e-05, + "loss": 2.7668, + "step": 125500 + }, + { + "epoch": 0.91, + "learning_rate": 1.092378017908548e-05, + "loss": 2.771, + "step": 125600 + }, + { + "epoch": 0.91, + "learning_rate": 1.0916547324567115e-05, + "loss": 2.7749, + "step": 125700 + }, + { + "epoch": 0.91, + "learning_rate": 1.0909386798593934e-05, + "loss": 2.7676, + "step": 125800 + }, + { + "epoch": 0.91, + "learning_rate": 1.090215394407557e-05, + "loss": 2.7716, + "step": 125900 + }, + { + "epoch": 0.91, + "learning_rate": 1.0894921089557207e-05, + "loss": 2.7835, + "step": 126000 + }, + { + "epoch": 0.91, + "eval_accuracy": 0.45800933361158747, + "eval_loss": 2.7854855060577393, + "eval_runtime": 27.8858, + "eval_samples_per_second": 232.484, + "eval_steps_per_second": 2.439, + "step": 126000 + }, + { + "epoch": 0.91, + "learning_rate": 1.0887688235038842e-05, + "loss": 2.7658, + "step": 126100 + }, + { + "epoch": 0.91, + "learning_rate": 1.0880455380520475e-05, + "loss": 2.7706, + "step": 126200 + }, + { + "epoch": 0.91, + "learning_rate": 1.0873222526002112e-05, + "loss": 2.7689, + "step": 126300 + }, + { + "epoch": 0.91, + "learning_rate": 1.0865989671483748e-05, + "loss": 2.768, + "step": 126400 + }, + { + "epoch": 0.91, + "learning_rate": 1.0858756816965383e-05, + "loss": 2.7656, + "step": 126500 + }, + { + "epoch": 0.92, + "learning_rate": 1.085152396244702e-05, + "loss": 2.769, + "step": 126600 + }, + { + "epoch": 0.92, + "learning_rate": 1.0844291107928654e-05, + "loss": 2.7722, + "step": 126700 + }, + { + "epoch": 0.92, + "learning_rate": 1.0837058253410291e-05, + "loss": 2.7725, + "step": 126800 + }, + { + "epoch": 0.92, + "learning_rate": 1.0829825398891927e-05, + "loss": 2.7739, + "step": 126900 + }, + { + "epoch": 0.92, + "learning_rate": 1.0822592544373562e-05, + "loss": 2.768, + "step": 127000 + }, + { + "epoch": 0.92, + "eval_accuracy": 0.4580704285416156, + "eval_loss": 2.784817695617676, + "eval_runtime": 30.4636, + "eval_samples_per_second": 212.811, + "eval_steps_per_second": 2.232, + "step": 127000 + }, + { + "epoch": 0.92, + "learning_rate": 1.0815359689855199e-05, + "loss": 2.7736, + "step": 127100 + }, + { + "epoch": 0.92, + "learning_rate": 1.0808126835336834e-05, + "loss": 2.7665, + "step": 127200 + }, + { + "epoch": 0.92, + "learning_rate": 1.0800966309363653e-05, + "loss": 2.7702, + "step": 127300 + }, + { + "epoch": 0.92, + "learning_rate": 1.079373345484529e-05, + "loss": 2.7739, + "step": 127400 + }, + { + "epoch": 0.92, + "learning_rate": 1.0786500600326926e-05, + "loss": 2.7685, + "step": 127500 + }, + { + "epoch": 0.92, + "learning_rate": 1.0779267745808561e-05, + "loss": 2.7725, + "step": 127600 + }, + { + "epoch": 0.92, + "learning_rate": 1.0772034891290198e-05, + "loss": 2.7645, + "step": 127700 + }, + { + "epoch": 0.92, + "learning_rate": 1.0764802036771833e-05, + "loss": 2.7751, + "step": 127800 + }, + { + "epoch": 0.93, + "learning_rate": 1.0757569182253469e-05, + "loss": 2.7822, + "step": 127900 + }, + { + "epoch": 0.93, + "learning_rate": 1.0750336327735106e-05, + "loss": 2.7701, + "step": 128000 + }, + { + "epoch": 0.93, + "eval_accuracy": 0.45819806250434775, + "eval_loss": 2.7843172550201416, + "eval_runtime": 29.9391, + "eval_samples_per_second": 216.54, + "eval_steps_per_second": 2.271, + "step": 128000 + }, + { + "epoch": 0.93, + "learning_rate": 1.074310347321674e-05, + "loss": 2.7667, + "step": 128100 + }, + { + "epoch": 0.93, + "learning_rate": 1.0735870618698377e-05, + "loss": 2.7741, + "step": 128200 + }, + { + "epoch": 0.93, + "learning_rate": 1.0728637764180012e-05, + "loss": 2.767, + "step": 128300 + }, + { + "epoch": 0.93, + "learning_rate": 1.0721404909661648e-05, + "loss": 2.7672, + "step": 128400 + }, + { + "epoch": 0.93, + "learning_rate": 1.0714172055143283e-05, + "loss": 2.7694, + "step": 128500 + }, + { + "epoch": 0.93, + "learning_rate": 1.070693920062492e-05, + "loss": 2.775, + "step": 128600 + }, + { + "epoch": 0.93, + "learning_rate": 1.0699706346106556e-05, + "loss": 2.7747, + "step": 128700 + }, + { + "epoch": 0.93, + "learning_rate": 1.0692473491588191e-05, + "loss": 2.7689, + "step": 128800 + }, + { + "epoch": 0.93, + "learning_rate": 1.0685240637069827e-05, + "loss": 2.7631, + "step": 128900 + }, + { + "epoch": 0.93, + "learning_rate": 1.0678007782551462e-05, + "loss": 2.7682, + "step": 129000 + }, + { + "epoch": 0.93, + "eval_accuracy": 0.45829424165161975, + "eval_loss": 2.783777952194214, + "eval_runtime": 29.7932, + "eval_samples_per_second": 217.6, + "eval_steps_per_second": 2.282, + "step": 129000 + }, + { + "epoch": 0.93, + "learning_rate": 1.0670774928033099e-05, + "loss": 2.7683, + "step": 129100 + }, + { + "epoch": 0.93, + "learning_rate": 1.0663542073514734e-05, + "loss": 2.773, + "step": 129200 + }, + { + "epoch": 0.94, + "learning_rate": 1.065630921899637e-05, + "loss": 2.769, + "step": 129300 + }, + { + "epoch": 0.94, + "learning_rate": 1.0649076364478007e-05, + "loss": 2.7738, + "step": 129400 + }, + { + "epoch": 0.94, + "learning_rate": 1.0641915838504826e-05, + "loss": 2.7609, + "step": 129500 + }, + { + "epoch": 0.94, + "learning_rate": 1.0634682983986461e-05, + "loss": 2.7699, + "step": 129600 + }, + { + "epoch": 0.94, + "learning_rate": 1.062752245801328e-05, + "loss": 2.7711, + "step": 129700 + }, + { + "epoch": 0.94, + "learning_rate": 1.0620289603494916e-05, + "loss": 2.7743, + "step": 129800 + }, + { + "epoch": 0.94, + "learning_rate": 1.061305674897655e-05, + "loss": 2.768, + "step": 129900 + }, + { + "epoch": 0.94, + "learning_rate": 1.0605823894458187e-05, + "loss": 2.7595, + "step": 130000 + }, + { + "epoch": 0.94, + "eval_accuracy": 0.458337794473026, + "eval_loss": 2.7834300994873047, + "eval_runtime": 28.4215, + "eval_samples_per_second": 228.102, + "eval_steps_per_second": 2.393, + "step": 130000 + }, + { + "epoch": 0.94, + "learning_rate": 1.0598591039939824e-05, + "loss": 2.7653, + "step": 130100 + }, + { + "epoch": 0.94, + "learning_rate": 1.0591358185421458e-05, + "loss": 2.7622, + "step": 130200 + }, + { + "epoch": 0.94, + "learning_rate": 1.0584125330903095e-05, + "loss": 2.7737, + "step": 130300 + }, + { + "epoch": 0.94, + "learning_rate": 1.057689247638473e-05, + "loss": 2.7744, + "step": 130400 + }, + { + "epoch": 0.94, + "learning_rate": 1.0569659621866366e-05, + "loss": 2.7739, + "step": 130500 + }, + { + "epoch": 0.94, + "learning_rate": 1.0562426767348001e-05, + "loss": 2.7748, + "step": 130600 + }, + { + "epoch": 0.95, + "learning_rate": 1.0555193912829638e-05, + "loss": 2.7696, + "step": 130700 + }, + { + "epoch": 0.95, + "learning_rate": 1.0547961058311274e-05, + "loss": 2.7628, + "step": 130800 + }, + { + "epoch": 0.95, + "learning_rate": 1.0540728203792909e-05, + "loss": 2.7739, + "step": 130900 + }, + { + "epoch": 0.95, + "learning_rate": 1.0533495349274545e-05, + "loss": 2.7627, + "step": 131000 + }, + { + "epoch": 0.95, + "eval_accuracy": 0.45831420336143097, + "eval_loss": 2.783060312271118, + "eval_runtime": 31.194, + "eval_samples_per_second": 207.829, + "eval_steps_per_second": 2.18, + "step": 131000 + }, + { + "epoch": 0.95, + "learning_rate": 1.052626249475618e-05, + "loss": 2.7654, + "step": 131100 + }, + { + "epoch": 0.95, + "learning_rate": 1.0519029640237817e-05, + "loss": 2.7662, + "step": 131200 + }, + { + "epoch": 0.95, + "learning_rate": 1.0511796785719453e-05, + "loss": 2.7627, + "step": 131300 + }, + { + "epoch": 0.95, + "learning_rate": 1.0504636259746273e-05, + "loss": 2.7735, + "step": 131400 + }, + { + "epoch": 0.95, + "learning_rate": 1.0497403405227908e-05, + "loss": 2.7787, + "step": 131500 + }, + { + "epoch": 0.95, + "learning_rate": 1.0490170550709544e-05, + "loss": 2.7708, + "step": 131600 + }, + { + "epoch": 0.95, + "learning_rate": 1.0482937696191179e-05, + "loss": 2.7678, + "step": 131700 + }, + { + "epoch": 0.95, + "learning_rate": 1.0475704841672816e-05, + "loss": 2.7681, + "step": 131800 + }, + { + "epoch": 0.95, + "learning_rate": 1.0468471987154452e-05, + "loss": 2.7792, + "step": 131900 + }, + { + "epoch": 0.95, + "learning_rate": 1.0461239132636087e-05, + "loss": 2.7716, + "step": 132000 + }, + { + "epoch": 0.95, + "eval_accuracy": 0.45840554330632455, + "eval_loss": 2.7826600074768066, + "eval_runtime": 29.7072, + "eval_samples_per_second": 218.23, + "eval_steps_per_second": 2.289, + "step": 132000 + }, + { + "epoch": 0.96, + "learning_rate": 1.0454006278117723e-05, + "loss": 2.7721, + "step": 132100 + }, + { + "epoch": 0.96, + "learning_rate": 1.0446773423599358e-05, + "loss": 2.7637, + "step": 132200 + }, + { + "epoch": 0.96, + "learning_rate": 1.0439540569080995e-05, + "loss": 2.7743, + "step": 132300 + }, + { + "epoch": 0.96, + "learning_rate": 1.0432307714562631e-05, + "loss": 2.7649, + "step": 132400 + }, + { + "epoch": 0.96, + "learning_rate": 1.0425074860044266e-05, + "loss": 2.7672, + "step": 132500 + }, + { + "epoch": 0.96, + "learning_rate": 1.0417842005525903e-05, + "loss": 2.7683, + "step": 132600 + }, + { + "epoch": 0.96, + "learning_rate": 1.0410609151007537e-05, + "loss": 2.7668, + "step": 132700 + }, + { + "epoch": 0.96, + "learning_rate": 1.0403376296489174e-05, + "loss": 2.7582, + "step": 132800 + }, + { + "epoch": 0.96, + "learning_rate": 1.0396143441970809e-05, + "loss": 2.7663, + "step": 132900 + }, + { + "epoch": 0.96, + "learning_rate": 1.0388910587452445e-05, + "loss": 2.7719, + "step": 133000 + }, + { + "epoch": 0.96, + "eval_accuracy": 0.45850656165597503, + "eval_loss": 2.782144069671631, + "eval_runtime": 29.573, + "eval_samples_per_second": 219.22, + "eval_steps_per_second": 2.299, + "step": 133000 + }, + { + "epoch": 0.96, + "learning_rate": 1.0381677732934082e-05, + "loss": 2.77, + "step": 133100 + }, + { + "epoch": 0.96, + "learning_rate": 1.0374444878415717e-05, + "loss": 2.7684, + "step": 133200 + }, + { + "epoch": 0.96, + "learning_rate": 1.0367212023897353e-05, + "loss": 2.7562, + "step": 133300 + }, + { + "epoch": 0.96, + "learning_rate": 1.0359979169378988e-05, + "loss": 2.7682, + "step": 133400 + }, + { + "epoch": 0.97, + "learning_rate": 1.0352746314860625e-05, + "loss": 2.7686, + "step": 133500 + }, + { + "epoch": 0.97, + "learning_rate": 1.0345513460342261e-05, + "loss": 2.7624, + "step": 133600 + }, + { + "epoch": 0.97, + "learning_rate": 1.0338352934369077e-05, + "loss": 2.7643, + "step": 133700 + }, + { + "epoch": 0.97, + "learning_rate": 1.0331120079850714e-05, + "loss": 2.7725, + "step": 133800 + }, + { + "epoch": 0.97, + "learning_rate": 1.0323959553877534e-05, + "loss": 2.7617, + "step": 133900 + }, + { + "epoch": 0.97, + "learning_rate": 1.031672669935917e-05, + "loss": 2.7723, + "step": 134000 + }, + { + "epoch": 0.97, + "eval_accuracy": 0.45827972404448436, + "eval_loss": 2.781625747680664, + "eval_runtime": 30.6595, + "eval_samples_per_second": 211.452, + "eval_steps_per_second": 2.218, + "step": 134000 + }, + { + "epoch": 0.97, + "learning_rate": 1.0309493844840805e-05, + "loss": 2.7677, + "step": 134100 + }, + { + "epoch": 0.97, + "learning_rate": 1.0302260990322441e-05, + "loss": 2.7655, + "step": 134200 + }, + { + "epoch": 0.97, + "learning_rate": 1.0295028135804076e-05, + "loss": 2.7718, + "step": 134300 + }, + { + "epoch": 0.97, + "learning_rate": 1.0287795281285713e-05, + "loss": 2.7667, + "step": 134400 + }, + { + "epoch": 0.97, + "learning_rate": 1.028056242676735e-05, + "loss": 2.7639, + "step": 134500 + }, + { + "epoch": 0.97, + "learning_rate": 1.0273329572248984e-05, + "loss": 2.771, + "step": 134600 + }, + { + "epoch": 0.97, + "learning_rate": 1.026609671773062e-05, + "loss": 2.7588, + "step": 134700 + }, + { + "epoch": 0.97, + "learning_rate": 1.0258863863212255e-05, + "loss": 2.7687, + "step": 134800 + }, + { + "epoch": 0.98, + "learning_rate": 1.0251631008693892e-05, + "loss": 2.7606, + "step": 134900 + }, + { + "epoch": 0.98, + "learning_rate": 1.0244398154175529e-05, + "loss": 2.7736, + "step": 135000 + }, + { + "epoch": 0.98, + "eval_accuracy": 0.45850595675567773, + "eval_loss": 2.7812275886535645, + "eval_runtime": 31.0326, + "eval_samples_per_second": 208.91, + "eval_steps_per_second": 2.191, + "step": 135000 + }, + { + "epoch": 0.98, + "learning_rate": 1.0237165299657163e-05, + "loss": 2.768, + "step": 135100 + }, + { + "epoch": 0.98, + "learning_rate": 1.02299324451388e-05, + "loss": 2.7736, + "step": 135200 + }, + { + "epoch": 0.98, + "learning_rate": 1.0222699590620435e-05, + "loss": 2.7712, + "step": 135300 + }, + { + "epoch": 0.98, + "learning_rate": 1.0215466736102071e-05, + "loss": 2.7636, + "step": 135400 + }, + { + "epoch": 0.98, + "learning_rate": 1.0208306210128891e-05, + "loss": 2.7671, + "step": 135500 + }, + { + "epoch": 0.98, + "learning_rate": 1.0201073355610527e-05, + "loss": 2.7746, + "step": 135600 + }, + { + "epoch": 0.98, + "learning_rate": 1.0193840501092162e-05, + "loss": 2.7717, + "step": 135700 + }, + { + "epoch": 0.98, + "learning_rate": 1.0186607646573799e-05, + "loss": 2.7739, + "step": 135800 + }, + { + "epoch": 0.98, + "learning_rate": 1.0179374792055434e-05, + "loss": 2.766, + "step": 135900 + }, + { + "epoch": 0.98, + "learning_rate": 1.017214193753707e-05, + "loss": 2.7646, + "step": 136000 + }, + { + "epoch": 0.98, + "eval_accuracy": 0.4585743104892736, + "eval_loss": 2.7808570861816406, + "eval_runtime": 30.7384, + "eval_samples_per_second": 210.909, + "eval_steps_per_second": 2.212, + "step": 136000 + }, + { + "epoch": 0.98, + "learning_rate": 1.0164909083018707e-05, + "loss": 2.7681, + "step": 136100 + }, + { + "epoch": 0.99, + "learning_rate": 1.0157676228500341e-05, + "loss": 2.766, + "step": 136200 + }, + { + "epoch": 0.99, + "learning_rate": 1.0150443373981978e-05, + "loss": 2.7689, + "step": 136300 + }, + { + "epoch": 0.99, + "learning_rate": 1.0143210519463613e-05, + "loss": 2.7692, + "step": 136400 + }, + { + "epoch": 0.99, + "learning_rate": 1.013597766494525e-05, + "loss": 2.766, + "step": 136500 + }, + { + "epoch": 0.99, + "learning_rate": 1.0128744810426884e-05, + "loss": 2.7685, + "step": 136600 + }, + { + "epoch": 0.99, + "learning_rate": 1.012151195590852e-05, + "loss": 2.7625, + "step": 136700 + }, + { + "epoch": 0.99, + "learning_rate": 1.0114351429935339e-05, + "loss": 2.7596, + "step": 136800 + }, + { + "epoch": 0.99, + "learning_rate": 1.0107118575416973e-05, + "loss": 2.7644, + "step": 136900 + }, + { + "epoch": 0.99, + "learning_rate": 1.009988572089861e-05, + "loss": 2.76, + "step": 137000 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.458596691800274, + "eval_loss": 2.7804572582244873, + "eval_runtime": 29.4041, + "eval_samples_per_second": 220.48, + "eval_steps_per_second": 2.313, + "step": 137000 + }, + { + "epoch": 0.99, + "learning_rate": 1.0092652866380247e-05, + "loss": 2.7618, + "step": 137100 + }, + { + "epoch": 0.99, + "learning_rate": 1.0085420011861881e-05, + "loss": 2.7663, + "step": 137200 + }, + { + "epoch": 0.99, + "learning_rate": 1.0078187157343518e-05, + "loss": 2.7684, + "step": 137300 + }, + { + "epoch": 0.99, + "learning_rate": 1.0071026631370338e-05, + "loss": 2.7632, + "step": 137400 + }, + { + "epoch": 0.99, + "learning_rate": 1.0063793776851974e-05, + "loss": 2.7688, + "step": 137500 + }, + { + "epoch": 1.0, + "learning_rate": 1.0056560922333609e-05, + "loss": 2.7616, + "step": 137600 + }, + { + "epoch": 1.0, + "learning_rate": 1.0049328067815245e-05, + "loss": 2.7634, + "step": 137700 + }, + { + "epoch": 1.0, + "learning_rate": 1.004209521329688e-05, + "loss": 2.7608, + "step": 137800 + }, + { + "epoch": 1.0, + "learning_rate": 1.0034862358778517e-05, + "loss": 2.7697, + "step": 137900 + }, + { + "epoch": 1.0, + "learning_rate": 1.0027629504260152e-05, + "loss": 2.7659, + "step": 138000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.4585610026827328, + "eval_loss": 2.780273914337158, + "eval_runtime": 29.5606, + "eval_samples_per_second": 219.313, + "eval_steps_per_second": 2.3, + "step": 138000 + }, + { + "epoch": 1.0, + "learning_rate": 1.0020396649741788e-05, + "loss": 2.7704, + "step": 138100 + }, + { + "epoch": 1.0, + "learning_rate": 1.0013163795223425e-05, + "loss": 2.7639, + "step": 138200 + }, + { + "epoch": 1.0, + "learning_rate": 1.000593094070506e-05, + "loss": 2.7586, + "step": 138300 + }, + { + "epoch": 1.0, + "learning_rate": 9.998698086186696e-06, + "loss": 2.7646, + "step": 138400 + }, + { + "epoch": 1.0, + "learning_rate": 9.99146523166833e-06, + "loss": 2.7588, + "step": 138500 + }, + { + "epoch": 1.0, + "learning_rate": 9.984232377149967e-06, + "loss": 2.7584, + "step": 138600 + }, + { + "epoch": 1.0, + "learning_rate": 9.976999522631604e-06, + "loss": 2.7627, + "step": 138700 + }, + { + "epoch": 1.0, + "learning_rate": 9.969766668113239e-06, + "loss": 2.7523, + "step": 138800 + }, + { + "epoch": 1.0, + "learning_rate": 9.962533813594875e-06, + "loss": 2.7636, + "step": 138900 + }, + { + "epoch": 1.01, + "learning_rate": 9.95530095907651e-06, + "loss": 2.7604, + "step": 139000 + }, + { + "epoch": 1.01, + "eval_accuracy": 0.45872069636122226, + "eval_loss": 2.7798776626586914, + "eval_runtime": 29.5234, + "eval_samples_per_second": 219.589, + "eval_steps_per_second": 2.303, + "step": 139000 + }, + { + "epoch": 1.01, + "learning_rate": 9.948068104558146e-06, + "loss": 2.7533, + "step": 139100 + }, + { + "epoch": 1.01, + "learning_rate": 9.940835250039781e-06, + "loss": 2.7478, + "step": 139200 + }, + { + "epoch": 1.01, + "learning_rate": 9.933602395521418e-06, + "loss": 2.762, + "step": 139300 + }, + { + "epoch": 1.01, + "learning_rate": 9.926369541003054e-06, + "loss": 2.7616, + "step": 139400 + }, + { + "epoch": 1.01, + "learning_rate": 9.919136686484689e-06, + "loss": 2.766, + "step": 139500 + }, + { + "epoch": 1.01, + "learning_rate": 9.911903831966324e-06, + "loss": 2.762, + "step": 139600 + }, + { + "epoch": 1.01, + "learning_rate": 9.90467097744796e-06, + "loss": 2.7599, + "step": 139700 + }, + { + "epoch": 1.01, + "learning_rate": 9.897438122929595e-06, + "loss": 2.7588, + "step": 139800 + }, + { + "epoch": 1.01, + "learning_rate": 9.890205268411232e-06, + "loss": 2.752, + "step": 139900 + }, + { + "epoch": 1.01, + "learning_rate": 9.882972413892867e-06, + "loss": 2.7597, + "step": 140000 + }, + { + "epoch": 1.01, + "eval_accuracy": 0.4586741190383295, + "eval_loss": 2.7793562412261963, + "eval_runtime": 32.292, + "eval_samples_per_second": 200.762, + "eval_steps_per_second": 2.106, + "step": 140000 + }, + { + "epoch": 1.01, + "learning_rate": 9.875739559374503e-06, + "loss": 2.7523, + "step": 140100 + }, + { + "epoch": 1.01, + "learning_rate": 9.86850670485614e-06, + "loss": 2.7645, + "step": 140200 + }, + { + "epoch": 1.01, + "learning_rate": 9.861273850337774e-06, + "loss": 2.7598, + "step": 140300 + }, + { + "epoch": 1.02, + "learning_rate": 9.854040995819411e-06, + "loss": 2.7589, + "step": 140400 + }, + { + "epoch": 1.02, + "learning_rate": 9.846808141301046e-06, + "loss": 2.755, + "step": 140500 + }, + { + "epoch": 1.02, + "learning_rate": 9.839575286782682e-06, + "loss": 2.7631, + "step": 140600 + }, + { + "epoch": 1.02, + "learning_rate": 9.832342432264317e-06, + "loss": 2.7571, + "step": 140700 + }, + { + "epoch": 1.02, + "learning_rate": 9.825109577745954e-06, + "loss": 2.7596, + "step": 140800 + }, + { + "epoch": 1.02, + "learning_rate": 9.817949051772773e-06, + "loss": 2.7562, + "step": 140900 + }, + { + "epoch": 1.02, + "learning_rate": 9.81071619725441e-06, + "loss": 2.7551, + "step": 141000 + }, + { + "epoch": 1.02, + "eval_accuracy": 0.45877937169006117, + "eval_loss": 2.7791290283203125, + "eval_runtime": 29.4298, + "eval_samples_per_second": 220.287, + "eval_steps_per_second": 2.311, + "step": 141000 + }, + { + "epoch": 1.02, + "learning_rate": 9.803483342736045e-06, + "loss": 2.753, + "step": 141100 + }, + { + "epoch": 1.02, + "learning_rate": 9.796250488217681e-06, + "loss": 2.7499, + "step": 141200 + }, + { + "epoch": 1.02, + "learning_rate": 9.789017633699318e-06, + "loss": 2.7595, + "step": 141300 + }, + { + "epoch": 1.02, + "learning_rate": 9.781784779180953e-06, + "loss": 2.7612, + "step": 141400 + }, + { + "epoch": 1.02, + "learning_rate": 9.774551924662589e-06, + "loss": 2.7634, + "step": 141500 + }, + { + "epoch": 1.02, + "learning_rate": 9.767319070144224e-06, + "loss": 2.749, + "step": 141600 + }, + { + "epoch": 1.02, + "learning_rate": 9.76008621562586e-06, + "loss": 2.7567, + "step": 141700 + }, + { + "epoch": 1.03, + "learning_rate": 9.752853361107495e-06, + "loss": 2.7546, + "step": 141800 + }, + { + "epoch": 1.03, + "learning_rate": 9.74562050658913e-06, + "loss": 2.7588, + "step": 141900 + }, + { + "epoch": 1.03, + "learning_rate": 9.738387652070767e-06, + "loss": 2.7619, + "step": 142000 + }, + { + "epoch": 1.03, + "eval_accuracy": 0.4588090118046293, + "eval_loss": 2.7787861824035645, + "eval_runtime": 29.6893, + "eval_samples_per_second": 218.361, + "eval_steps_per_second": 2.29, + "step": 142000 + }, + { + "epoch": 1.03, + "learning_rate": 9.731227126097586e-06, + "loss": 2.7533, + "step": 142100 + }, + { + "epoch": 1.03, + "learning_rate": 9.723994271579223e-06, + "loss": 2.7554, + "step": 142200 + }, + { + "epoch": 1.03, + "learning_rate": 9.716761417060858e-06, + "loss": 2.7627, + "step": 142300 + }, + { + "epoch": 1.03, + "learning_rate": 9.709528562542494e-06, + "loss": 2.7635, + "step": 142400 + }, + { + "epoch": 1.03, + "learning_rate": 9.702368036569312e-06, + "loss": 2.757, + "step": 142500 + }, + { + "epoch": 1.03, + "learning_rate": 9.695135182050949e-06, + "loss": 2.7527, + "step": 142600 + }, + { + "epoch": 1.03, + "learning_rate": 9.687902327532585e-06, + "loss": 2.761, + "step": 142700 + }, + { + "epoch": 1.03, + "learning_rate": 9.68066947301422e-06, + "loss": 2.7562, + "step": 142800 + }, + { + "epoch": 1.03, + "learning_rate": 9.673436618495857e-06, + "loss": 2.7565, + "step": 142900 + }, + { + "epoch": 1.03, + "learning_rate": 9.666203763977491e-06, + "loss": 2.7658, + "step": 143000 + }, + { + "epoch": 1.03, + "eval_accuracy": 0.45889793214833363, + "eval_loss": 2.778470754623413, + "eval_runtime": 31.4699, + "eval_samples_per_second": 206.006, + "eval_steps_per_second": 2.161, + "step": 143000 + }, + { + "epoch": 1.04, + "learning_rate": 9.658970909459128e-06, + "loss": 2.7582, + "step": 143100 + }, + { + "epoch": 1.04, + "learning_rate": 9.651738054940764e-06, + "loss": 2.7627, + "step": 143200 + }, + { + "epoch": 1.04, + "learning_rate": 9.6445052004224e-06, + "loss": 2.7568, + "step": 143300 + }, + { + "epoch": 1.04, + "learning_rate": 9.637272345904036e-06, + "loss": 2.7529, + "step": 143400 + }, + { + "epoch": 1.04, + "learning_rate": 9.63003949138567e-06, + "loss": 2.7641, + "step": 143500 + }, + { + "epoch": 1.04, + "learning_rate": 9.622806636867307e-06, + "loss": 2.7539, + "step": 143600 + }, + { + "epoch": 1.04, + "learning_rate": 9.615573782348942e-06, + "loss": 2.7585, + "step": 143700 + }, + { + "epoch": 1.04, + "learning_rate": 9.608340927830578e-06, + "loss": 2.7628, + "step": 143800 + }, + { + "epoch": 1.04, + "learning_rate": 9.601108073312215e-06, + "loss": 2.763, + "step": 143900 + }, + { + "epoch": 1.04, + "learning_rate": 9.59387521879385e-06, + "loss": 2.751, + "step": 144000 + }, + { + "epoch": 1.04, + "eval_accuracy": 0.45885679892811665, + "eval_loss": 2.778137683868408, + "eval_runtime": 32.5333, + "eval_samples_per_second": 199.273, + "eval_steps_per_second": 2.09, + "step": 144000 + }, + { + "epoch": 1.04, + "learning_rate": 9.586642364275486e-06, + "loss": 2.7642, + "step": 144100 + }, + { + "epoch": 1.04, + "learning_rate": 9.579409509757121e-06, + "loss": 2.7635, + "step": 144200 + }, + { + "epoch": 1.04, + "learning_rate": 9.572176655238758e-06, + "loss": 2.7698, + "step": 144300 + }, + { + "epoch": 1.04, + "learning_rate": 9.564943800720392e-06, + "loss": 2.7617, + "step": 144400 + }, + { + "epoch": 1.05, + "learning_rate": 9.557710946202029e-06, + "loss": 2.7541, + "step": 144500 + }, + { + "epoch": 1.05, + "learning_rate": 9.550478091683665e-06, + "loss": 2.7584, + "step": 144600 + }, + { + "epoch": 1.05, + "learning_rate": 9.5432452371653e-06, + "loss": 2.7594, + "step": 144700 + }, + { + "epoch": 1.05, + "learning_rate": 9.536012382646937e-06, + "loss": 2.7506, + "step": 144800 + }, + { + "epoch": 1.05, + "learning_rate": 9.528779528128572e-06, + "loss": 2.7573, + "step": 144900 + }, + { + "epoch": 1.05, + "learning_rate": 9.521546673610208e-06, + "loss": 2.7589, + "step": 145000 + }, + { + "epoch": 1.05, + "eval_accuracy": 0.45895902707836184, + "eval_loss": 2.7777721881866455, + "eval_runtime": 29.5823, + "eval_samples_per_second": 219.151, + "eval_steps_per_second": 2.299, + "step": 145000 + }, + { + "epoch": 1.05, + "learning_rate": 9.514313819091845e-06, + "loss": 2.7514, + "step": 145100 + }, + { + "epoch": 1.05, + "learning_rate": 9.50708096457348e-06, + "loss": 2.7523, + "step": 145200 + }, + { + "epoch": 1.05, + "learning_rate": 9.499848110055116e-06, + "loss": 2.7626, + "step": 145300 + }, + { + "epoch": 1.05, + "learning_rate": 9.49261525553675e-06, + "loss": 2.7541, + "step": 145400 + }, + { + "epoch": 1.05, + "learning_rate": 9.485382401018387e-06, + "loss": 2.7624, + "step": 145500 + }, + { + "epoch": 1.05, + "learning_rate": 9.478221875045205e-06, + "loss": 2.7604, + "step": 145600 + }, + { + "epoch": 1.05, + "learning_rate": 9.470989020526842e-06, + "loss": 2.7576, + "step": 145700 + }, + { + "epoch": 1.05, + "learning_rate": 9.463828494553662e-06, + "loss": 2.7547, + "step": 145800 + }, + { + "epoch": 1.06, + "learning_rate": 9.456595640035298e-06, + "loss": 2.7526, + "step": 145900 + }, + { + "epoch": 1.06, + "learning_rate": 9.449362785516933e-06, + "loss": 2.7459, + "step": 146000 + }, + { + "epoch": 1.06, + "eval_accuracy": 0.4589529780753887, + "eval_loss": 2.777561902999878, + "eval_runtime": 29.3866, + "eval_samples_per_second": 220.611, + "eval_steps_per_second": 2.314, + "step": 146000 + }, + { + "epoch": 1.06, + "learning_rate": 9.442129930998568e-06, + "loss": 2.7579, + "step": 146100 + }, + { + "epoch": 1.06, + "learning_rate": 9.434897076480204e-06, + "loss": 2.7543, + "step": 146200 + }, + { + "epoch": 1.06, + "learning_rate": 9.427664221961839e-06, + "loss": 2.7608, + "step": 146300 + }, + { + "epoch": 1.06, + "learning_rate": 9.420431367443476e-06, + "loss": 2.7595, + "step": 146400 + }, + { + "epoch": 1.06, + "learning_rate": 9.413198512925112e-06, + "loss": 2.7531, + "step": 146500 + }, + { + "epoch": 1.06, + "learning_rate": 9.405965658406747e-06, + "loss": 2.7576, + "step": 146600 + }, + { + "epoch": 1.06, + "learning_rate": 9.398732803888383e-06, + "loss": 2.7546, + "step": 146700 + }, + { + "epoch": 1.06, + "learning_rate": 9.391499949370018e-06, + "loss": 2.7465, + "step": 146800 + }, + { + "epoch": 1.06, + "learning_rate": 9.384267094851655e-06, + "loss": 2.7597, + "step": 146900 + }, + { + "epoch": 1.06, + "learning_rate": 9.37703424033329e-06, + "loss": 2.7646, + "step": 147000 + }, + { + "epoch": 1.06, + "eval_accuracy": 0.4591423118684463, + "eval_loss": 2.7770681381225586, + "eval_runtime": 29.9251, + "eval_samples_per_second": 216.641, + "eval_steps_per_second": 2.272, + "step": 147000 + }, + { + "epoch": 1.06, + "learning_rate": 9.369801385814926e-06, + "loss": 2.7594, + "step": 147100 + }, + { + "epoch": 1.06, + "learning_rate": 9.362568531296563e-06, + "loss": 2.7502, + "step": 147200 + }, + { + "epoch": 1.07, + "learning_rate": 9.355335676778197e-06, + "loss": 2.7557, + "step": 147300 + }, + { + "epoch": 1.07, + "learning_rate": 9.348102822259834e-06, + "loss": 2.7589, + "step": 147400 + }, + { + "epoch": 1.07, + "learning_rate": 9.340942296286654e-06, + "loss": 2.7468, + "step": 147500 + }, + { + "epoch": 1.07, + "learning_rate": 9.33370944176829e-06, + "loss": 2.7484, + "step": 147600 + }, + { + "epoch": 1.07, + "learning_rate": 9.326476587249925e-06, + "loss": 2.7559, + "step": 147700 + }, + { + "epoch": 1.07, + "learning_rate": 9.319243732731561e-06, + "loss": 2.7502, + "step": 147800 + }, + { + "epoch": 1.07, + "learning_rate": 9.312010878213196e-06, + "loss": 2.7512, + "step": 147900 + }, + { + "epoch": 1.07, + "learning_rate": 9.304778023694833e-06, + "loss": 2.7529, + "step": 148000 + }, + { + "epoch": 1.07, + "eval_accuracy": 0.4589487437733076, + "eval_loss": 2.7767789363861084, + "eval_runtime": 29.4704, + "eval_samples_per_second": 219.984, + "eval_steps_per_second": 2.307, + "step": 148000 + }, + { + "epoch": 1.07, + "learning_rate": 9.297545169176468e-06, + "loss": 2.7583, + "step": 148100 + }, + { + "epoch": 1.07, + "learning_rate": 9.290312314658104e-06, + "loss": 2.7579, + "step": 148200 + }, + { + "epoch": 1.07, + "learning_rate": 9.283151788684924e-06, + "loss": 2.7561, + "step": 148300 + }, + { + "epoch": 1.07, + "learning_rate": 9.275918934166559e-06, + "loss": 2.7596, + "step": 148400 + }, + { + "epoch": 1.07, + "learning_rate": 9.268686079648195e-06, + "loss": 2.7601, + "step": 148500 + }, + { + "epoch": 1.07, + "learning_rate": 9.26145322512983e-06, + "loss": 2.7478, + "step": 148600 + }, + { + "epoch": 1.08, + "learning_rate": 9.254220370611467e-06, + "loss": 2.7583, + "step": 148700 + }, + { + "epoch": 1.08, + "learning_rate": 9.246987516093101e-06, + "loss": 2.7545, + "step": 148800 + }, + { + "epoch": 1.08, + "learning_rate": 9.239754661574738e-06, + "loss": 2.7507, + "step": 148900 + }, + { + "epoch": 1.08, + "learning_rate": 9.232521807056374e-06, + "loss": 2.7573, + "step": 149000 + }, + { + "epoch": 1.08, + "eval_accuracy": 0.4591822352880686, + "eval_loss": 2.7764034271240234, + "eval_runtime": 29.9267, + "eval_samples_per_second": 216.63, + "eval_steps_per_second": 2.272, + "step": 149000 + }, + { + "epoch": 1.08, + "learning_rate": 9.22528895253801e-06, + "loss": 2.7514, + "step": 149100 + }, + { + "epoch": 1.08, + "learning_rate": 9.218056098019646e-06, + "loss": 2.7569, + "step": 149200 + }, + { + "epoch": 1.08, + "learning_rate": 9.21082324350128e-06, + "loss": 2.757, + "step": 149300 + }, + { + "epoch": 1.08, + "learning_rate": 9.203590388982917e-06, + "loss": 2.7477, + "step": 149400 + }, + { + "epoch": 1.08, + "learning_rate": 9.196357534464554e-06, + "loss": 2.7616, + "step": 149500 + }, + { + "epoch": 1.08, + "learning_rate": 9.189124679946188e-06, + "loss": 2.7647, + "step": 149600 + }, + { + "epoch": 1.08, + "learning_rate": 9.181891825427825e-06, + "loss": 2.7509, + "step": 149700 + }, + { + "epoch": 1.08, + "learning_rate": 9.174731299454643e-06, + "loss": 2.7546, + "step": 149800 + }, + { + "epoch": 1.08, + "learning_rate": 9.16749844493628e-06, + "loss": 2.7492, + "step": 149900 + }, + { + "epoch": 1.08, + "learning_rate": 9.160265590417914e-06, + "loss": 2.754, + "step": 150000 + }, + { + "epoch": 1.08, + "eval_accuracy": 0.459124164859527, + "eval_loss": 2.7761712074279785, + "eval_runtime": 30.2068, + "eval_samples_per_second": 214.621, + "eval_steps_per_second": 2.251, + "step": 150000 + }, + { + "epoch": 1.09, + "learning_rate": 9.15303273589955e-06, + "loss": 2.7537, + "step": 150100 + }, + { + "epoch": 1.09, + "learning_rate": 9.145799881381187e-06, + "loss": 2.753, + "step": 150200 + }, + { + "epoch": 1.09, + "learning_rate": 9.138567026862822e-06, + "loss": 2.7594, + "step": 150300 + }, + { + "epoch": 1.09, + "learning_rate": 9.131334172344459e-06, + "loss": 2.7595, + "step": 150400 + }, + { + "epoch": 1.09, + "learning_rate": 9.124101317826093e-06, + "loss": 2.7532, + "step": 150500 + }, + { + "epoch": 1.09, + "learning_rate": 9.11686846330773e-06, + "loss": 2.7571, + "step": 150600 + }, + { + "epoch": 1.09, + "learning_rate": 9.109635608789365e-06, + "loss": 2.7499, + "step": 150700 + }, + { + "epoch": 1.09, + "learning_rate": 9.102475082816185e-06, + "loss": 2.7486, + "step": 150800 + }, + { + "epoch": 1.09, + "learning_rate": 9.095242228297821e-06, + "loss": 2.7537, + "step": 150900 + }, + { + "epoch": 1.09, + "learning_rate": 9.088009373779456e-06, + "loss": 2.7553, + "step": 151000 + }, + { + "epoch": 1.09, + "eval_accuracy": 0.45908545124049926, + "eval_loss": 2.7759199142456055, + "eval_runtime": 31.1913, + "eval_samples_per_second": 207.846, + "eval_steps_per_second": 2.18, + "step": 151000 + }, + { + "epoch": 1.09, + "learning_rate": 9.080776519261092e-06, + "loss": 2.7495, + "step": 151100 + }, + { + "epoch": 1.09, + "learning_rate": 9.073543664742727e-06, + "loss": 2.7562, + "step": 151200 + }, + { + "epoch": 1.09, + "learning_rate": 9.066310810224364e-06, + "loss": 2.7567, + "step": 151300 + }, + { + "epoch": 1.1, + "learning_rate": 9.059077955705999e-06, + "loss": 2.7532, + "step": 151400 + }, + { + "epoch": 1.1, + "learning_rate": 9.051845101187635e-06, + "loss": 2.7485, + "step": 151500 + }, + { + "epoch": 1.1, + "learning_rate": 9.044612246669272e-06, + "loss": 2.7502, + "step": 151600 + }, + { + "epoch": 1.1, + "learning_rate": 9.037379392150906e-06, + "loss": 2.756, + "step": 151700 + }, + { + "epoch": 1.1, + "learning_rate": 9.030146537632543e-06, + "loss": 2.7559, + "step": 151800 + }, + { + "epoch": 1.1, + "learning_rate": 9.022913683114178e-06, + "loss": 2.753, + "step": 151900 + }, + { + "epoch": 1.1, + "learning_rate": 9.015680828595814e-06, + "loss": 2.7485, + "step": 152000 + }, + { + "epoch": 1.1, + "eval_accuracy": 0.4592844634383138, + "eval_loss": 2.7755496501922607, + "eval_runtime": 34.9091, + "eval_samples_per_second": 185.711, + "eval_steps_per_second": 1.948, + "step": 152000 + }, + { + "epoch": 1.1, + "learning_rate": 9.00844797407745e-06, + "loss": 2.7569, + "step": 152100 + }, + { + "epoch": 1.1, + "learning_rate": 9.001215119559086e-06, + "loss": 2.7532, + "step": 152200 + }, + { + "epoch": 1.1, + "learning_rate": 8.993982265040722e-06, + "loss": 2.7619, + "step": 152300 + }, + { + "epoch": 1.1, + "learning_rate": 8.986749410522357e-06, + "loss": 2.754, + "step": 152400 + }, + { + "epoch": 1.1, + "learning_rate": 8.979516556003993e-06, + "loss": 2.7528, + "step": 152500 + }, + { + "epoch": 1.1, + "learning_rate": 8.972283701485628e-06, + "loss": 2.761, + "step": 152600 + }, + { + "epoch": 1.1, + "learning_rate": 8.965050846967265e-06, + "loss": 2.7578, + "step": 152700 + }, + { + "epoch": 1.11, + "learning_rate": 8.957817992448901e-06, + "loss": 2.7531, + "step": 152800 + }, + { + "epoch": 1.11, + "learning_rate": 8.950585137930536e-06, + "loss": 2.7593, + "step": 152900 + }, + { + "epoch": 1.11, + "learning_rate": 8.943352283412173e-06, + "loss": 2.7558, + "step": 153000 + }, + { + "epoch": 1.11, + "eval_accuracy": 0.45928869774039494, + "eval_loss": 2.7751994132995605, + "eval_runtime": 29.596, + "eval_samples_per_second": 219.05, + "eval_steps_per_second": 2.298, + "step": 153000 + }, + { + "epoch": 1.11, + "learning_rate": 8.936119428893807e-06, + "loss": 2.7529, + "step": 153100 + }, + { + "epoch": 1.11, + "learning_rate": 8.928886574375444e-06, + "loss": 2.7601, + "step": 153200 + }, + { + "epoch": 1.11, + "learning_rate": 8.92165371985708e-06, + "loss": 2.766, + "step": 153300 + }, + { + "epoch": 1.11, + "learning_rate": 8.914420865338715e-06, + "loss": 2.7564, + "step": 153400 + }, + { + "epoch": 1.11, + "learning_rate": 8.907188010820352e-06, + "loss": 2.7583, + "step": 153500 + }, + { + "epoch": 1.11, + "learning_rate": 8.899955156301987e-06, + "loss": 2.7546, + "step": 153600 + }, + { + "epoch": 1.11, + "learning_rate": 8.892722301783623e-06, + "loss": 2.752, + "step": 153700 + }, + { + "epoch": 1.11, + "learning_rate": 8.885489447265258e-06, + "loss": 2.7557, + "step": 153800 + }, + { + "epoch": 1.11, + "learning_rate": 8.878256592746894e-06, + "loss": 2.7539, + "step": 153900 + }, + { + "epoch": 1.11, + "learning_rate": 8.871023738228531e-06, + "loss": 2.7563, + "step": 154000 + }, + { + "epoch": 1.11, + "eval_accuracy": 0.459253613523151, + "eval_loss": 2.774827003479004, + "eval_runtime": 31.2857, + "eval_samples_per_second": 207.22, + "eval_steps_per_second": 2.174, + "step": 154000 + }, + { + "epoch": 1.11, + "learning_rate": 8.863790883710166e-06, + "loss": 2.7591, + "step": 154100 + }, + { + "epoch": 1.12, + "learning_rate": 8.856558029191802e-06, + "loss": 2.7548, + "step": 154200 + }, + { + "epoch": 1.12, + "learning_rate": 8.84939750321862e-06, + "loss": 2.7506, + "step": 154300 + }, + { + "epoch": 1.12, + "learning_rate": 8.842164648700257e-06, + "loss": 2.7549, + "step": 154400 + }, + { + "epoch": 1.12, + "learning_rate": 8.834931794181892e-06, + "loss": 2.7589, + "step": 154500 + }, + { + "epoch": 1.12, + "learning_rate": 8.827698939663528e-06, + "loss": 2.7581, + "step": 154600 + }, + { + "epoch": 1.12, + "learning_rate": 8.820466085145165e-06, + "loss": 2.7543, + "step": 154700 + }, + { + "epoch": 1.12, + "learning_rate": 8.8132332306268e-06, + "loss": 2.7567, + "step": 154800 + }, + { + "epoch": 1.12, + "learning_rate": 8.806000376108436e-06, + "loss": 2.7518, + "step": 154900 + }, + { + "epoch": 1.12, + "learning_rate": 8.798767521590071e-06, + "loss": 2.7557, + "step": 155000 + }, + { + "epoch": 1.12, + "eval_accuracy": 0.4593570514739908, + "eval_loss": 2.774669647216797, + "eval_runtime": 29.7619, + "eval_samples_per_second": 217.829, + "eval_steps_per_second": 2.285, + "step": 155000 + }, + { + "epoch": 1.12, + "learning_rate": 8.791534667071707e-06, + "loss": 2.7495, + "step": 155100 + }, + { + "epoch": 1.12, + "learning_rate": 8.784301812553342e-06, + "loss": 2.7528, + "step": 155200 + }, + { + "epoch": 1.12, + "learning_rate": 8.777068958034979e-06, + "loss": 2.7503, + "step": 155300 + }, + { + "epoch": 1.12, + "learning_rate": 8.769836103516615e-06, + "loss": 2.7503, + "step": 155400 + }, + { + "epoch": 1.12, + "learning_rate": 8.762675577543433e-06, + "loss": 2.7492, + "step": 155500 + }, + { + "epoch": 1.13, + "learning_rate": 8.75544272302507e-06, + "loss": 2.7544, + "step": 155600 + }, + { + "epoch": 1.13, + "learning_rate": 8.748209868506705e-06, + "loss": 2.7547, + "step": 155700 + }, + { + "epoch": 1.13, + "learning_rate": 8.740977013988341e-06, + "loss": 2.7446, + "step": 155800 + }, + { + "epoch": 1.13, + "learning_rate": 8.733744159469978e-06, + "loss": 2.753, + "step": 155900 + }, + { + "epoch": 1.13, + "learning_rate": 8.726511304951612e-06, + "loss": 2.7593, + "step": 156000 + }, + { + "epoch": 1.13, + "eval_accuracy": 0.4591931234934202, + "eval_loss": 2.7744040489196777, + "eval_runtime": 29.5223, + "eval_samples_per_second": 219.597, + "eval_steps_per_second": 2.303, + "step": 156000 + }, + { + "epoch": 1.13, + "learning_rate": 8.719278450433249e-06, + "loss": 2.758, + "step": 156100 + }, + { + "epoch": 1.13, + "learning_rate": 8.712045595914884e-06, + "loss": 2.761, + "step": 156200 + }, + { + "epoch": 1.13, + "learning_rate": 8.70481274139652e-06, + "loss": 2.756, + "step": 156300 + }, + { + "epoch": 1.13, + "learning_rate": 8.697579886878155e-06, + "loss": 2.7484, + "step": 156400 + }, + { + "epoch": 1.13, + "learning_rate": 8.690347032359792e-06, + "loss": 2.7491, + "step": 156500 + }, + { + "epoch": 1.13, + "learning_rate": 8.683114177841428e-06, + "loss": 2.7551, + "step": 156600 + }, + { + "epoch": 1.13, + "learning_rate": 8.675881323323063e-06, + "loss": 2.7515, + "step": 156700 + }, + { + "epoch": 1.13, + "learning_rate": 8.6686484688047e-06, + "loss": 2.7541, + "step": 156800 + }, + { + "epoch": 1.13, + "learning_rate": 8.66148794283152e-06, + "loss": 2.7546, + "step": 156900 + }, + { + "epoch": 1.14, + "learning_rate": 8.654255088313154e-06, + "loss": 2.752, + "step": 157000 + }, + { + "epoch": 1.14, + "eval_accuracy": 0.4592584527255295, + "eval_loss": 2.774146795272827, + "eval_runtime": 30.0141, + "eval_samples_per_second": 215.999, + "eval_steps_per_second": 2.266, + "step": 157000 + }, + { + "epoch": 1.14, + "learning_rate": 8.64702223379479e-06, + "loss": 2.7488, + "step": 157100 + }, + { + "epoch": 1.14, + "learning_rate": 8.639789379276427e-06, + "loss": 2.7539, + "step": 157200 + }, + { + "epoch": 1.14, + "learning_rate": 8.632556524758062e-06, + "loss": 2.7552, + "step": 157300 + }, + { + "epoch": 1.14, + "learning_rate": 8.625323670239697e-06, + "loss": 2.7566, + "step": 157400 + }, + { + "epoch": 1.14, + "learning_rate": 8.618090815721333e-06, + "loss": 2.7454, + "step": 157500 + }, + { + "epoch": 1.14, + "learning_rate": 8.610930289748153e-06, + "loss": 2.7527, + "step": 157600 + }, + { + "epoch": 1.14, + "learning_rate": 8.60369743522979e-06, + "loss": 2.7558, + "step": 157700 + }, + { + "epoch": 1.14, + "learning_rate": 8.596464580711424e-06, + "loss": 2.7579, + "step": 157800 + }, + { + "epoch": 1.14, + "learning_rate": 8.58923172619306e-06, + "loss": 2.7495, + "step": 157900 + }, + { + "epoch": 1.14, + "learning_rate": 8.581998871674696e-06, + "loss": 2.748, + "step": 158000 + }, + { + "epoch": 1.14, + "eval_accuracy": 0.4593262015588281, + "eval_loss": 2.773747205734253, + "eval_runtime": 30.6235, + "eval_samples_per_second": 211.7, + "eval_steps_per_second": 2.221, + "step": 158000 + }, + { + "epoch": 1.14, + "learning_rate": 8.574766017156332e-06, + "loss": 2.7465, + "step": 158100 + }, + { + "epoch": 1.14, + "learning_rate": 8.567533162637967e-06, + "loss": 2.7549, + "step": 158200 + }, + { + "epoch": 1.14, + "learning_rate": 8.560300308119603e-06, + "loss": 2.7489, + "step": 158300 + }, + { + "epoch": 1.15, + "learning_rate": 8.55306745360124e-06, + "loss": 2.7508, + "step": 158400 + }, + { + "epoch": 1.15, + "learning_rate": 8.545834599082875e-06, + "loss": 2.7512, + "step": 158500 + }, + { + "epoch": 1.15, + "learning_rate": 8.538601744564511e-06, + "loss": 2.7481, + "step": 158600 + }, + { + "epoch": 1.15, + "learning_rate": 8.531368890046146e-06, + "loss": 2.7517, + "step": 158700 + }, + { + "epoch": 1.15, + "learning_rate": 8.524136035527783e-06, + "loss": 2.7504, + "step": 158800 + }, + { + "epoch": 1.15, + "learning_rate": 8.516903181009417e-06, + "loss": 2.7538, + "step": 158900 + }, + { + "epoch": 1.15, + "learning_rate": 8.509670326491054e-06, + "loss": 2.7549, + "step": 159000 + }, + { + "epoch": 1.15, + "eval_accuracy": 0.4593860866882616, + "eval_loss": 2.773451566696167, + "eval_runtime": 27.8905, + "eval_samples_per_second": 232.445, + "eval_steps_per_second": 2.438, + "step": 159000 + }, + { + "epoch": 1.15, + "learning_rate": 8.50243747197269e-06, + "loss": 2.7499, + "step": 159100 + }, + { + "epoch": 1.15, + "learning_rate": 8.495204617454325e-06, + "loss": 2.7457, + "step": 159200 + }, + { + "epoch": 1.15, + "learning_rate": 8.487971762935962e-06, + "loss": 2.7566, + "step": 159300 + }, + { + "epoch": 1.15, + "learning_rate": 8.480738908417597e-06, + "loss": 2.7566, + "step": 159400 + }, + { + "epoch": 1.15, + "learning_rate": 8.473506053899233e-06, + "loss": 2.7539, + "step": 159500 + }, + { + "epoch": 1.15, + "learning_rate": 8.46627319938087e-06, + "loss": 2.7471, + "step": 159600 + }, + { + "epoch": 1.16, + "learning_rate": 8.459040344862503e-06, + "loss": 2.7586, + "step": 159700 + }, + { + "epoch": 1.16, + "learning_rate": 8.45180749034414e-06, + "loss": 2.7493, + "step": 159800 + }, + { + "epoch": 1.16, + "learning_rate": 8.444574635825776e-06, + "loss": 2.7554, + "step": 159900 + }, + { + "epoch": 1.16, + "learning_rate": 8.437414109852596e-06, + "loss": 2.7455, + "step": 160000 + }, + { + "epoch": 1.16, + "eval_accuracy": 0.45956574207656226, + "eval_loss": 2.7732744216918945, + "eval_runtime": 30.512, + "eval_samples_per_second": 212.474, + "eval_steps_per_second": 2.229, + "step": 160000 + }, + { + "epoch": 1.16, + "learning_rate": 8.43018125533423e-06, + "loss": 2.7551, + "step": 160100 + }, + { + "epoch": 1.16, + "learning_rate": 8.422948400815867e-06, + "loss": 2.7513, + "step": 160200 + }, + { + "epoch": 1.16, + "learning_rate": 8.415715546297503e-06, + "loss": 2.7594, + "step": 160300 + }, + { + "epoch": 1.16, + "learning_rate": 8.408555020324321e-06, + "loss": 2.7598, + "step": 160400 + }, + { + "epoch": 1.16, + "learning_rate": 8.401322165805958e-06, + "loss": 2.7547, + "step": 160500 + }, + { + "epoch": 1.16, + "learning_rate": 8.394089311287593e-06, + "loss": 2.7433, + "step": 160600 + }, + { + "epoch": 1.16, + "learning_rate": 8.38685645676923e-06, + "loss": 2.7512, + "step": 160700 + }, + { + "epoch": 1.16, + "learning_rate": 8.379623602250864e-06, + "loss": 2.7544, + "step": 160800 + }, + { + "epoch": 1.16, + "learning_rate": 8.3723907477325e-06, + "loss": 2.7475, + "step": 160900 + }, + { + "epoch": 1.16, + "learning_rate": 8.365157893214137e-06, + "loss": 2.7582, + "step": 161000 + }, + { + "epoch": 1.16, + "eval_accuracy": 0.45939636999331585, + "eval_loss": 2.7731149196624756, + "eval_runtime": 31.8214, + "eval_samples_per_second": 203.731, + "eval_steps_per_second": 2.137, + "step": 161000 + }, + { + "epoch": 1.17, + "learning_rate": 8.357925038695772e-06, + "loss": 2.747, + "step": 161100 + }, + { + "epoch": 1.17, + "learning_rate": 8.350692184177408e-06, + "loss": 2.7507, + "step": 161200 + }, + { + "epoch": 1.17, + "learning_rate": 8.343459329659043e-06, + "loss": 2.7441, + "step": 161300 + }, + { + "epoch": 1.17, + "learning_rate": 8.33622647514068e-06, + "loss": 2.7502, + "step": 161400 + }, + { + "epoch": 1.17, + "learning_rate": 8.328993620622315e-06, + "loss": 2.7447, + "step": 161500 + }, + { + "epoch": 1.17, + "learning_rate": 8.321760766103951e-06, + "loss": 2.7473, + "step": 161600 + }, + { + "epoch": 1.17, + "learning_rate": 8.314527911585588e-06, + "loss": 2.7443, + "step": 161700 + }, + { + "epoch": 1.17, + "learning_rate": 8.307295057067222e-06, + "loss": 2.7603, + "step": 161800 + }, + { + "epoch": 1.17, + "learning_rate": 8.300062202548859e-06, + "loss": 2.7555, + "step": 161900 + }, + { + "epoch": 1.17, + "learning_rate": 8.292829348030494e-06, + "loss": 2.7532, + "step": 162000 + }, + { + "epoch": 1.17, + "eval_accuracy": 0.45951795495307485, + "eval_loss": 2.7727766036987305, + "eval_runtime": 30.0745, + "eval_samples_per_second": 215.565, + "eval_steps_per_second": 2.261, + "step": 162000 + }, + { + "epoch": 1.17, + "learning_rate": 8.28559649351213e-06, + "loss": 2.7477, + "step": 162100 + }, + { + "epoch": 1.17, + "learning_rate": 8.278435967538948e-06, + "loss": 2.7539, + "step": 162200 + }, + { + "epoch": 1.17, + "learning_rate": 8.271203113020585e-06, + "loss": 2.7502, + "step": 162300 + }, + { + "epoch": 1.17, + "learning_rate": 8.263970258502221e-06, + "loss": 2.752, + "step": 162400 + }, + { + "epoch": 1.18, + "learning_rate": 8.256737403983856e-06, + "loss": 2.7551, + "step": 162500 + }, + { + "epoch": 1.18, + "learning_rate": 8.249504549465493e-06, + "loss": 2.7514, + "step": 162600 + }, + { + "epoch": 1.18, + "learning_rate": 8.242271694947128e-06, + "loss": 2.7475, + "step": 162700 + }, + { + "epoch": 1.18, + "learning_rate": 8.235038840428764e-06, + "loss": 2.751, + "step": 162800 + }, + { + "epoch": 1.18, + "learning_rate": 8.2278059859104e-06, + "loss": 2.7475, + "step": 162900 + }, + { + "epoch": 1.18, + "learning_rate": 8.220573131392035e-06, + "loss": 2.7496, + "step": 163000 + }, + { + "epoch": 1.18, + "eval_accuracy": 0.4595270284575345, + "eval_loss": 2.772428512573242, + "eval_runtime": 30.2915, + "eval_samples_per_second": 214.021, + "eval_steps_per_second": 2.245, + "step": 163000 + }, + { + "epoch": 1.18, + "learning_rate": 8.213340276873672e-06, + "loss": 2.7493, + "step": 163100 + }, + { + "epoch": 1.18, + "learning_rate": 8.206107422355307e-06, + "loss": 2.7641, + "step": 163200 + }, + { + "epoch": 1.18, + "learning_rate": 8.198874567836943e-06, + "loss": 2.7511, + "step": 163300 + }, + { + "epoch": 1.18, + "learning_rate": 8.191641713318578e-06, + "loss": 2.7539, + "step": 163400 + }, + { + "epoch": 1.18, + "learning_rate": 8.184408858800215e-06, + "loss": 2.7525, + "step": 163500 + }, + { + "epoch": 1.18, + "learning_rate": 8.177176004281851e-06, + "loss": 2.7518, + "step": 163600 + }, + { + "epoch": 1.18, + "learning_rate": 8.169943149763486e-06, + "loss": 2.7566, + "step": 163700 + }, + { + "epoch": 1.18, + "learning_rate": 8.162782623790306e-06, + "loss": 2.752, + "step": 163800 + }, + { + "epoch": 1.19, + "learning_rate": 8.155549769271942e-06, + "loss": 2.7538, + "step": 163900 + }, + { + "epoch": 1.19, + "learning_rate": 8.148316914753577e-06, + "loss": 2.75, + "step": 164000 + }, + { + "epoch": 1.19, + "eval_accuracy": 0.45964437911521233, + "eval_loss": 2.7721121311187744, + "eval_runtime": 29.4536, + "eval_samples_per_second": 220.109, + "eval_steps_per_second": 2.309, + "step": 164000 + }, + { + "epoch": 1.19, + "learning_rate": 8.141084060235212e-06, + "loss": 2.7486, + "step": 164100 + }, + { + "epoch": 1.19, + "learning_rate": 8.133851205716848e-06, + "loss": 2.7547, + "step": 164200 + }, + { + "epoch": 1.19, + "learning_rate": 8.126618351198485e-06, + "loss": 2.749, + "step": 164300 + }, + { + "epoch": 1.19, + "learning_rate": 8.11938549668012e-06, + "loss": 2.7498, + "step": 164400 + }, + { + "epoch": 1.19, + "learning_rate": 8.11222497070694e-06, + "loss": 2.7537, + "step": 164500 + }, + { + "epoch": 1.19, + "learning_rate": 8.104992116188576e-06, + "loss": 2.754, + "step": 164600 + }, + { + "epoch": 1.19, + "learning_rate": 8.097759261670212e-06, + "loss": 2.7577, + "step": 164700 + }, + { + "epoch": 1.19, + "learning_rate": 8.090526407151847e-06, + "loss": 2.7514, + "step": 164800 + }, + { + "epoch": 1.19, + "learning_rate": 8.083293552633484e-06, + "loss": 2.755, + "step": 164900 + }, + { + "epoch": 1.19, + "learning_rate": 8.076060698115119e-06, + "loss": 2.7517, + "step": 165000 + }, + { + "epoch": 1.19, + "eval_accuracy": 0.45970123974315935, + "eval_loss": 2.7717862129211426, + "eval_runtime": 29.3512, + "eval_samples_per_second": 220.877, + "eval_steps_per_second": 2.317, + "step": 165000 + }, + { + "epoch": 1.19, + "learning_rate": 8.068827843596755e-06, + "loss": 2.7543, + "step": 165100 + }, + { + "epoch": 1.19, + "learning_rate": 8.06159498907839e-06, + "loss": 2.744, + "step": 165200 + }, + { + "epoch": 1.2, + "learning_rate": 8.054362134560026e-06, + "loss": 2.7539, + "step": 165300 + }, + { + "epoch": 1.2, + "learning_rate": 8.047129280041663e-06, + "loss": 2.7509, + "step": 165400 + }, + { + "epoch": 1.2, + "learning_rate": 8.039896425523298e-06, + "loss": 2.7535, + "step": 165500 + }, + { + "epoch": 1.2, + "learning_rate": 8.032663571004934e-06, + "loss": 2.7485, + "step": 165600 + }, + { + "epoch": 1.2, + "learning_rate": 8.025430716486569e-06, + "loss": 2.7493, + "step": 165700 + }, + { + "epoch": 1.2, + "learning_rate": 8.018197861968206e-06, + "loss": 2.7477, + "step": 165800 + }, + { + "epoch": 1.2, + "learning_rate": 8.010965007449842e-06, + "loss": 2.7504, + "step": 165900 + }, + { + "epoch": 1.2, + "learning_rate": 8.003732152931477e-06, + "loss": 2.7522, + "step": 166000 + }, + { + "epoch": 1.2, + "eval_accuracy": 0.4596595016226451, + "eval_loss": 2.7715861797332764, + "eval_runtime": 29.3689, + "eval_samples_per_second": 220.744, + "eval_steps_per_second": 2.315, + "step": 166000 + }, + { + "epoch": 1.2, + "learning_rate": 7.996499298413113e-06, + "loss": 2.7494, + "step": 166100 + }, + { + "epoch": 1.2, + "learning_rate": 7.989266443894748e-06, + "loss": 2.7555, + "step": 166200 + }, + { + "epoch": 1.2, + "learning_rate": 7.982033589376383e-06, + "loss": 2.7561, + "step": 166300 + }, + { + "epoch": 1.2, + "learning_rate": 7.974873063403203e-06, + "loss": 2.7441, + "step": 166400 + }, + { + "epoch": 1.2, + "learning_rate": 7.96764020888484e-06, + "loss": 2.7574, + "step": 166500 + }, + { + "epoch": 1.2, + "learning_rate": 7.960407354366476e-06, + "loss": 2.7528, + "step": 166600 + }, + { + "epoch": 1.21, + "learning_rate": 7.95317449984811e-06, + "loss": 2.7582, + "step": 166700 + }, + { + "epoch": 1.21, + "learning_rate": 7.945941645329747e-06, + "loss": 2.7543, + "step": 166800 + }, + { + "epoch": 1.21, + "learning_rate": 7.938708790811382e-06, + "loss": 2.7501, + "step": 166900 + }, + { + "epoch": 1.21, + "learning_rate": 7.931475936293019e-06, + "loss": 2.7514, + "step": 167000 + }, + { + "epoch": 1.21, + "eval_accuracy": 0.45986395792313534, + "eval_loss": 2.771327495574951, + "eval_runtime": 32.3724, + "eval_samples_per_second": 200.263, + "eval_steps_per_second": 2.101, + "step": 167000 + }, + { + "epoch": 1.21, + "learning_rate": 7.924243081774653e-06, + "loss": 2.7594, + "step": 167100 + }, + { + "epoch": 1.21, + "learning_rate": 7.91701022725629e-06, + "loss": 2.7553, + "step": 167200 + }, + { + "epoch": 1.21, + "learning_rate": 7.909777372737926e-06, + "loss": 2.7411, + "step": 167300 + }, + { + "epoch": 1.21, + "learning_rate": 7.902544518219561e-06, + "loss": 2.7499, + "step": 167400 + }, + { + "epoch": 1.21, + "learning_rate": 7.895311663701198e-06, + "loss": 2.7544, + "step": 167500 + }, + { + "epoch": 1.21, + "learning_rate": 7.888078809182833e-06, + "loss": 2.7509, + "step": 167600 + }, + { + "epoch": 1.21, + "learning_rate": 7.880845954664469e-06, + "loss": 2.7516, + "step": 167700 + }, + { + "epoch": 1.21, + "learning_rate": 7.873613100146106e-06, + "loss": 2.7502, + "step": 167800 + }, + { + "epoch": 1.21, + "learning_rate": 7.86638024562774e-06, + "loss": 2.7581, + "step": 167900 + }, + { + "epoch": 1.22, + "learning_rate": 7.859147391109377e-06, + "loss": 2.7515, + "step": 168000 + }, + { + "epoch": 1.22, + "eval_accuracy": 0.4597810865824041, + "eval_loss": 2.7710556983947754, + "eval_runtime": 30.9587, + "eval_samples_per_second": 209.408, + "eval_steps_per_second": 2.196, + "step": 168000 + }, + { + "epoch": 1.22, + "learning_rate": 7.851914536591012e-06, + "loss": 2.75, + "step": 168100 + }, + { + "epoch": 1.22, + "learning_rate": 7.844681682072648e-06, + "loss": 2.7496, + "step": 168200 + }, + { + "epoch": 1.22, + "learning_rate": 7.837448827554283e-06, + "loss": 2.747, + "step": 168300 + }, + { + "epoch": 1.22, + "learning_rate": 7.83021597303592e-06, + "loss": 2.7516, + "step": 168400 + }, + { + "epoch": 1.22, + "learning_rate": 7.822983118517556e-06, + "loss": 2.7546, + "step": 168500 + }, + { + "epoch": 1.22, + "learning_rate": 7.81575026399919e-06, + "loss": 2.7462, + "step": 168600 + }, + { + "epoch": 1.22, + "learning_rate": 7.808517409480826e-06, + "loss": 2.7511, + "step": 168700 + }, + { + "epoch": 1.22, + "learning_rate": 7.801284554962462e-06, + "loss": 2.7571, + "step": 168800 + }, + { + "epoch": 1.22, + "learning_rate": 7.794051700444097e-06, + "loss": 2.7533, + "step": 168900 + }, + { + "epoch": 1.22, + "learning_rate": 7.786818845925734e-06, + "loss": 2.7493, + "step": 169000 + }, + { + "epoch": 1.22, + "eval_accuracy": 0.45982221980262106, + "eval_loss": 2.7707936763763428, + "eval_runtime": 30.5041, + "eval_samples_per_second": 212.529, + "eval_steps_per_second": 2.229, + "step": 169000 + }, + { + "epoch": 1.22, + "learning_rate": 7.779585991407368e-06, + "loss": 2.7488, + "step": 169100 + }, + { + "epoch": 1.22, + "learning_rate": 7.772353136889005e-06, + "loss": 2.7433, + "step": 169200 + }, + { + "epoch": 1.22, + "learning_rate": 7.765120282370641e-06, + "loss": 2.7549, + "step": 169300 + }, + { + "epoch": 1.23, + "learning_rate": 7.757887427852276e-06, + "loss": 2.7575, + "step": 169400 + }, + { + "epoch": 1.23, + "learning_rate": 7.750726901879096e-06, + "loss": 2.7478, + "step": 169500 + }, + { + "epoch": 1.23, + "learning_rate": 7.743566375905916e-06, + "loss": 2.7501, + "step": 169600 + }, + { + "epoch": 1.23, + "learning_rate": 7.73633352138755e-06, + "loss": 2.7466, + "step": 169700 + }, + { + "epoch": 1.23, + "learning_rate": 7.729100666869187e-06, + "loss": 2.7534, + "step": 169800 + }, + { + "epoch": 1.23, + "learning_rate": 7.721867812350824e-06, + "loss": 2.7458, + "step": 169900 + }, + { + "epoch": 1.23, + "learning_rate": 7.714634957832458e-06, + "loss": 2.7491, + "step": 170000 + }, + { + "epoch": 1.23, + "eval_accuracy": 0.45979620908983676, + "eval_loss": 2.7705278396606445, + "eval_runtime": 32.0791, + "eval_samples_per_second": 202.094, + "eval_steps_per_second": 2.12, + "step": 170000 + }, + { + "epoch": 1.23, + "learning_rate": 7.707402103314095e-06, + "loss": 2.7529, + "step": 170100 + }, + { + "epoch": 1.23, + "learning_rate": 7.70016924879573e-06, + "loss": 2.7448, + "step": 170200 + }, + { + "epoch": 1.23, + "learning_rate": 7.692936394277366e-06, + "loss": 2.7452, + "step": 170300 + }, + { + "epoch": 1.23, + "learning_rate": 7.685703539759003e-06, + "loss": 2.7512, + "step": 170400 + }, + { + "epoch": 1.23, + "learning_rate": 7.678470685240638e-06, + "loss": 2.7513, + "step": 170500 + }, + { + "epoch": 1.23, + "learning_rate": 7.671237830722274e-06, + "loss": 2.7544, + "step": 170600 + }, + { + "epoch": 1.23, + "learning_rate": 7.664004976203909e-06, + "loss": 2.749, + "step": 170700 + }, + { + "epoch": 1.24, + "learning_rate": 7.656772121685545e-06, + "loss": 2.7389, + "step": 170800 + }, + { + "epoch": 1.24, + "learning_rate": 7.64953926716718e-06, + "loss": 2.7474, + "step": 170900 + }, + { + "epoch": 1.24, + "learning_rate": 7.642306412648817e-06, + "loss": 2.7552, + "step": 171000 + }, + { + "epoch": 1.24, + "eval_accuracy": 0.45987726572967613, + "eval_loss": 2.7704155445098877, + "eval_runtime": 30.2626, + "eval_samples_per_second": 214.224, + "eval_steps_per_second": 2.247, + "step": 171000 + }, + { + "epoch": 1.24, + "learning_rate": 7.635073558130453e-06, + "loss": 2.7507, + "step": 171100 + }, + { + "epoch": 1.24, + "learning_rate": 7.627840703612088e-06, + "loss": 2.7527, + "step": 171200 + }, + { + "epoch": 1.24, + "learning_rate": 7.620607849093724e-06, + "loss": 2.7471, + "step": 171300 + }, + { + "epoch": 1.24, + "learning_rate": 7.61337499457536e-06, + "loss": 2.7481, + "step": 171400 + }, + { + "epoch": 1.24, + "learning_rate": 7.606142140056996e-06, + "loss": 2.7542, + "step": 171500 + }, + { + "epoch": 1.24, + "learning_rate": 7.5989092855386315e-06, + "loss": 2.7582, + "step": 171600 + }, + { + "epoch": 1.24, + "learning_rate": 7.5917487595654504e-06, + "loss": 2.7523, + "step": 171700 + }, + { + "epoch": 1.24, + "learning_rate": 7.58458823359227e-06, + "loss": 2.7543, + "step": 171800 + }, + { + "epoch": 1.24, + "learning_rate": 7.577355379073907e-06, + "loss": 2.7501, + "step": 171900 + }, + { + "epoch": 1.24, + "learning_rate": 7.570122524555542e-06, + "loss": 2.7536, + "step": 172000 + }, + { + "epoch": 1.24, + "eval_accuracy": 0.4599921967861647, + "eval_loss": 2.7700235843658447, + "eval_runtime": 30.9099, + "eval_samples_per_second": 209.739, + "eval_steps_per_second": 2.2, + "step": 172000 + }, + { + "epoch": 1.24, + "learning_rate": 7.562889670037178e-06, + "loss": 2.7438, + "step": 172100 + }, + { + "epoch": 1.25, + "learning_rate": 7.555656815518814e-06, + "loss": 2.7532, + "step": 172200 + }, + { + "epoch": 1.25, + "learning_rate": 7.548423961000449e-06, + "loss": 2.7427, + "step": 172300 + }, + { + "epoch": 1.25, + "learning_rate": 7.541191106482085e-06, + "loss": 2.7507, + "step": 172400 + }, + { + "epoch": 1.25, + "learning_rate": 7.533958251963721e-06, + "loss": 2.7479, + "step": 172500 + }, + { + "epoch": 1.25, + "learning_rate": 7.526725397445357e-06, + "loss": 2.7546, + "step": 172600 + }, + { + "epoch": 1.25, + "learning_rate": 7.519492542926993e-06, + "loss": 2.7616, + "step": 172700 + }, + { + "epoch": 1.25, + "learning_rate": 7.5122596884086285e-06, + "loss": 2.752, + "step": 172800 + }, + { + "epoch": 1.25, + "learning_rate": 7.505026833890263e-06, + "loss": 2.7501, + "step": 172900 + }, + { + "epoch": 1.25, + "learning_rate": 7.497793979371899e-06, + "loss": 2.7485, + "step": 173000 + }, + { + "epoch": 1.25, + "eval_accuracy": 0.4599425949617854, + "eval_loss": 2.769742727279663, + "eval_runtime": 29.9989, + "eval_samples_per_second": 216.108, + "eval_steps_per_second": 2.267, + "step": 173000 + }, + { + "epoch": 1.25, + "learning_rate": 7.490561124853535e-06, + "loss": 2.7499, + "step": 173100 + }, + { + "epoch": 1.25, + "learning_rate": 7.48332827033517e-06, + "loss": 2.7605, + "step": 173200 + }, + { + "epoch": 1.25, + "learning_rate": 7.476095415816806e-06, + "loss": 2.7559, + "step": 173300 + }, + { + "epoch": 1.25, + "learning_rate": 7.4688625612984425e-06, + "loss": 2.7446, + "step": 173400 + }, + { + "epoch": 1.25, + "learning_rate": 7.461629706780078e-06, + "loss": 2.7475, + "step": 173500 + }, + { + "epoch": 1.26, + "learning_rate": 7.454396852261714e-06, + "loss": 2.7498, + "step": 173600 + }, + { + "epoch": 1.26, + "learning_rate": 7.4471639977433495e-06, + "loss": 2.7551, + "step": 173700 + }, + { + "epoch": 1.26, + "learning_rate": 7.439931143224985e-06, + "loss": 2.7502, + "step": 173800 + }, + { + "epoch": 1.26, + "learning_rate": 7.432698288706621e-06, + "loss": 2.7391, + "step": 173900 + }, + { + "epoch": 1.26, + "learning_rate": 7.425465434188257e-06, + "loss": 2.7455, + "step": 174000 + }, + { + "epoch": 1.26, + "eval_accuracy": 0.45988815393502763, + "eval_loss": 2.7696611881256104, + "eval_runtime": 29.7398, + "eval_samples_per_second": 217.991, + "eval_steps_per_second": 2.286, + "step": 174000 + }, + { + "epoch": 1.26, + "learning_rate": 7.418232579669893e-06, + "loss": 2.7492, + "step": 174100 + }, + { + "epoch": 1.26, + "learning_rate": 7.410999725151529e-06, + "loss": 2.7532, + "step": 174200 + }, + { + "epoch": 1.26, + "learning_rate": 7.4038391991783485e-06, + "loss": 2.7515, + "step": 174300 + }, + { + "epoch": 1.26, + "learning_rate": 7.396606344659984e-06, + "loss": 2.746, + "step": 174400 + }, + { + "epoch": 1.26, + "learning_rate": 7.389373490141621e-06, + "loss": 2.7493, + "step": 174500 + }, + { + "epoch": 1.26, + "learning_rate": 7.382140635623256e-06, + "loss": 2.7509, + "step": 174600 + }, + { + "epoch": 1.26, + "learning_rate": 7.374907781104892e-06, + "loss": 2.7518, + "step": 174700 + }, + { + "epoch": 1.26, + "learning_rate": 7.367674926586528e-06, + "loss": 2.748, + "step": 174800 + }, + { + "epoch": 1.27, + "learning_rate": 7.360442072068163e-06, + "loss": 2.7488, + "step": 174900 + }, + { + "epoch": 1.27, + "learning_rate": 7.353209217549799e-06, + "loss": 2.7516, + "step": 175000 + }, + { + "epoch": 1.27, + "eval_accuracy": 0.45990811564483886, + "eval_loss": 2.7693846225738525, + "eval_runtime": 29.4233, + "eval_samples_per_second": 220.336, + "eval_steps_per_second": 2.311, + "step": 175000 + }, + { + "epoch": 1.27, + "learning_rate": 7.346048691576618e-06, + "loss": 2.7507, + "step": 175100 + }, + { + "epoch": 1.27, + "learning_rate": 7.338815837058254e-06, + "loss": 2.7448, + "step": 175200 + }, + { + "epoch": 1.27, + "learning_rate": 7.33158298253989e-06, + "loss": 2.7467, + "step": 175300 + }, + { + "epoch": 1.27, + "learning_rate": 7.324350128021526e-06, + "loss": 2.7441, + "step": 175400 + }, + { + "epoch": 1.27, + "learning_rate": 7.317117273503161e-06, + "loss": 2.7504, + "step": 175500 + }, + { + "epoch": 1.27, + "learning_rate": 7.309884418984797e-06, + "loss": 2.753, + "step": 175600 + }, + { + "epoch": 1.27, + "learning_rate": 7.302651564466433e-06, + "loss": 2.7544, + "step": 175700 + }, + { + "epoch": 1.27, + "learning_rate": 7.295418709948069e-06, + "loss": 2.7518, + "step": 175800 + }, + { + "epoch": 1.27, + "learning_rate": 7.288185855429705e-06, + "loss": 2.7521, + "step": 175900 + }, + { + "epoch": 1.27, + "learning_rate": 7.2809530009113406e-06, + "loss": 2.754, + "step": 176000 + }, + { + "epoch": 1.27, + "eval_accuracy": 0.460049662314409, + "eval_loss": 2.7690155506134033, + "eval_runtime": 29.3683, + "eval_samples_per_second": 220.749, + "eval_steps_per_second": 2.315, + "step": 176000 + }, + { + "epoch": 1.27, + "learning_rate": 7.273720146392976e-06, + "loss": 2.7484, + "step": 176100 + }, + { + "epoch": 1.27, + "learning_rate": 7.266487291874612e-06, + "loss": 2.7408, + "step": 176200 + }, + { + "epoch": 1.28, + "learning_rate": 7.2592544373562476e-06, + "loss": 2.7564, + "step": 176300 + }, + { + "epoch": 1.28, + "learning_rate": 7.2520939113830664e-06, + "loss": 2.7572, + "step": 176400 + }, + { + "epoch": 1.28, + "learning_rate": 7.244861056864703e-06, + "loss": 2.754, + "step": 176500 + }, + { + "epoch": 1.28, + "learning_rate": 7.237628202346339e-06, + "loss": 2.7524, + "step": 176600 + }, + { + "epoch": 1.28, + "learning_rate": 7.230395347827974e-06, + "loss": 2.7537, + "step": 176700 + }, + { + "epoch": 1.28, + "learning_rate": 7.22316249330961e-06, + "loss": 2.7523, + "step": 176800 + }, + { + "epoch": 1.28, + "learning_rate": 7.215929638791246e-06, + "loss": 2.7486, + "step": 176900 + }, + { + "epoch": 1.28, + "learning_rate": 7.208696784272881e-06, + "loss": 2.7489, + "step": 177000 + }, + { + "epoch": 1.28, + "eval_accuracy": 0.4598288737058914, + "eval_loss": 2.76901912689209, + "eval_runtime": 29.8199, + "eval_samples_per_second": 217.405, + "eval_steps_per_second": 2.28, + "step": 177000 + }, + { + "epoch": 1.28, + "learning_rate": 7.201463929754518e-06, + "loss": 2.7491, + "step": 177100 + }, + { + "epoch": 1.28, + "learning_rate": 7.1942310752361535e-06, + "loss": 2.7486, + "step": 177200 + }, + { + "epoch": 1.28, + "learning_rate": 7.186998220717789e-06, + "loss": 2.7605, + "step": 177300 + }, + { + "epoch": 1.28, + "learning_rate": 7.179765366199425e-06, + "loss": 2.7506, + "step": 177400 + }, + { + "epoch": 1.28, + "learning_rate": 7.1725325116810605e-06, + "loss": 2.7473, + "step": 177500 + }, + { + "epoch": 1.28, + "learning_rate": 7.165299657162696e-06, + "loss": 2.7484, + "step": 177600 + }, + { + "epoch": 1.29, + "learning_rate": 7.158066802644333e-06, + "loss": 2.7501, + "step": 177700 + }, + { + "epoch": 1.29, + "learning_rate": 7.150833948125968e-06, + "loss": 2.7492, + "step": 177800 + }, + { + "epoch": 1.29, + "learning_rate": 7.143673422152787e-06, + "loss": 2.7521, + "step": 177900 + }, + { + "epoch": 1.29, + "learning_rate": 7.136440567634423e-06, + "loss": 2.7491, + "step": 178000 + }, + { + "epoch": 1.29, + "eval_accuracy": 0.4601452365613838, + "eval_loss": 2.7686147689819336, + "eval_runtime": 29.3757, + "eval_samples_per_second": 220.693, + "eval_steps_per_second": 2.315, + "step": 178000 + }, + { + "epoch": 1.29, + "learning_rate": 7.1292077131160585e-06, + "loss": 2.7487, + "step": 178100 + }, + { + "epoch": 1.29, + "learning_rate": 7.121974858597694e-06, + "loss": 2.748, + "step": 178200 + }, + { + "epoch": 1.29, + "learning_rate": 7.11474200407933e-06, + "loss": 2.7602, + "step": 178300 + }, + { + "epoch": 1.29, + "learning_rate": 7.107509149560966e-06, + "loss": 2.7475, + "step": 178400 + }, + { + "epoch": 1.29, + "learning_rate": 7.100276295042602e-06, + "loss": 2.7534, + "step": 178500 + }, + { + "epoch": 1.29, + "learning_rate": 7.093043440524238e-06, + "loss": 2.7507, + "step": 178600 + }, + { + "epoch": 1.29, + "learning_rate": 7.085810586005873e-06, + "loss": 2.7415, + "step": 178700 + }, + { + "epoch": 1.29, + "learning_rate": 7.078577731487509e-06, + "loss": 2.7544, + "step": 178800 + }, + { + "epoch": 1.29, + "learning_rate": 7.071344876969145e-06, + "loss": 2.7539, + "step": 178900 + }, + { + "epoch": 1.29, + "learning_rate": 7.064112022450781e-06, + "loss": 2.7432, + "step": 179000 + }, + { + "epoch": 1.29, + "eval_accuracy": 0.46003030550489515, + "eval_loss": 2.768362283706665, + "eval_runtime": 29.3614, + "eval_samples_per_second": 220.8, + "eval_steps_per_second": 2.316, + "step": 179000 + }, + { + "epoch": 1.3, + "learning_rate": 7.056879167932417e-06, + "loss": 2.74, + "step": 179100 + }, + { + "epoch": 1.3, + "learning_rate": 7.0496463134140526e-06, + "loss": 2.7498, + "step": 179200 + }, + { + "epoch": 1.3, + "learning_rate": 7.042413458895688e-06, + "loss": 2.7522, + "step": 179300 + }, + { + "epoch": 1.3, + "learning_rate": 7.035180604377324e-06, + "loss": 2.7575, + "step": 179400 + }, + { + "epoch": 1.3, + "learning_rate": 7.0279477498589596e-06, + "loss": 2.7505, + "step": 179500 + }, + { + "epoch": 1.3, + "learning_rate": 7.020714895340596e-06, + "loss": 2.7489, + "step": 179600 + }, + { + "epoch": 1.3, + "learning_rate": 7.013482040822232e-06, + "loss": 2.7459, + "step": 179700 + }, + { + "epoch": 1.3, + "learning_rate": 7.006321514849051e-06, + "loss": 2.7523, + "step": 179800 + }, + { + "epoch": 1.3, + "learning_rate": 6.999088660330686e-06, + "loss": 2.7475, + "step": 179900 + }, + { + "epoch": 1.3, + "learning_rate": 6.991855805812322e-06, + "loss": 2.7388, + "step": 180000 + }, + { + "epoch": 1.3, + "eval_accuracy": 0.460155519866438, + "eval_loss": 2.768120050430298, + "eval_runtime": 29.4147, + "eval_samples_per_second": 220.4, + "eval_steps_per_second": 2.312, + "step": 180000 + }, + { + "epoch": 1.3, + "learning_rate": 6.984622951293958e-06, + "loss": 2.7413, + "step": 180100 + }, + { + "epoch": 1.3, + "learning_rate": 6.977390096775593e-06, + "loss": 2.743, + "step": 180200 + }, + { + "epoch": 1.3, + "learning_rate": 6.97015724225723e-06, + "loss": 2.7562, + "step": 180300 + }, + { + "epoch": 1.3, + "learning_rate": 6.9629243877388655e-06, + "loss": 2.7498, + "step": 180400 + }, + { + "epoch": 1.31, + "learning_rate": 6.955691533220501e-06, + "loss": 2.7485, + "step": 180500 + }, + { + "epoch": 1.31, + "learning_rate": 6.948458678702137e-06, + "loss": 2.7551, + "step": 180600 + }, + { + "epoch": 1.31, + "learning_rate": 6.9412258241837725e-06, + "loss": 2.7558, + "step": 180700 + }, + { + "epoch": 1.31, + "learning_rate": 6.933992969665408e-06, + "loss": 2.7451, + "step": 180800 + }, + { + "epoch": 1.31, + "learning_rate": 6.926760115147045e-06, + "loss": 2.7498, + "step": 180900 + }, + { + "epoch": 1.31, + "learning_rate": 6.91952726062868e-06, + "loss": 2.7501, + "step": 181000 + }, + { + "epoch": 1.31, + "eval_accuracy": 0.46024988431281816, + "eval_loss": 2.767861843109131, + "eval_runtime": 30.1014, + "eval_samples_per_second": 215.372, + "eval_steps_per_second": 2.259, + "step": 181000 + }, + { + "epoch": 1.31, + "learning_rate": 6.912294406110316e-06, + "loss": 2.7455, + "step": 181100 + }, + { + "epoch": 1.31, + "learning_rate": 6.905061551591952e-06, + "loss": 2.7505, + "step": 181200 + }, + { + "epoch": 1.31, + "learning_rate": 6.897828697073587e-06, + "loss": 2.7485, + "step": 181300 + }, + { + "epoch": 1.31, + "learning_rate": 6.890595842555223e-06, + "loss": 2.7509, + "step": 181400 + }, + { + "epoch": 1.31, + "learning_rate": 6.8833629880368595e-06, + "loss": 2.7526, + "step": 181500 + }, + { + "epoch": 1.31, + "learning_rate": 6.876130133518495e-06, + "loss": 2.7452, + "step": 181600 + }, + { + "epoch": 1.31, + "learning_rate": 6.868897279000131e-06, + "loss": 2.7528, + "step": 181700 + }, + { + "epoch": 1.31, + "learning_rate": 6.8616644244817665e-06, + "loss": 2.7542, + "step": 181800 + }, + { + "epoch": 1.32, + "learning_rate": 6.854431569963402e-06, + "loss": 2.7439, + "step": 181900 + }, + { + "epoch": 1.32, + "learning_rate": 6.847198715445038e-06, + "loss": 2.7526, + "step": 182000 + }, + { + "epoch": 1.32, + "eval_accuracy": 0.4602668215211428, + "eval_loss": 2.767512798309326, + "eval_runtime": 29.7082, + "eval_samples_per_second": 218.222, + "eval_steps_per_second": 2.289, + "step": 182000 + }, + { + "epoch": 1.32, + "learning_rate": 6.8399658609266735e-06, + "loss": 2.7462, + "step": 182100 + }, + { + "epoch": 1.32, + "learning_rate": 6.832877663498677e-06, + "loss": 2.7469, + "step": 182200 + }, + { + "epoch": 1.32, + "learning_rate": 6.825644808980313e-06, + "loss": 2.7407, + "step": 182300 + }, + { + "epoch": 1.32, + "learning_rate": 6.818411954461949e-06, + "loss": 2.751, + "step": 182400 + }, + { + "epoch": 1.32, + "learning_rate": 6.811179099943584e-06, + "loss": 2.7415, + "step": 182500 + }, + { + "epoch": 1.32, + "learning_rate": 6.80394624542522e-06, + "loss": 2.7469, + "step": 182600 + }, + { + "epoch": 1.32, + "learning_rate": 6.7967133909068565e-06, + "loss": 2.7459, + "step": 182700 + }, + { + "epoch": 1.32, + "learning_rate": 6.789480536388492e-06, + "loss": 2.7417, + "step": 182800 + }, + { + "epoch": 1.32, + "learning_rate": 6.782247681870128e-06, + "loss": 2.7458, + "step": 182900 + }, + { + "epoch": 1.32, + "learning_rate": 6.7750148273517635e-06, + "loss": 2.7478, + "step": 183000 + }, + { + "epoch": 1.32, + "eval_accuracy": 0.46032186744819786, + "eval_loss": 2.7674057483673096, + "eval_runtime": 30.0098, + "eval_samples_per_second": 216.029, + "eval_steps_per_second": 2.266, + "step": 183000 + }, + { + "epoch": 1.32, + "learning_rate": 6.767781972833399e-06, + "loss": 2.7386, + "step": 183100 + }, + { + "epoch": 1.33, + "learning_rate": 6.760549118315035e-06, + "loss": 2.7463, + "step": 183200 + }, + { + "epoch": 1.33, + "learning_rate": 6.753316263796671e-06, + "loss": 2.7496, + "step": 183300 + }, + { + "epoch": 1.33, + "learning_rate": 6.74615573782349e-06, + "loss": 2.7441, + "step": 183400 + }, + { + "epoch": 1.33, + "learning_rate": 6.738922883305126e-06, + "loss": 2.752, + "step": 183500 + }, + { + "epoch": 1.33, + "learning_rate": 6.7316900287867616e-06, + "loss": 2.753, + "step": 183600 + }, + { + "epoch": 1.33, + "learning_rate": 6.724457174268397e-06, + "loss": 2.7478, + "step": 183700 + }, + { + "epoch": 1.33, + "learning_rate": 6.717224319750033e-06, + "loss": 2.7465, + "step": 183800 + }, + { + "epoch": 1.33, + "learning_rate": 6.7099914652316686e-06, + "loss": 2.7441, + "step": 183900 + }, + { + "epoch": 1.33, + "learning_rate": 6.702758610713305e-06, + "loss": 2.7491, + "step": 184000 + }, + { + "epoch": 1.33, + "eval_accuracy": 0.46035937126663096, + "eval_loss": 2.767024040222168, + "eval_runtime": 29.3058, + "eval_samples_per_second": 221.219, + "eval_steps_per_second": 2.32, + "step": 184000 + }, + { + "epoch": 1.33, + "learning_rate": 6.695525756194941e-06, + "loss": 2.7492, + "step": 184100 + }, + { + "epoch": 1.33, + "learning_rate": 6.688292901676576e-06, + "loss": 2.7514, + "step": 184200 + }, + { + "epoch": 1.33, + "learning_rate": 6.681060047158212e-06, + "loss": 2.7474, + "step": 184300 + }, + { + "epoch": 1.33, + "learning_rate": 6.673827192639848e-06, + "loss": 2.7496, + "step": 184400 + }, + { + "epoch": 1.33, + "learning_rate": 6.666594338121483e-06, + "loss": 2.7429, + "step": 184500 + }, + { + "epoch": 1.34, + "learning_rate": 6.65936148360312e-06, + "loss": 2.7572, + "step": 184600 + }, + { + "epoch": 1.34, + "learning_rate": 6.652128629084756e-06, + "loss": 2.7417, + "step": 184700 + }, + { + "epoch": 1.34, + "learning_rate": 6.644895774566391e-06, + "loss": 2.7459, + "step": 184800 + }, + { + "epoch": 1.34, + "learning_rate": 6.637662920048027e-06, + "loss": 2.7471, + "step": 184900 + }, + { + "epoch": 1.34, + "learning_rate": 6.630430065529663e-06, + "loss": 2.7505, + "step": 185000 + }, + { + "epoch": 1.34, + "eval_accuracy": 0.46044466220855146, + "eval_loss": 2.7669997215270996, + "eval_runtime": 30.3971, + "eval_samples_per_second": 213.277, + "eval_steps_per_second": 2.237, + "step": 185000 + }, + { + "epoch": 1.34, + "learning_rate": 6.623197211011298e-06, + "loss": 2.7449, + "step": 185100 + }, + { + "epoch": 1.34, + "learning_rate": 6.615964356492934e-06, + "loss": 2.7466, + "step": 185200 + }, + { + "epoch": 1.34, + "learning_rate": 6.6087315019745704e-06, + "loss": 2.7491, + "step": 185300 + }, + { + "epoch": 1.34, + "learning_rate": 6.601498647456206e-06, + "loss": 2.753, + "step": 185400 + }, + { + "epoch": 1.34, + "learning_rate": 6.594265792937842e-06, + "loss": 2.7509, + "step": 185500 + }, + { + "epoch": 1.34, + "learning_rate": 6.5870329384194774e-06, + "loss": 2.7459, + "step": 185600 + }, + { + "epoch": 1.34, + "learning_rate": 6.579800083901113e-06, + "loss": 2.7465, + "step": 185700 + }, + { + "epoch": 1.34, + "learning_rate": 6.572567229382749e-06, + "loss": 2.7477, + "step": 185800 + }, + { + "epoch": 1.34, + "learning_rate": 6.565334374864385e-06, + "loss": 2.7506, + "step": 185900 + }, + { + "epoch": 1.35, + "learning_rate": 6.558101520346021e-06, + "loss": 2.7436, + "step": 186000 + }, + { + "epoch": 1.35, + "eval_accuracy": 0.46046341411776803, + "eval_loss": 2.766613245010376, + "eval_runtime": 30.3071, + "eval_samples_per_second": 213.91, + "eval_steps_per_second": 2.244, + "step": 186000 + }, + { + "epoch": 1.35, + "learning_rate": 6.550868665827657e-06, + "loss": 2.7414, + "step": 186100 + }, + { + "epoch": 1.35, + "learning_rate": 6.543635811309292e-06, + "loss": 2.7517, + "step": 186200 + }, + { + "epoch": 1.35, + "learning_rate": 6.536402956790928e-06, + "loss": 2.7479, + "step": 186300 + }, + { + "epoch": 1.35, + "learning_rate": 6.529170102272563e-06, + "loss": 2.7473, + "step": 186400 + }, + { + "epoch": 1.35, + "learning_rate": 6.5220095762993825e-06, + "loss": 2.7451, + "step": 186500 + }, + { + "epoch": 1.35, + "learning_rate": 6.514776721781019e-06, + "loss": 2.7426, + "step": 186600 + }, + { + "epoch": 1.35, + "learning_rate": 6.507543867262655e-06, + "loss": 2.7463, + "step": 186700 + }, + { + "epoch": 1.35, + "learning_rate": 6.50031101274429e-06, + "loss": 2.7519, + "step": 186800 + }, + { + "epoch": 1.35, + "learning_rate": 6.493078158225926e-06, + "loss": 2.7468, + "step": 186900 + }, + { + "epoch": 1.35, + "learning_rate": 6.485845303707562e-06, + "loss": 2.7389, + "step": 187000 + }, + { + "epoch": 1.35, + "eval_accuracy": 0.4603109792428463, + "eval_loss": 2.7665233612060547, + "eval_runtime": 30.7223, + "eval_samples_per_second": 211.019, + "eval_steps_per_second": 2.213, + "step": 187000 + }, + { + "epoch": 1.35, + "learning_rate": 6.478612449189197e-06, + "loss": 2.7471, + "step": 187100 + }, + { + "epoch": 1.35, + "learning_rate": 6.471379594670834e-06, + "loss": 2.7468, + "step": 187200 + }, + { + "epoch": 1.35, + "learning_rate": 6.4641467401524695e-06, + "loss": 2.7295, + "step": 187300 + }, + { + "epoch": 1.36, + "learning_rate": 6.456913885634105e-06, + "loss": 2.7394, + "step": 187400 + }, + { + "epoch": 1.36, + "learning_rate": 6.449681031115741e-06, + "loss": 2.7495, + "step": 187500 + }, + { + "epoch": 1.36, + "learning_rate": 6.44252050514256e-06, + "loss": 2.7429, + "step": 187600 + }, + { + "epoch": 1.36, + "learning_rate": 6.4352876506241954e-06, + "loss": 2.7461, + "step": 187700 + }, + { + "epoch": 1.36, + "learning_rate": 6.428054796105831e-06, + "loss": 2.7471, + "step": 187800 + }, + { + "epoch": 1.36, + "learning_rate": 6.420894270132652e-06, + "loss": 2.75, + "step": 187900 + }, + { + "epoch": 1.36, + "learning_rate": 6.413661415614287e-06, + "loss": 2.7564, + "step": 188000 + }, + { + "epoch": 1.36, + "eval_accuracy": 0.4603660251699014, + "eval_loss": 2.7662112712860107, + "eval_runtime": 28.077, + "eval_samples_per_second": 230.901, + "eval_steps_per_second": 2.422, + "step": 188000 + }, + { + "epoch": 1.36, + "learning_rate": 6.406428561095923e-06, + "loss": 2.7547, + "step": 188100 + }, + { + "epoch": 1.36, + "learning_rate": 6.399195706577559e-06, + "loss": 2.7463, + "step": 188200 + }, + { + "epoch": 1.36, + "learning_rate": 6.391962852059194e-06, + "loss": 2.7426, + "step": 188300 + }, + { + "epoch": 1.36, + "learning_rate": 6.384729997540829e-06, + "loss": 2.7424, + "step": 188400 + }, + { + "epoch": 1.36, + "learning_rate": 6.377497143022465e-06, + "loss": 2.7434, + "step": 188500 + }, + { + "epoch": 1.36, + "learning_rate": 6.370264288504101e-06, + "loss": 2.7483, + "step": 188600 + }, + { + "epoch": 1.36, + "learning_rate": 6.363031433985737e-06, + "loss": 2.752, + "step": 188700 + }, + { + "epoch": 1.37, + "learning_rate": 6.355798579467373e-06, + "loss": 2.7474, + "step": 188800 + }, + { + "epoch": 1.37, + "learning_rate": 6.348565724949008e-06, + "loss": 2.7408, + "step": 188900 + }, + { + "epoch": 1.37, + "learning_rate": 6.341332870430644e-06, + "loss": 2.7464, + "step": 189000 + }, + { + "epoch": 1.37, + "eval_accuracy": 0.4603968750850641, + "eval_loss": 2.76613712310791, + "eval_runtime": 28.0132, + "eval_samples_per_second": 231.427, + "eval_steps_per_second": 2.427, + "step": 189000 + }, + { + "epoch": 1.37, + "learning_rate": 6.33410001591228e-06, + "loss": 2.7481, + "step": 189100 + }, + { + "epoch": 1.37, + "learning_rate": 6.3269394899391e-06, + "loss": 2.7505, + "step": 189200 + }, + { + "epoch": 1.37, + "learning_rate": 6.319706635420736e-06, + "loss": 2.7506, + "step": 189300 + }, + { + "epoch": 1.37, + "learning_rate": 6.312473780902372e-06, + "loss": 2.7421, + "step": 189400 + }, + { + "epoch": 1.37, + "learning_rate": 6.305240926384007e-06, + "loss": 2.7442, + "step": 189500 + }, + { + "epoch": 1.37, + "learning_rate": 6.298008071865643e-06, + "loss": 2.739, + "step": 189600 + }, + { + "epoch": 1.37, + "learning_rate": 6.2907752173472795e-06, + "loss": 2.7498, + "step": 189700 + }, + { + "epoch": 1.37, + "learning_rate": 6.283542362828915e-06, + "loss": 2.7451, + "step": 189800 + }, + { + "epoch": 1.37, + "learning_rate": 6.276309508310551e-06, + "loss": 2.7447, + "step": 189900 + }, + { + "epoch": 1.37, + "learning_rate": 6.2690766537921865e-06, + "loss": 2.7459, + "step": 190000 + }, + { + "epoch": 1.37, + "eval_accuracy": 0.4604603896162815, + "eval_loss": 2.7658865451812744, + "eval_runtime": 28.2461, + "eval_samples_per_second": 229.518, + "eval_steps_per_second": 2.407, + "step": 190000 + }, + { + "epoch": 1.37, + "learning_rate": 6.261843799273822e-06, + "loss": 2.7398, + "step": 190100 + }, + { + "epoch": 1.38, + "learning_rate": 6.254610944755458e-06, + "loss": 2.7458, + "step": 190200 + }, + { + "epoch": 1.38, + "learning_rate": 6.247378090237094e-06, + "loss": 2.7484, + "step": 190300 + }, + { + "epoch": 1.38, + "learning_rate": 6.24014523571873e-06, + "loss": 2.7425, + "step": 190400 + }, + { + "epoch": 1.38, + "learning_rate": 6.232984709745549e-06, + "loss": 2.7482, + "step": 190500 + }, + { + "epoch": 1.38, + "learning_rate": 6.2257518552271845e-06, + "loss": 2.7479, + "step": 190600 + }, + { + "epoch": 1.38, + "learning_rate": 6.21851900070882e-06, + "loss": 2.7382, + "step": 190700 + }, + { + "epoch": 1.38, + "learning_rate": 6.211286146190456e-06, + "loss": 2.7414, + "step": 190800 + }, + { + "epoch": 1.38, + "learning_rate": 6.2040532916720915e-06, + "loss": 2.7534, + "step": 190900 + }, + { + "epoch": 1.38, + "learning_rate": 6.196820437153728e-06, + "loss": 2.7481, + "step": 191000 + }, + { + "epoch": 1.38, + "eval_accuracy": 0.4605136208424446, + "eval_loss": 2.765713691711426, + "eval_runtime": 28.3745, + "eval_samples_per_second": 228.48, + "eval_steps_per_second": 2.397, + "step": 191000 + }, + { + "epoch": 1.38, + "learning_rate": 6.189587582635364e-06, + "loss": 2.7396, + "step": 191100 + }, + { + "epoch": 1.38, + "learning_rate": 6.182354728116999e-06, + "loss": 2.7452, + "step": 191200 + }, + { + "epoch": 1.38, + "learning_rate": 6.175121873598635e-06, + "loss": 2.7406, + "step": 191300 + }, + { + "epoch": 1.38, + "learning_rate": 6.167889019080271e-06, + "loss": 2.7457, + "step": 191400 + }, + { + "epoch": 1.39, + "learning_rate": 6.160656164561906e-06, + "loss": 2.758, + "step": 191500 + }, + { + "epoch": 1.39, + "learning_rate": 6.153423310043543e-06, + "loss": 2.7454, + "step": 191600 + }, + { + "epoch": 1.39, + "learning_rate": 6.1461904555251785e-06, + "loss": 2.7408, + "step": 191700 + }, + { + "epoch": 1.39, + "learning_rate": 6.138957601006814e-06, + "loss": 2.7432, + "step": 191800 + }, + { + "epoch": 1.39, + "learning_rate": 6.131797075033633e-06, + "loss": 2.7435, + "step": 191900 + }, + { + "epoch": 1.39, + "learning_rate": 6.124564220515269e-06, + "loss": 2.7458, + "step": 192000 + }, + { + "epoch": 1.39, + "eval_accuracy": 0.46044345240795687, + "eval_loss": 2.765500783920288, + "eval_runtime": 32.8692, + "eval_samples_per_second": 197.236, + "eval_steps_per_second": 2.069, + "step": 192000 + }, + { + "epoch": 1.39, + "learning_rate": 6.1173313659969044e-06, + "loss": 2.7541, + "step": 192100 + }, + { + "epoch": 1.39, + "learning_rate": 6.11009851147854e-06, + "loss": 2.743, + "step": 192200 + }, + { + "epoch": 1.39, + "learning_rate": 6.102865656960177e-06, + "loss": 2.752, + "step": 192300 + }, + { + "epoch": 1.39, + "learning_rate": 6.095632802441812e-06, + "loss": 2.7452, + "step": 192400 + }, + { + "epoch": 1.39, + "learning_rate": 6.088399947923448e-06, + "loss": 2.7477, + "step": 192500 + }, + { + "epoch": 1.39, + "learning_rate": 6.081167093405084e-06, + "loss": 2.7538, + "step": 192600 + }, + { + "epoch": 1.39, + "learning_rate": 6.073934238886719e-06, + "loss": 2.7416, + "step": 192700 + }, + { + "epoch": 1.39, + "learning_rate": 6.066701384368355e-06, + "loss": 2.7492, + "step": 192800 + }, + { + "epoch": 1.4, + "learning_rate": 6.0594685298499915e-06, + "loss": 2.7435, + "step": 192900 + }, + { + "epoch": 1.4, + "learning_rate": 6.052235675331627e-06, + "loss": 2.7427, + "step": 193000 + }, + { + "epoch": 1.4, + "eval_accuracy": 0.4605184600448231, + "eval_loss": 2.7652785778045654, + "eval_runtime": 31.9488, + "eval_samples_per_second": 202.919, + "eval_steps_per_second": 2.128, + "step": 193000 + }, + { + "epoch": 1.4, + "learning_rate": 6.045002820813263e-06, + "loss": 2.7376, + "step": 193100 + }, + { + "epoch": 1.4, + "learning_rate": 6.0377699662948985e-06, + "loss": 2.7449, + "step": 193200 + }, + { + "epoch": 1.4, + "learning_rate": 6.030537111776534e-06, + "loss": 2.7361, + "step": 193300 + }, + { + "epoch": 1.4, + "learning_rate": 6.02330425725817e-06, + "loss": 2.747, + "step": 193400 + }, + { + "epoch": 1.4, + "learning_rate": 6.016071402739806e-06, + "loss": 2.748, + "step": 193500 + }, + { + "epoch": 1.4, + "learning_rate": 6.008838548221442e-06, + "loss": 2.7475, + "step": 193600 + }, + { + "epoch": 1.4, + "learning_rate": 6.001605693703078e-06, + "loss": 2.7504, + "step": 193700 + }, + { + "epoch": 1.4, + "learning_rate": 5.994372839184713e-06, + "loss": 2.7381, + "step": 193800 + }, + { + "epoch": 1.4, + "learning_rate": 5.987212313211532e-06, + "loss": 2.7453, + "step": 193900 + }, + { + "epoch": 1.4, + "learning_rate": 5.980051787238352e-06, + "loss": 2.741, + "step": 194000 + }, + { + "epoch": 1.4, + "eval_accuracy": 0.4605601981653374, + "eval_loss": 2.7650601863861084, + "eval_runtime": 29.1266, + "eval_samples_per_second": 222.58, + "eval_steps_per_second": 2.335, + "step": 194000 + }, + { + "epoch": 1.4, + "learning_rate": 5.9728189327199885e-06, + "loss": 2.7422, + "step": 194100 + }, + { + "epoch": 1.4, + "learning_rate": 5.965586078201624e-06, + "loss": 2.7339, + "step": 194200 + }, + { + "epoch": 1.41, + "learning_rate": 5.95835322368326e-06, + "loss": 2.746, + "step": 194300 + }, + { + "epoch": 1.41, + "learning_rate": 5.9511203691648955e-06, + "loss": 2.7438, + "step": 194400 + }, + { + "epoch": 1.41, + "learning_rate": 5.943887514646531e-06, + "loss": 2.749, + "step": 194500 + }, + { + "epoch": 1.41, + "learning_rate": 5.936654660128167e-06, + "loss": 2.7492, + "step": 194600 + }, + { + "epoch": 1.41, + "learning_rate": 5.929494134154986e-06, + "loss": 2.7466, + "step": 194700 + }, + { + "epoch": 1.41, + "learning_rate": 5.922261279636622e-06, + "loss": 2.7385, + "step": 194800 + }, + { + "epoch": 1.41, + "learning_rate": 5.915028425118258e-06, + "loss": 2.7399, + "step": 194900 + }, + { + "epoch": 1.41, + "learning_rate": 5.9077955705998935e-06, + "loss": 2.7488, + "step": 195000 + }, + { + "epoch": 1.41, + "eval_accuracy": 0.4605995166846624, + "eval_loss": 2.7648675441741943, + "eval_runtime": 29.4441, + "eval_samples_per_second": 220.18, + "eval_steps_per_second": 2.309, + "step": 195000 + }, + { + "epoch": 1.41, + "learning_rate": 5.900562716081529e-06, + "loss": 2.7498, + "step": 195100 + }, + { + "epoch": 1.41, + "learning_rate": 5.893329861563165e-06, + "loss": 2.746, + "step": 195200 + }, + { + "epoch": 1.41, + "learning_rate": 5.8860970070448005e-06, + "loss": 2.7481, + "step": 195300 + }, + { + "epoch": 1.41, + "learning_rate": 5.878864152526437e-06, + "loss": 2.7438, + "step": 195400 + }, + { + "epoch": 1.41, + "learning_rate": 5.871631298008073e-06, + "loss": 2.7449, + "step": 195500 + }, + { + "epoch": 1.41, + "learning_rate": 5.864398443489708e-06, + "loss": 2.7426, + "step": 195600 + }, + { + "epoch": 1.42, + "learning_rate": 5.857165588971344e-06, + "loss": 2.744, + "step": 195700 + }, + { + "epoch": 1.42, + "learning_rate": 5.84993273445298e-06, + "loss": 2.7455, + "step": 195800 + }, + { + "epoch": 1.42, + "learning_rate": 5.842699879934615e-06, + "loss": 2.7543, + "step": 195900 + }, + { + "epoch": 1.42, + "learning_rate": 5.835467025416252e-06, + "loss": 2.7353, + "step": 196000 + }, + { + "epoch": 1.42, + "eval_accuracy": 0.4605263237486881, + "eval_loss": 2.7647223472595215, + "eval_runtime": 32.6984, + "eval_samples_per_second": 198.266, + "eval_steps_per_second": 2.08, + "step": 196000 + }, + { + "epoch": 1.42, + "learning_rate": 5.8282341708978876e-06, + "loss": 2.7424, + "step": 196100 + }, + { + "epoch": 1.42, + "learning_rate": 5.821001316379523e-06, + "loss": 2.7506, + "step": 196200 + }, + { + "epoch": 1.42, + "learning_rate": 5.813768461861159e-06, + "loss": 2.7501, + "step": 196300 + }, + { + "epoch": 1.42, + "learning_rate": 5.8065356073427946e-06, + "loss": 2.7455, + "step": 196400 + }, + { + "epoch": 1.42, + "learning_rate": 5.79930275282443e-06, + "loss": 2.7528, + "step": 196500 + }, + { + "epoch": 1.42, + "learning_rate": 5.792069898306067e-06, + "loss": 2.7473, + "step": 196600 + }, + { + "epoch": 1.42, + "learning_rate": 5.784909372332886e-06, + "loss": 2.7343, + "step": 196700 + }, + { + "epoch": 1.42, + "learning_rate": 5.777676517814521e-06, + "loss": 2.7428, + "step": 196800 + }, + { + "epoch": 1.42, + "learning_rate": 5.770443663296157e-06, + "loss": 2.7387, + "step": 196900 + }, + { + "epoch": 1.42, + "learning_rate": 5.763210808777793e-06, + "loss": 2.7503, + "step": 197000 + }, + { + "epoch": 1.42, + "eval_accuracy": 0.46069085662955606, + "eval_loss": 2.7644920349121094, + "eval_runtime": 29.638, + "eval_samples_per_second": 218.74, + "eval_steps_per_second": 2.294, + "step": 197000 + }, + { + "epoch": 1.43, + "learning_rate": 5.755977954259428e-06, + "loss": 2.7531, + "step": 197100 + }, + { + "epoch": 1.43, + "learning_rate": 5.748745099741064e-06, + "loss": 2.7383, + "step": 197200 + }, + { + "epoch": 1.43, + "learning_rate": 5.7415122452227005e-06, + "loss": 2.7487, + "step": 197300 + }, + { + "epoch": 1.43, + "learning_rate": 5.734279390704336e-06, + "loss": 2.7425, + "step": 197400 + }, + { + "epoch": 1.43, + "learning_rate": 5.727046536185972e-06, + "loss": 2.7485, + "step": 197500 + }, + { + "epoch": 1.43, + "learning_rate": 5.7198136816676075e-06, + "loss": 2.7438, + "step": 197600 + }, + { + "epoch": 1.43, + "learning_rate": 5.712653155694426e-06, + "loss": 2.7458, + "step": 197700 + }, + { + "epoch": 1.43, + "learning_rate": 5.705420301176062e-06, + "loss": 2.7499, + "step": 197800 + }, + { + "epoch": 1.43, + "learning_rate": 5.698187446657698e-06, + "loss": 2.7384, + "step": 197900 + }, + { + "epoch": 1.43, + "learning_rate": 5.690954592139334e-06, + "loss": 2.7446, + "step": 198000 + }, + { + "epoch": 1.43, + "eval_accuracy": 0.46069690563252913, + "eval_loss": 2.7643613815307617, + "eval_runtime": 29.4958, + "eval_samples_per_second": 219.794, + "eval_steps_per_second": 2.305, + "step": 198000 + }, + { + "epoch": 1.43, + "learning_rate": 5.68372173762097e-06, + "loss": 2.7482, + "step": 198100 + }, + { + "epoch": 1.43, + "learning_rate": 5.6764888831026055e-06, + "loss": 2.745, + "step": 198200 + }, + { + "epoch": 1.43, + "learning_rate": 5.669256028584241e-06, + "loss": 2.743, + "step": 198300 + }, + { + "epoch": 1.43, + "learning_rate": 5.662023174065877e-06, + "loss": 2.7392, + "step": 198400 + }, + { + "epoch": 1.44, + "learning_rate": 5.6547903195475125e-06, + "loss": 2.7378, + "step": 198500 + }, + { + "epoch": 1.44, + "learning_rate": 5.647557465029149e-06, + "loss": 2.747, + "step": 198600 + }, + { + "epoch": 1.44, + "learning_rate": 5.640324610510785e-06, + "loss": 2.7432, + "step": 198700 + }, + { + "epoch": 1.44, + "learning_rate": 5.6331640845376045e-06, + "loss": 2.7402, + "step": 198800 + }, + { + "epoch": 1.44, + "learning_rate": 5.62593123001924e-06, + "loss": 2.7439, + "step": 198900 + }, + { + "epoch": 1.44, + "learning_rate": 5.618698375500876e-06, + "loss": 2.748, + "step": 199000 + }, + { + "epoch": 1.44, + "eval_accuracy": 0.460747717257503, + "eval_loss": 2.764165163040161, + "eval_runtime": 31.2501, + "eval_samples_per_second": 207.455, + "eval_steps_per_second": 2.176, + "step": 199000 + }, + { + "epoch": 1.44, + "learning_rate": 5.611465520982512e-06, + "loss": 2.7423, + "step": 199100 + }, + { + "epoch": 1.44, + "learning_rate": 5.604232666464146e-06, + "loss": 2.7383, + "step": 199200 + }, + { + "epoch": 1.44, + "learning_rate": 5.596999811945783e-06, + "loss": 2.748, + "step": 199300 + }, + { + "epoch": 1.44, + "learning_rate": 5.5897669574274185e-06, + "loss": 2.7418, + "step": 199400 + }, + { + "epoch": 1.44, + "learning_rate": 5.582534102909054e-06, + "loss": 2.7437, + "step": 199500 + }, + { + "epoch": 1.44, + "learning_rate": 5.57530124839069e-06, + "loss": 2.7444, + "step": 199600 + }, + { + "epoch": 1.44, + "learning_rate": 5.5680683938723255e-06, + "loss": 2.7405, + "step": 199700 + }, + { + "epoch": 1.45, + "learning_rate": 5.560835539353961e-06, + "loss": 2.7555, + "step": 199800 + }, + { + "epoch": 1.45, + "learning_rate": 5.553602684835598e-06, + "loss": 2.7455, + "step": 199900 + }, + { + "epoch": 1.45, + "learning_rate": 5.546369830317233e-06, + "loss": 2.7394, + "step": 200000 + }, + { + "epoch": 1.45, + "eval_accuracy": 0.46070537423669145, + "eval_loss": 2.7640960216522217, + "eval_runtime": 29.999, + "eval_samples_per_second": 216.107, + "eval_steps_per_second": 2.267, + "step": 200000 + }, + { + "epoch": 1.45, + "learning_rate": 5.539136975798869e-06, + "loss": 2.7442, + "step": 200100 + }, + { + "epoch": 1.45, + "learning_rate": 5.531904121280505e-06, + "loss": 2.746, + "step": 200200 + }, + { + "epoch": 1.45, + "learning_rate": 5.524743595307324e-06, + "loss": 2.7457, + "step": 200300 + }, + { + "epoch": 1.45, + "learning_rate": 5.517510740788961e-06, + "loss": 2.738, + "step": 200400 + }, + { + "epoch": 1.45, + "learning_rate": 5.5102778862705966e-06, + "loss": 2.7424, + "step": 200500 + }, + { + "epoch": 1.45, + "learning_rate": 5.503045031752232e-06, + "loss": 2.7345, + "step": 200600 + }, + { + "epoch": 1.45, + "learning_rate": 5.495812177233868e-06, + "loss": 2.7399, + "step": 200700 + }, + { + "epoch": 1.45, + "learning_rate": 5.4885793227155036e-06, + "loss": 2.7494, + "step": 200800 + }, + { + "epoch": 1.45, + "learning_rate": 5.481346468197139e-06, + "loss": 2.7468, + "step": 200900 + }, + { + "epoch": 1.45, + "learning_rate": 5.474113613678776e-06, + "loss": 2.7403, + "step": 201000 + }, + { + "epoch": 1.45, + "eval_accuracy": 0.46072291634531337, + "eval_loss": 2.7638235092163086, + "eval_runtime": 29.4493, + "eval_samples_per_second": 220.141, + "eval_steps_per_second": 2.309, + "step": 201000 + }, + { + "epoch": 1.45, + "learning_rate": 5.466880759160411e-06, + "loss": 2.7505, + "step": 201100 + }, + { + "epoch": 1.46, + "learning_rate": 5.459647904642047e-06, + "loss": 2.7523, + "step": 201200 + }, + { + "epoch": 1.46, + "learning_rate": 5.452415050123683e-06, + "loss": 2.7507, + "step": 201300 + }, + { + "epoch": 1.46, + "learning_rate": 5.445182195605318e-06, + "loss": 2.7374, + "step": 201400 + }, + { + "epoch": 1.46, + "learning_rate": 5.437949341086953e-06, + "loss": 2.7449, + "step": 201500 + }, + { + "epoch": 1.46, + "learning_rate": 5.430788815113773e-06, + "loss": 2.739, + "step": 201600 + }, + { + "epoch": 1.46, + "learning_rate": 5.4235559605954095e-06, + "loss": 2.7527, + "step": 201700 + }, + { + "epoch": 1.46, + "learning_rate": 5.416323106077045e-06, + "loss": 2.7362, + "step": 201800 + }, + { + "epoch": 1.46, + "learning_rate": 5.409090251558681e-06, + "loss": 2.7489, + "step": 201900 + }, + { + "epoch": 1.46, + "learning_rate": 5.4018573970403165e-06, + "loss": 2.7467, + "step": 202000 + }, + { + "epoch": 1.46, + "eval_accuracy": 0.46072291634531337, + "eval_loss": 2.763704299926758, + "eval_runtime": 30.6951, + "eval_samples_per_second": 211.206, + "eval_steps_per_second": 2.215, + "step": 202000 + }, + { + "epoch": 1.46, + "learning_rate": 5.394624542521952e-06, + "loss": 2.7419, + "step": 202100 + }, + { + "epoch": 1.46, + "learning_rate": 5.387391688003588e-06, + "loss": 2.7384, + "step": 202200 + }, + { + "epoch": 1.46, + "learning_rate": 5.380158833485224e-06, + "loss": 2.7425, + "step": 202300 + }, + { + "epoch": 1.46, + "learning_rate": 5.37292597896686e-06, + "loss": 2.7346, + "step": 202400 + }, + { + "epoch": 1.46, + "learning_rate": 5.365693124448496e-06, + "loss": 2.7527, + "step": 202500 + }, + { + "epoch": 1.47, + "learning_rate": 5.358460269930131e-06, + "loss": 2.7396, + "step": 202600 + }, + { + "epoch": 1.47, + "learning_rate": 5.351227415411767e-06, + "loss": 2.7407, + "step": 202700 + }, + { + "epoch": 1.47, + "learning_rate": 5.343994560893403e-06, + "loss": 2.7501, + "step": 202800 + }, + { + "epoch": 1.47, + "learning_rate": 5.3368340349202216e-06, + "loss": 2.7392, + "step": 202900 + }, + { + "epoch": 1.47, + "learning_rate": 5.329601180401858e-06, + "loss": 2.7532, + "step": 203000 + }, + { + "epoch": 1.47, + "eval_accuracy": 0.4608082072872339, + "eval_loss": 2.7634613513946533, + "eval_runtime": 27.9391, + "eval_samples_per_second": 232.04, + "eval_steps_per_second": 2.434, + "step": 203000 + }, + { + "epoch": 1.47, + "learning_rate": 5.322368325883494e-06, + "loss": 2.738, + "step": 203100 + }, + { + "epoch": 1.47, + "learning_rate": 5.315135471365129e-06, + "loss": 2.7442, + "step": 203200 + }, + { + "epoch": 1.47, + "learning_rate": 5.307902616846765e-06, + "loss": 2.7478, + "step": 203300 + }, + { + "epoch": 1.47, + "learning_rate": 5.300669762328401e-06, + "loss": 2.7452, + "step": 203400 + }, + { + "epoch": 1.47, + "learning_rate": 5.293436907810036e-06, + "loss": 2.7407, + "step": 203500 + }, + { + "epoch": 1.47, + "learning_rate": 5.286204053291673e-06, + "loss": 2.7475, + "step": 203600 + }, + { + "epoch": 1.47, + "learning_rate": 5.278971198773309e-06, + "loss": 2.7537, + "step": 203700 + }, + { + "epoch": 1.47, + "learning_rate": 5.271738344254944e-06, + "loss": 2.7459, + "step": 203800 + }, + { + "epoch": 1.47, + "learning_rate": 5.26450548973658e-06, + "loss": 2.7525, + "step": 203900 + }, + { + "epoch": 1.48, + "learning_rate": 5.257344963763399e-06, + "loss": 2.7431, + "step": 204000 + }, + { + "epoch": 1.48, + "eval_accuracy": 0.4608662777157755, + "eval_loss": 2.763364553451538, + "eval_runtime": 32.4224, + "eval_samples_per_second": 199.954, + "eval_steps_per_second": 2.097, + "step": 204000 + }, + { + "epoch": 1.48, + "learning_rate": 5.2501121092450345e-06, + "loss": 2.7486, + "step": 204100 + }, + { + "epoch": 1.48, + "learning_rate": 5.24287925472667e-06, + "loss": 2.7437, + "step": 204200 + }, + { + "epoch": 1.48, + "learning_rate": 5.235646400208307e-06, + "loss": 2.7369, + "step": 204300 + }, + { + "epoch": 1.48, + "learning_rate": 5.228413545689942e-06, + "loss": 2.7487, + "step": 204400 + }, + { + "epoch": 1.48, + "learning_rate": 5.221180691171578e-06, + "loss": 2.7439, + "step": 204500 + }, + { + "epoch": 1.48, + "learning_rate": 5.213947836653214e-06, + "loss": 2.7435, + "step": 204600 + }, + { + "epoch": 1.48, + "learning_rate": 5.206714982134849e-06, + "loss": 2.7452, + "step": 204700 + }, + { + "epoch": 1.48, + "learning_rate": 5.199482127616485e-06, + "loss": 2.7409, + "step": 204800 + }, + { + "epoch": 1.48, + "learning_rate": 5.1922492730981215e-06, + "loss": 2.7448, + "step": 204900 + }, + { + "epoch": 1.48, + "learning_rate": 5.185016418579757e-06, + "loss": 2.7433, + "step": 205000 + }, + { + "epoch": 1.48, + "eval_accuracy": 0.4608082072872339, + "eval_loss": 2.7632086277008057, + "eval_runtime": 29.7969, + "eval_samples_per_second": 217.573, + "eval_steps_per_second": 2.282, + "step": 205000 + }, + { + "epoch": 1.48, + "learning_rate": 5.177855892606577e-06, + "loss": 2.7424, + "step": 205100 + }, + { + "epoch": 1.48, + "learning_rate": 5.1706230380882126e-06, + "loss": 2.7355, + "step": 205200 + }, + { + "epoch": 1.48, + "learning_rate": 5.163390183569848e-06, + "loss": 2.7338, + "step": 205300 + }, + { + "epoch": 1.49, + "learning_rate": 5.156157329051485e-06, + "loss": 2.7346, + "step": 205400 + }, + { + "epoch": 1.49, + "learning_rate": 5.14892447453312e-06, + "loss": 2.7495, + "step": 205500 + }, + { + "epoch": 1.49, + "learning_rate": 5.141691620014756e-06, + "loss": 2.7364, + "step": 205600 + }, + { + "epoch": 1.49, + "learning_rate": 5.134458765496392e-06, + "loss": 2.7424, + "step": 205700 + }, + { + "epoch": 1.49, + "learning_rate": 5.1272259109780266e-06, + "loss": 2.74, + "step": 205800 + }, + { + "epoch": 1.49, + "learning_rate": 5.119993056459662e-06, + "loss": 2.748, + "step": 205900 + }, + { + "epoch": 1.49, + "learning_rate": 5.112760201941298e-06, + "loss": 2.7436, + "step": 206000 + }, + { + "epoch": 1.49, + "eval_accuracy": 0.4609152746398575, + "eval_loss": 2.762951135635376, + "eval_runtime": 29.8903, + "eval_samples_per_second": 216.893, + "eval_steps_per_second": 2.275, + "step": 206000 + }, + { + "epoch": 1.49, + "learning_rate": 5.1055273474229336e-06, + "loss": 2.7419, + "step": 206100 + }, + { + "epoch": 1.49, + "learning_rate": 5.09829449290457e-06, + "loss": 2.7369, + "step": 206200 + }, + { + "epoch": 1.49, + "learning_rate": 5.091061638386206e-06, + "loss": 2.7427, + "step": 206300 + }, + { + "epoch": 1.49, + "learning_rate": 5.0839011124130255e-06, + "loss": 2.747, + "step": 206400 + }, + { + "epoch": 1.49, + "learning_rate": 5.076668257894661e-06, + "loss": 2.7469, + "step": 206500 + }, + { + "epoch": 1.49, + "learning_rate": 5.069435403376297e-06, + "loss": 2.7459, + "step": 206600 + }, + { + "epoch": 1.5, + "learning_rate": 5.062202548857933e-06, + "loss": 2.7446, + "step": 206700 + }, + { + "epoch": 1.5, + "learning_rate": 5.054969694339569e-06, + "loss": 2.7419, + "step": 206800 + }, + { + "epoch": 1.5, + "learning_rate": 5.047736839821205e-06, + "loss": 2.749, + "step": 206900 + }, + { + "epoch": 1.5, + "learning_rate": 5.04050398530284e-06, + "loss": 2.747, + "step": 207000 + }, + { + "epoch": 1.5, + "eval_accuracy": 0.46086809241666743, + "eval_loss": 2.7627713680267334, + "eval_runtime": 29.635, + "eval_samples_per_second": 218.761, + "eval_steps_per_second": 2.295, + "step": 207000 + }, + { + "epoch": 1.5, + "learning_rate": 5.033271130784476e-06, + "loss": 2.7381, + "step": 207100 + }, + { + "epoch": 1.5, + "learning_rate": 5.026038276266112e-06, + "loss": 2.7322, + "step": 207200 + }, + { + "epoch": 1.5, + "learning_rate": 5.018805421747748e-06, + "loss": 2.7446, + "step": 207300 + }, + { + "epoch": 1.5, + "learning_rate": 5.011572567229384e-06, + "loss": 2.7473, + "step": 207400 + }, + { + "epoch": 1.5, + "learning_rate": 5.0043397127110195e-06, + "loss": 2.7449, + "step": 207500 + }, + { + "epoch": 1.5, + "learning_rate": 4.997106858192654e-06, + "loss": 2.7365, + "step": 207600 + }, + { + "epoch": 1.5, + "learning_rate": 4.98987400367429e-06, + "loss": 2.7431, + "step": 207700 + }, + { + "epoch": 1.5, + "learning_rate": 4.98271347770111e-06, + "loss": 2.7405, + "step": 207800 + }, + { + "epoch": 1.5, + "learning_rate": 4.975480623182745e-06, + "loss": 2.7495, + "step": 207900 + }, + { + "epoch": 1.5, + "learning_rate": 4.968247768664382e-06, + "loss": 2.7395, + "step": 208000 + }, + { + "epoch": 1.5, + "eval_accuracy": 0.4608783757217217, + "eval_loss": 2.7625892162323, + "eval_runtime": 28.0543, + "eval_samples_per_second": 231.088, + "eval_steps_per_second": 2.424, + "step": 208000 + }, + { + "epoch": 1.51, + "learning_rate": 4.961014914146018e-06, + "loss": 2.746, + "step": 208100 + }, + { + "epoch": 1.51, + "learning_rate": 4.953782059627653e-06, + "loss": 2.7362, + "step": 208200 + }, + { + "epoch": 1.51, + "learning_rate": 4.946549205109289e-06, + "loss": 2.7453, + "step": 208300 + }, + { + "epoch": 1.51, + "learning_rate": 4.939316350590925e-06, + "loss": 2.7409, + "step": 208400 + }, + { + "epoch": 1.51, + "learning_rate": 4.93208349607256e-06, + "loss": 2.7459, + "step": 208500 + }, + { + "epoch": 1.51, + "learning_rate": 4.924850641554196e-06, + "loss": 2.7428, + "step": 208600 + }, + { + "epoch": 1.51, + "learning_rate": 4.9176177870358324e-06, + "loss": 2.7507, + "step": 208700 + }, + { + "epoch": 1.51, + "learning_rate": 4.910384932517468e-06, + "loss": 2.7423, + "step": 208800 + }, + { + "epoch": 1.51, + "learning_rate": 4.903152077999104e-06, + "loss": 2.7494, + "step": 208900 + }, + { + "epoch": 1.51, + "learning_rate": 4.8959192234807394e-06, + "loss": 2.7443, + "step": 209000 + }, + { + "epoch": 1.51, + "eval_accuracy": 0.4609176942410467, + "eval_loss": 2.7624387741088867, + "eval_runtime": 30.0098, + "eval_samples_per_second": 216.029, + "eval_steps_per_second": 2.266, + "step": 209000 + }, + { + "epoch": 1.51, + "learning_rate": 4.888686368962375e-06, + "loss": 2.7409, + "step": 209100 + }, + { + "epoch": 1.51, + "learning_rate": 4.881525842989194e-06, + "loss": 2.7504, + "step": 209200 + }, + { + "epoch": 1.51, + "learning_rate": 4.8742929884708305e-06, + "loss": 2.7428, + "step": 209300 + }, + { + "epoch": 1.51, + "learning_rate": 4.867060133952466e-06, + "loss": 2.7458, + "step": 209400 + }, + { + "epoch": 1.52, + "learning_rate": 4.859827279434102e-06, + "loss": 2.7465, + "step": 209500 + }, + { + "epoch": 1.52, + "learning_rate": 4.8525944249157375e-06, + "loss": 2.7435, + "step": 209600 + }, + { + "epoch": 1.52, + "learning_rate": 4.845361570397373e-06, + "loss": 2.7431, + "step": 209700 + }, + { + "epoch": 1.52, + "learning_rate": 4.838128715879009e-06, + "loss": 2.7376, + "step": 209800 + }, + { + "epoch": 1.52, + "learning_rate": 4.8308958613606445e-06, + "loss": 2.7403, + "step": 209900 + }, + { + "epoch": 1.52, + "learning_rate": 4.823663006842281e-06, + "loss": 2.7395, + "step": 210000 + }, + { + "epoch": 1.52, + "eval_accuracy": 0.46079066517861195, + "eval_loss": 2.762295961380005, + "eval_runtime": 30.222, + "eval_samples_per_second": 214.513, + "eval_steps_per_second": 2.25, + "step": 210000 + }, + { + "epoch": 1.52, + "learning_rate": 4.816430152323917e-06, + "loss": 2.7402, + "step": 210100 + }, + { + "epoch": 1.52, + "learning_rate": 4.809197297805552e-06, + "loss": 2.7445, + "step": 210200 + }, + { + "epoch": 1.52, + "learning_rate": 4.802036771832372e-06, + "loss": 2.7432, + "step": 210300 + }, + { + "epoch": 1.52, + "learning_rate": 4.794803917314008e-06, + "loss": 2.7407, + "step": 210400 + }, + { + "epoch": 1.52, + "learning_rate": 4.7875710627956434e-06, + "loss": 2.7449, + "step": 210500 + }, + { + "epoch": 1.52, + "learning_rate": 4.780338208277279e-06, + "loss": 2.7481, + "step": 210600 + }, + { + "epoch": 1.52, + "learning_rate": 4.773105353758915e-06, + "loss": 2.7456, + "step": 210700 + }, + { + "epoch": 1.52, + "learning_rate": 4.7658724992405504e-06, + "loss": 2.7489, + "step": 210800 + }, + { + "epoch": 1.53, + "learning_rate": 4.75871197326737e-06, + "loss": 2.7345, + "step": 210900 + }, + { + "epoch": 1.53, + "learning_rate": 4.751479118749006e-06, + "loss": 2.7353, + "step": 211000 + }, + { + "epoch": 1.53, + "eval_accuracy": 0.4608324032991262, + "eval_loss": 2.7621333599090576, + "eval_runtime": 31.0021, + "eval_samples_per_second": 209.115, + "eval_steps_per_second": 2.193, + "step": 211000 + }, + { + "epoch": 1.53, + "learning_rate": 4.7442462642306415e-06, + "loss": 2.7442, + "step": 211100 + }, + { + "epoch": 1.53, + "learning_rate": 4.737013409712277e-06, + "loss": 2.7349, + "step": 211200 + }, + { + "epoch": 1.53, + "learning_rate": 4.729780555193913e-06, + "loss": 2.7421, + "step": 211300 + }, + { + "epoch": 1.53, + "learning_rate": 4.7225477006755485e-06, + "loss": 2.7441, + "step": 211400 + }, + { + "epoch": 1.53, + "learning_rate": 4.715314846157185e-06, + "loss": 2.74, + "step": 211500 + }, + { + "epoch": 1.53, + "learning_rate": 4.708081991638821e-06, + "loss": 2.7479, + "step": 211600 + }, + { + "epoch": 1.53, + "learning_rate": 4.700849137120456e-06, + "loss": 2.7426, + "step": 211700 + }, + { + "epoch": 1.53, + "learning_rate": 4.693616282602092e-06, + "loss": 2.7466, + "step": 211800 + }, + { + "epoch": 1.53, + "learning_rate": 4.686383428083728e-06, + "loss": 2.7438, + "step": 211900 + }, + { + "epoch": 1.53, + "learning_rate": 4.679150573565363e-06, + "loss": 2.7401, + "step": 212000 + }, + { + "epoch": 1.53, + "eval_accuracy": 0.46098302347315606, + "eval_loss": 2.7617835998535156, + "eval_runtime": 29.9041, + "eval_samples_per_second": 216.793, + "eval_steps_per_second": 2.274, + "step": 212000 + }, + { + "epoch": 1.53, + "learning_rate": 4.671917719046999e-06, + "loss": 2.7432, + "step": 212100 + }, + { + "epoch": 1.53, + "learning_rate": 4.6646848645286355e-06, + "loss": 2.7462, + "step": 212200 + }, + { + "epoch": 1.54, + "learning_rate": 4.657452010010271e-06, + "loss": 2.7348, + "step": 212300 + }, + { + "epoch": 1.54, + "learning_rate": 4.650219155491907e-06, + "loss": 2.7424, + "step": 212400 + }, + { + "epoch": 1.54, + "learning_rate": 4.6429863009735425e-06, + "loss": 2.7427, + "step": 212500 + }, + { + "epoch": 1.54, + "learning_rate": 4.635753446455178e-06, + "loss": 2.75, + "step": 212600 + }, + { + "epoch": 1.54, + "learning_rate": 4.628520591936814e-06, + "loss": 2.7498, + "step": 212700 + }, + { + "epoch": 1.54, + "learning_rate": 4.62128773741845e-06, + "loss": 2.7432, + "step": 212800 + }, + { + "epoch": 1.54, + "learning_rate": 4.614054882900086e-06, + "loss": 2.7424, + "step": 212900 + }, + { + "epoch": 1.54, + "learning_rate": 4.606894356926905e-06, + "loss": 2.7371, + "step": 213000 + }, + { + "epoch": 1.54, + "eval_accuracy": 0.4609551980594798, + "eval_loss": 2.761749744415283, + "eval_runtime": 30.3654, + "eval_samples_per_second": 213.5, + "eval_steps_per_second": 2.239, + "step": 213000 + }, + { + "epoch": 1.54, + "learning_rate": 4.5996615024085414e-06, + "loss": 2.7503, + "step": 213100 + }, + { + "epoch": 1.54, + "learning_rate": 4.592428647890177e-06, + "loss": 2.7367, + "step": 213200 + }, + { + "epoch": 1.54, + "learning_rate": 4.585195793371813e-06, + "loss": 2.7346, + "step": 213300 + }, + { + "epoch": 1.54, + "learning_rate": 4.577962938853448e-06, + "loss": 2.7493, + "step": 213400 + }, + { + "epoch": 1.54, + "learning_rate": 4.570730084335084e-06, + "loss": 2.748, + "step": 213500 + }, + { + "epoch": 1.54, + "learning_rate": 4.563569558361903e-06, + "loss": 2.7412, + "step": 213600 + }, + { + "epoch": 1.55, + "learning_rate": 4.5563367038435395e-06, + "loss": 2.7452, + "step": 213700 + }, + { + "epoch": 1.55, + "learning_rate": 4.549103849325175e-06, + "loss": 2.7362, + "step": 213800 + }, + { + "epoch": 1.55, + "learning_rate": 4.541870994806811e-06, + "loss": 2.7471, + "step": 213900 + }, + { + "epoch": 1.55, + "learning_rate": 4.5346381402884465e-06, + "loss": 2.7458, + "step": 214000 + }, + { + "epoch": 1.55, + "eval_accuracy": 0.46101931749099456, + "eval_loss": 2.7615652084350586, + "eval_runtime": 31.5152, + "eval_samples_per_second": 205.71, + "eval_steps_per_second": 2.158, + "step": 214000 + }, + { + "epoch": 1.55, + "learning_rate": 4.527405285770082e-06, + "loss": 2.7413, + "step": 214100 + }, + { + "epoch": 1.55, + "learning_rate": 4.520172431251718e-06, + "loss": 2.7408, + "step": 214200 + }, + { + "epoch": 1.55, + "learning_rate": 4.5129395767333535e-06, + "loss": 2.7456, + "step": 214300 + }, + { + "epoch": 1.55, + "learning_rate": 4.50570672221499e-06, + "loss": 2.741, + "step": 214400 + }, + { + "epoch": 1.55, + "learning_rate": 4.498473867696626e-06, + "loss": 2.7465, + "step": 214500 + }, + { + "epoch": 1.55, + "learning_rate": 4.491241013178261e-06, + "loss": 2.7435, + "step": 214600 + }, + { + "epoch": 1.55, + "learning_rate": 4.484008158659897e-06, + "loss": 2.7377, + "step": 214700 + }, + { + "epoch": 1.55, + "learning_rate": 4.476775304141533e-06, + "loss": 2.7381, + "step": 214800 + }, + { + "epoch": 1.55, + "learning_rate": 4.469542449623168e-06, + "loss": 2.7445, + "step": 214900 + }, + { + "epoch": 1.56, + "learning_rate": 4.462309595104805e-06, + "loss": 2.7416, + "step": 215000 + }, + { + "epoch": 1.56, + "eval_accuracy": 0.4611487661546186, + "eval_loss": 2.7614753246307373, + "eval_runtime": 27.7512, + "eval_samples_per_second": 233.612, + "eval_steps_per_second": 2.45, + "step": 215000 + }, + { + "epoch": 1.56, + "learning_rate": 4.4550767405864405e-06, + "loss": 2.7414, + "step": 215100 + }, + { + "epoch": 1.56, + "learning_rate": 4.447843886068076e-06, + "loss": 2.7419, + "step": 215200 + }, + { + "epoch": 1.56, + "learning_rate": 4.440611031549712e-06, + "loss": 2.7466, + "step": 215300 + }, + { + "epoch": 1.56, + "learning_rate": 4.4333781770313475e-06, + "loss": 2.7416, + "step": 215400 + }, + { + "epoch": 1.56, + "learning_rate": 4.426145322512983e-06, + "loss": 2.7512, + "step": 215500 + }, + { + "epoch": 1.56, + "learning_rate": 4.418984796539802e-06, + "loss": 2.7472, + "step": 215600 + }, + { + "epoch": 1.56, + "learning_rate": 4.411751942021439e-06, + "loss": 2.7447, + "step": 215700 + }, + { + "epoch": 1.56, + "learning_rate": 4.404519087503074e-06, + "loss": 2.7415, + "step": 215800 + }, + { + "epoch": 1.56, + "learning_rate": 4.39728623298471e-06, + "loss": 2.7387, + "step": 215900 + }, + { + "epoch": 1.56, + "learning_rate": 4.390053378466346e-06, + "loss": 2.7434, + "step": 216000 + }, + { + "epoch": 1.56, + "eval_accuracy": 0.4610798075207254, + "eval_loss": 2.761385440826416, + "eval_runtime": 27.9023, + "eval_samples_per_second": 232.347, + "eval_steps_per_second": 2.437, + "step": 216000 + }, + { + "epoch": 1.56, + "learning_rate": 4.382820523947981e-06, + "loss": 2.7392, + "step": 216100 + }, + { + "epoch": 1.56, + "learning_rate": 4.375587669429617e-06, + "loss": 2.7394, + "step": 216200 + }, + { + "epoch": 1.56, + "learning_rate": 4.3683548149112535e-06, + "loss": 2.7423, + "step": 216300 + }, + { + "epoch": 1.57, + "learning_rate": 4.361121960392889e-06, + "loss": 2.7389, + "step": 216400 + }, + { + "epoch": 1.57, + "learning_rate": 4.353889105874525e-06, + "loss": 2.7334, + "step": 216500 + }, + { + "epoch": 1.57, + "learning_rate": 4.3466562513561605e-06, + "loss": 2.7384, + "step": 216600 + }, + { + "epoch": 1.57, + "learning_rate": 4.339423396837796e-06, + "loss": 2.7432, + "step": 216700 + }, + { + "epoch": 1.57, + "learning_rate": 4.332190542319432e-06, + "loss": 2.7388, + "step": 216800 + }, + { + "epoch": 1.57, + "learning_rate": 4.324957687801068e-06, + "loss": 2.7417, + "step": 216900 + }, + { + "epoch": 1.57, + "learning_rate": 4.317724833282704e-06, + "loss": 2.7456, + "step": 217000 + }, + { + "epoch": 1.57, + "eval_accuracy": 0.4610507723064546, + "eval_loss": 2.7613608837127686, + "eval_runtime": 32.1832, + "eval_samples_per_second": 201.441, + "eval_steps_per_second": 2.113, + "step": 217000 + }, + { + "epoch": 1.57, + "learning_rate": 4.31049197876434e-06, + "loss": 2.744, + "step": 217100 + }, + { + "epoch": 1.57, + "learning_rate": 4.303259124245975e-06, + "loss": 2.7446, + "step": 217200 + }, + { + "epoch": 1.57, + "learning_rate": 4.296026269727611e-06, + "loss": 2.7351, + "step": 217300 + }, + { + "epoch": 1.57, + "learning_rate": 4.288793415209247e-06, + "loss": 2.7413, + "step": 217400 + }, + { + "epoch": 1.57, + "learning_rate": 4.281560560690883e-06, + "loss": 2.7478, + "step": 217500 + }, + { + "epoch": 1.57, + "learning_rate": 4.274327706172519e-06, + "loss": 2.7409, + "step": 217600 + }, + { + "epoch": 1.57, + "learning_rate": 4.267167180199338e-06, + "loss": 2.731, + "step": 217700 + }, + { + "epoch": 1.58, + "learning_rate": 4.259934325680974e-06, + "loss": 2.7469, + "step": 217800 + }, + { + "epoch": 1.58, + "learning_rate": 4.252701471162609e-06, + "loss": 2.7392, + "step": 217900 + }, + { + "epoch": 1.58, + "learning_rate": 4.245540945189429e-06, + "loss": 2.7499, + "step": 218000 + }, + { + "epoch": 1.58, + "eval_accuracy": 0.46106226541210343, + "eval_loss": 2.7610652446746826, + "eval_runtime": 29.5792, + "eval_samples_per_second": 219.174, + "eval_steps_per_second": 2.299, + "step": 218000 + }, + { + "epoch": 1.58, + "learning_rate": 4.2383080906710645e-06, + "loss": 2.7383, + "step": 218100 + }, + { + "epoch": 1.58, + "learning_rate": 4.2310752361527e-06, + "loss": 2.7398, + "step": 218200 + }, + { + "epoch": 1.58, + "learning_rate": 4.223842381634336e-06, + "loss": 2.7354, + "step": 218300 + }, + { + "epoch": 1.58, + "learning_rate": 4.2166095271159715e-06, + "loss": 2.7482, + "step": 218400 + }, + { + "epoch": 1.58, + "learning_rate": 4.209376672597608e-06, + "loss": 2.7427, + "step": 218500 + }, + { + "epoch": 1.58, + "learning_rate": 4.202143818079244e-06, + "loss": 2.7346, + "step": 218600 + }, + { + "epoch": 1.58, + "learning_rate": 4.194910963560879e-06, + "loss": 2.743, + "step": 218700 + }, + { + "epoch": 1.58, + "learning_rate": 4.187678109042515e-06, + "loss": 2.738, + "step": 218800 + }, + { + "epoch": 1.58, + "learning_rate": 4.180445254524151e-06, + "loss": 2.7347, + "step": 218900 + }, + { + "epoch": 1.58, + "learning_rate": 4.173212400005786e-06, + "loss": 2.744, + "step": 219000 + }, + { + "epoch": 1.58, + "eval_accuracy": 0.46114574165313205, + "eval_loss": 2.760906457901001, + "eval_runtime": 27.8233, + "eval_samples_per_second": 233.006, + "eval_steps_per_second": 2.444, + "step": 219000 + }, + { + "epoch": 1.58, + "learning_rate": 4.165979545487423e-06, + "loss": 2.7415, + "step": 219100 + }, + { + "epoch": 1.59, + "learning_rate": 4.158819019514242e-06, + "loss": 2.7429, + "step": 219200 + }, + { + "epoch": 1.59, + "learning_rate": 4.151586164995877e-06, + "loss": 2.7381, + "step": 219300 + }, + { + "epoch": 1.59, + "learning_rate": 4.144353310477514e-06, + "loss": 2.7389, + "step": 219400 + }, + { + "epoch": 1.59, + "learning_rate": 4.1371204559591496e-06, + "loss": 2.7519, + "step": 219500 + }, + { + "epoch": 1.59, + "learning_rate": 4.129887601440785e-06, + "loss": 2.7505, + "step": 219600 + }, + { + "epoch": 1.59, + "learning_rate": 4.122727075467605e-06, + "loss": 2.7414, + "step": 219700 + }, + { + "epoch": 1.59, + "learning_rate": 4.11549422094924e-06, + "loss": 2.7272, + "step": 219800 + }, + { + "epoch": 1.59, + "learning_rate": 4.1082613664308754e-06, + "loss": 2.7408, + "step": 219900 + }, + { + "epoch": 1.59, + "learning_rate": 4.101028511912511e-06, + "loss": 2.7375, + "step": 220000 + }, + { + "epoch": 1.59, + "eval_accuracy": 0.4611257799433208, + "eval_loss": 2.760807514190674, + "eval_runtime": 29.5284, + "eval_samples_per_second": 219.552, + "eval_steps_per_second": 2.303, + "step": 220000 + }, + { + "epoch": 1.59, + "learning_rate": 4.093795657394148e-06, + "loss": 2.7376, + "step": 220100 + }, + { + "epoch": 1.59, + "learning_rate": 4.086562802875783e-06, + "loss": 2.7406, + "step": 220200 + }, + { + "epoch": 1.59, + "learning_rate": 4.079329948357419e-06, + "loss": 2.7457, + "step": 220300 + }, + { + "epoch": 1.59, + "learning_rate": 4.072097093839055e-06, + "loss": 2.7377, + "step": 220400 + }, + { + "epoch": 1.59, + "learning_rate": 4.06486423932069e-06, + "loss": 2.7417, + "step": 220500 + }, + { + "epoch": 1.6, + "learning_rate": 4.057631384802326e-06, + "loss": 2.7429, + "step": 220600 + }, + { + "epoch": 1.6, + "learning_rate": 4.050470858829146e-06, + "loss": 2.7458, + "step": 220700 + }, + { + "epoch": 1.6, + "learning_rate": 4.043238004310781e-06, + "loss": 2.74, + "step": 220800 + }, + { + "epoch": 1.6, + "learning_rate": 4.036005149792417e-06, + "loss": 2.7355, + "step": 220900 + }, + { + "epoch": 1.6, + "learning_rate": 4.0287722952740535e-06, + "loss": 2.7428, + "step": 221000 + }, + { + "epoch": 1.6, + "eval_accuracy": 0.4611263848436182, + "eval_loss": 2.7606468200683594, + "eval_runtime": 29.1245, + "eval_samples_per_second": 222.596, + "eval_steps_per_second": 2.335, + "step": 221000 + }, + { + "epoch": 1.6, + "learning_rate": 4.021539440755689e-06, + "loss": 2.7413, + "step": 221100 + }, + { + "epoch": 1.6, + "learning_rate": 4.014306586237325e-06, + "loss": 2.7345, + "step": 221200 + }, + { + "epoch": 1.6, + "learning_rate": 4.0070737317189605e-06, + "loss": 2.7442, + "step": 221300 + }, + { + "epoch": 1.6, + "learning_rate": 3.999840877200596e-06, + "loss": 2.7402, + "step": 221400 + }, + { + "epoch": 1.6, + "learning_rate": 3.992608022682232e-06, + "loss": 2.7399, + "step": 221500 + }, + { + "epoch": 1.6, + "learning_rate": 3.985375168163868e-06, + "loss": 2.7461, + "step": 221600 + }, + { + "epoch": 1.6, + "learning_rate": 3.978142313645504e-06, + "loss": 2.7288, + "step": 221700 + }, + { + "epoch": 1.6, + "learning_rate": 3.97090945912714e-06, + "loss": 2.7323, + "step": 221800 + }, + { + "epoch": 1.6, + "learning_rate": 3.963676604608775e-06, + "loss": 2.735, + "step": 221900 + }, + { + "epoch": 1.61, + "learning_rate": 3.956443750090411e-06, + "loss": 2.7442, + "step": 222000 + }, + { + "epoch": 1.61, + "eval_accuracy": 0.4611021888317258, + "eval_loss": 2.7605795860290527, + "eval_runtime": 29.7064, + "eval_samples_per_second": 218.236, + "eval_steps_per_second": 2.289, + "step": 222000 + }, + { + "epoch": 1.61, + "learning_rate": 3.949210895572047e-06, + "loss": 2.7375, + "step": 222100 + }, + { + "epoch": 1.61, + "learning_rate": 3.941978041053682e-06, + "loss": 2.7365, + "step": 222200 + }, + { + "epoch": 1.61, + "learning_rate": 3.934745186535318e-06, + "loss": 2.7442, + "step": 222300 + }, + { + "epoch": 1.61, + "learning_rate": 3.927512332016954e-06, + "loss": 2.7405, + "step": 222400 + }, + { + "epoch": 1.61, + "learning_rate": 3.920279477498589e-06, + "loss": 2.7429, + "step": 222500 + }, + { + "epoch": 1.61, + "learning_rate": 3.913046622980226e-06, + "loss": 2.7361, + "step": 222600 + }, + { + "epoch": 1.61, + "learning_rate": 3.9058137684618616e-06, + "loss": 2.7376, + "step": 222700 + }, + { + "epoch": 1.61, + "learning_rate": 3.8986532424886805e-06, + "loss": 2.7357, + "step": 222800 + }, + { + "epoch": 1.61, + "learning_rate": 3.891420387970317e-06, + "loss": 2.7413, + "step": 222900 + }, + { + "epoch": 1.61, + "learning_rate": 3.884187533451953e-06, + "loss": 2.7395, + "step": 223000 + }, + { + "epoch": 1.61, + "eval_accuracy": 0.46116146906086203, + "eval_loss": 2.7603955268859863, + "eval_runtime": 29.5556, + "eval_samples_per_second": 219.349, + "eval_steps_per_second": 2.301, + "step": 223000 + }, + { + "epoch": 1.61, + "learning_rate": 3.876954678933588e-06, + "loss": 2.7413, + "step": 223100 + }, + { + "epoch": 1.61, + "learning_rate": 3.869721824415224e-06, + "loss": 2.7495, + "step": 223200 + }, + { + "epoch": 1.62, + "learning_rate": 3.86248896989686e-06, + "loss": 2.7346, + "step": 223300 + }, + { + "epoch": 1.62, + "learning_rate": 3.855256115378495e-06, + "loss": 2.7434, + "step": 223400 + }, + { + "epoch": 1.62, + "learning_rate": 3.848023260860132e-06, + "loss": 2.7414, + "step": 223500 + }, + { + "epoch": 1.62, + "learning_rate": 3.8407904063417675e-06, + "loss": 2.7452, + "step": 223600 + }, + { + "epoch": 1.62, + "learning_rate": 3.833557551823403e-06, + "loss": 2.7334, + "step": 223700 + }, + { + "epoch": 1.62, + "learning_rate": 3.826397025850223e-06, + "loss": 2.734, + "step": 223800 + }, + { + "epoch": 1.62, + "learning_rate": 3.8191641713318586e-06, + "loss": 2.7417, + "step": 223900 + }, + { + "epoch": 1.62, + "learning_rate": 3.8119313168134942e-06, + "loss": 2.7445, + "step": 224000 + }, + { + "epoch": 1.62, + "eval_accuracy": 0.4612249835920794, + "eval_loss": 2.7602407932281494, + "eval_runtime": 27.9042, + "eval_samples_per_second": 232.331, + "eval_steps_per_second": 2.437, + "step": 224000 + }, + { + "epoch": 1.62, + "learning_rate": 3.80469846229513e-06, + "loss": 2.7441, + "step": 224100 + }, + { + "epoch": 1.62, + "learning_rate": 3.7974656077767656e-06, + "loss": 2.7424, + "step": 224200 + }, + { + "epoch": 1.62, + "learning_rate": 3.790232753258401e-06, + "loss": 2.7458, + "step": 224300 + }, + { + "epoch": 1.62, + "learning_rate": 3.782999898740037e-06, + "loss": 2.7381, + "step": 224400 + }, + { + "epoch": 1.62, + "learning_rate": 3.7757670442216726e-06, + "loss": 2.7407, + "step": 224500 + }, + { + "epoch": 1.62, + "learning_rate": 3.7685341897033082e-06, + "loss": 2.7407, + "step": 224600 + }, + { + "epoch": 1.63, + "learning_rate": 3.7613013351849443e-06, + "loss": 2.7438, + "step": 224700 + }, + { + "epoch": 1.63, + "learning_rate": 3.75406848066658e-06, + "loss": 2.7364, + "step": 224800 + }, + { + "epoch": 1.63, + "learning_rate": 3.7468356261482156e-06, + "loss": 2.7412, + "step": 224900 + }, + { + "epoch": 1.63, + "learning_rate": 3.7396027716298517e-06, + "loss": 2.7394, + "step": 225000 + }, + { + "epoch": 1.63, + "eval_accuracy": 0.4611070280341043, + "eval_loss": 2.760154962539673, + "eval_runtime": 30.9249, + "eval_samples_per_second": 209.637, + "eval_steps_per_second": 2.199, + "step": 225000 + }, + { + "epoch": 1.63, + "learning_rate": 3.732442245656671e-06, + "loss": 2.744, + "step": 225100 + }, + { + "epoch": 1.63, + "learning_rate": 3.7252093911383067e-06, + "loss": 2.7371, + "step": 225200 + }, + { + "epoch": 1.63, + "learning_rate": 3.717976536619943e-06, + "loss": 2.7374, + "step": 225300 + }, + { + "epoch": 1.63, + "learning_rate": 3.7107436821015785e-06, + "loss": 2.7375, + "step": 225400 + }, + { + "epoch": 1.63, + "learning_rate": 3.703510827583214e-06, + "loss": 2.7368, + "step": 225500 + }, + { + "epoch": 1.63, + "learning_rate": 3.6962779730648502e-06, + "loss": 2.7399, + "step": 225600 + }, + { + "epoch": 1.63, + "learning_rate": 3.689045118546486e-06, + "loss": 2.7354, + "step": 225700 + }, + { + "epoch": 1.63, + "learning_rate": 3.6818122640281216e-06, + "loss": 2.7475, + "step": 225800 + }, + { + "epoch": 1.63, + "learning_rate": 3.6745794095097577e-06, + "loss": 2.749, + "step": 225900 + }, + { + "epoch": 1.63, + "learning_rate": 3.6673465549913933e-06, + "loss": 2.7403, + "step": 226000 + }, + { + "epoch": 1.63, + "eval_accuracy": 0.46118143077067325, + "eval_loss": 2.7599334716796875, + "eval_runtime": 29.7721, + "eval_samples_per_second": 217.754, + "eval_steps_per_second": 2.284, + "step": 226000 + }, + { + "epoch": 1.64, + "learning_rate": 3.6601860290182126e-06, + "loss": 2.7356, + "step": 226100 + }, + { + "epoch": 1.64, + "learning_rate": 3.6529531744998487e-06, + "loss": 2.7327, + "step": 226200 + }, + { + "epoch": 1.64, + "learning_rate": 3.6457203199814844e-06, + "loss": 2.7394, + "step": 226300 + }, + { + "epoch": 1.64, + "learning_rate": 3.6384874654631196e-06, + "loss": 2.7427, + "step": 226400 + }, + { + "epoch": 1.64, + "learning_rate": 3.6312546109447553e-06, + "loss": 2.7417, + "step": 226500 + }, + { + "epoch": 1.64, + "learning_rate": 3.6240217564263914e-06, + "loss": 2.7395, + "step": 226600 + }, + { + "epoch": 1.64, + "learning_rate": 3.616788901908027e-06, + "loss": 2.7454, + "step": 226700 + }, + { + "epoch": 1.64, + "learning_rate": 3.6095560473896627e-06, + "loss": 2.737, + "step": 226800 + }, + { + "epoch": 1.64, + "learning_rate": 3.602323192871299e-06, + "loss": 2.7436, + "step": 226900 + }, + { + "epoch": 1.64, + "learning_rate": 3.5950903383529345e-06, + "loss": 2.738, + "step": 227000 + }, + { + "epoch": 1.64, + "eval_accuracy": 0.46119715817840323, + "eval_loss": 2.7598636150360107, + "eval_runtime": 27.9537, + "eval_samples_per_second": 231.919, + "eval_steps_per_second": 2.433, + "step": 227000 + }, + { + "epoch": 1.64, + "learning_rate": 3.58785748383457e-06, + "loss": 2.7383, + "step": 227100 + }, + { + "epoch": 1.64, + "learning_rate": 3.5806246293162062e-06, + "loss": 2.7407, + "step": 227200 + }, + { + "epoch": 1.64, + "learning_rate": 3.573391774797842e-06, + "loss": 2.7394, + "step": 227300 + }, + { + "epoch": 1.64, + "learning_rate": 3.5661589202794776e-06, + "loss": 2.7449, + "step": 227400 + }, + { + "epoch": 1.65, + "learning_rate": 3.5589260657611137e-06, + "loss": 2.7449, + "step": 227500 + }, + { + "epoch": 1.65, + "learning_rate": 3.5516932112427493e-06, + "loss": 2.7383, + "step": 227600 + }, + { + "epoch": 1.65, + "learning_rate": 3.5445326852695686e-06, + "loss": 2.7364, + "step": 227700 + }, + { + "epoch": 1.65, + "learning_rate": 3.5372998307512047e-06, + "loss": 2.7375, + "step": 227800 + }, + { + "epoch": 1.65, + "learning_rate": 3.5300669762328404e-06, + "loss": 2.7405, + "step": 227900 + }, + { + "epoch": 1.65, + "learning_rate": 3.522834121714476e-06, + "loss": 2.7332, + "step": 228000 + }, + { + "epoch": 1.65, + "eval_accuracy": 0.4612788197185399, + "eval_loss": 2.759690284729004, + "eval_runtime": 32.3658, + "eval_samples_per_second": 200.304, + "eval_steps_per_second": 2.101, + "step": 228000 + }, + { + "epoch": 1.65, + "learning_rate": 3.515601267196112e-06, + "loss": 2.7336, + "step": 228100 + }, + { + "epoch": 1.65, + "learning_rate": 3.508368412677748e-06, + "loss": 2.73, + "step": 228200 + }, + { + "epoch": 1.65, + "learning_rate": 3.5011355581593835e-06, + "loss": 2.7353, + "step": 228300 + }, + { + "epoch": 1.65, + "learning_rate": 3.4939027036410196e-06, + "loss": 2.7394, + "step": 228400 + }, + { + "epoch": 1.65, + "learning_rate": 3.4866698491226552e-06, + "loss": 2.7291, + "step": 228500 + }, + { + "epoch": 1.65, + "learning_rate": 3.479436994604291e-06, + "loss": 2.7395, + "step": 228600 + }, + { + "epoch": 1.65, + "learning_rate": 3.472204140085926e-06, + "loss": 2.7351, + "step": 228700 + }, + { + "epoch": 1.65, + "learning_rate": 3.4649712855675622e-06, + "loss": 2.7361, + "step": 228800 + }, + { + "epoch": 1.66, + "learning_rate": 3.457738431049198e-06, + "loss": 2.7387, + "step": 228900 + }, + { + "epoch": 1.66, + "learning_rate": 3.4505055765308336e-06, + "loss": 2.7388, + "step": 229000 + }, + { + "epoch": 1.66, + "eval_accuracy": 0.46129575692686453, + "eval_loss": 2.759584903717041, + "eval_runtime": 29.3954, + "eval_samples_per_second": 220.545, + "eval_steps_per_second": 2.313, + "step": 229000 + }, + { + "epoch": 1.66, + "learning_rate": 3.4432727220124697e-06, + "loss": 2.7428, + "step": 229100 + }, + { + "epoch": 1.66, + "learning_rate": 3.4360398674941053e-06, + "loss": 2.7331, + "step": 229200 + }, + { + "epoch": 1.66, + "learning_rate": 3.428807012975741e-06, + "loss": 2.7316, + "step": 229300 + }, + { + "epoch": 1.66, + "learning_rate": 3.421574158457377e-06, + "loss": 2.7302, + "step": 229400 + }, + { + "epoch": 1.66, + "learning_rate": 3.4143413039390128e-06, + "loss": 2.7417, + "step": 229500 + }, + { + "epoch": 1.66, + "learning_rate": 3.4071084494206484e-06, + "loss": 2.7427, + "step": 229600 + }, + { + "epoch": 1.66, + "learning_rate": 3.3998755949022845e-06, + "loss": 2.7407, + "step": 229700 + }, + { + "epoch": 1.66, + "learning_rate": 3.39264274038392e-06, + "loss": 2.7359, + "step": 229800 + }, + { + "epoch": 1.66, + "learning_rate": 3.385409885865556e-06, + "loss": 2.7396, + "step": 229900 + }, + { + "epoch": 1.66, + "learning_rate": 3.3782493598923756e-06, + "loss": 2.743, + "step": 230000 + }, + { + "epoch": 1.66, + "eval_accuracy": 0.46134172934946, + "eval_loss": 2.7594590187072754, + "eval_runtime": 29.5124, + "eval_samples_per_second": 219.67, + "eval_steps_per_second": 2.304, + "step": 230000 + }, + { + "epoch": 1.66, + "learning_rate": 3.3710165053740113e-06, + "loss": 2.7326, + "step": 230100 + }, + { + "epoch": 1.66, + "learning_rate": 3.363783650855647e-06, + "loss": 2.7364, + "step": 230200 + }, + { + "epoch": 1.67, + "learning_rate": 3.356550796337283e-06, + "loss": 2.7352, + "step": 230300 + }, + { + "epoch": 1.67, + "learning_rate": 3.3493179418189187e-06, + "loss": 2.7397, + "step": 230400 + }, + { + "epoch": 1.67, + "learning_rate": 3.3420850873005543e-06, + "loss": 2.7393, + "step": 230500 + }, + { + "epoch": 1.67, + "learning_rate": 3.3348522327821904e-06, + "loss": 2.744, + "step": 230600 + }, + { + "epoch": 1.67, + "learning_rate": 3.327619378263826e-06, + "loss": 2.7422, + "step": 230700 + }, + { + "epoch": 1.67, + "learning_rate": 3.3203865237454618e-06, + "loss": 2.7439, + "step": 230800 + }, + { + "epoch": 1.67, + "learning_rate": 3.3132259977722807e-06, + "loss": 2.746, + "step": 230900 + }, + { + "epoch": 1.67, + "learning_rate": 3.3059931432539168e-06, + "loss": 2.7368, + "step": 231000 + }, + { + "epoch": 1.67, + "eval_accuracy": 0.4612715609149722, + "eval_loss": 2.759273052215576, + "eval_runtime": 29.4403, + "eval_samples_per_second": 220.209, + "eval_steps_per_second": 2.31, + "step": 231000 + }, + { + "epoch": 1.67, + "learning_rate": 3.2987602887355524e-06, + "loss": 2.7397, + "step": 231100 + }, + { + "epoch": 1.67, + "learning_rate": 3.291527434217188e-06, + "loss": 2.7324, + "step": 231200 + }, + { + "epoch": 1.67, + "learning_rate": 3.284294579698824e-06, + "loss": 2.7456, + "step": 231300 + }, + { + "epoch": 1.67, + "learning_rate": 3.27706172518046e-06, + "loss": 2.731, + "step": 231400 + }, + { + "epoch": 1.67, + "learning_rate": 3.2698288706620955e-06, + "loss": 2.7319, + "step": 231500 + }, + { + "epoch": 1.68, + "learning_rate": 3.2625960161437316e-06, + "loss": 2.7492, + "step": 231600 + }, + { + "epoch": 1.68, + "learning_rate": 3.2553631616253673e-06, + "loss": 2.741, + "step": 231700 + }, + { + "epoch": 1.68, + "learning_rate": 3.248130307107003e-06, + "loss": 2.7384, + "step": 231800 + }, + { + "epoch": 1.68, + "learning_rate": 3.240897452588639e-06, + "loss": 2.7307, + "step": 231900 + }, + { + "epoch": 1.68, + "learning_rate": 3.2336645980702747e-06, + "loss": 2.7426, + "step": 232000 + }, + { + "epoch": 1.68, + "eval_accuracy": 0.4613774184670012, + "eval_loss": 2.7592265605926514, + "eval_runtime": 31.7025, + "eval_samples_per_second": 204.495, + "eval_steps_per_second": 2.145, + "step": 232000 + }, + { + "epoch": 1.68, + "learning_rate": 3.2264317435519104e-06, + "loss": 2.7339, + "step": 232100 + }, + { + "epoch": 1.68, + "learning_rate": 3.2191988890335464e-06, + "loss": 2.7413, + "step": 232200 + }, + { + "epoch": 1.68, + "learning_rate": 3.211966034515182e-06, + "loss": 2.7413, + "step": 232300 + }, + { + "epoch": 1.68, + "learning_rate": 3.2047331799968178e-06, + "loss": 2.7421, + "step": 232400 + }, + { + "epoch": 1.68, + "learning_rate": 3.197500325478454e-06, + "loss": 2.7387, + "step": 232500 + }, + { + "epoch": 1.68, + "learning_rate": 3.1902674709600895e-06, + "loss": 2.7344, + "step": 232600 + }, + { + "epoch": 1.68, + "learning_rate": 3.183034616441725e-06, + "loss": 2.7488, + "step": 232700 + }, + { + "epoch": 1.68, + "learning_rate": 3.1758017619233613e-06, + "loss": 2.7391, + "step": 232800 + }, + { + "epoch": 1.68, + "learning_rate": 3.168568907404997e-06, + "loss": 2.7362, + "step": 232900 + }, + { + "epoch": 1.69, + "learning_rate": 3.1614083814318163e-06, + "loss": 2.7332, + "step": 233000 + }, + { + "epoch": 1.69, + "eval_accuracy": 0.4614445624000024, + "eval_loss": 2.7591168880462646, + "eval_runtime": 30.2871, + "eval_samples_per_second": 214.052, + "eval_steps_per_second": 2.245, + "step": 233000 + }, + { + "epoch": 1.69, + "learning_rate": 3.1541755269134524e-06, + "loss": 2.7441, + "step": 233100 + }, + { + "epoch": 1.69, + "learning_rate": 3.1469426723950876e-06, + "loss": 2.74, + "step": 233200 + }, + { + "epoch": 1.69, + "learning_rate": 3.1397098178767233e-06, + "loss": 2.7379, + "step": 233300 + }, + { + "epoch": 1.69, + "learning_rate": 3.132476963358359e-06, + "loss": 2.7406, + "step": 233400 + }, + { + "epoch": 1.69, + "learning_rate": 3.1253164373851787e-06, + "loss": 2.7385, + "step": 233500 + }, + { + "epoch": 1.69, + "learning_rate": 3.1180835828668143e-06, + "loss": 2.7332, + "step": 233600 + }, + { + "epoch": 1.69, + "learning_rate": 3.11085072834845e-06, + "loss": 2.7396, + "step": 233700 + }, + { + "epoch": 1.69, + "learning_rate": 3.103617873830086e-06, + "loss": 2.743, + "step": 233800 + }, + { + "epoch": 1.69, + "learning_rate": 3.0963850193117218e-06, + "loss": 2.7363, + "step": 233900 + }, + { + "epoch": 1.69, + "learning_rate": 3.0891521647933574e-06, + "loss": 2.7413, + "step": 234000 + }, + { + "epoch": 1.69, + "eval_accuracy": 0.4613556420562981, + "eval_loss": 2.7590200901031494, + "eval_runtime": 29.9923, + "eval_samples_per_second": 216.155, + "eval_steps_per_second": 2.267, + "step": 234000 + }, + { + "epoch": 1.69, + "learning_rate": 3.0819193102749935e-06, + "loss": 2.7473, + "step": 234100 + }, + { + "epoch": 1.69, + "learning_rate": 3.074686455756629e-06, + "loss": 2.7374, + "step": 234200 + }, + { + "epoch": 1.69, + "learning_rate": 3.067453601238265e-06, + "loss": 2.7358, + "step": 234300 + }, + { + "epoch": 1.7, + "learning_rate": 3.060220746719901e-06, + "loss": 2.7322, + "step": 234400 + }, + { + "epoch": 1.7, + "learning_rate": 3.0529878922015366e-06, + "loss": 2.7412, + "step": 234500 + }, + { + "epoch": 1.7, + "learning_rate": 3.0457550376831723e-06, + "loss": 2.7346, + "step": 234600 + }, + { + "epoch": 1.7, + "learning_rate": 3.0385221831648084e-06, + "loss": 2.7395, + "step": 234700 + }, + { + "epoch": 1.7, + "learning_rate": 3.031289328646444e-06, + "loss": 2.7384, + "step": 234800 + }, + { + "epoch": 1.7, + "learning_rate": 3.0240564741280797e-06, + "loss": 2.7408, + "step": 234900 + }, + { + "epoch": 1.7, + "learning_rate": 3.016823619609716e-06, + "loss": 2.735, + "step": 235000 + }, + { + "epoch": 1.7, + "eval_accuracy": 0.46131692843727035, + "eval_loss": 2.7589235305786133, + "eval_runtime": 29.8027, + "eval_samples_per_second": 217.53, + "eval_steps_per_second": 2.282, + "step": 235000 + }, + { + "epoch": 1.7, + "learning_rate": 3.0095907650913515e-06, + "loss": 2.7375, + "step": 235100 + }, + { + "epoch": 1.7, + "learning_rate": 3.002357910572987e-06, + "loss": 2.7379, + "step": 235200 + }, + { + "epoch": 1.7, + "learning_rate": 2.9951250560546232e-06, + "loss": 2.734, + "step": 235300 + }, + { + "epoch": 1.7, + "learning_rate": 2.987892201536259e-06, + "loss": 2.743, + "step": 235400 + }, + { + "epoch": 1.7, + "learning_rate": 2.980659347017894e-06, + "loss": 2.7419, + "step": 235500 + }, + { + "epoch": 1.7, + "learning_rate": 2.97342649249953e-06, + "loss": 2.743, + "step": 235600 + }, + { + "epoch": 1.7, + "learning_rate": 2.9662659665263495e-06, + "loss": 2.7387, + "step": 235700 + }, + { + "epoch": 1.71, + "learning_rate": 2.959033112007985e-06, + "loss": 2.7431, + "step": 235800 + }, + { + "epoch": 1.71, + "learning_rate": 2.951800257489621e-06, + "loss": 2.7385, + "step": 235900 + }, + { + "epoch": 1.71, + "learning_rate": 2.9446397315164406e-06, + "loss": 2.7393, + "step": 236000 + }, + { + "epoch": 1.71, + "eval_accuracy": 0.46144032809792124, + "eval_loss": 2.7588789463043213, + "eval_runtime": 29.5655, + "eval_samples_per_second": 219.276, + "eval_steps_per_second": 2.3, + "step": 236000 + }, + { + "epoch": 1.71, + "learning_rate": 2.9374068769980763e-06, + "loss": 2.7405, + "step": 236100 + }, + { + "epoch": 1.71, + "learning_rate": 2.930174022479712e-06, + "loss": 2.7432, + "step": 236200 + }, + { + "epoch": 1.71, + "learning_rate": 2.922941167961348e-06, + "loss": 2.7447, + "step": 236300 + }, + { + "epoch": 1.71, + "learning_rate": 2.9157083134429837e-06, + "loss": 2.7412, + "step": 236400 + }, + { + "epoch": 1.71, + "learning_rate": 2.9084754589246194e-06, + "loss": 2.7406, + "step": 236500 + }, + { + "epoch": 1.71, + "learning_rate": 2.9012426044062555e-06, + "loss": 2.7361, + "step": 236600 + }, + { + "epoch": 1.71, + "learning_rate": 2.894009749887891e-06, + "loss": 2.7427, + "step": 236700 + }, + { + "epoch": 1.71, + "learning_rate": 2.8867768953695268e-06, + "loss": 2.732, + "step": 236800 + }, + { + "epoch": 1.71, + "learning_rate": 2.879544040851163e-06, + "loss": 2.7359, + "step": 236900 + }, + { + "epoch": 1.71, + "learning_rate": 2.8723111863327985e-06, + "loss": 2.7382, + "step": 237000 + }, + { + "epoch": 1.71, + "eval_accuracy": 0.4614735976142732, + "eval_loss": 2.7587404251098633, + "eval_runtime": 29.6674, + "eval_samples_per_second": 218.523, + "eval_steps_per_second": 2.292, + "step": 237000 + }, + { + "epoch": 1.71, + "learning_rate": 2.865078331814434e-06, + "loss": 2.7361, + "step": 237100 + }, + { + "epoch": 1.72, + "learning_rate": 2.8578454772960703e-06, + "loss": 2.7353, + "step": 237200 + }, + { + "epoch": 1.72, + "learning_rate": 2.850612622777706e-06, + "loss": 2.7453, + "step": 237300 + }, + { + "epoch": 1.72, + "learning_rate": 2.8433797682593416e-06, + "loss": 2.7412, + "step": 237400 + }, + { + "epoch": 1.72, + "learning_rate": 2.8361469137409773e-06, + "loss": 2.7355, + "step": 237500 + }, + { + "epoch": 1.72, + "learning_rate": 2.828914059222613e-06, + "loss": 2.7308, + "step": 237600 + }, + { + "epoch": 1.72, + "learning_rate": 2.8216812047042486e-06, + "loss": 2.7353, + "step": 237700 + }, + { + "epoch": 1.72, + "learning_rate": 2.8144483501858843e-06, + "loss": 2.7321, + "step": 237800 + }, + { + "epoch": 1.72, + "learning_rate": 2.80721549566752e-06, + "loss": 2.7433, + "step": 237900 + }, + { + "epoch": 1.72, + "learning_rate": 2.8000549696943397e-06, + "loss": 2.7403, + "step": 238000 + }, + { + "epoch": 1.72, + "eval_accuracy": 0.46152561903984174, + "eval_loss": 2.7587077617645264, + "eval_runtime": 29.8439, + "eval_samples_per_second": 217.231, + "eval_steps_per_second": 2.279, + "step": 238000 + }, + { + "epoch": 1.72, + "learning_rate": 2.7928221151759754e-06, + "loss": 2.738, + "step": 238100 + }, + { + "epoch": 1.72, + "learning_rate": 2.7855892606576115e-06, + "loss": 2.7421, + "step": 238200 + }, + { + "epoch": 1.72, + "learning_rate": 2.778356406139247e-06, + "loss": 2.747, + "step": 238300 + }, + { + "epoch": 1.72, + "learning_rate": 2.771123551620883e-06, + "loss": 2.7361, + "step": 238400 + }, + { + "epoch": 1.73, + "learning_rate": 2.763890697102519e-06, + "loss": 2.7439, + "step": 238500 + }, + { + "epoch": 1.73, + "learning_rate": 2.7566578425841545e-06, + "loss": 2.7365, + "step": 238600 + }, + { + "epoch": 1.73, + "learning_rate": 2.7494249880657902e-06, + "loss": 2.744, + "step": 238700 + }, + { + "epoch": 1.73, + "learning_rate": 2.742192133547426e-06, + "loss": 2.7478, + "step": 238800 + }, + { + "epoch": 1.73, + "learning_rate": 2.7350316075742456e-06, + "loss": 2.7332, + "step": 238900 + }, + { + "epoch": 1.73, + "learning_rate": 2.7277987530558813e-06, + "loss": 2.7436, + "step": 239000 + }, + { + "epoch": 1.73, + "eval_accuracy": 0.46152017493716596, + "eval_loss": 2.7586169242858887, + "eval_runtime": 29.5848, + "eval_samples_per_second": 219.132, + "eval_steps_per_second": 2.298, + "step": 239000 + }, + { + "epoch": 1.73, + "learning_rate": 2.7205658985375174e-06, + "loss": 2.7422, + "step": 239100 + }, + { + "epoch": 1.73, + "learning_rate": 2.713333044019153e-06, + "loss": 2.7439, + "step": 239200 + }, + { + "epoch": 1.73, + "learning_rate": 2.7061001895007887e-06, + "loss": 2.7457, + "step": 239300 + }, + { + "epoch": 1.73, + "learning_rate": 2.698867334982425e-06, + "loss": 2.7453, + "step": 239400 + }, + { + "epoch": 1.73, + "learning_rate": 2.6916344804640605e-06, + "loss": 2.7448, + "step": 239500 + }, + { + "epoch": 1.73, + "learning_rate": 2.684401625945696e-06, + "loss": 2.7451, + "step": 239600 + }, + { + "epoch": 1.73, + "learning_rate": 2.677168771427332e-06, + "loss": 2.75, + "step": 239700 + }, + { + "epoch": 1.73, + "learning_rate": 2.669935916908967e-06, + "loss": 2.7402, + "step": 239800 + }, + { + "epoch": 1.74, + "learning_rate": 2.662703062390603e-06, + "loss": 2.7313, + "step": 239900 + }, + { + "epoch": 1.74, + "learning_rate": 2.655470207872239e-06, + "loss": 2.7422, + "step": 240000 + }, + { + "epoch": 1.74, + "eval_accuracy": 0.461479041716949, + "eval_loss": 2.758490562438965, + "eval_runtime": 30.0813, + "eval_samples_per_second": 215.516, + "eval_steps_per_second": 2.261, + "step": 240000 + }, + { + "epoch": 1.74, + "learning_rate": 2.6482373533538745e-06, + "loss": 2.7413, + "step": 240100 + }, + { + "epoch": 1.74, + "learning_rate": 2.6410044988355106e-06, + "loss": 2.7452, + "step": 240200 + }, + { + "epoch": 1.74, + "learning_rate": 2.6337716443171462e-06, + "loss": 2.7438, + "step": 240300 + }, + { + "epoch": 1.74, + "learning_rate": 2.626538789798782e-06, + "loss": 2.7333, + "step": 240400 + }, + { + "epoch": 1.74, + "learning_rate": 2.619305935280418e-06, + "loss": 2.7387, + "step": 240500 + }, + { + "epoch": 1.74, + "learning_rate": 2.6120730807620536e-06, + "loss": 2.7413, + "step": 240600 + }, + { + "epoch": 1.74, + "learning_rate": 2.6048402262436893e-06, + "loss": 2.7352, + "step": 240700 + }, + { + "epoch": 1.74, + "learning_rate": 2.5976073717253254e-06, + "loss": 2.7375, + "step": 240800 + }, + { + "epoch": 1.74, + "learning_rate": 2.590374517206961e-06, + "loss": 2.7369, + "step": 240900 + }, + { + "epoch": 1.74, + "learning_rate": 2.5832139912337804e-06, + "loss": 2.7257, + "step": 241000 + }, + { + "epoch": 1.74, + "eval_accuracy": 0.4614288349922724, + "eval_loss": 2.758410930633545, + "eval_runtime": 30.0568, + "eval_samples_per_second": 215.692, + "eval_steps_per_second": 2.262, + "step": 241000 + }, + { + "epoch": 1.74, + "learning_rate": 2.5759811367154165e-06, + "loss": 2.738, + "step": 241100 + }, + { + "epoch": 1.74, + "learning_rate": 2.568748282197052e-06, + "loss": 2.7356, + "step": 241200 + }, + { + "epoch": 1.75, + "learning_rate": 2.561515427678688e-06, + "loss": 2.7441, + "step": 241300 + }, + { + "epoch": 1.75, + "learning_rate": 2.554282573160324e-06, + "loss": 2.7457, + "step": 241400 + }, + { + "epoch": 1.75, + "learning_rate": 2.5470497186419596e-06, + "loss": 2.7406, + "step": 241500 + }, + { + "epoch": 1.75, + "learning_rate": 2.5398168641235952e-06, + "loss": 2.7382, + "step": 241600 + }, + { + "epoch": 1.75, + "learning_rate": 2.532656338150415e-06, + "loss": 2.742, + "step": 241700 + }, + { + "epoch": 1.75, + "learning_rate": 2.5254234836320506e-06, + "loss": 2.7328, + "step": 241800 + }, + { + "epoch": 1.75, + "learning_rate": 2.518190629113686e-06, + "loss": 2.7437, + "step": 241900 + }, + { + "epoch": 1.75, + "learning_rate": 2.5109577745953215e-06, + "loss": 2.7351, + "step": 242000 + }, + { + "epoch": 1.75, + "eval_accuracy": 0.461543766048761, + "eval_loss": 2.758322238922119, + "eval_runtime": 29.9387, + "eval_samples_per_second": 216.543, + "eval_steps_per_second": 2.271, + "step": 242000 + }, + { + "epoch": 1.75, + "learning_rate": 2.5037249200769576e-06, + "loss": 2.7423, + "step": 242100 + }, + { + "epoch": 1.75, + "learning_rate": 2.4964920655585937e-06, + "loss": 2.7486, + "step": 242200 + }, + { + "epoch": 1.75, + "learning_rate": 2.4892592110402294e-06, + "loss": 2.7439, + "step": 242300 + }, + { + "epoch": 1.75, + "learning_rate": 2.482026356521865e-06, + "loss": 2.7344, + "step": 242400 + }, + { + "epoch": 1.75, + "learning_rate": 2.4747935020035007e-06, + "loss": 2.7396, + "step": 242500 + }, + { + "epoch": 1.75, + "learning_rate": 2.4675606474851364e-06, + "loss": 2.7368, + "step": 242600 + }, + { + "epoch": 1.76, + "learning_rate": 2.4603277929667725e-06, + "loss": 2.7349, + "step": 242700 + }, + { + "epoch": 1.76, + "learning_rate": 2.453094938448408e-06, + "loss": 2.7427, + "step": 242800 + }, + { + "epoch": 1.76, + "learning_rate": 2.445862083930044e-06, + "loss": 2.7458, + "step": 242900 + }, + { + "epoch": 1.76, + "learning_rate": 2.43862922941168e-06, + "loss": 2.7391, + "step": 243000 + }, + { + "epoch": 1.76, + "eval_accuracy": 0.4615431611484637, + "eval_loss": 2.7582499980926514, + "eval_runtime": 29.9454, + "eval_samples_per_second": 216.494, + "eval_steps_per_second": 2.271, + "step": 243000 + }, + { + "epoch": 1.76, + "learning_rate": 2.4313963748933156e-06, + "loss": 2.7428, + "step": 243100 + }, + { + "epoch": 1.76, + "learning_rate": 2.4241635203749512e-06, + "loss": 2.7424, + "step": 243200 + }, + { + "epoch": 1.76, + "learning_rate": 2.4169306658565873e-06, + "loss": 2.7412, + "step": 243300 + }, + { + "epoch": 1.76, + "learning_rate": 2.409697811338223e-06, + "loss": 2.7338, + "step": 243400 + }, + { + "epoch": 1.76, + "learning_rate": 2.4024649568198587e-06, + "loss": 2.7389, + "step": 243500 + }, + { + "epoch": 1.76, + "learning_rate": 2.3952321023014948e-06, + "loss": 2.7363, + "step": 243600 + }, + { + "epoch": 1.76, + "learning_rate": 2.3879992477831304e-06, + "loss": 2.7472, + "step": 243700 + }, + { + "epoch": 1.76, + "learning_rate": 2.3808387218099497e-06, + "loss": 2.7422, + "step": 243800 + }, + { + "epoch": 1.76, + "learning_rate": 2.3736058672915854e-06, + "loss": 2.7432, + "step": 243900 + }, + { + "epoch": 1.76, + "learning_rate": 2.366373012773221e-06, + "loss": 2.7495, + "step": 244000 + }, + { + "epoch": 1.76, + "eval_accuracy": 0.4615492101514368, + "eval_loss": 2.758117437362671, + "eval_runtime": 29.5895, + "eval_samples_per_second": 219.098, + "eval_steps_per_second": 2.298, + "step": 244000 + }, + { + "epoch": 1.77, + "learning_rate": 2.359140158254857e-06, + "loss": 2.7379, + "step": 244100 + }, + { + "epoch": 1.77, + "learning_rate": 2.351907303736493e-06, + "loss": 2.736, + "step": 244200 + }, + { + "epoch": 1.77, + "learning_rate": 2.3446744492181285e-06, + "loss": 2.7374, + "step": 244300 + }, + { + "epoch": 1.77, + "learning_rate": 2.3374415946997646e-06, + "loss": 2.7352, + "step": 244400 + }, + { + "epoch": 1.77, + "learning_rate": 2.3302087401814002e-06, + "loss": 2.7366, + "step": 244500 + }, + { + "epoch": 1.77, + "learning_rate": 2.322975885663036e-06, + "loss": 2.742, + "step": 244600 + }, + { + "epoch": 1.77, + "learning_rate": 2.315743031144672e-06, + "loss": 2.7376, + "step": 244700 + }, + { + "epoch": 1.77, + "learning_rate": 2.3085101766263077e-06, + "loss": 2.7423, + "step": 244800 + }, + { + "epoch": 1.77, + "learning_rate": 2.3012773221079433e-06, + "loss": 2.7357, + "step": 244900 + }, + { + "epoch": 1.77, + "learning_rate": 2.294044467589579e-06, + "loss": 2.7399, + "step": 245000 + }, + { + "epoch": 1.77, + "eval_accuracy": 0.4614409329982186, + "eval_loss": 2.7580342292785645, + "eval_runtime": 31.4458, + "eval_samples_per_second": 206.164, + "eval_steps_per_second": 2.162, + "step": 245000 + }, + { + "epoch": 1.77, + "learning_rate": 2.2868116130712147e-06, + "loss": 2.7369, + "step": 245100 + }, + { + "epoch": 1.77, + "learning_rate": 2.2795787585528508e-06, + "loss": 2.7372, + "step": 245200 + }, + { + "epoch": 1.77, + "learning_rate": 2.2723459040344864e-06, + "loss": 2.7426, + "step": 245300 + }, + { + "epoch": 1.77, + "learning_rate": 2.2651853780613057e-06, + "loss": 2.742, + "step": 245400 + }, + { + "epoch": 1.78, + "learning_rate": 2.257952523542942e-06, + "loss": 2.7461, + "step": 245500 + }, + { + "epoch": 1.78, + "learning_rate": 2.2507196690245775e-06, + "loss": 2.7337, + "step": 245600 + }, + { + "epoch": 1.78, + "learning_rate": 2.243486814506213e-06, + "loss": 2.7397, + "step": 245700 + }, + { + "epoch": 1.78, + "learning_rate": 2.2362539599878493e-06, + "loss": 2.7301, + "step": 245800 + }, + { + "epoch": 1.78, + "learning_rate": 2.2290211054694845e-06, + "loss": 2.7431, + "step": 245900 + }, + { + "epoch": 1.78, + "learning_rate": 2.2217882509511206e-06, + "loss": 2.7435, + "step": 246000 + }, + { + "epoch": 1.78, + "eval_accuracy": 0.4615564689550045, + "eval_loss": 2.757960081100464, + "eval_runtime": 29.7199, + "eval_samples_per_second": 218.136, + "eval_steps_per_second": 2.288, + "step": 246000 + }, + { + "epoch": 1.78, + "learning_rate": 2.2145553964327563e-06, + "loss": 2.739, + "step": 246100 + }, + { + "epoch": 1.78, + "learning_rate": 2.2073948704595756e-06, + "loss": 2.7395, + "step": 246200 + }, + { + "epoch": 1.78, + "learning_rate": 2.2001620159412117e-06, + "loss": 2.7303, + "step": 246300 + }, + { + "epoch": 1.78, + "learning_rate": 2.1929291614228473e-06, + "loss": 2.7363, + "step": 246400 + }, + { + "epoch": 1.78, + "learning_rate": 2.185696306904483e-06, + "loss": 2.7318, + "step": 246500 + }, + { + "epoch": 1.78, + "learning_rate": 2.178463452386119e-06, + "loss": 2.7335, + "step": 246600 + }, + { + "epoch": 1.78, + "learning_rate": 2.1712305978677548e-06, + "loss": 2.7384, + "step": 246700 + }, + { + "epoch": 1.79, + "learning_rate": 2.1639977433493904e-06, + "loss": 2.7412, + "step": 246800 + }, + { + "epoch": 1.79, + "learning_rate": 2.1567648888310265e-06, + "loss": 2.7449, + "step": 246900 + }, + { + "epoch": 1.79, + "learning_rate": 2.1495320343126617e-06, + "loss": 2.7414, + "step": 247000 + }, + { + "epoch": 1.79, + "eval_accuracy": 0.46152924844162563, + "eval_loss": 2.7578506469726562, + "eval_runtime": 30.2105, + "eval_samples_per_second": 214.595, + "eval_steps_per_second": 2.251, + "step": 247000 + }, + { + "epoch": 1.79, + "learning_rate": 2.142299179794298e-06, + "loss": 2.7373, + "step": 247100 + }, + { + "epoch": 1.79, + "learning_rate": 2.1350663252759335e-06, + "loss": 2.7383, + "step": 247200 + }, + { + "epoch": 1.79, + "learning_rate": 2.127833470757569e-06, + "loss": 2.7326, + "step": 247300 + }, + { + "epoch": 1.79, + "learning_rate": 2.1206006162392053e-06, + "loss": 2.7407, + "step": 247400 + }, + { + "epoch": 1.79, + "learning_rate": 2.113367761720841e-06, + "loss": 2.7294, + "step": 247500 + }, + { + "epoch": 1.79, + "learning_rate": 2.1061349072024766e-06, + "loss": 2.7417, + "step": 247600 + }, + { + "epoch": 1.79, + "learning_rate": 2.0989020526841127e-06, + "loss": 2.7314, + "step": 247700 + }, + { + "epoch": 1.79, + "learning_rate": 2.0916691981657484e-06, + "loss": 2.736, + "step": 247800 + }, + { + "epoch": 1.79, + "learning_rate": 2.084436343647384e-06, + "loss": 2.7482, + "step": 247900 + }, + { + "epoch": 1.79, + "learning_rate": 2.07720348912902e-06, + "loss": 2.7478, + "step": 248000 + }, + { + "epoch": 1.79, + "eval_accuracy": 0.4615903433716538, + "eval_loss": 2.7578227519989014, + "eval_runtime": 29.6353, + "eval_samples_per_second": 218.759, + "eval_steps_per_second": 2.295, + "step": 248000 + }, + { + "epoch": 1.79, + "learning_rate": 2.0699706346106558e-06, + "loss": 2.7368, + "step": 248100 + }, + { + "epoch": 1.8, + "learning_rate": 2.0627377800922914e-06, + "loss": 2.735, + "step": 248200 + }, + { + "epoch": 1.8, + "learning_rate": 2.055504925573927e-06, + "loss": 2.7335, + "step": 248300 + }, + { + "epoch": 1.8, + "learning_rate": 2.0482720710555628e-06, + "loss": 2.7429, + "step": 248400 + }, + { + "epoch": 1.8, + "learning_rate": 2.041039216537199e-06, + "loss": 2.7418, + "step": 248500 + }, + { + "epoch": 1.8, + "learning_rate": 2.0338063620188345e-06, + "loss": 2.7387, + "step": 248600 + }, + { + "epoch": 1.8, + "learning_rate": 2.02657350750047e-06, + "loss": 2.7381, + "step": 248700 + }, + { + "epoch": 1.8, + "learning_rate": 2.0193406529821063e-06, + "loss": 2.7433, + "step": 248800 + }, + { + "epoch": 1.8, + "learning_rate": 2.012107798463742e-06, + "loss": 2.7437, + "step": 248900 + }, + { + "epoch": 1.8, + "learning_rate": 2.0048749439453776e-06, + "loss": 2.7299, + "step": 249000 + }, + { + "epoch": 1.8, + "eval_accuracy": 0.4615842943686807, + "eval_loss": 2.7576780319213867, + "eval_runtime": 30.3855, + "eval_samples_per_second": 213.358, + "eval_steps_per_second": 2.238, + "step": 249000 + }, + { + "epoch": 1.8, + "learning_rate": 1.9976420894270133e-06, + "loss": 2.74, + "step": 249100 + }, + { + "epoch": 1.8, + "learning_rate": 1.990481563453833e-06, + "loss": 2.7341, + "step": 249200 + }, + { + "epoch": 1.8, + "learning_rate": 1.9833210374806523e-06, + "loss": 2.7405, + "step": 249300 + }, + { + "epoch": 1.8, + "learning_rate": 1.976088182962288e-06, + "loss": 2.7356, + "step": 249400 + }, + { + "epoch": 1.8, + "learning_rate": 1.9689276569891073e-06, + "loss": 2.7424, + "step": 249500 + }, + { + "epoch": 1.81, + "learning_rate": 1.9616948024707434e-06, + "loss": 2.7359, + "step": 249600 + }, + { + "epoch": 1.81, + "learning_rate": 1.954461947952379e-06, + "loss": 2.7368, + "step": 249700 + }, + { + "epoch": 1.81, + "learning_rate": 1.9472290934340148e-06, + "loss": 2.7339, + "step": 249800 + }, + { + "epoch": 1.81, + "learning_rate": 1.939996238915651e-06, + "loss": 2.7452, + "step": 249900 + }, + { + "epoch": 1.81, + "learning_rate": 1.9327633843972865e-06, + "loss": 2.7401, + "step": 250000 + }, + { + "epoch": 1.81, + "eval_accuracy": 0.4615576787555991, + "eval_loss": 2.7575795650482178, + "eval_runtime": 30.2185, + "eval_samples_per_second": 214.538, + "eval_steps_per_second": 2.25, + "step": 250000 + }, + { + "epoch": 1.81, + "learning_rate": 1.925530529878922e-06, + "loss": 2.7378, + "step": 250100 + }, + { + "epoch": 1.81, + "learning_rate": 1.918297675360558e-06, + "loss": 2.7439, + "step": 250200 + }, + { + "epoch": 1.81, + "learning_rate": 1.9110648208421935e-06, + "loss": 2.7375, + "step": 250300 + }, + { + "epoch": 1.81, + "learning_rate": 1.9038319663238294e-06, + "loss": 2.7377, + "step": 250400 + }, + { + "epoch": 1.81, + "learning_rate": 1.8965991118054653e-06, + "loss": 2.7367, + "step": 250500 + }, + { + "epoch": 1.81, + "learning_rate": 1.8893662572871011e-06, + "loss": 2.7409, + "step": 250600 + }, + { + "epoch": 1.81, + "learning_rate": 1.8821334027687368e-06, + "loss": 2.7352, + "step": 250700 + }, + { + "epoch": 1.81, + "learning_rate": 1.8749005482503727e-06, + "loss": 2.7421, + "step": 250800 + }, + { + "epoch": 1.81, + "learning_rate": 1.8676676937320086e-06, + "loss": 2.7286, + "step": 250900 + }, + { + "epoch": 1.82, + "learning_rate": 1.8604348392136442e-06, + "loss": 2.7395, + "step": 251000 + }, + { + "epoch": 1.82, + "eval_accuracy": 0.4616157491841407, + "eval_loss": 2.757535219192505, + "eval_runtime": 29.6145, + "eval_samples_per_second": 218.913, + "eval_steps_per_second": 2.296, + "step": 251000 + }, + { + "epoch": 1.82, + "learning_rate": 1.8532019846952801e-06, + "loss": 2.734, + "step": 251100 + }, + { + "epoch": 1.82, + "learning_rate": 1.8459691301769158e-06, + "loss": 2.7448, + "step": 251200 + }, + { + "epoch": 1.82, + "learning_rate": 1.8387362756585517e-06, + "loss": 2.7367, + "step": 251300 + }, + { + "epoch": 1.82, + "learning_rate": 1.8315034211401871e-06, + "loss": 2.7354, + "step": 251400 + }, + { + "epoch": 1.82, + "learning_rate": 1.824270566621823e-06, + "loss": 2.7409, + "step": 251500 + }, + { + "epoch": 1.82, + "learning_rate": 1.8171100406486425e-06, + "loss": 2.7331, + "step": 251600 + }, + { + "epoch": 1.82, + "learning_rate": 1.8098771861302784e-06, + "loss": 2.7325, + "step": 251700 + }, + { + "epoch": 1.82, + "learning_rate": 1.802644331611914e-06, + "loss": 2.7439, + "step": 251800 + }, + { + "epoch": 1.82, + "learning_rate": 1.79541147709355e-06, + "loss": 2.7325, + "step": 251900 + }, + { + "epoch": 1.82, + "learning_rate": 1.7881786225751856e-06, + "loss": 2.7399, + "step": 252000 + }, + { + "epoch": 1.82, + "eval_accuracy": 0.46158852867076183, + "eval_loss": 2.757430076599121, + "eval_runtime": 29.3317, + "eval_samples_per_second": 221.024, + "eval_steps_per_second": 2.318, + "step": 252000 + }, + { + "epoch": 1.82, + "learning_rate": 1.7809457680568215e-06, + "loss": 2.744, + "step": 252100 + }, + { + "epoch": 1.82, + "learning_rate": 1.7737129135384574e-06, + "loss": 2.7365, + "step": 252200 + }, + { + "epoch": 1.82, + "learning_rate": 1.766480059020093e-06, + "loss": 2.7367, + "step": 252300 + }, + { + "epoch": 1.83, + "learning_rate": 1.759247204501729e-06, + "loss": 2.7402, + "step": 252400 + }, + { + "epoch": 1.83, + "learning_rate": 1.7520143499833644e-06, + "loss": 2.74, + "step": 252500 + }, + { + "epoch": 1.83, + "learning_rate": 1.7447814954650002e-06, + "loss": 2.7433, + "step": 252600 + }, + { + "epoch": 1.83, + "learning_rate": 1.7376209694918198e-06, + "loss": 2.7307, + "step": 252700 + }, + { + "epoch": 1.83, + "learning_rate": 1.7303881149734556e-06, + "loss": 2.7333, + "step": 252800 + }, + { + "epoch": 1.83, + "learning_rate": 1.7231552604550913e-06, + "loss": 2.7416, + "step": 252900 + }, + { + "epoch": 1.83, + "learning_rate": 1.7159224059367272e-06, + "loss": 2.7413, + "step": 253000 + }, + { + "epoch": 1.83, + "eval_accuracy": 0.4616139344832488, + "eval_loss": 2.757355213165283, + "eval_runtime": 29.4627, + "eval_samples_per_second": 220.041, + "eval_steps_per_second": 2.308, + "step": 253000 + }, + { + "epoch": 1.83, + "learning_rate": 1.7086895514183629e-06, + "loss": 2.7397, + "step": 253100 + }, + { + "epoch": 1.83, + "learning_rate": 1.7014566968999987e-06, + "loss": 2.7499, + "step": 253200 + }, + { + "epoch": 1.83, + "learning_rate": 1.6942238423816346e-06, + "loss": 2.7391, + "step": 253300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6869909878632703e-06, + "loss": 2.7351, + "step": 253400 + }, + { + "epoch": 1.83, + "learning_rate": 1.6797581333449062e-06, + "loss": 2.7421, + "step": 253500 + }, + { + "epoch": 1.83, + "learning_rate": 1.6725252788265416e-06, + "loss": 2.7468, + "step": 253600 + }, + { + "epoch": 1.83, + "learning_rate": 1.6652924243081775e-06, + "loss": 2.7392, + "step": 253700 + }, + { + "epoch": 1.84, + "learning_rate": 1.6580595697898134e-06, + "loss": 2.7259, + "step": 253800 + }, + { + "epoch": 1.84, + "learning_rate": 1.650899043816633e-06, + "loss": 2.7404, + "step": 253900 + }, + { + "epoch": 1.84, + "learning_rate": 1.6436661892982686e-06, + "loss": 2.7294, + "step": 254000 + }, + { + "epoch": 1.84, + "eval_accuracy": 0.4616357108939519, + "eval_loss": 2.757269859313965, + "eval_runtime": 30.4439, + "eval_samples_per_second": 212.949, + "eval_steps_per_second": 2.234, + "step": 254000 + }, + { + "epoch": 1.84, + "learning_rate": 1.6364333347799044e-06, + "loss": 2.74, + "step": 254100 + }, + { + "epoch": 1.84, + "learning_rate": 1.6292004802615401e-06, + "loss": 2.7326, + "step": 254200 + }, + { + "epoch": 1.84, + "learning_rate": 1.621967625743176e-06, + "loss": 2.7434, + "step": 254300 + }, + { + "epoch": 1.84, + "learning_rate": 1.6148070997699953e-06, + "loss": 2.7419, + "step": 254400 + }, + { + "epoch": 1.84, + "learning_rate": 1.607574245251631e-06, + "loss": 2.7295, + "step": 254500 + }, + { + "epoch": 1.84, + "learning_rate": 1.6003413907332668e-06, + "loss": 2.7313, + "step": 254600 + }, + { + "epoch": 1.84, + "learning_rate": 1.5931085362149027e-06, + "loss": 2.7341, + "step": 254700 + }, + { + "epoch": 1.84, + "learning_rate": 1.5858756816965384e-06, + "loss": 2.7482, + "step": 254800 + }, + { + "epoch": 1.84, + "learning_rate": 1.5786428271781743e-06, + "loss": 2.7316, + "step": 254900 + }, + { + "epoch": 1.84, + "learning_rate": 1.5714099726598101e-06, + "loss": 2.7329, + "step": 255000 + }, + { + "epoch": 1.84, + "eval_accuracy": 0.4616484138001954, + "eval_loss": 2.757246732711792, + "eval_runtime": 29.9443, + "eval_samples_per_second": 216.502, + "eval_steps_per_second": 2.271, + "step": 255000 + }, + { + "epoch": 1.85, + "learning_rate": 1.5641771181414458e-06, + "loss": 2.7439, + "step": 255100 + }, + { + "epoch": 1.85, + "learning_rate": 1.5569442636230817e-06, + "loss": 2.7369, + "step": 255200 + }, + { + "epoch": 1.85, + "learning_rate": 1.5497114091047174e-06, + "loss": 2.7376, + "step": 255300 + }, + { + "epoch": 1.85, + "learning_rate": 1.5424785545863532e-06, + "loss": 2.7391, + "step": 255400 + }, + { + "epoch": 1.85, + "learning_rate": 1.5352457000679891e-06, + "loss": 2.7324, + "step": 255500 + }, + { + "epoch": 1.85, + "learning_rate": 1.5280128455496248e-06, + "loss": 2.7475, + "step": 255600 + }, + { + "epoch": 1.85, + "learning_rate": 1.5207799910312604e-06, + "loss": 2.7312, + "step": 255700 + }, + { + "epoch": 1.85, + "learning_rate": 1.5135471365128961e-06, + "loss": 2.7444, + "step": 255800 + }, + { + "epoch": 1.85, + "learning_rate": 1.506314281994532e-06, + "loss": 2.7346, + "step": 255900 + }, + { + "epoch": 1.85, + "learning_rate": 1.4991537560213515e-06, + "loss": 2.7454, + "step": 256000 + }, + { + "epoch": 1.85, + "eval_accuracy": 0.46165385790287117, + "eval_loss": 2.757188081741333, + "eval_runtime": 28.8568, + "eval_samples_per_second": 224.661, + "eval_steps_per_second": 2.356, + "step": 256000 + }, + { + "epoch": 1.85, + "learning_rate": 1.4919209015029872e-06, + "loss": 2.7443, + "step": 256100 + }, + { + "epoch": 1.85, + "learning_rate": 1.484688046984623e-06, + "loss": 2.7341, + "step": 256200 + }, + { + "epoch": 1.85, + "learning_rate": 1.477455192466259e-06, + "loss": 2.7427, + "step": 256300 + }, + { + "epoch": 1.85, + "learning_rate": 1.4702223379478946e-06, + "loss": 2.7386, + "step": 256400 + }, + { + "epoch": 1.86, + "learning_rate": 1.4629894834295305e-06, + "loss": 2.7349, + "step": 256500 + }, + { + "epoch": 1.86, + "learning_rate": 1.4559012860015335e-06, + "loss": 2.7334, + "step": 256600 + }, + { + "epoch": 1.86, + "learning_rate": 1.4486684314831691e-06, + "loss": 2.7423, + "step": 256700 + }, + { + "epoch": 1.86, + "learning_rate": 1.441435576964805e-06, + "loss": 2.7376, + "step": 256800 + }, + { + "epoch": 1.86, + "learning_rate": 1.4342027224464409e-06, + "loss": 2.7353, + "step": 256900 + }, + { + "epoch": 1.86, + "learning_rate": 1.4269698679280765e-06, + "loss": 2.7343, + "step": 257000 + }, + { + "epoch": 1.86, + "eval_accuracy": 0.46170527442814235, + "eval_loss": 2.757138252258301, + "eval_runtime": 30.9623, + "eval_samples_per_second": 209.383, + "eval_steps_per_second": 2.196, + "step": 257000 + }, + { + "epoch": 1.86, + "learning_rate": 1.4197370134097124e-06, + "loss": 2.7396, + "step": 257100 + }, + { + "epoch": 1.86, + "learning_rate": 1.4125041588913483e-06, + "loss": 2.7398, + "step": 257200 + }, + { + "epoch": 1.86, + "learning_rate": 1.405271304372984e-06, + "loss": 2.7293, + "step": 257300 + }, + { + "epoch": 1.86, + "learning_rate": 1.3980384498546199e-06, + "loss": 2.7378, + "step": 257400 + }, + { + "epoch": 1.86, + "learning_rate": 1.3908055953362557e-06, + "loss": 2.7431, + "step": 257500 + }, + { + "epoch": 1.86, + "learning_rate": 1.3835727408178912e-06, + "loss": 2.7375, + "step": 257600 + }, + { + "epoch": 1.86, + "learning_rate": 1.376339886299527e-06, + "loss": 2.7346, + "step": 257700 + }, + { + "epoch": 1.86, + "learning_rate": 1.3691070317811627e-06, + "loss": 2.7308, + "step": 257800 + }, + { + "epoch": 1.87, + "learning_rate": 1.3618741772627986e-06, + "loss": 2.7364, + "step": 257900 + }, + { + "epoch": 1.87, + "learning_rate": 1.3546413227444345e-06, + "loss": 2.7356, + "step": 258000 + }, + { + "epoch": 1.87, + "eval_accuracy": 0.46169559602338545, + "eval_loss": 2.757066488265991, + "eval_runtime": 29.5596, + "eval_samples_per_second": 219.319, + "eval_steps_per_second": 2.3, + "step": 258000 + }, + { + "epoch": 1.87, + "learning_rate": 1.3474084682260701e-06, + "loss": 2.7362, + "step": 258100 + }, + { + "epoch": 1.87, + "learning_rate": 1.340175613707706e-06, + "loss": 2.7308, + "step": 258200 + }, + { + "epoch": 1.87, + "learning_rate": 1.3329427591893417e-06, + "loss": 2.7459, + "step": 258300 + }, + { + "epoch": 1.87, + "learning_rate": 1.3257099046709776e-06, + "loss": 2.7381, + "step": 258400 + }, + { + "epoch": 1.87, + "learning_rate": 1.3184770501526135e-06, + "loss": 2.7388, + "step": 258500 + }, + { + "epoch": 1.87, + "learning_rate": 1.3112441956342491e-06, + "loss": 2.7453, + "step": 258600 + }, + { + "epoch": 1.87, + "learning_rate": 1.304011341115885e-06, + "loss": 2.7413, + "step": 258700 + }, + { + "epoch": 1.87, + "learning_rate": 1.2967784865975209e-06, + "loss": 2.7237, + "step": 258800 + }, + { + "epoch": 1.87, + "learning_rate": 1.2895456320791563e-06, + "loss": 2.7355, + "step": 258900 + }, + { + "epoch": 1.87, + "learning_rate": 1.2823127775607922e-06, + "loss": 2.7462, + "step": 259000 + }, + { + "epoch": 1.87, + "eval_accuracy": 0.4617197920352778, + "eval_loss": 2.7570412158966064, + "eval_runtime": 27.9363, + "eval_samples_per_second": 232.064, + "eval_steps_per_second": 2.434, + "step": 259000 + }, + { + "epoch": 1.87, + "learning_rate": 1.2750799230424279e-06, + "loss": 2.7382, + "step": 259100 + }, + { + "epoch": 1.87, + "learning_rate": 1.2678470685240638e-06, + "loss": 2.7421, + "step": 259200 + }, + { + "epoch": 1.88, + "learning_rate": 1.2606142140056996e-06, + "loss": 2.7328, + "step": 259300 + }, + { + "epoch": 1.88, + "learning_rate": 1.2533813594873353e-06, + "loss": 2.7247, + "step": 259400 + }, + { + "epoch": 1.88, + "learning_rate": 1.2461485049689712e-06, + "loss": 2.735, + "step": 259500 + }, + { + "epoch": 1.88, + "learning_rate": 1.238915650450607e-06, + "loss": 2.737, + "step": 259600 + }, + { + "epoch": 1.88, + "learning_rate": 1.2316827959322427e-06, + "loss": 2.7381, + "step": 259700 + }, + { + "epoch": 1.88, + "learning_rate": 1.224522269959062e-06, + "loss": 2.7417, + "step": 259800 + }, + { + "epoch": 1.88, + "learning_rate": 1.217289415440698e-06, + "loss": 2.745, + "step": 259900 + }, + { + "epoch": 1.88, + "learning_rate": 1.2100565609223338e-06, + "loss": 2.7375, + "step": 260000 + }, + { + "epoch": 1.88, + "eval_accuracy": 0.4617488272495486, + "eval_loss": 2.7569446563720703, + "eval_runtime": 31.5522, + "eval_samples_per_second": 205.469, + "eval_steps_per_second": 2.155, + "step": 260000 + }, + { + "epoch": 1.88, + "learning_rate": 1.2028237064039695e-06, + "loss": 2.7467, + "step": 260100 + }, + { + "epoch": 1.88, + "learning_rate": 1.1955908518856053e-06, + "loss": 2.7409, + "step": 260200 + }, + { + "epoch": 1.88, + "learning_rate": 1.188357997367241e-06, + "loss": 2.7339, + "step": 260300 + }, + { + "epoch": 1.88, + "learning_rate": 1.1811251428488769e-06, + "loss": 2.7397, + "step": 260400 + }, + { + "epoch": 1.88, + "learning_rate": 1.1738922883305125e-06, + "loss": 2.7418, + "step": 260500 + }, + { + "epoch": 1.88, + "learning_rate": 1.1666594338121484e-06, + "loss": 2.7402, + "step": 260600 + }, + { + "epoch": 1.89, + "learning_rate": 1.1594265792937843e-06, + "loss": 2.7448, + "step": 260700 + }, + { + "epoch": 1.89, + "learning_rate": 1.1522660533206036e-06, + "loss": 2.7441, + "step": 260800 + }, + { + "epoch": 1.89, + "learning_rate": 1.1450331988022393e-06, + "loss": 2.7411, + "step": 260900 + }, + { + "epoch": 1.89, + "learning_rate": 1.1378003442838752e-06, + "loss": 2.7368, + "step": 261000 + }, + { + "epoch": 1.89, + "eval_accuracy": 0.46175306155162976, + "eval_loss": 2.7569141387939453, + "eval_runtime": 29.4044, + "eval_samples_per_second": 220.477, + "eval_steps_per_second": 2.313, + "step": 261000 + }, + { + "epoch": 1.89, + "learning_rate": 1.130567489765511e-06, + "loss": 2.7327, + "step": 261100 + }, + { + "epoch": 1.89, + "learning_rate": 1.1233346352471467e-06, + "loss": 2.7408, + "step": 261200 + }, + { + "epoch": 1.89, + "learning_rate": 1.1161017807287824e-06, + "loss": 2.7415, + "step": 261300 + }, + { + "epoch": 1.89, + "learning_rate": 1.1088689262104183e-06, + "loss": 2.7472, + "step": 261400 + }, + { + "epoch": 1.89, + "learning_rate": 1.1016360716920541e-06, + "loss": 2.7347, + "step": 261500 + }, + { + "epoch": 1.89, + "learning_rate": 1.0944032171736898e-06, + "loss": 2.7333, + "step": 261600 + }, + { + "epoch": 1.89, + "learning_rate": 1.0871703626553257e-06, + "loss": 2.7426, + "step": 261700 + }, + { + "epoch": 1.89, + "learning_rate": 1.0799375081369616e-06, + "loss": 2.7343, + "step": 261800 + }, + { + "epoch": 1.89, + "learning_rate": 1.0727046536185972e-06, + "loss": 2.7387, + "step": 261900 + }, + { + "epoch": 1.89, + "learning_rate": 1.0654717991002329e-06, + "loss": 2.7452, + "step": 262000 + }, + { + "epoch": 1.89, + "eval_accuracy": 0.4617373341438997, + "eval_loss": 2.7568695545196533, + "eval_runtime": 29.6699, + "eval_samples_per_second": 218.504, + "eval_steps_per_second": 2.292, + "step": 262000 + }, + { + "epoch": 1.9, + "learning_rate": 1.0582389445818688e-06, + "loss": 2.7431, + "step": 262100 + }, + { + "epoch": 1.9, + "learning_rate": 1.0510060900635046e-06, + "loss": 2.737, + "step": 262200 + }, + { + "epoch": 1.9, + "learning_rate": 1.0437732355451403e-06, + "loss": 2.7351, + "step": 262300 + }, + { + "epoch": 1.9, + "learning_rate": 1.0365403810267762e-06, + "loss": 2.7306, + "step": 262400 + }, + { + "epoch": 1.9, + "learning_rate": 1.0293075265084119e-06, + "loss": 2.7392, + "step": 262500 + }, + { + "epoch": 1.9, + "learning_rate": 1.0220746719900475e-06, + "loss": 2.7406, + "step": 262600 + }, + { + "epoch": 1.9, + "learning_rate": 1.0148418174716834e-06, + "loss": 2.7378, + "step": 262700 + }, + { + "epoch": 1.9, + "learning_rate": 1.0076089629533193e-06, + "loss": 2.7391, + "step": 262800 + }, + { + "epoch": 1.9, + "learning_rate": 1.000376108434955e-06, + "loss": 2.7356, + "step": 262900 + }, + { + "epoch": 1.9, + "learning_rate": 9.931432539165908e-07, + "loss": 2.7394, + "step": 263000 + }, + { + "epoch": 1.9, + "eval_accuracy": 0.46170285482695317, + "eval_loss": 2.7567996978759766, + "eval_runtime": 27.9535, + "eval_samples_per_second": 231.921, + "eval_steps_per_second": 2.433, + "step": 263000 + }, + { + "epoch": 1.9, + "learning_rate": 9.859103993982267e-07, + "loss": 2.7391, + "step": 263100 + }, + { + "epoch": 1.9, + "learning_rate": 9.786775448798624e-07, + "loss": 2.7399, + "step": 263200 + }, + { + "epoch": 1.9, + "learning_rate": 9.71444690361498e-07, + "loss": 2.7326, + "step": 263300 + }, + { + "epoch": 1.91, + "learning_rate": 9.64211835843134e-07, + "loss": 2.7277, + "step": 263400 + }, + { + "epoch": 1.91, + "learning_rate": 9.569789813247698e-07, + "loss": 2.7386, + "step": 263500 + }, + { + "epoch": 1.91, + "learning_rate": 9.497461268064056e-07, + "loss": 2.7339, + "step": 263600 + }, + { + "epoch": 1.91, + "learning_rate": 9.425132722880413e-07, + "loss": 2.7326, + "step": 263700 + }, + { + "epoch": 1.91, + "learning_rate": 9.352804177696771e-07, + "loss": 2.7286, + "step": 263800 + }, + { + "epoch": 1.91, + "learning_rate": 9.280475632513128e-07, + "loss": 2.7334, + "step": 263900 + }, + { + "epoch": 1.91, + "learning_rate": 9.208147087329485e-07, + "loss": 2.7378, + "step": 264000 + }, + { + "epoch": 1.91, + "eval_accuracy": 0.46175245665133247, + "eval_loss": 2.7567875385284424, + "eval_runtime": 29.609, + "eval_samples_per_second": 218.954, + "eval_steps_per_second": 2.297, + "step": 264000 + }, + { + "epoch": 1.91, + "learning_rate": 9.136541827597681e-07, + "loss": 2.7481, + "step": 264100 + }, + { + "epoch": 1.91, + "learning_rate": 9.064213282414038e-07, + "loss": 2.7415, + "step": 264200 + }, + { + "epoch": 1.91, + "learning_rate": 8.991884737230395e-07, + "loss": 2.7386, + "step": 264300 + }, + { + "epoch": 1.91, + "learning_rate": 8.919556192046754e-07, + "loss": 2.7357, + "step": 264400 + }, + { + "epoch": 1.91, + "learning_rate": 8.847950932314948e-07, + "loss": 2.743, + "step": 264500 + }, + { + "epoch": 1.91, + "learning_rate": 8.775622387131307e-07, + "loss": 2.7419, + "step": 264600 + }, + { + "epoch": 1.91, + "learning_rate": 8.703293841947664e-07, + "loss": 2.7376, + "step": 264700 + }, + { + "epoch": 1.92, + "learning_rate": 8.630965296764021e-07, + "loss": 2.7359, + "step": 264800 + }, + { + "epoch": 1.92, + "learning_rate": 8.558636751580379e-07, + "loss": 2.7316, + "step": 264900 + }, + { + "epoch": 1.92, + "learning_rate": 8.486308206396737e-07, + "loss": 2.7446, + "step": 265000 + }, + { + "epoch": 1.92, + "eval_accuracy": 0.4617639497569813, + "eval_loss": 2.7567336559295654, + "eval_runtime": 29.2914, + "eval_samples_per_second": 221.328, + "eval_steps_per_second": 2.322, + "step": 265000 + }, + { + "epoch": 1.92, + "learning_rate": 8.413979661213096e-07, + "loss": 2.7289, + "step": 265100 + }, + { + "epoch": 1.92, + "learning_rate": 8.341651116029453e-07, + "loss": 2.7291, + "step": 265200 + }, + { + "epoch": 1.92, + "learning_rate": 8.269322570845811e-07, + "loss": 2.7372, + "step": 265300 + }, + { + "epoch": 1.92, + "learning_rate": 8.196994025662168e-07, + "loss": 2.7369, + "step": 265400 + }, + { + "epoch": 1.92, + "learning_rate": 8.124665480478526e-07, + "loss": 2.739, + "step": 265500 + }, + { + "epoch": 1.92, + "learning_rate": 8.052336935294884e-07, + "loss": 2.7476, + "step": 265600 + }, + { + "epoch": 1.92, + "learning_rate": 7.980008390111242e-07, + "loss": 2.7415, + "step": 265700 + }, + { + "epoch": 1.92, + "learning_rate": 7.907679844927601e-07, + "loss": 2.738, + "step": 265800 + }, + { + "epoch": 1.92, + "learning_rate": 7.835351299743958e-07, + "loss": 2.7455, + "step": 265900 + }, + { + "epoch": 1.92, + "learning_rate": 7.763022754560315e-07, + "loss": 2.7436, + "step": 266000 + }, + { + "epoch": 1.92, + "eval_accuracy": 0.46179419477184674, + "eval_loss": 2.756711006164551, + "eval_runtime": 31.9975, + "eval_samples_per_second": 202.61, + "eval_steps_per_second": 2.125, + "step": 266000 + }, + { + "epoch": 1.92, + "learning_rate": 7.690694209376673e-07, + "loss": 2.7329, + "step": 266100 + }, + { + "epoch": 1.93, + "learning_rate": 7.61836566419303e-07, + "loss": 2.7423, + "step": 266200 + }, + { + "epoch": 1.93, + "learning_rate": 7.546037119009389e-07, + "loss": 2.7321, + "step": 266300 + }, + { + "epoch": 1.93, + "learning_rate": 7.473708573825747e-07, + "loss": 2.7325, + "step": 266400 + }, + { + "epoch": 1.93, + "learning_rate": 7.401380028642105e-07, + "loss": 2.7361, + "step": 266500 + }, + { + "epoch": 1.93, + "learning_rate": 7.329051483458461e-07, + "loss": 2.7379, + "step": 266600 + }, + { + "epoch": 1.93, + "learning_rate": 7.25672293827482e-07, + "loss": 2.7332, + "step": 266700 + }, + { + "epoch": 1.93, + "learning_rate": 7.184394393091178e-07, + "loss": 2.7375, + "step": 266800 + }, + { + "epoch": 1.93, + "learning_rate": 7.112065847907536e-07, + "loss": 2.7367, + "step": 266900 + }, + { + "epoch": 1.93, + "learning_rate": 7.040460588175731e-07, + "loss": 2.7505, + "step": 267000 + }, + { + "epoch": 1.93, + "eval_accuracy": 0.4617736281617382, + "eval_loss": 2.7566604614257812, + "eval_runtime": 29.751, + "eval_samples_per_second": 217.908, + "eval_steps_per_second": 2.286, + "step": 267000 + }, + { + "epoch": 1.93, + "learning_rate": 6.968132042992088e-07, + "loss": 2.7389, + "step": 267100 + }, + { + "epoch": 1.93, + "learning_rate": 6.895803497808445e-07, + "loss": 2.7352, + "step": 267200 + }, + { + "epoch": 1.93, + "learning_rate": 6.823474952624803e-07, + "loss": 2.7449, + "step": 267300 + }, + { + "epoch": 1.93, + "learning_rate": 6.751146407441162e-07, + "loss": 2.7338, + "step": 267400 + }, + { + "epoch": 1.93, + "learning_rate": 6.67881786225752e-07, + "loss": 2.7355, + "step": 267500 + }, + { + "epoch": 1.94, + "learning_rate": 6.606489317073877e-07, + "loss": 2.7429, + "step": 267600 + }, + { + "epoch": 1.94, + "learning_rate": 6.534160771890234e-07, + "loss": 2.7364, + "step": 267700 + }, + { + "epoch": 1.94, + "learning_rate": 6.461832226706593e-07, + "loss": 2.738, + "step": 267800 + }, + { + "epoch": 1.94, + "learning_rate": 6.38950368152295e-07, + "loss": 2.7408, + "step": 267900 + }, + { + "epoch": 1.94, + "learning_rate": 6.317175136339308e-07, + "loss": 2.7493, + "step": 268000 + }, + { + "epoch": 1.94, + "eval_accuracy": 0.4617833065664952, + "eval_loss": 2.7566213607788086, + "eval_runtime": 28.2536, + "eval_samples_per_second": 229.457, + "eval_steps_per_second": 2.407, + "step": 268000 + }, + { + "epoch": 1.94, + "learning_rate": 6.245569876607502e-07, + "loss": 2.7406, + "step": 268100 + }, + { + "epoch": 1.94, + "learning_rate": 6.17324133142386e-07, + "loss": 2.7364, + "step": 268200 + }, + { + "epoch": 1.94, + "learning_rate": 6.100912786240218e-07, + "loss": 2.7406, + "step": 268300 + }, + { + "epoch": 1.94, + "learning_rate": 6.029307526508412e-07, + "loss": 2.7454, + "step": 268400 + }, + { + "epoch": 1.94, + "learning_rate": 5.95697898132477e-07, + "loss": 2.7426, + "step": 268500 + }, + { + "epoch": 1.94, + "learning_rate": 5.884650436141129e-07, + "loss": 2.7343, + "step": 268600 + }, + { + "epoch": 1.94, + "learning_rate": 5.812321890957485e-07, + "loss": 2.7358, + "step": 268700 + }, + { + "epoch": 1.94, + "learning_rate": 5.739993345773844e-07, + "loss": 2.736, + "step": 268800 + }, + { + "epoch": 1.94, + "learning_rate": 5.667664800590202e-07, + "loss": 2.732, + "step": 268900 + }, + { + "epoch": 1.95, + "learning_rate": 5.595336255406559e-07, + "loss": 2.7391, + "step": 269000 + }, + { + "epoch": 1.95, + "eval_accuracy": 0.4617845163670898, + "eval_loss": 2.7565996646881104, + "eval_runtime": 30.4715, + "eval_samples_per_second": 212.756, + "eval_steps_per_second": 2.232, + "step": 269000 + }, + { + "epoch": 1.95, + "learning_rate": 5.523007710222917e-07, + "loss": 2.733, + "step": 269100 + }, + { + "epoch": 1.95, + "learning_rate": 5.450679165039275e-07, + "loss": 2.7411, + "step": 269200 + }, + { + "epoch": 1.95, + "learning_rate": 5.378350619855633e-07, + "loss": 2.7453, + "step": 269300 + }, + { + "epoch": 1.95, + "learning_rate": 5.30602207467199e-07, + "loss": 2.7358, + "step": 269400 + }, + { + "epoch": 1.95, + "learning_rate": 5.233693529488348e-07, + "loss": 2.7314, + "step": 269500 + }, + { + "epoch": 1.95, + "learning_rate": 5.161364984304707e-07, + "loss": 2.744, + "step": 269600 + }, + { + "epoch": 1.95, + "learning_rate": 5.089036439121064e-07, + "loss": 2.7467, + "step": 269700 + }, + { + "epoch": 1.95, + "learning_rate": 5.016707893937422e-07, + "loss": 2.7395, + "step": 269800 + }, + { + "epoch": 1.95, + "learning_rate": 4.94437934875378e-07, + "loss": 2.7339, + "step": 269900 + }, + { + "epoch": 1.95, + "learning_rate": 4.872774089021974e-07, + "loss": 2.7431, + "step": 270000 + }, + { + "epoch": 1.95, + "eval_accuracy": 0.461747617448954, + "eval_loss": 2.756573438644409, + "eval_runtime": 31.3033, + "eval_samples_per_second": 207.103, + "eval_steps_per_second": 2.172, + "step": 270000 + }, + { + "epoch": 1.95, + "learning_rate": 4.800445543838331e-07, + "loss": 2.727, + "step": 270100 + }, + { + "epoch": 1.95, + "learning_rate": 4.7281169986546897e-07, + "loss": 2.7348, + "step": 270200 + }, + { + "epoch": 1.96, + "learning_rate": 4.6557884534710474e-07, + "loss": 2.7455, + "step": 270300 + }, + { + "epoch": 1.96, + "learning_rate": 4.5834599082874046e-07, + "loss": 2.7347, + "step": 270400 + }, + { + "epoch": 1.96, + "learning_rate": 4.511131363103763e-07, + "loss": 2.7418, + "step": 270500 + }, + { + "epoch": 1.96, + "learning_rate": 4.438802817920121e-07, + "loss": 2.7362, + "step": 270600 + }, + { + "epoch": 1.96, + "learning_rate": 4.3664742727364783e-07, + "loss": 2.7349, + "step": 270700 + }, + { + "epoch": 1.96, + "learning_rate": 4.2941457275528366e-07, + "loss": 2.7409, + "step": 270800 + }, + { + "epoch": 1.96, + "learning_rate": 4.2218171823691943e-07, + "loss": 2.7341, + "step": 270900 + }, + { + "epoch": 1.96, + "learning_rate": 4.1494886371855515e-07, + "loss": 2.7387, + "step": 271000 + }, + { + "epoch": 1.96, + "eval_accuracy": 0.46175306155162976, + "eval_loss": 2.7565271854400635, + "eval_runtime": 28.2552, + "eval_samples_per_second": 229.444, + "eval_steps_per_second": 2.407, + "step": 271000 + }, + { + "epoch": 1.96, + "learning_rate": 4.0771600920019097e-07, + "loss": 2.7284, + "step": 271100 + }, + { + "epoch": 1.96, + "learning_rate": 4.004831546818268e-07, + "loss": 2.7376, + "step": 271200 + }, + { + "epoch": 1.96, + "learning_rate": 3.9325030016346257e-07, + "loss": 2.7438, + "step": 271300 + }, + { + "epoch": 1.96, + "learning_rate": 3.860174456450983e-07, + "loss": 2.73, + "step": 271400 + }, + { + "epoch": 1.96, + "learning_rate": 3.787845911267341e-07, + "loss": 2.7454, + "step": 271500 + }, + { + "epoch": 1.96, + "learning_rate": 3.7155173660836994e-07, + "loss": 2.7397, + "step": 271600 + }, + { + "epoch": 1.97, + "learning_rate": 3.6431888209000566e-07, + "loss": 2.7391, + "step": 271700 + }, + { + "epoch": 1.97, + "learning_rate": 3.570860275716415e-07, + "loss": 2.7378, + "step": 271800 + }, + { + "epoch": 1.97, + "learning_rate": 3.4985317305327726e-07, + "loss": 2.7392, + "step": 271900 + }, + { + "epoch": 1.97, + "learning_rate": 3.42620318534913e-07, + "loss": 2.741, + "step": 272000 + }, + { + "epoch": 1.97, + "eval_accuracy": 0.46179358987154945, + "eval_loss": 2.7564971446990967, + "eval_runtime": 28.8041, + "eval_samples_per_second": 225.072, + "eval_steps_per_second": 2.361, + "step": 272000 + }, + { + "epoch": 1.97, + "learning_rate": 3.354597925617324e-07, + "loss": 2.7359, + "step": 272100 + }, + { + "epoch": 1.97, + "learning_rate": 3.282269380433682e-07, + "loss": 2.7351, + "step": 272200 + }, + { + "epoch": 1.97, + "learning_rate": 3.2099408352500405e-07, + "loss": 2.7398, + "step": 272300 + }, + { + "epoch": 1.97, + "learning_rate": 3.1376122900663977e-07, + "loss": 2.7399, + "step": 272400 + }, + { + "epoch": 1.97, + "learning_rate": 3.0652837448827554e-07, + "loss": 2.7299, + "step": 272500 + }, + { + "epoch": 1.97, + "learning_rate": 2.9929551996991137e-07, + "loss": 2.7412, + "step": 272600 + }, + { + "epoch": 1.97, + "learning_rate": 2.9206266545154714e-07, + "loss": 2.7434, + "step": 272700 + }, + { + "epoch": 1.97, + "learning_rate": 2.848298109331829e-07, + "loss": 2.741, + "step": 272800 + }, + { + "epoch": 1.97, + "learning_rate": 2.775969564148187e-07, + "loss": 2.7356, + "step": 272900 + }, + { + "epoch": 1.97, + "learning_rate": 2.7036410189645445e-07, + "loss": 2.7343, + "step": 273000 + }, + { + "epoch": 1.97, + "eval_accuracy": 0.4617863310679817, + "eval_loss": 2.756471872329712, + "eval_runtime": 29.6952, + "eval_samples_per_second": 218.318, + "eval_steps_per_second": 2.29, + "step": 273000 + }, + { + "epoch": 1.98, + "learning_rate": 2.6313124737809023e-07, + "loss": 2.7316, + "step": 273100 + }, + { + "epoch": 1.98, + "learning_rate": 2.5589839285972605e-07, + "loss": 2.7433, + "step": 273200 + }, + { + "epoch": 1.98, + "learning_rate": 2.486655383413618e-07, + "loss": 2.7338, + "step": 273300 + }, + { + "epoch": 1.98, + "learning_rate": 2.4150501236818125e-07, + "loss": 2.7303, + "step": 273400 + }, + { + "epoch": 1.98, + "learning_rate": 2.3427215784981705e-07, + "loss": 2.7432, + "step": 273500 + }, + { + "epoch": 1.98, + "learning_rate": 2.2703930333145282e-07, + "loss": 2.7416, + "step": 273600 + }, + { + "epoch": 1.98, + "learning_rate": 2.198064488130886e-07, + "loss": 2.739, + "step": 273700 + }, + { + "epoch": 1.98, + "learning_rate": 2.125735942947244e-07, + "loss": 2.7426, + "step": 273800 + }, + { + "epoch": 1.98, + "learning_rate": 2.0534073977636016e-07, + "loss": 2.7369, + "step": 273900 + }, + { + "epoch": 1.98, + "learning_rate": 1.9810788525799593e-07, + "loss": 2.7378, + "step": 274000 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.4617851212673871, + "eval_loss": 2.756432056427002, + "eval_runtime": 29.9933, + "eval_samples_per_second": 216.148, + "eval_steps_per_second": 2.267, + "step": 274000 + }, + { + "epoch": 1.98, + "learning_rate": 1.9087503073963173e-07, + "loss": 2.7462, + "step": 274100 + }, + { + "epoch": 1.98, + "learning_rate": 1.836421762212675e-07, + "loss": 2.7361, + "step": 274200 + }, + { + "epoch": 1.98, + "learning_rate": 1.764093217029033e-07, + "loss": 2.7366, + "step": 274300 + }, + { + "epoch": 1.98, + "learning_rate": 1.6917646718453908e-07, + "loss": 2.7395, + "step": 274400 + }, + { + "epoch": 1.99, + "learning_rate": 1.6194361266617485e-07, + "loss": 2.7322, + "step": 274500 + }, + { + "epoch": 1.99, + "learning_rate": 1.5478308669299427e-07, + "loss": 2.7378, + "step": 274600 + }, + { + "epoch": 1.99, + "learning_rate": 1.4755023217463005e-07, + "loss": 2.7433, + "step": 274700 + }, + { + "epoch": 1.99, + "learning_rate": 1.4031737765626584e-07, + "loss": 2.7401, + "step": 274800 + }, + { + "epoch": 1.99, + "learning_rate": 1.3308452313790162e-07, + "loss": 2.7369, + "step": 274900 + }, + { + "epoch": 1.99, + "learning_rate": 1.2585166861953741e-07, + "loss": 2.737, + "step": 275000 + }, + { + "epoch": 1.99, + "eval_accuracy": 0.46180387317660365, + "eval_loss": 2.756422996520996, + "eval_runtime": 29.6811, + "eval_samples_per_second": 218.422, + "eval_steps_per_second": 2.291, + "step": 275000 + }, + { + "epoch": 1.99, + "learning_rate": 1.1861881410117317e-07, + "loss": 2.7343, + "step": 275100 + }, + { + "epoch": 1.99, + "learning_rate": 1.1138595958280896e-07, + "loss": 2.7331, + "step": 275200 + }, + { + "epoch": 1.99, + "learning_rate": 1.0415310506444474e-07, + "loss": 2.7372, + "step": 275300 + }, + { + "epoch": 1.99, + "learning_rate": 9.692025054608053e-08, + "loss": 2.7342, + "step": 275400 + }, + { + "epoch": 1.99, + "learning_rate": 8.96873960277163e-08, + "loss": 2.7394, + "step": 275500 + }, + { + "epoch": 1.99, + "learning_rate": 8.252687005453573e-08, + "loss": 2.733, + "step": 275600 + }, + { + "epoch": 1.99, + "learning_rate": 7.529401553617151e-08, + "loss": 2.7389, + "step": 275700 + }, + { + "epoch": 1.99, + "learning_rate": 6.80611610178073e-08, + "loss": 2.7343, + "step": 275800 + }, + { + "epoch": 2.0, + "learning_rate": 6.082830649944307e-08, + "loss": 2.7303, + "step": 275900 + }, + { + "epoch": 2.0, + "learning_rate": 5.3595451981078855e-08, + "loss": 2.7397, + "step": 276000 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.4618002437748198, + "eval_loss": 2.756411075592041, + "eval_runtime": 30.3922, + "eval_samples_per_second": 213.311, + "eval_steps_per_second": 2.237, + "step": 276000 + }, + { + "epoch": 2.0, + "learning_rate": 4.6362597462714634e-08, + "loss": 2.7391, + "step": 276100 + }, + { + "epoch": 2.0, + "learning_rate": 3.912974294435042e-08, + "loss": 2.739, + "step": 276200 + }, + { + "epoch": 2.0, + "learning_rate": 3.18968884259862e-08, + "loss": 2.7493, + "step": 276300 + }, + { + "epoch": 2.0, + "learning_rate": 2.4736362452805626e-08, + "loss": 2.733, + "step": 276400 + }, + { + "epoch": 2.0, + "learning_rate": 1.7503507934441408e-08, + "loss": 2.7424, + "step": 276500 + }, + { + "epoch": 2.0, + "step": 276518, + "total_flos": 5.3881880355706765e+20, + "train_loss": 2.8250040690803138, + "train_runtime": 396233.9412, + "train_samples_per_second": 133.99, + "train_steps_per_second": 0.698 + } + ], + "logging_steps": 100, + "max_steps": 276518, + "num_train_epochs": 2, + "save_steps": 20000, + "total_flos": 5.3881880355706765e+20, + "trial_name": null, + "trial_params": null +}