{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9694915254237289, "eval_steps": 74, "global_step": 590, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003389830508474576, "grad_norm": 0.35930299758911133, "learning_rate": 1.0000000000000002e-06, "loss": 1.4859, "step": 1 }, { "epoch": 0.003389830508474576, "eval_loss": 1.429020643234253, "eval_runtime": 31.2958, "eval_samples_per_second": 9.266, "eval_steps_per_second": 1.182, "step": 1 }, { "epoch": 0.006779661016949152, "grad_norm": 0.34624481201171875, "learning_rate": 2.0000000000000003e-06, "loss": 1.4354, "step": 2 }, { "epoch": 0.010169491525423728, "grad_norm": 0.3303937017917633, "learning_rate": 3e-06, "loss": 1.4894, "step": 3 }, { "epoch": 0.013559322033898305, "grad_norm": 0.31533053517341614, "learning_rate": 4.000000000000001e-06, "loss": 1.4704, "step": 4 }, { "epoch": 0.01694915254237288, "grad_norm": 0.35673025250434875, "learning_rate": 5e-06, "loss": 1.4396, "step": 5 }, { "epoch": 0.020338983050847456, "grad_norm": 0.3174373507499695, "learning_rate": 6e-06, "loss": 1.4272, "step": 6 }, { "epoch": 0.023728813559322035, "grad_norm": 0.3153597414493561, "learning_rate": 7e-06, "loss": 1.4145, "step": 7 }, { "epoch": 0.02711864406779661, "grad_norm": 0.3401546776294708, "learning_rate": 8.000000000000001e-06, "loss": 1.4201, "step": 8 }, { "epoch": 0.030508474576271188, "grad_norm": 0.33904287219047546, "learning_rate": 9e-06, "loss": 1.4631, "step": 9 }, { "epoch": 0.03389830508474576, "grad_norm": 0.35196101665496826, "learning_rate": 1e-05, "loss": 1.4311, "step": 10 }, { "epoch": 0.03728813559322034, "grad_norm": 0.3472827970981598, "learning_rate": 9.999926652940914e-06, "loss": 1.4943, "step": 11 }, { "epoch": 0.04067796610169491, "grad_norm": 0.36120378971099854, "learning_rate": 9.999706613915567e-06, "loss": 1.4617, "step": 12 }, { "epoch": 0.04406779661016949, "grad_norm": 0.36284977197647095, "learning_rate": 9.999339889379647e-06, "loss": 1.48, "step": 13 }, { "epoch": 0.04745762711864407, "grad_norm": 0.33688271045684814, "learning_rate": 9.99882649009242e-06, "loss": 1.3719, "step": 14 }, { "epoch": 0.05084745762711865, "grad_norm": 0.331083744764328, "learning_rate": 9.998166431116421e-06, "loss": 1.3922, "step": 15 }, { "epoch": 0.05423728813559322, "grad_norm": 0.38570094108581543, "learning_rate": 9.997359731816998e-06, "loss": 1.427, "step": 16 }, { "epoch": 0.0576271186440678, "grad_norm": 0.36311087012290955, "learning_rate": 9.996406415861763e-06, "loss": 1.4435, "step": 17 }, { "epoch": 0.061016949152542375, "grad_norm": 0.3820710778236389, "learning_rate": 9.995306511219885e-06, "loss": 1.4298, "step": 18 }, { "epoch": 0.06440677966101695, "grad_norm": 0.39588412642478943, "learning_rate": 9.99406005016127e-06, "loss": 1.39, "step": 19 }, { "epoch": 0.06779661016949153, "grad_norm": 0.3949587047100067, "learning_rate": 9.99266706925562e-06, "loss": 1.4479, "step": 20 }, { "epoch": 0.0711864406779661, "grad_norm": 0.3987651467323303, "learning_rate": 9.991127609371357e-06, "loss": 1.4051, "step": 21 }, { "epoch": 0.07457627118644068, "grad_norm": 0.39174318313598633, "learning_rate": 9.989441715674422e-06, "loss": 1.3698, "step": 22 }, { "epoch": 0.07796610169491526, "grad_norm": 0.43692299723625183, "learning_rate": 9.987609437626955e-06, "loss": 1.4031, "step": 23 }, { "epoch": 0.08135593220338982, "grad_norm": 0.3924993574619293, "learning_rate": 9.985630828985835e-06, "loss": 1.3806, "step": 24 }, { "epoch": 0.0847457627118644, "grad_norm": 0.4775010645389557, "learning_rate": 9.983505947801115e-06, "loss": 1.3136, "step": 25 }, { "epoch": 0.08813559322033898, "grad_norm": 0.3788422644138336, "learning_rate": 9.981234856414306e-06, "loss": 1.3393, "step": 26 }, { "epoch": 0.09152542372881356, "grad_norm": 0.44365671277046204, "learning_rate": 9.978817621456562e-06, "loss": 1.4279, "step": 27 }, { "epoch": 0.09491525423728814, "grad_norm": 0.4439156651496887, "learning_rate": 9.97625431384671e-06, "loss": 1.3132, "step": 28 }, { "epoch": 0.09830508474576272, "grad_norm": 0.4146508276462555, "learning_rate": 9.973545008789182e-06, "loss": 1.3535, "step": 29 }, { "epoch": 0.1016949152542373, "grad_norm": 0.41049906611442566, "learning_rate": 9.970689785771798e-06, "loss": 1.373, "step": 30 }, { "epoch": 0.10508474576271186, "grad_norm": 0.4016999900341034, "learning_rate": 9.967688728563446e-06, "loss": 1.3689, "step": 31 }, { "epoch": 0.10847457627118644, "grad_norm": 0.44828104972839355, "learning_rate": 9.964541925211613e-06, "loss": 1.3401, "step": 32 }, { "epoch": 0.11186440677966102, "grad_norm": 0.4662250876426697, "learning_rate": 9.961249468039806e-06, "loss": 1.3681, "step": 33 }, { "epoch": 0.1152542372881356, "grad_norm": 0.4449179768562317, "learning_rate": 9.957811453644848e-06, "loss": 1.3252, "step": 34 }, { "epoch": 0.11864406779661017, "grad_norm": 0.404920369386673, "learning_rate": 9.954227982894034e-06, "loss": 1.3414, "step": 35 }, { "epoch": 0.12203389830508475, "grad_norm": 0.4138246178627014, "learning_rate": 9.950499160922184e-06, "loss": 1.307, "step": 36 }, { "epoch": 0.12542372881355932, "grad_norm": 0.44694656133651733, "learning_rate": 9.946625097128544e-06, "loss": 1.3156, "step": 37 }, { "epoch": 0.1288135593220339, "grad_norm": 0.4669574499130249, "learning_rate": 9.942605905173593e-06, "loss": 1.2325, "step": 38 }, { "epoch": 0.13220338983050847, "grad_norm": 0.4561479687690735, "learning_rate": 9.938441702975689e-06, "loss": 1.2837, "step": 39 }, { "epoch": 0.13559322033898305, "grad_norm": 0.4158235788345337, "learning_rate": 9.934132612707631e-06, "loss": 1.2915, "step": 40 }, { "epoch": 0.13898305084745763, "grad_norm": 0.40082406997680664, "learning_rate": 9.929678760793057e-06, "loss": 1.2878, "step": 41 }, { "epoch": 0.1423728813559322, "grad_norm": 0.44715750217437744, "learning_rate": 9.925080277902743e-06, "loss": 1.2413, "step": 42 }, { "epoch": 0.14576271186440679, "grad_norm": 0.45030006766319275, "learning_rate": 9.920337298950767e-06, "loss": 1.273, "step": 43 }, { "epoch": 0.14915254237288136, "grad_norm": 0.42045557498931885, "learning_rate": 9.915449963090551e-06, "loss": 1.2451, "step": 44 }, { "epoch": 0.15254237288135594, "grad_norm": 0.42879652976989746, "learning_rate": 9.91041841371078e-06, "loss": 1.228, "step": 45 }, { "epoch": 0.15593220338983052, "grad_norm": 0.43669596314430237, "learning_rate": 9.905242798431196e-06, "loss": 1.2576, "step": 46 }, { "epoch": 0.15932203389830507, "grad_norm": 0.43827345967292786, "learning_rate": 9.899923269098262e-06, "loss": 1.2882, "step": 47 }, { "epoch": 0.16271186440677965, "grad_norm": 0.4514354169368744, "learning_rate": 9.894459981780711e-06, "loss": 1.2197, "step": 48 }, { "epoch": 0.16610169491525423, "grad_norm": 0.41045793890953064, "learning_rate": 9.888853096764963e-06, "loss": 1.2053, "step": 49 }, { "epoch": 0.1694915254237288, "grad_norm": 0.4640413224697113, "learning_rate": 9.883102778550434e-06, "loss": 1.2482, "step": 50 }, { "epoch": 0.17288135593220338, "grad_norm": 0.40715348720550537, "learning_rate": 9.877209195844692e-06, "loss": 1.2361, "step": 51 }, { "epoch": 0.17627118644067796, "grad_norm": 0.475995272397995, "learning_rate": 9.871172521558523e-06, "loss": 1.2005, "step": 52 }, { "epoch": 0.17966101694915254, "grad_norm": 0.362023264169693, "learning_rate": 9.864992932800845e-06, "loss": 1.2113, "step": 53 }, { "epoch": 0.18305084745762712, "grad_norm": 0.3483423888683319, "learning_rate": 9.858670610873528e-06, "loss": 1.2028, "step": 54 }, { "epoch": 0.1864406779661017, "grad_norm": 0.39612239599227905, "learning_rate": 9.852205741266058e-06, "loss": 1.2108, "step": 55 }, { "epoch": 0.18983050847457628, "grad_norm": 0.44254443049430847, "learning_rate": 9.845598513650104e-06, "loss": 1.2322, "step": 56 }, { "epoch": 0.19322033898305085, "grad_norm": 0.38581666350364685, "learning_rate": 9.83884912187395e-06, "loss": 1.2722, "step": 57 }, { "epoch": 0.19661016949152543, "grad_norm": 0.44798368215560913, "learning_rate": 9.831957763956814e-06, "loss": 1.1941, "step": 58 }, { "epoch": 0.2, "grad_norm": 0.40427738428115845, "learning_rate": 9.824924642083026e-06, "loss": 1.2295, "step": 59 }, { "epoch": 0.2033898305084746, "grad_norm": 0.3983621895313263, "learning_rate": 9.817749962596115e-06, "loss": 1.1937, "step": 60 }, { "epoch": 0.20677966101694914, "grad_norm": 0.4414602518081665, "learning_rate": 9.810433935992734e-06, "loss": 1.1965, "step": 61 }, { "epoch": 0.21016949152542372, "grad_norm": 0.39355960488319397, "learning_rate": 9.802976776916493e-06, "loss": 1.1339, "step": 62 }, { "epoch": 0.2135593220338983, "grad_norm": 0.4074147939682007, "learning_rate": 9.795378704151675e-06, "loss": 1.2277, "step": 63 }, { "epoch": 0.21694915254237288, "grad_norm": 0.5208883881568909, "learning_rate": 9.787639940616789e-06, "loss": 1.2077, "step": 64 }, { "epoch": 0.22033898305084745, "grad_norm": 0.45397087931632996, "learning_rate": 9.77976071335806e-06, "loss": 1.2425, "step": 65 }, { "epoch": 0.22372881355932203, "grad_norm": 0.4047125279903412, "learning_rate": 9.771741253542742e-06, "loss": 1.1817, "step": 66 }, { "epoch": 0.2271186440677966, "grad_norm": 0.44132372736930847, "learning_rate": 9.763581796452353e-06, "loss": 1.1835, "step": 67 }, { "epoch": 0.2305084745762712, "grad_norm": 0.3882867693901062, "learning_rate": 9.755282581475769e-06, "loss": 1.1488, "step": 68 }, { "epoch": 0.23389830508474577, "grad_norm": 0.42960307002067566, "learning_rate": 9.746843852102191e-06, "loss": 1.1559, "step": 69 }, { "epoch": 0.23728813559322035, "grad_norm": 0.38017773628234863, "learning_rate": 9.738265855914014e-06, "loss": 1.1617, "step": 70 }, { "epoch": 0.24067796610169492, "grad_norm": 0.470213383436203, "learning_rate": 9.729548844579552e-06, "loss": 1.1536, "step": 71 }, { "epoch": 0.2440677966101695, "grad_norm": 0.5659909844398499, "learning_rate": 9.720693073845668e-06, "loss": 1.1216, "step": 72 }, { "epoch": 0.24745762711864408, "grad_norm": 0.48637259006500244, "learning_rate": 9.711698803530253e-06, "loss": 1.1202, "step": 73 }, { "epoch": 0.25084745762711863, "grad_norm": 0.42485398054122925, "learning_rate": 9.70256629751462e-06, "loss": 1.1159, "step": 74 }, { "epoch": 0.25084745762711863, "eval_loss": 1.136481523513794, "eval_runtime": 31.4712, "eval_samples_per_second": 9.215, "eval_steps_per_second": 1.176, "step": 74 }, { "epoch": 0.2542372881355932, "grad_norm": 0.5143113136291504, "learning_rate": 9.693295823735754e-06, "loss": 1.1204, "step": 75 }, { "epoch": 0.2576271186440678, "grad_norm": 0.5175806283950806, "learning_rate": 9.683887654178446e-06, "loss": 1.1709, "step": 76 }, { "epoch": 0.26101694915254237, "grad_norm": 0.4077480137348175, "learning_rate": 9.674342064867326e-06, "loss": 1.1532, "step": 77 }, { "epoch": 0.26440677966101694, "grad_norm": 0.44241201877593994, "learning_rate": 9.664659335858755e-06, "loss": 1.1327, "step": 78 }, { "epoch": 0.2677966101694915, "grad_norm": 0.4448195695877075, "learning_rate": 9.654839751232612e-06, "loss": 1.1447, "step": 79 }, { "epoch": 0.2711864406779661, "grad_norm": 0.40797024965286255, "learning_rate": 9.644883599083959e-06, "loss": 1.1103, "step": 80 }, { "epoch": 0.2745762711864407, "grad_norm": 0.36770811676979065, "learning_rate": 9.634791171514585e-06, "loss": 1.1323, "step": 81 }, { "epoch": 0.27796610169491526, "grad_norm": 0.44905734062194824, "learning_rate": 9.624562764624445e-06, "loss": 1.1859, "step": 82 }, { "epoch": 0.28135593220338984, "grad_norm": 0.4534432888031006, "learning_rate": 9.614198678502965e-06, "loss": 1.1314, "step": 83 }, { "epoch": 0.2847457627118644, "grad_norm": 0.4798562824726105, "learning_rate": 9.603699217220239e-06, "loss": 1.0829, "step": 84 }, { "epoch": 0.288135593220339, "grad_norm": 0.3493732511997223, "learning_rate": 9.59306468881811e-06, "loss": 1.159, "step": 85 }, { "epoch": 0.29152542372881357, "grad_norm": 0.4375908076763153, "learning_rate": 9.582295405301131e-06, "loss": 1.1565, "step": 86 }, { "epoch": 0.29491525423728815, "grad_norm": 0.4635830521583557, "learning_rate": 9.571391682627413e-06, "loss": 1.1295, "step": 87 }, { "epoch": 0.2983050847457627, "grad_norm": 0.43029898405075073, "learning_rate": 9.56035384069935e-06, "loss": 1.1393, "step": 88 }, { "epoch": 0.3016949152542373, "grad_norm": 0.45764777064323425, "learning_rate": 9.549182203354241e-06, "loss": 1.1185, "step": 89 }, { "epoch": 0.3050847457627119, "grad_norm": 0.5191004276275635, "learning_rate": 9.537877098354787e-06, "loss": 1.0814, "step": 90 }, { "epoch": 0.30847457627118646, "grad_norm": 0.43570736050605774, "learning_rate": 9.526438857379463e-06, "loss": 1.0788, "step": 91 }, { "epoch": 0.31186440677966104, "grad_norm": 0.38071703910827637, "learning_rate": 9.514867816012809e-06, "loss": 1.1122, "step": 92 }, { "epoch": 0.3152542372881356, "grad_norm": 0.4297149181365967, "learning_rate": 9.503164313735566e-06, "loss": 1.1574, "step": 93 }, { "epoch": 0.31864406779661014, "grad_norm": 0.430363267660141, "learning_rate": 9.491328693914723e-06, "loss": 1.0897, "step": 94 }, { "epoch": 0.3220338983050847, "grad_norm": 0.47374778985977173, "learning_rate": 9.479361303793441e-06, "loss": 1.0983, "step": 95 }, { "epoch": 0.3254237288135593, "grad_norm": 0.4537319540977478, "learning_rate": 9.46726249448087e-06, "loss": 1.1346, "step": 96 }, { "epoch": 0.3288135593220339, "grad_norm": 0.4462335705757141, "learning_rate": 9.45503262094184e-06, "loss": 1.0899, "step": 97 }, { "epoch": 0.33220338983050846, "grad_norm": 0.41826269030570984, "learning_rate": 9.442672041986456e-06, "loss": 1.1208, "step": 98 }, { "epoch": 0.33559322033898303, "grad_norm": 0.4252929985523224, "learning_rate": 9.430181120259566e-06, "loss": 1.1231, "step": 99 }, { "epoch": 0.3389830508474576, "grad_norm": 0.4414180815219879, "learning_rate": 9.417560222230115e-06, "loss": 1.0666, "step": 100 }, { "epoch": 0.3423728813559322, "grad_norm": 0.4924032688140869, "learning_rate": 9.404809718180408e-06, "loss": 1.1042, "step": 101 }, { "epoch": 0.34576271186440677, "grad_norm": 0.4684174954891205, "learning_rate": 9.391929982195233e-06, "loss": 1.1262, "step": 102 }, { "epoch": 0.34915254237288135, "grad_norm": 0.5047671794891357, "learning_rate": 9.378921392150893e-06, "loss": 1.1174, "step": 103 }, { "epoch": 0.3525423728813559, "grad_norm": 0.45903801918029785, "learning_rate": 9.365784329704114e-06, "loss": 1.1046, "step": 104 }, { "epoch": 0.3559322033898305, "grad_norm": 0.46311357617378235, "learning_rate": 9.352519180280862e-06, "loss": 1.0749, "step": 105 }, { "epoch": 0.3593220338983051, "grad_norm": 0.5181329250335693, "learning_rate": 9.339126333065008e-06, "loss": 1.1037, "step": 106 }, { "epoch": 0.36271186440677966, "grad_norm": 0.4740346372127533, "learning_rate": 9.325606180986938e-06, "loss": 1.0855, "step": 107 }, { "epoch": 0.36610169491525424, "grad_norm": 0.554834246635437, "learning_rate": 9.311959120712012e-06, "loss": 1.1364, "step": 108 }, { "epoch": 0.3694915254237288, "grad_norm": 0.5633101463317871, "learning_rate": 9.298185552628917e-06, "loss": 1.0482, "step": 109 }, { "epoch": 0.3728813559322034, "grad_norm": 0.5091131329536438, "learning_rate": 9.284285880837947e-06, "loss": 1.0801, "step": 110 }, { "epoch": 0.376271186440678, "grad_norm": 0.5129575729370117, "learning_rate": 9.270260513139116e-06, "loss": 1.0575, "step": 111 }, { "epoch": 0.37966101694915255, "grad_norm": 0.4866485893726349, "learning_rate": 9.256109861020213e-06, "loss": 1.0705, "step": 112 }, { "epoch": 0.38305084745762713, "grad_norm": 0.48480409383773804, "learning_rate": 9.241834339644726e-06, "loss": 1.0744, "step": 113 }, { "epoch": 0.3864406779661017, "grad_norm": 0.5094970464706421, "learning_rate": 9.22743436783966e-06, "loss": 1.035, "step": 114 }, { "epoch": 0.3898305084745763, "grad_norm": 0.5411089658737183, "learning_rate": 9.212910368083246e-06, "loss": 1.067, "step": 115 }, { "epoch": 0.39322033898305087, "grad_norm": 0.5899820327758789, "learning_rate": 9.198262766492554e-06, "loss": 1.0643, "step": 116 }, { "epoch": 0.39661016949152544, "grad_norm": 0.40834295749664307, "learning_rate": 9.18349199281098e-06, "loss": 1.0478, "step": 117 }, { "epoch": 0.4, "grad_norm": 0.5265842080116272, "learning_rate": 9.168598480395653e-06, "loss": 1.0743, "step": 118 }, { "epoch": 0.4033898305084746, "grad_norm": 0.5842476487159729, "learning_rate": 9.153582666204702e-06, "loss": 1.0714, "step": 119 }, { "epoch": 0.4067796610169492, "grad_norm": 0.4064609110355377, "learning_rate": 9.138444990784455e-06, "loss": 1.0881, "step": 120 }, { "epoch": 0.4101694915254237, "grad_norm": 0.5088958740234375, "learning_rate": 9.123185898256497e-06, "loss": 1.0558, "step": 121 }, { "epoch": 0.4135593220338983, "grad_norm": 0.5272684097290039, "learning_rate": 9.107805836304658e-06, "loss": 1.0971, "step": 122 }, { "epoch": 0.41694915254237286, "grad_norm": 0.5298842191696167, "learning_rate": 9.09230525616186e-06, "loss": 1.0715, "step": 123 }, { "epoch": 0.42033898305084744, "grad_norm": 0.4694008231163025, "learning_rate": 9.076684612596891e-06, "loss": 1.0486, "step": 124 }, { "epoch": 0.423728813559322, "grad_norm": 0.4768994152545929, "learning_rate": 9.060944363901057e-06, "loss": 1.0653, "step": 125 }, { "epoch": 0.4271186440677966, "grad_norm": 0.6186073422431946, "learning_rate": 9.045084971874738e-06, "loss": 1.1184, "step": 126 }, { "epoch": 0.43050847457627117, "grad_norm": 0.5240334272384644, "learning_rate": 9.02910690181384e-06, "loss": 1.0814, "step": 127 }, { "epoch": 0.43389830508474575, "grad_norm": 0.4750120937824249, "learning_rate": 9.013010622496145e-06, "loss": 1.0713, "step": 128 }, { "epoch": 0.43728813559322033, "grad_norm": 0.47448915243148804, "learning_rate": 8.996796606167549e-06, "loss": 1.0743, "step": 129 }, { "epoch": 0.4406779661016949, "grad_norm": 0.5730359554290771, "learning_rate": 8.98046532852822e-06, "loss": 1.1518, "step": 130 }, { "epoch": 0.4440677966101695, "grad_norm": 0.4681292176246643, "learning_rate": 8.964017268718632e-06, "loss": 1.087, "step": 131 }, { "epoch": 0.44745762711864406, "grad_norm": 0.5501366853713989, "learning_rate": 8.94745290930551e-06, "loss": 1.0116, "step": 132 }, { "epoch": 0.45084745762711864, "grad_norm": 0.6889796257019043, "learning_rate": 8.930772736267675e-06, "loss": 1.0958, "step": 133 }, { "epoch": 0.4542372881355932, "grad_norm": 0.581390380859375, "learning_rate": 8.91397723898178e-06, "loss": 1.0244, "step": 134 }, { "epoch": 0.4576271186440678, "grad_norm": 0.6266252994537354, "learning_rate": 8.897066910207958e-06, "loss": 1.0986, "step": 135 }, { "epoch": 0.4610169491525424, "grad_norm": 0.6138602495193481, "learning_rate": 8.880042246075366e-06, "loss": 1.0705, "step": 136 }, { "epoch": 0.46440677966101696, "grad_norm": 0.5058969259262085, "learning_rate": 8.862903746067619e-06, "loss": 1.0749, "step": 137 }, { "epoch": 0.46779661016949153, "grad_norm": 0.6257705092430115, "learning_rate": 8.845651913008145e-06, "loss": 1.0774, "step": 138 }, { "epoch": 0.4711864406779661, "grad_norm": 0.5636258125305176, "learning_rate": 8.828287253045436e-06, "loss": 1.1003, "step": 139 }, { "epoch": 0.4745762711864407, "grad_norm": 0.554627537727356, "learning_rate": 8.810810275638183e-06, "loss": 1.066, "step": 140 }, { "epoch": 0.47796610169491527, "grad_norm": 0.5035117268562317, "learning_rate": 8.793221493540347e-06, "loss": 1.0334, "step": 141 }, { "epoch": 0.48135593220338985, "grad_norm": 0.5991939902305603, "learning_rate": 8.775521422786104e-06, "loss": 0.9872, "step": 142 }, { "epoch": 0.4847457627118644, "grad_norm": 0.5974786281585693, "learning_rate": 8.757710582674708e-06, "loss": 1.0796, "step": 143 }, { "epoch": 0.488135593220339, "grad_norm": 0.46773356199264526, "learning_rate": 8.739789495755254e-06, "loss": 1.115, "step": 144 }, { "epoch": 0.4915254237288136, "grad_norm": 0.6154885292053223, "learning_rate": 8.721758687811353e-06, "loss": 1.0288, "step": 145 }, { "epoch": 0.49491525423728816, "grad_norm": 0.5579652190208435, "learning_rate": 8.703618687845697e-06, "loss": 1.0959, "step": 146 }, { "epoch": 0.49830508474576274, "grad_norm": 0.5460482835769653, "learning_rate": 8.685370028064546e-06, "loss": 1.0491, "step": 147 }, { "epoch": 0.5016949152542373, "grad_norm": 0.6327131390571594, "learning_rate": 8.667013243862113e-06, "loss": 1.0075, "step": 148 }, { "epoch": 0.5016949152542373, "eval_loss": 1.0498446226119995, "eval_runtime": 31.4652, "eval_samples_per_second": 9.217, "eval_steps_per_second": 1.176, "step": 148 }, { "epoch": 0.5050847457627119, "grad_norm": 0.5511671304702759, "learning_rate": 8.64854887380485e-06, "loss": 1.1018, "step": 149 }, { "epoch": 0.5084745762711864, "grad_norm": 0.5637195706367493, "learning_rate": 8.629977459615655e-06, "loss": 1.0287, "step": 150 }, { "epoch": 0.511864406779661, "grad_norm": 0.5637701749801636, "learning_rate": 8.611299546157973e-06, "loss": 1.0745, "step": 151 }, { "epoch": 0.5152542372881356, "grad_norm": 0.47854217886924744, "learning_rate": 8.592515681419812e-06, "loss": 1.0817, "step": 152 }, { "epoch": 0.5186440677966102, "grad_norm": 0.5699596405029297, "learning_rate": 8.573626416497669e-06, "loss": 1.043, "step": 153 }, { "epoch": 0.5220338983050847, "grad_norm": 0.44490182399749756, "learning_rate": 8.554632305580355e-06, "loss": 1.0804, "step": 154 }, { "epoch": 0.5254237288135594, "grad_norm": 0.5301121473312378, "learning_rate": 8.535533905932739e-06, "loss": 1.0301, "step": 155 }, { "epoch": 0.5288135593220339, "grad_norm": 0.4578869938850403, "learning_rate": 8.5163317778794e-06, "loss": 1.0626, "step": 156 }, { "epoch": 0.5322033898305085, "grad_norm": 0.4965490698814392, "learning_rate": 8.497026484788189e-06, "loss": 1.0129, "step": 157 }, { "epoch": 0.535593220338983, "grad_norm": 0.4961901307106018, "learning_rate": 8.477618593053693e-06, "loss": 1.0996, "step": 158 }, { "epoch": 0.5389830508474577, "grad_norm": 0.6383612155914307, "learning_rate": 8.458108672080624e-06, "loss": 0.9412, "step": 159 }, { "epoch": 0.5423728813559322, "grad_norm": 0.6696265339851379, "learning_rate": 8.438497294267117e-06, "loss": 1.0061, "step": 160 }, { "epoch": 0.5457627118644067, "grad_norm": 0.6069437861442566, "learning_rate": 8.418785034987921e-06, "loss": 1.062, "step": 161 }, { "epoch": 0.5491525423728814, "grad_norm": 0.49983924627304077, "learning_rate": 8.39897247257754e-06, "loss": 1.0299, "step": 162 }, { "epoch": 0.5525423728813559, "grad_norm": 0.6177976727485657, "learning_rate": 8.379060188313244e-06, "loss": 1.0177, "step": 163 }, { "epoch": 0.5559322033898305, "grad_norm": 0.4884355962276459, "learning_rate": 8.359048766398032e-06, "loss": 1.0552, "step": 164 }, { "epoch": 0.559322033898305, "grad_norm": 0.5263038277626038, "learning_rate": 8.338938793943478e-06, "loss": 1.0601, "step": 165 }, { "epoch": 0.5627118644067797, "grad_norm": 0.6198043823242188, "learning_rate": 8.318730860952523e-06, "loss": 0.9993, "step": 166 }, { "epoch": 0.5661016949152542, "grad_norm": 0.7637753486633301, "learning_rate": 8.298425560302146e-06, "loss": 1.0305, "step": 167 }, { "epoch": 0.5694915254237288, "grad_norm": 0.5884391069412231, "learning_rate": 8.278023487725981e-06, "loss": 1.1051, "step": 168 }, { "epoch": 0.5728813559322034, "grad_norm": 0.6029988527297974, "learning_rate": 8.257525241796837e-06, "loss": 1.0193, "step": 169 }, { "epoch": 0.576271186440678, "grad_norm": 0.5312538743019104, "learning_rate": 8.23693142390914e-06, "loss": 1.094, "step": 170 }, { "epoch": 0.5796610169491525, "grad_norm": 0.5297631025314331, "learning_rate": 8.216242638261277e-06, "loss": 1.0379, "step": 171 }, { "epoch": 0.5830508474576271, "grad_norm": 0.5840802192687988, "learning_rate": 8.195459491837881e-06, "loss": 1.0919, "step": 172 }, { "epoch": 0.5864406779661017, "grad_norm": 0.6274826526641846, "learning_rate": 8.17458259439202e-06, "loss": 1.088, "step": 173 }, { "epoch": 0.5898305084745763, "grad_norm": 0.6879580020904541, "learning_rate": 8.153612558427311e-06, "loss": 1.073, "step": 174 }, { "epoch": 0.5932203389830508, "grad_norm": 0.501899242401123, "learning_rate": 8.132549999179934e-06, "loss": 1.0645, "step": 175 }, { "epoch": 0.5966101694915255, "grad_norm": 0.574945330619812, "learning_rate": 8.111395534600604e-06, "loss": 1.0918, "step": 176 }, { "epoch": 0.6, "grad_norm": 0.5933839082717896, "learning_rate": 8.090149785336426e-06, "loss": 1.0155, "step": 177 }, { "epoch": 0.6033898305084746, "grad_norm": 0.5565395355224609, "learning_rate": 8.068813374712689e-06, "loss": 1.0388, "step": 178 }, { "epoch": 0.6067796610169491, "grad_norm": 0.4744894206523895, "learning_rate": 8.047386928714583e-06, "loss": 1.032, "step": 179 }, { "epoch": 0.6101694915254238, "grad_norm": 0.6712637543678284, "learning_rate": 8.025871075968828e-06, "loss": 1.0434, "step": 180 }, { "epoch": 0.6135593220338983, "grad_norm": 0.596839964389801, "learning_rate": 8.00426644772523e-06, "loss": 1.0737, "step": 181 }, { "epoch": 0.6169491525423729, "grad_norm": 0.4570152759552002, "learning_rate": 7.982573677838172e-06, "loss": 1.0367, "step": 182 }, { "epoch": 0.6203389830508474, "grad_norm": 0.5980694890022278, "learning_rate": 7.960793402748001e-06, "loss": 1.0677, "step": 183 }, { "epoch": 0.6237288135593221, "grad_norm": 0.5124939680099487, "learning_rate": 7.938926261462366e-06, "loss": 1.0164, "step": 184 }, { "epoch": 0.6271186440677966, "grad_norm": 0.46987423300743103, "learning_rate": 7.916972895537471e-06, "loss": 1.0339, "step": 185 }, { "epoch": 0.6305084745762712, "grad_norm": 0.47326841950416565, "learning_rate": 7.894933949059245e-06, "loss": 1.0243, "step": 186 }, { "epoch": 0.6338983050847458, "grad_norm": 0.6061602830886841, "learning_rate": 7.872810068624452e-06, "loss": 1.0294, "step": 187 }, { "epoch": 0.6372881355932203, "grad_norm": 0.47816479206085205, "learning_rate": 7.850601903321717e-06, "loss": 1.0019, "step": 188 }, { "epoch": 0.6406779661016949, "grad_norm": 0.6350972056388855, "learning_rate": 7.828310104712488e-06, "loss": 1.0369, "step": 189 }, { "epoch": 0.6440677966101694, "grad_norm": 0.591119647026062, "learning_rate": 7.805935326811913e-06, "loss": 1.0366, "step": 190 }, { "epoch": 0.6474576271186441, "grad_norm": 0.5823162198066711, "learning_rate": 7.783478226069652e-06, "loss": 1.0244, "step": 191 }, { "epoch": 0.6508474576271186, "grad_norm": 0.6485019326210022, "learning_rate": 7.760939461350622e-06, "loss": 1.0323, "step": 192 }, { "epoch": 0.6542372881355932, "grad_norm": 0.6895255446434021, "learning_rate": 7.738319693915673e-06, "loss": 1.0157, "step": 193 }, { "epoch": 0.6576271186440678, "grad_norm": 0.5625537037849426, "learning_rate": 7.715619587402165e-06, "loss": 1.085, "step": 194 }, { "epoch": 0.6610169491525424, "grad_norm": 0.5819742679595947, "learning_rate": 7.692839807804522e-06, "loss": 1.0436, "step": 195 }, { "epoch": 0.6644067796610169, "grad_norm": 0.5372628569602966, "learning_rate": 7.669981023454682e-06, "loss": 1.0725, "step": 196 }, { "epoch": 0.6677966101694915, "grad_norm": 0.6635042428970337, "learning_rate": 7.647043905002485e-06, "loss": 1.0267, "step": 197 }, { "epoch": 0.6711864406779661, "grad_norm": 0.5099017024040222, "learning_rate": 7.624029125396004e-06, "loss": 1.0086, "step": 198 }, { "epoch": 0.6745762711864407, "grad_norm": 0.47169557213783264, "learning_rate": 7.600937359861799e-06, "loss": 1.0346, "step": 199 }, { "epoch": 0.6779661016949152, "grad_norm": 0.6285570859909058, "learning_rate": 7.57776928588511e-06, "loss": 1.0014, "step": 200 }, { "epoch": 0.6813559322033899, "grad_norm": 0.7339727878570557, "learning_rate": 7.554525583189969e-06, "loss": 1.0974, "step": 201 }, { "epoch": 0.6847457627118644, "grad_norm": 0.6427012085914612, "learning_rate": 7.53120693371927e-06, "loss": 1.0749, "step": 202 }, { "epoch": 0.688135593220339, "grad_norm": 0.5695547461509705, "learning_rate": 7.507814021614761e-06, "loss": 1.0267, "step": 203 }, { "epoch": 0.6915254237288135, "grad_norm": 0.5251569151878357, "learning_rate": 7.4843475331969614e-06, "loss": 1.0319, "step": 204 }, { "epoch": 0.6949152542372882, "grad_norm": 0.5034914612770081, "learning_rate": 7.4608081569450365e-06, "loss": 1.0222, "step": 205 }, { "epoch": 0.6983050847457627, "grad_norm": 0.6644279956817627, "learning_rate": 7.437196583476597e-06, "loss": 1.0822, "step": 206 }, { "epoch": 0.7016949152542373, "grad_norm": 0.5474566221237183, "learning_rate": 7.41351350552743e-06, "loss": 1.0419, "step": 207 }, { "epoch": 0.7050847457627119, "grad_norm": 0.5675274133682251, "learning_rate": 7.389759617931183e-06, "loss": 1.0225, "step": 208 }, { "epoch": 0.7084745762711865, "grad_norm": 0.6349363923072815, "learning_rate": 7.365935617598975e-06, "loss": 1.0587, "step": 209 }, { "epoch": 0.711864406779661, "grad_norm": 0.5804888606071472, "learning_rate": 7.342042203498952e-06, "loss": 1.0544, "step": 210 }, { "epoch": 0.7152542372881356, "grad_norm": 0.5343108177185059, "learning_rate": 7.318080076635773e-06, "loss": 1.0429, "step": 211 }, { "epoch": 0.7186440677966102, "grad_norm": 0.550864577293396, "learning_rate": 7.294049940030055e-06, "loss": 1.0503, "step": 212 }, { "epoch": 0.7220338983050848, "grad_norm": 0.4380170702934265, "learning_rate": 7.269952498697734e-06, "loss": 1.0209, "step": 213 }, { "epoch": 0.7254237288135593, "grad_norm": 0.7789644598960876, "learning_rate": 7.245788459629397e-06, "loss": 1.0665, "step": 214 }, { "epoch": 0.7288135593220338, "grad_norm": 0.5640416741371155, "learning_rate": 7.221558531769519e-06, "loss": 1.0429, "step": 215 }, { "epoch": 0.7322033898305085, "grad_norm": 0.6263359785079956, "learning_rate": 7.197263425995682e-06, "loss": 1.0652, "step": 216 }, { "epoch": 0.735593220338983, "grad_norm": 0.6411235928535461, "learning_rate": 7.172903855097712e-06, "loss": 1.0597, "step": 217 }, { "epoch": 0.7389830508474576, "grad_norm": 0.5862070322036743, "learning_rate": 7.148480533756759e-06, "loss": 1.0372, "step": 218 }, { "epoch": 0.7423728813559322, "grad_norm": 0.5596318244934082, "learning_rate": 7.123994178524345e-06, "loss": 1.024, "step": 219 }, { "epoch": 0.7457627118644068, "grad_norm": 0.6051028370857239, "learning_rate": 7.099445507801324e-06, "loss": 0.9988, "step": 220 }, { "epoch": 0.7491525423728813, "grad_norm": 0.6190935373306274, "learning_rate": 7.0748352418168174e-06, "loss": 1.0302, "step": 221 }, { "epoch": 0.752542372881356, "grad_norm": 0.6556586623191833, "learning_rate": 7.050164102607081e-06, "loss": 1.0646, "step": 222 }, { "epoch": 0.752542372881356, "eval_loss": 1.0214256048202515, "eval_runtime": 31.4525, "eval_samples_per_second": 9.22, "eval_steps_per_second": 1.176, "step": 222 }, { "epoch": 0.7559322033898305, "grad_norm": 0.6047313809394836, "learning_rate": 7.025432813994315e-06, "loss": 1.0599, "step": 223 }, { "epoch": 0.7593220338983051, "grad_norm": 0.5894531011581421, "learning_rate": 7.000642101565434e-06, "loss": 1.0479, "step": 224 }, { "epoch": 0.7627118644067796, "grad_norm": 0.6461945176124573, "learning_rate": 6.975792692650778e-06, "loss": 1.0377, "step": 225 }, { "epoch": 0.7661016949152543, "grad_norm": 0.7174557447433472, "learning_rate": 6.950885316302773e-06, "loss": 0.9929, "step": 226 }, { "epoch": 0.7694915254237288, "grad_norm": 0.62993985414505, "learning_rate": 6.9259207032745415e-06, "loss": 1.0639, "step": 227 }, { "epoch": 0.7728813559322034, "grad_norm": 0.5091562867164612, "learning_rate": 6.90089958599846e-06, "loss": 1.0476, "step": 228 }, { "epoch": 0.7762711864406779, "grad_norm": 0.5700099468231201, "learning_rate": 6.875822698564678e-06, "loss": 1.0678, "step": 229 }, { "epoch": 0.7796610169491526, "grad_norm": 0.5657733082771301, "learning_rate": 6.850690776699574e-06, "loss": 1.0234, "step": 230 }, { "epoch": 0.7830508474576271, "grad_norm": 0.5933859348297119, "learning_rate": 6.825504557744167e-06, "loss": 1.034, "step": 231 }, { "epoch": 0.7864406779661017, "grad_norm": 0.6932422518730164, "learning_rate": 6.800264780632495e-06, "loss": 1.0357, "step": 232 }, { "epoch": 0.7898305084745763, "grad_norm": 0.6227273941040039, "learning_rate": 6.774972185869928e-06, "loss": 0.9694, "step": 233 }, { "epoch": 0.7932203389830509, "grad_norm": 0.5673425793647766, "learning_rate": 6.749627515511443e-06, "loss": 1.0535, "step": 234 }, { "epoch": 0.7966101694915254, "grad_norm": 0.7180874943733215, "learning_rate": 6.724231513139853e-06, "loss": 0.983, "step": 235 }, { "epoch": 0.8, "grad_norm": 0.623993992805481, "learning_rate": 6.698784923843993e-06, "loss": 0.9727, "step": 236 }, { "epoch": 0.8033898305084746, "grad_norm": 0.7041409611701965, "learning_rate": 6.673288494196858e-06, "loss": 0.9864, "step": 237 }, { "epoch": 0.8067796610169492, "grad_norm": 0.5427469611167908, "learning_rate": 6.647742972233703e-06, "loss": 1.0213, "step": 238 }, { "epoch": 0.8101694915254237, "grad_norm": 0.4915582835674286, "learning_rate": 6.622149107430088e-06, "loss": 1.0006, "step": 239 }, { "epoch": 0.8135593220338984, "grad_norm": 0.6874458193778992, "learning_rate": 6.5965076506799e-06, "loss": 1.0339, "step": 240 }, { "epoch": 0.8169491525423729, "grad_norm": 0.5942449569702148, "learning_rate": 6.570819354273317e-06, "loss": 1.0453, "step": 241 }, { "epoch": 0.8203389830508474, "grad_norm": 0.5021138787269592, "learning_rate": 6.545084971874738e-06, "loss": 1.0256, "step": 242 }, { "epoch": 0.823728813559322, "grad_norm": 0.562307596206665, "learning_rate": 6.5193052585006666e-06, "loss": 1.0223, "step": 243 }, { "epoch": 0.8271186440677966, "grad_norm": 0.5904672145843506, "learning_rate": 6.493480970497569e-06, "loss": 1.0564, "step": 244 }, { "epoch": 0.8305084745762712, "grad_norm": 0.5653520822525024, "learning_rate": 6.467612865519674e-06, "loss": 1.0672, "step": 245 }, { "epoch": 0.8338983050847457, "grad_norm": 0.5326948165893555, "learning_rate": 6.441701702506755e-06, "loss": 1.0086, "step": 246 }, { "epoch": 0.8372881355932204, "grad_norm": 0.6869088411331177, "learning_rate": 6.4157482416618514e-06, "loss": 1.0101, "step": 247 }, { "epoch": 0.8406779661016949, "grad_norm": 0.5359118580818176, "learning_rate": 6.389753244428973e-06, "loss": 0.9606, "step": 248 }, { "epoch": 0.8440677966101695, "grad_norm": 0.5426267385482788, "learning_rate": 6.36371747347076e-06, "loss": 1.0501, "step": 249 }, { "epoch": 0.847457627118644, "grad_norm": 0.6245284676551819, "learning_rate": 6.337641692646106e-06, "loss": 1.0042, "step": 250 }, { "epoch": 0.8508474576271187, "grad_norm": 0.7445155382156372, "learning_rate": 6.3115266669877425e-06, "loss": 1.0026, "step": 251 }, { "epoch": 0.8542372881355932, "grad_norm": 0.5616058707237244, "learning_rate": 6.285373162679804e-06, "loss": 1.0548, "step": 252 }, { "epoch": 0.8576271186440678, "grad_norm": 0.5358186960220337, "learning_rate": 6.2591819470353424e-06, "loss": 1.0241, "step": 253 }, { "epoch": 0.8610169491525423, "grad_norm": 0.584170401096344, "learning_rate": 6.2329537884738115e-06, "loss": 1.0343, "step": 254 }, { "epoch": 0.864406779661017, "grad_norm": 0.6548037528991699, "learning_rate": 6.206689456498529e-06, "loss": 1.0545, "step": 255 }, { "epoch": 0.8677966101694915, "grad_norm": 0.6002088785171509, "learning_rate": 6.180389721674101e-06, "loss": 1.0202, "step": 256 }, { "epoch": 0.8711864406779661, "grad_norm": 0.5085415244102478, "learning_rate": 6.1540553556038075e-06, "loss": 1.0072, "step": 257 }, { "epoch": 0.8745762711864407, "grad_norm": 0.5137361288070679, "learning_rate": 6.127687130906972e-06, "loss": 1.0174, "step": 258 }, { "epoch": 0.8779661016949153, "grad_norm": 0.7367336750030518, "learning_rate": 6.101285821196285e-06, "loss": 0.9879, "step": 259 }, { "epoch": 0.8813559322033898, "grad_norm": 0.7270784378051758, "learning_rate": 6.074852201055121e-06, "loss": 0.9795, "step": 260 }, { "epoch": 0.8847457627118644, "grad_norm": 0.6950981020927429, "learning_rate": 6.048387046014795e-06, "loss": 0.9937, "step": 261 }, { "epoch": 0.888135593220339, "grad_norm": 0.4810272753238678, "learning_rate": 6.021891132531825e-06, "loss": 1.0255, "step": 262 }, { "epoch": 0.8915254237288136, "grad_norm": 0.7551162242889404, "learning_rate": 5.995365237965144e-06, "loss": 1.0436, "step": 263 }, { "epoch": 0.8949152542372881, "grad_norm": 0.7334685921669006, "learning_rate": 5.968810140553292e-06, "loss": 1.0115, "step": 264 }, { "epoch": 0.8983050847457628, "grad_norm": 0.6817261576652527, "learning_rate": 5.942226619391592e-06, "loss": 1.0132, "step": 265 }, { "epoch": 0.9016949152542373, "grad_norm": 0.6429016590118408, "learning_rate": 5.915615454409281e-06, "loss": 0.98, "step": 266 }, { "epoch": 0.9050847457627119, "grad_norm": 0.5944652557373047, "learning_rate": 5.888977426346636e-06, "loss": 0.9893, "step": 267 }, { "epoch": 0.9084745762711864, "grad_norm": 0.5805054306983948, "learning_rate": 5.862313316732064e-06, "loss": 0.9879, "step": 268 }, { "epoch": 0.911864406779661, "grad_norm": 0.5468863844871521, "learning_rate": 5.835623907859173e-06, "loss": 1.0164, "step": 269 }, { "epoch": 0.9152542372881356, "grad_norm": 0.5630771517753601, "learning_rate": 5.808909982763825e-06, "loss": 1.0465, "step": 270 }, { "epoch": 0.9186440677966101, "grad_norm": 0.6584744453430176, "learning_rate": 5.782172325201155e-06, "loss": 0.9923, "step": 271 }, { "epoch": 0.9220338983050848, "grad_norm": 0.6021252274513245, "learning_rate": 5.755411719622584e-06, "loss": 1.0068, "step": 272 }, { "epoch": 0.9254237288135593, "grad_norm": 0.6880324482917786, "learning_rate": 5.728628951152799e-06, "loss": 0.9824, "step": 273 }, { "epoch": 0.9288135593220339, "grad_norm": 0.4966793358325958, "learning_rate": 5.701824805566722e-06, "loss": 1.0191, "step": 274 }, { "epoch": 0.9322033898305084, "grad_norm": 0.624777615070343, "learning_rate": 5.675000069266451e-06, "loss": 1.0242, "step": 275 }, { "epoch": 0.9355932203389831, "grad_norm": 0.6443299651145935, "learning_rate": 5.648155529258195e-06, "loss": 1.0128, "step": 276 }, { "epoch": 0.9389830508474576, "grad_norm": 0.5580403804779053, "learning_rate": 5.621291973129177e-06, "loss": 1.0257, "step": 277 }, { "epoch": 0.9423728813559322, "grad_norm": 0.6010347604751587, "learning_rate": 5.594410189024533e-06, "loss": 1.0072, "step": 278 }, { "epoch": 0.9457627118644067, "grad_norm": 0.6926978230476379, "learning_rate": 5.567510965624187e-06, "loss": 1.0112, "step": 279 }, { "epoch": 0.9491525423728814, "grad_norm": 0.5698837041854858, "learning_rate": 5.540595092119709e-06, "loss": 0.9803, "step": 280 }, { "epoch": 0.9525423728813559, "grad_norm": 0.5894476771354675, "learning_rate": 5.513663358191166e-06, "loss": 1.0068, "step": 281 }, { "epoch": 0.9559322033898305, "grad_norm": 0.5834940671920776, "learning_rate": 5.4867165539839505e-06, "loss": 0.9661, "step": 282 }, { "epoch": 0.9593220338983051, "grad_norm": 0.6299657225608826, "learning_rate": 5.459755470085595e-06, "loss": 1.0068, "step": 283 }, { "epoch": 0.9627118644067797, "grad_norm": 0.7412062287330627, "learning_rate": 5.432780897502588e-06, "loss": 1.0291, "step": 284 }, { "epoch": 0.9661016949152542, "grad_norm": 0.5718345046043396, "learning_rate": 5.405793627637157e-06, "loss": 1.0044, "step": 285 }, { "epoch": 0.9694915254237289, "grad_norm": 0.6688190698623657, "learning_rate": 5.378794452264053e-06, "loss": 1.0451, "step": 286 }, { "epoch": 0.9728813559322034, "grad_norm": 0.48207801580429077, "learning_rate": 5.351784163507319e-06, "loss": 1.0157, "step": 287 }, { "epoch": 0.976271186440678, "grad_norm": 0.6743605136871338, "learning_rate": 5.3247635538170536e-06, "loss": 1.0568, "step": 288 }, { "epoch": 0.9796610169491525, "grad_norm": 0.5271384716033936, "learning_rate": 5.297733415946161e-06, "loss": 0.988, "step": 289 }, { "epoch": 0.9830508474576272, "grad_norm": 0.8239924311637878, "learning_rate": 5.270694542927089e-06, "loss": 1.0132, "step": 290 }, { "epoch": 0.9864406779661017, "grad_norm": 0.5944933295249939, "learning_rate": 5.243647728048561e-06, "loss": 0.9985, "step": 291 }, { "epoch": 0.9898305084745763, "grad_norm": 0.7668391466140747, "learning_rate": 5.2165937648323115e-06, "loss": 1.01, "step": 292 }, { "epoch": 0.9932203389830508, "grad_norm": 0.7769994139671326, "learning_rate": 5.189533447009795e-06, "loss": 1.0759, "step": 293 }, { "epoch": 0.9966101694915255, "grad_norm": 0.7191136479377747, "learning_rate": 5.1624675684989035e-06, "loss": 1.0242, "step": 294 }, { "epoch": 1.0, "grad_norm": 0.5792292952537537, "learning_rate": 5.1353969233806735e-06, "loss": 0.9792, "step": 295 }, { "epoch": 1.0033898305084745, "grad_norm": 0.7428760528564453, "learning_rate": 5.108322305875988e-06, "loss": 0.9919, "step": 296 }, { "epoch": 1.0033898305084745, "eval_loss": 1.0072969198226929, "eval_runtime": 31.5304, "eval_samples_per_second": 9.197, "eval_steps_per_second": 1.173, "step": 296 }, { "epoch": 1.006779661016949, "grad_norm": 0.5366787910461426, "learning_rate": 5.0812445103222745e-06, "loss": 0.9889, "step": 297 }, { "epoch": 1.0101694915254238, "grad_norm": 0.6666207313537598, "learning_rate": 5.054164331150199e-06, "loss": 1.0394, "step": 298 }, { "epoch": 1.0135593220338983, "grad_norm": 0.712552547454834, "learning_rate": 5.027082562860368e-06, "loss": 1.0023, "step": 299 }, { "epoch": 1.0169491525423728, "grad_norm": 0.7301045060157776, "learning_rate": 5e-06, "loss": 1.081, "step": 300 }, { "epoch": 1.0203389830508474, "grad_norm": 0.7409658432006836, "learning_rate": 4.972917437139634e-06, "loss": 0.9906, "step": 301 }, { "epoch": 1.023728813559322, "grad_norm": 0.6821278929710388, "learning_rate": 4.945835668849801e-06, "loss": 0.9937, "step": 302 }, { "epoch": 1.0271186440677966, "grad_norm": 0.6895297169685364, "learning_rate": 4.918755489677729e-06, "loss": 0.9867, "step": 303 }, { "epoch": 1.0305084745762711, "grad_norm": 0.8565642833709717, "learning_rate": 4.891677694124013e-06, "loss": 1.0715, "step": 304 }, { "epoch": 1.0033898305084745, "grad_norm": 0.6300548911094666, "learning_rate": 4.864603076619329e-06, "loss": 1.0337, "step": 305 }, { "epoch": 1.006779661016949, "grad_norm": 0.6456012725830078, "learning_rate": 4.837532431501098e-06, "loss": 0.9473, "step": 306 }, { "epoch": 1.0101694915254238, "grad_norm": 0.7895907759666443, "learning_rate": 4.8104665529902075e-06, "loss": 0.9585, "step": 307 }, { "epoch": 1.0135593220338983, "grad_norm": 0.5499475598335266, "learning_rate": 4.783406235167689e-06, "loss": 0.9791, "step": 308 }, { "epoch": 1.0169491525423728, "grad_norm": 0.7060081958770752, "learning_rate": 4.756352271951441e-06, "loss": 0.9473, "step": 309 }, { "epoch": 1.0203389830508474, "grad_norm": 0.6436624526977539, "learning_rate": 4.729305457072913e-06, "loss": 0.954, "step": 310 }, { "epoch": 1.023728813559322, "grad_norm": 0.5318639874458313, "learning_rate": 4.70226658405384e-06, "loss": 1.0073, "step": 311 }, { "epoch": 1.0271186440677966, "grad_norm": 0.6243753433227539, "learning_rate": 4.6752364461829456e-06, "loss": 0.9943, "step": 312 }, { "epoch": 1.0305084745762711, "grad_norm": 0.5925084352493286, "learning_rate": 4.648215836492682e-06, "loss": 0.9736, "step": 313 }, { "epoch": 1.0338983050847457, "grad_norm": 0.5721198916435242, "learning_rate": 4.621205547735949e-06, "loss": 1.0315, "step": 314 }, { "epoch": 1.0372881355932204, "grad_norm": 0.6614224910736084, "learning_rate": 4.594206372362845e-06, "loss": 1.0123, "step": 315 }, { "epoch": 1.040677966101695, "grad_norm": 0.6062408089637756, "learning_rate": 4.567219102497413e-06, "loss": 0.9978, "step": 316 }, { "epoch": 1.0440677966101695, "grad_norm": 0.6464442014694214, "learning_rate": 4.540244529914406e-06, "loss": 0.9826, "step": 317 }, { "epoch": 1.047457627118644, "grad_norm": 0.5992658734321594, "learning_rate": 4.513283446016052e-06, "loss": 1.0148, "step": 318 }, { "epoch": 1.0508474576271187, "grad_norm": 0.6314042210578918, "learning_rate": 4.486336641808835e-06, "loss": 0.9987, "step": 319 }, { "epoch": 1.0542372881355933, "grad_norm": 0.6192302107810974, "learning_rate": 4.459404907880293e-06, "loss": 0.9688, "step": 320 }, { "epoch": 1.0576271186440678, "grad_norm": 0.5361487865447998, "learning_rate": 4.4324890343758134e-06, "loss": 1.0046, "step": 321 }, { "epoch": 1.0610169491525423, "grad_norm": 0.7211394906044006, "learning_rate": 4.4055898109754684e-06, "loss": 0.9851, "step": 322 }, { "epoch": 1.064406779661017, "grad_norm": 0.5352877974510193, "learning_rate": 4.378708026870825e-06, "loss": 0.9618, "step": 323 }, { "epoch": 1.0677966101694916, "grad_norm": 0.6018617153167725, "learning_rate": 4.351844470741808e-06, "loss": 1.0266, "step": 324 }, { "epoch": 1.071186440677966, "grad_norm": 0.6201012134552002, "learning_rate": 4.32499993073355e-06, "loss": 1.0007, "step": 325 }, { "epoch": 1.0745762711864406, "grad_norm": 0.6259652972221375, "learning_rate": 4.298175194433279e-06, "loss": 0.9581, "step": 326 }, { "epoch": 1.0779661016949154, "grad_norm": 0.5345995426177979, "learning_rate": 4.271371048847201e-06, "loss": 1.0333, "step": 327 }, { "epoch": 1.0813559322033899, "grad_norm": 0.7936511039733887, "learning_rate": 4.244588280377417e-06, "loss": 1.0423, "step": 328 }, { "epoch": 1.0847457627118644, "grad_norm": 1.0520579814910889, "learning_rate": 4.217827674798845e-06, "loss": 0.9842, "step": 329 }, { "epoch": 1.088135593220339, "grad_norm": 0.8607239723205566, "learning_rate": 4.191090017236177e-06, "loss": 1.0018, "step": 330 }, { "epoch": 1.0915254237288137, "grad_norm": 0.6893454194068909, "learning_rate": 4.164376092140828e-06, "loss": 1.0463, "step": 331 }, { "epoch": 1.0949152542372882, "grad_norm": 0.781074047088623, "learning_rate": 4.137686683267939e-06, "loss": 0.9685, "step": 332 }, { "epoch": 1.0983050847457627, "grad_norm": 0.6798998117446899, "learning_rate": 4.111022573653366e-06, "loss": 1.0383, "step": 333 }, { "epoch": 1.1016949152542372, "grad_norm": 0.5226808786392212, "learning_rate": 4.0843845455907195e-06, "loss": 1.0229, "step": 334 }, { "epoch": 1.1050847457627118, "grad_norm": 0.7190439701080322, "learning_rate": 4.057773380608411e-06, "loss": 0.9515, "step": 335 }, { "epoch": 1.1084745762711865, "grad_norm": 0.5823246240615845, "learning_rate": 4.0311898594467084e-06, "loss": 0.9663, "step": 336 }, { "epoch": 1.111864406779661, "grad_norm": 0.5484724044799805, "learning_rate": 4.004634762034858e-06, "loss": 1.0038, "step": 337 }, { "epoch": 1.1152542372881356, "grad_norm": 0.6896758079528809, "learning_rate": 3.9781088674681764e-06, "loss": 0.9926, "step": 338 }, { "epoch": 1.11864406779661, "grad_norm": 0.6316563487052917, "learning_rate": 3.951612953985207e-06, "loss": 1.0141, "step": 339 }, { "epoch": 1.1220338983050848, "grad_norm": 0.7392005324363708, "learning_rate": 3.92514779894488e-06, "loss": 0.9543, "step": 340 }, { "epoch": 1.1254237288135593, "grad_norm": 0.5992228984832764, "learning_rate": 3.898714178803716e-06, "loss": 1.0353, "step": 341 }, { "epoch": 1.1288135593220339, "grad_norm": 0.6449342966079712, "learning_rate": 3.87231286909303e-06, "loss": 1.0054, "step": 342 }, { "epoch": 1.1322033898305084, "grad_norm": 0.706109881401062, "learning_rate": 3.845944644396194e-06, "loss": 0.989, "step": 343 }, { "epoch": 1.1355932203389831, "grad_norm": 0.7084924578666687, "learning_rate": 3.8196102783259e-06, "loss": 1.0253, "step": 344 }, { "epoch": 1.1389830508474577, "grad_norm": 0.49735644459724426, "learning_rate": 3.7933105435014727e-06, "loss": 0.9648, "step": 345 }, { "epoch": 1.1423728813559322, "grad_norm": 0.7136998176574707, "learning_rate": 3.767046211526191e-06, "loss": 0.9882, "step": 346 }, { "epoch": 1.1457627118644067, "grad_norm": 0.6371163725852966, "learning_rate": 3.7408180529646597e-06, "loss": 0.9773, "step": 347 }, { "epoch": 1.1491525423728814, "grad_norm": 0.5686894655227661, "learning_rate": 3.7146268373201956e-06, "loss": 0.9334, "step": 348 }, { "epoch": 1.152542372881356, "grad_norm": 0.7179729342460632, "learning_rate": 3.6884733330122583e-06, "loss": 1.0156, "step": 349 }, { "epoch": 1.1559322033898305, "grad_norm": 0.6962030529975891, "learning_rate": 3.662358307353897e-06, "loss": 0.9608, "step": 350 }, { "epoch": 1.159322033898305, "grad_norm": 0.6895543932914734, "learning_rate": 3.6362825265292424e-06, "loss": 1.0191, "step": 351 }, { "epoch": 1.1627118644067798, "grad_norm": 0.6924142241477966, "learning_rate": 3.61024675557103e-06, "loss": 1.0078, "step": 352 }, { "epoch": 1.1661016949152543, "grad_norm": 0.5956156253814697, "learning_rate": 3.584251758338151e-06, "loss": 0.9755, "step": 353 }, { "epoch": 1.1694915254237288, "grad_norm": 0.7389259338378906, "learning_rate": 3.5582982974932467e-06, "loss": 1.0219, "step": 354 }, { "epoch": 1.1728813559322033, "grad_norm": 0.6344757080078125, "learning_rate": 3.532387134480327e-06, "loss": 1.0028, "step": 355 }, { "epoch": 1.1762711864406779, "grad_norm": 0.6760934591293335, "learning_rate": 3.5065190295024334e-06, "loss": 0.9891, "step": 356 }, { "epoch": 1.1796610169491526, "grad_norm": 0.6028621196746826, "learning_rate": 3.4806947414993342e-06, "loss": 1.0029, "step": 357 }, { "epoch": 1.1830508474576271, "grad_norm": 0.6282196044921875, "learning_rate": 3.4549150281252635e-06, "loss": 1.0102, "step": 358 }, { "epoch": 1.1864406779661016, "grad_norm": 0.6155880093574524, "learning_rate": 3.429180645726683e-06, "loss": 1.0118, "step": 359 }, { "epoch": 1.1898305084745764, "grad_norm": 0.6115932464599609, "learning_rate": 3.403492349320101e-06, "loss": 1.0167, "step": 360 }, { "epoch": 1.193220338983051, "grad_norm": 0.6650276184082031, "learning_rate": 3.3778508925699126e-06, "loss": 0.9822, "step": 361 }, { "epoch": 1.1966101694915254, "grad_norm": 0.5415568947792053, "learning_rate": 3.3522570277662986e-06, "loss": 0.9273, "step": 362 }, { "epoch": 1.2, "grad_norm": 0.6332775950431824, "learning_rate": 3.3267115058031418e-06, "loss": 0.9554, "step": 363 }, { "epoch": 1.2033898305084745, "grad_norm": 0.5640422701835632, "learning_rate": 3.3012150761560085e-06, "loss": 1.0298, "step": 364 }, { "epoch": 1.2067796610169492, "grad_norm": 0.6500956416130066, "learning_rate": 3.275768486860149e-06, "loss": 1.0414, "step": 365 }, { "epoch": 1.2101694915254237, "grad_norm": 0.5981314182281494, "learning_rate": 3.250372484488558e-06, "loss": 1.0077, "step": 366 }, { "epoch": 1.2135593220338983, "grad_norm": 0.5373485684394836, "learning_rate": 3.225027814130074e-06, "loss": 0.9927, "step": 367 }, { "epoch": 1.2169491525423728, "grad_norm": 0.666456937789917, "learning_rate": 3.199735219367507e-06, "loss": 1.0094, "step": 368 }, { "epoch": 1.2203389830508475, "grad_norm": 0.6833539009094238, "learning_rate": 3.174495442255836e-06, "loss": 0.9963, "step": 369 }, { "epoch": 1.223728813559322, "grad_norm": 0.716399610042572, "learning_rate": 3.149309223300428e-06, "loss": 0.9595, "step": 370 }, { "epoch": 1.223728813559322, "eval_loss": 0.9998968839645386, "eval_runtime": 31.4549, "eval_samples_per_second": 9.22, "eval_steps_per_second": 1.176, "step": 370 }, { "epoch": 1.2271186440677966, "grad_norm": 0.6079496741294861, "learning_rate": 3.124177301435324e-06, "loss": 1.0654, "step": 371 }, { "epoch": 1.230508474576271, "grad_norm": 0.521492600440979, "learning_rate": 3.09910041400154e-06, "loss": 0.9759, "step": 372 }, { "epoch": 1.2338983050847459, "grad_norm": 0.5222616791725159, "learning_rate": 3.0740792967254606e-06, "loss": 1.026, "step": 373 }, { "epoch": 1.2372881355932204, "grad_norm": 0.6758270263671875, "learning_rate": 3.0491146836972273e-06, "loss": 0.9977, "step": 374 }, { "epoch": 1.240677966101695, "grad_norm": 0.5093096494674683, "learning_rate": 3.0242073073492238e-06, "loss": 0.989, "step": 375 }, { "epoch": 1.2440677966101694, "grad_norm": 0.6237245202064514, "learning_rate": 2.9993578984345673e-06, "loss": 1.0425, "step": 376 }, { "epoch": 1.2474576271186442, "grad_norm": 0.6397922039031982, "learning_rate": 2.974567186005687e-06, "loss": 1.0184, "step": 377 }, { "epoch": 1.2508474576271187, "grad_norm": 0.6754893064498901, "learning_rate": 2.9498358973929197e-06, "loss": 0.9612, "step": 378 }, { "epoch": 1.2542372881355932, "grad_norm": 0.5369151830673218, "learning_rate": 2.925164758183184e-06, "loss": 0.9973, "step": 379 }, { "epoch": 1.2576271186440677, "grad_norm": 0.5667172074317932, "learning_rate": 2.9005544921986774e-06, "loss": 0.996, "step": 380 }, { "epoch": 1.2610169491525425, "grad_norm": 0.6440627574920654, "learning_rate": 2.876005821475657e-06, "loss": 1.0211, "step": 381 }, { "epoch": 1.264406779661017, "grad_norm": 0.6473886370658875, "learning_rate": 2.8515194662432423e-06, "loss": 0.9899, "step": 382 }, { "epoch": 1.2677966101694915, "grad_norm": 0.6933432817459106, "learning_rate": 2.827096144902289e-06, "loss": 1.0256, "step": 383 }, { "epoch": 1.271186440677966, "grad_norm": 0.49265170097351074, "learning_rate": 2.8027365740043188e-06, "loss": 1.004, "step": 384 }, { "epoch": 1.2745762711864406, "grad_norm": 0.7598005533218384, "learning_rate": 2.778441468230483e-06, "loss": 0.9867, "step": 385 }, { "epoch": 1.2779661016949153, "grad_norm": 0.6760120987892151, "learning_rate": 2.7542115403706067e-06, "loss": 0.9729, "step": 386 }, { "epoch": 1.2813559322033898, "grad_norm": 0.6402390599250793, "learning_rate": 2.7300475013022666e-06, "loss": 0.9735, "step": 387 }, { "epoch": 1.2847457627118644, "grad_norm": 0.6378870606422424, "learning_rate": 2.705950059969948e-06, "loss": 0.9638, "step": 388 }, { "epoch": 1.288135593220339, "grad_norm": 0.6482399106025696, "learning_rate": 2.681919923364228e-06, "loss": 1.024, "step": 389 }, { "epoch": 1.2915254237288136, "grad_norm": 0.6057458519935608, "learning_rate": 2.65795779650105e-06, "loss": 0.9725, "step": 390 }, { "epoch": 1.2949152542372881, "grad_norm": 0.7679187655448914, "learning_rate": 2.634064382401025e-06, "loss": 1.0331, "step": 391 }, { "epoch": 1.2983050847457627, "grad_norm": 0.6576074957847595, "learning_rate": 2.610240382068818e-06, "loss": 0.9493, "step": 392 }, { "epoch": 1.3016949152542372, "grad_norm": 0.7018663287162781, "learning_rate": 2.586486494472572e-06, "loss": 0.9663, "step": 393 }, { "epoch": 1.305084745762712, "grad_norm": 0.6951818466186523, "learning_rate": 2.562803416523405e-06, "loss": 0.9909, "step": 394 }, { "epoch": 1.3084745762711865, "grad_norm": 0.64813232421875, "learning_rate": 2.5391918430549635e-06, "loss": 1.0237, "step": 395 }, { "epoch": 1.311864406779661, "grad_norm": 0.5666842460632324, "learning_rate": 2.5156524668030402e-06, "loss": 0.9773, "step": 396 }, { "epoch": 1.3152542372881357, "grad_norm": 0.5688617825508118, "learning_rate": 2.492185978385241e-06, "loss": 1.0121, "step": 397 }, { "epoch": 1.31864406779661, "grad_norm": 0.702538788318634, "learning_rate": 2.46879306628073e-06, "loss": 1.0189, "step": 398 }, { "epoch": 1.3220338983050848, "grad_norm": 0.6049535274505615, "learning_rate": 2.445474416810033e-06, "loss": 1.0312, "step": 399 }, { "epoch": 1.3254237288135593, "grad_norm": 0.7152976989746094, "learning_rate": 2.422230714114891e-06, "loss": 1.0052, "step": 400 }, { "epoch": 1.3288135593220338, "grad_norm": 0.6938731670379639, "learning_rate": 2.399062640138201e-06, "loss": 1.0026, "step": 401 }, { "epoch": 1.3322033898305086, "grad_norm": 0.8566974997520447, "learning_rate": 2.375970874603998e-06, "loss": 1.0209, "step": 402 }, { "epoch": 1.335593220338983, "grad_norm": 0.6485055685043335, "learning_rate": 2.3529560949975184e-06, "loss": 0.9945, "step": 403 }, { "epoch": 1.3389830508474576, "grad_norm": 0.5885186195373535, "learning_rate": 2.3300189765453198e-06, "loss": 1.0213, "step": 404 }, { "epoch": 1.3423728813559321, "grad_norm": 0.799167275428772, "learning_rate": 2.3071601921954797e-06, "loss": 1.0164, "step": 405 }, { "epoch": 1.3457627118644067, "grad_norm": 0.778938889503479, "learning_rate": 2.2843804125978356e-06, "loss": 1.0186, "step": 406 }, { "epoch": 1.3491525423728814, "grad_norm": 0.6615436673164368, "learning_rate": 2.2616803060843283e-06, "loss": 0.922, "step": 407 }, { "epoch": 1.352542372881356, "grad_norm": 0.5986219048500061, "learning_rate": 2.2390605386493758e-06, "loss": 1.0299, "step": 408 }, { "epoch": 1.3559322033898304, "grad_norm": 0.7239742875099182, "learning_rate": 2.216521773930351e-06, "loss": 0.9646, "step": 409 }, { "epoch": 1.3593220338983052, "grad_norm": 0.5689971446990967, "learning_rate": 2.1940646731880887e-06, "loss": 0.9917, "step": 410 }, { "epoch": 1.3627118644067797, "grad_norm": 0.6450715065002441, "learning_rate": 2.1716898952875132e-06, "loss": 0.935, "step": 411 }, { "epoch": 1.3661016949152542, "grad_norm": 0.5985400676727295, "learning_rate": 2.149398096678283e-06, "loss": 0.9601, "step": 412 }, { "epoch": 1.3694915254237288, "grad_norm": 0.5657823085784912, "learning_rate": 2.12718993137555e-06, "loss": 1.0098, "step": 413 }, { "epoch": 1.3728813559322033, "grad_norm": 0.7647202610969543, "learning_rate": 2.105066050940758e-06, "loss": 0.9961, "step": 414 }, { "epoch": 1.376271186440678, "grad_norm": 0.711418628692627, "learning_rate": 2.08302710446253e-06, "loss": 0.9871, "step": 415 }, { "epoch": 1.3796610169491526, "grad_norm": 0.6339724063873291, "learning_rate": 2.061073738537635e-06, "loss": 0.9975, "step": 416 }, { "epoch": 1.383050847457627, "grad_norm": 0.7132736444473267, "learning_rate": 2.0392065972520008e-06, "loss": 1.0405, "step": 417 }, { "epoch": 1.3864406779661018, "grad_norm": 0.7711553573608398, "learning_rate": 2.0174263221618307e-06, "loss": 1.0732, "step": 418 }, { "epoch": 1.3898305084745763, "grad_norm": 0.6596791744232178, "learning_rate": 1.9957335522747707e-06, "loss": 1.0162, "step": 419 }, { "epoch": 1.3932203389830509, "grad_norm": 0.6543793082237244, "learning_rate": 1.9741289240311757e-06, "loss": 0.9685, "step": 420 }, { "epoch": 1.3966101694915254, "grad_norm": 0.5620580911636353, "learning_rate": 1.9526130712854186e-06, "loss": 1.0035, "step": 421 }, { "epoch": 1.4, "grad_norm": 0.6134817004203796, "learning_rate": 1.931186625287313e-06, "loss": 1.0138, "step": 422 }, { "epoch": 1.4033898305084747, "grad_norm": 0.5695359706878662, "learning_rate": 1.909850214663575e-06, "loss": 0.993, "step": 423 }, { "epoch": 1.4067796610169492, "grad_norm": 0.6079448461532593, "learning_rate": 1.8886044653993968e-06, "loss": 1.0357, "step": 424 }, { "epoch": 1.4101694915254237, "grad_norm": 0.540916383266449, "learning_rate": 1.8674500008200675e-06, "loss": 0.9811, "step": 425 }, { "epoch": 1.4135593220338982, "grad_norm": 0.604753851890564, "learning_rate": 1.8463874415726918e-06, "loss": 0.9735, "step": 426 }, { "epoch": 1.4169491525423727, "grad_norm": 0.7127838730812073, "learning_rate": 1.8254174056079798e-06, "loss": 1.018, "step": 427 }, { "epoch": 1.4203389830508475, "grad_norm": 0.723049521446228, "learning_rate": 1.8045405081621215e-06, "loss": 0.9786, "step": 428 }, { "epoch": 1.423728813559322, "grad_norm": 0.4959903955459595, "learning_rate": 1.7837573617387266e-06, "loss": 0.9938, "step": 429 }, { "epoch": 1.4271186440677965, "grad_norm": 0.7217829823493958, "learning_rate": 1.7630685760908623e-06, "loss": 1.0038, "step": 430 }, { "epoch": 1.4305084745762713, "grad_norm": 0.6313320994377136, "learning_rate": 1.7424747582031638e-06, "loss": 0.9668, "step": 431 }, { "epoch": 1.4338983050847458, "grad_norm": 0.608250617980957, "learning_rate": 1.7219765122740202e-06, "loss": 1.0189, "step": 432 }, { "epoch": 1.4372881355932203, "grad_norm": 0.7577348947525024, "learning_rate": 1.7015744396978557e-06, "loss": 1.0339, "step": 433 }, { "epoch": 1.4406779661016949, "grad_norm": 0.7023765444755554, "learning_rate": 1.6812691390474788e-06, "loss": 1.0342, "step": 434 }, { "epoch": 1.4440677966101694, "grad_norm": 0.6080233454704285, "learning_rate": 1.6610612060565235e-06, "loss": 0.992, "step": 435 }, { "epoch": 1.4474576271186441, "grad_norm": 0.8760459423065186, "learning_rate": 1.64095123360197e-06, "loss": 0.9686, "step": 436 }, { "epoch": 1.4508474576271186, "grad_norm": 0.736131489276886, "learning_rate": 1.6209398116867575e-06, "loss": 1.0133, "step": 437 }, { "epoch": 1.4542372881355932, "grad_norm": 0.7509693503379822, "learning_rate": 1.6010275274224607e-06, "loss": 1.0223, "step": 438 }, { "epoch": 1.457627118644068, "grad_norm": 0.5902605056762695, "learning_rate": 1.5812149650120784e-06, "loss": 1.0167, "step": 439 }, { "epoch": 1.4610169491525424, "grad_norm": 0.5759404897689819, "learning_rate": 1.561502705732883e-06, "loss": 0.9995, "step": 440 }, { "epoch": 1.464406779661017, "grad_norm": 0.758961021900177, "learning_rate": 1.5418913279193748e-06, "loss": 0.9975, "step": 441 }, { "epoch": 1.4677966101694915, "grad_norm": 0.5977769494056702, "learning_rate": 1.5223814069463077e-06, "loss": 1.0058, "step": 442 }, { "epoch": 1.471186440677966, "grad_norm": 0.6775996088981628, "learning_rate": 1.5029735152118125e-06, "loss": 1.0012, "step": 443 }, { "epoch": 1.4745762711864407, "grad_norm": 0.7276020646095276, "learning_rate": 1.4836682221206e-06, "loss": 0.954, "step": 444 }, { "epoch": 1.4745762711864407, "eval_loss": 0.9963757395744324, "eval_runtime": 31.6058, "eval_samples_per_second": 9.176, "eval_steps_per_second": 1.171, "step": 444 }, { "epoch": 1.4779661016949153, "grad_norm": 0.7590934038162231, "learning_rate": 1.4644660940672628e-06, "loss": 1.0088, "step": 445 }, { "epoch": 1.4813559322033898, "grad_norm": 0.6979360580444336, "learning_rate": 1.4453676944196477e-06, "loss": 0.9666, "step": 446 }, { "epoch": 1.4847457627118645, "grad_norm": 0.6411654353141785, "learning_rate": 1.4263735835023318e-06, "loss": 1.0149, "step": 447 }, { "epoch": 1.488135593220339, "grad_norm": 0.713005006313324, "learning_rate": 1.4074843185801885e-06, "loss": 1.0251, "step": 448 }, { "epoch": 1.4915254237288136, "grad_norm": 0.5988379120826721, "learning_rate": 1.388700453842029e-06, "loss": 0.9815, "step": 449 }, { "epoch": 1.494915254237288, "grad_norm": 0.6960732936859131, "learning_rate": 1.370022540384347e-06, "loss": 1.0176, "step": 450 }, { "epoch": 1.4983050847457626, "grad_norm": 0.6121718287467957, "learning_rate": 1.3514511261951514e-06, "loss": 1.0086, "step": 451 }, { "epoch": 1.5016949152542374, "grad_norm": 0.6415910720825195, "learning_rate": 1.332986756137889e-06, "loss": 1.0276, "step": 452 }, { "epoch": 1.505084745762712, "grad_norm": 0.7554261684417725, "learning_rate": 1.3146299719354544e-06, "loss": 0.9857, "step": 453 }, { "epoch": 1.5084745762711864, "grad_norm": 0.737715482711792, "learning_rate": 1.296381312154305e-06, "loss": 1.0135, "step": 454 }, { "epoch": 1.5118644067796612, "grad_norm": 0.6117502450942993, "learning_rate": 1.2782413121886483e-06, "loss": 1.0216, "step": 455 }, { "epoch": 1.5152542372881355, "grad_norm": 0.5583542585372925, "learning_rate": 1.2602105042447472e-06, "loss": 0.9898, "step": 456 }, { "epoch": 1.5186440677966102, "grad_norm": 0.723007321357727, "learning_rate": 1.2422894173252937e-06, "loss": 0.9926, "step": 457 }, { "epoch": 1.5220338983050847, "grad_norm": 0.7295446991920471, "learning_rate": 1.2244785772138972e-06, "loss": 1.0315, "step": 458 }, { "epoch": 1.5254237288135593, "grad_norm": 0.7366341948509216, "learning_rate": 1.2067785064596532e-06, "loss": 1.0289, "step": 459 }, { "epoch": 1.528813559322034, "grad_norm": 0.606618344783783, "learning_rate": 1.1891897243618184e-06, "loss": 1.0174, "step": 460 }, { "epoch": 1.5322033898305085, "grad_norm": 0.5922709703445435, "learning_rate": 1.171712746954566e-06, "loss": 0.988, "step": 461 }, { "epoch": 1.535593220338983, "grad_norm": 0.5724343657493591, "learning_rate": 1.1543480869918555e-06, "loss": 0.9473, "step": 462 }, { "epoch": 1.5389830508474578, "grad_norm": 0.5893242359161377, "learning_rate": 1.1370962539323837e-06, "loss": 0.9924, "step": 463 }, { "epoch": 1.542372881355932, "grad_norm": 0.7837064862251282, "learning_rate": 1.1199577539246348e-06, "loss": 0.985, "step": 464 }, { "epoch": 1.5457627118644068, "grad_norm": 0.6561700701713562, "learning_rate": 1.102933089792042e-06, "loss": 0.9693, "step": 465 }, { "epoch": 1.5491525423728814, "grad_norm": 0.7721257209777832, "learning_rate": 1.0860227610182222e-06, "loss": 0.96, "step": 466 }, { "epoch": 1.5525423728813559, "grad_norm": 0.651480495929718, "learning_rate": 1.0692272637323281e-06, "loss": 1.0225, "step": 467 }, { "epoch": 1.5559322033898306, "grad_norm": 0.6150879859924316, "learning_rate": 1.0525470906944919e-06, "loss": 1.0227, "step": 468 }, { "epoch": 1.559322033898305, "grad_norm": 0.5740178823471069, "learning_rate": 1.0359827312813702e-06, "loss": 0.9839, "step": 469 }, { "epoch": 1.5627118644067797, "grad_norm": 0.8018378019332886, "learning_rate": 1.0195346714717813e-06, "loss": 0.9557, "step": 470 }, { "epoch": 1.5661016949152542, "grad_norm": 0.8956387639045715, "learning_rate": 1.0032033938324527e-06, "loss": 1.0126, "step": 471 }, { "epoch": 1.5694915254237287, "grad_norm": 0.7544394135475159, "learning_rate": 9.869893775038558e-07, "loss": 0.9709, "step": 472 }, { "epoch": 1.5728813559322035, "grad_norm": 0.7016942501068115, "learning_rate": 9.708930981861603e-07, "loss": 1.0269, "step": 473 }, { "epoch": 1.576271186440678, "grad_norm": 0.8289680480957031, "learning_rate": 9.549150281252633e-07, "loss": 1.0506, "step": 474 }, { "epoch": 1.5796610169491525, "grad_norm": 0.63932865858078, "learning_rate": 9.39055636098945e-07, "loss": 1.054, "step": 475 }, { "epoch": 1.5830508474576273, "grad_norm": 0.6578360199928284, "learning_rate": 9.233153874031103e-07, "loss": 1.0128, "step": 476 }, { "epoch": 1.5864406779661016, "grad_norm": 0.6743602752685547, "learning_rate": 9.076947438381411e-07, "loss": 0.9818, "step": 477 }, { "epoch": 1.5898305084745763, "grad_norm": 0.5281490683555603, "learning_rate": 8.921941636953435e-07, "loss": 0.9995, "step": 478 }, { "epoch": 1.5932203389830508, "grad_norm": 0.6595844626426697, "learning_rate": 8.768141017435033e-07, "loss": 1.0524, "step": 479 }, { "epoch": 1.5966101694915253, "grad_norm": 0.6927159428596497, "learning_rate": 8.615550092155478e-07, "loss": 1.0238, "step": 480 }, { "epoch": 1.6, "grad_norm": 0.599197268486023, "learning_rate": 8.464173337952991e-07, "loss": 0.9844, "step": 481 }, { "epoch": 1.6033898305084746, "grad_norm": 0.6616894006729126, "learning_rate": 8.314015196043501e-07, "loss": 1.0204, "step": 482 }, { "epoch": 1.6067796610169491, "grad_norm": 0.5358266830444336, "learning_rate": 8.165080071890208e-07, "loss": 0.9899, "step": 483 }, { "epoch": 1.6101694915254239, "grad_norm": 0.4989199638366699, "learning_rate": 8.017372335074486e-07, "loss": 0.992, "step": 484 }, { "epoch": 1.6135593220338982, "grad_norm": 0.803619921207428, "learning_rate": 7.870896319167548e-07, "loss": 0.9961, "step": 485 }, { "epoch": 1.616949152542373, "grad_norm": 0.7678002119064331, "learning_rate": 7.725656321603414e-07, "loss": 0.9701, "step": 486 }, { "epoch": 1.6203389830508474, "grad_norm": 0.6170638799667358, "learning_rate": 7.581656603552745e-07, "loss": 1.0242, "step": 487 }, { "epoch": 1.623728813559322, "grad_norm": 0.6519885063171387, "learning_rate": 7.438901389797881e-07, "loss": 1.0362, "step": 488 }, { "epoch": 1.6271186440677967, "grad_norm": 0.6771361231803894, "learning_rate": 7.297394868608859e-07, "loss": 0.9726, "step": 489 }, { "epoch": 1.6305084745762712, "grad_norm": 0.6116876602172852, "learning_rate": 7.157141191620548e-07, "loss": 1.0192, "step": 490 }, { "epoch": 1.6338983050847458, "grad_norm": 0.6291380524635315, "learning_rate": 7.018144473710825e-07, "loss": 1.0201, "step": 491 }, { "epoch": 1.6372881355932203, "grad_norm": 0.6927295923233032, "learning_rate": 6.880408792879905e-07, "loss": 0.9699, "step": 492 }, { "epoch": 1.6406779661016948, "grad_norm": 0.5858490467071533, "learning_rate": 6.743938190130616e-07, "loss": 0.9487, "step": 493 }, { "epoch": 1.6440677966101696, "grad_norm": 0.6488326787948608, "learning_rate": 6.60873666934993e-07, "loss": 0.9852, "step": 494 }, { "epoch": 1.647457627118644, "grad_norm": 0.5244084596633911, "learning_rate": 6.474808197191401e-07, "loss": 0.9579, "step": 495 }, { "epoch": 1.6508474576271186, "grad_norm": 0.6233291625976562, "learning_rate": 6.342156702958851e-07, "loss": 0.9717, "step": 496 }, { "epoch": 1.6542372881355933, "grad_norm": 0.5940622687339783, "learning_rate": 6.210786078491088e-07, "loss": 0.9849, "step": 497 }, { "epoch": 1.6576271186440676, "grad_norm": 0.8331617116928101, "learning_rate": 6.080700178047688e-07, "loss": 1.0039, "step": 498 }, { "epoch": 1.6610169491525424, "grad_norm": 0.7853448987007141, "learning_rate": 5.951902818195937e-07, "loss": 1.0566, "step": 499 }, { "epoch": 1.664406779661017, "grad_norm": 0.920109212398529, "learning_rate": 5.824397777698859e-07, "loss": 0.9812, "step": 500 }, { "epoch": 1.6677966101694914, "grad_norm": 0.5814661979675293, "learning_rate": 5.698188797404358e-07, "loss": 0.9987, "step": 501 }, { "epoch": 1.6711864406779662, "grad_norm": 0.654880940914154, "learning_rate": 5.573279580135438e-07, "loss": 1.0066, "step": 502 }, { "epoch": 1.6745762711864407, "grad_norm": 0.6218942403793335, "learning_rate": 5.449673790581611e-07, "loss": 1.0203, "step": 503 }, { "epoch": 1.6779661016949152, "grad_norm": 0.7496787309646606, "learning_rate": 5.327375055191313e-07, "loss": 0.9407, "step": 504 }, { "epoch": 1.68135593220339, "grad_norm": 0.7124380469322205, "learning_rate": 5.206386962065601e-07, "loss": 1.0253, "step": 505 }, { "epoch": 1.6847457627118643, "grad_norm": 0.58067387342453, "learning_rate": 5.086713060852788e-07, "loss": 0.9541, "step": 506 }, { "epoch": 1.688135593220339, "grad_norm": 0.7176159620285034, "learning_rate": 4.968356862644352e-07, "loss": 1.0041, "step": 507 }, { "epoch": 1.6915254237288135, "grad_norm": 0.6429619789123535, "learning_rate": 4.851321839871908e-07, "loss": 0.9693, "step": 508 }, { "epoch": 1.694915254237288, "grad_norm": 0.6526856422424316, "learning_rate": 4.735611426205372e-07, "loss": 1.029, "step": 509 }, { "epoch": 1.6983050847457628, "grad_norm": 0.6652457118034363, "learning_rate": 4.6212290164521554e-07, "loss": 0.9892, "step": 510 }, { "epoch": 1.7016949152542373, "grad_norm": 0.578776478767395, "learning_rate": 4.5081779664575887e-07, "loss": 1.0041, "step": 511 }, { "epoch": 1.7050847457627119, "grad_norm": 0.5799604654312134, "learning_rate": 4.3964615930065126e-07, "loss": 1.0018, "step": 512 }, { "epoch": 1.7084745762711866, "grad_norm": 0.6132419109344482, "learning_rate": 4.2860831737258857e-07, "loss": 1.0143, "step": 513 }, { "epoch": 1.711864406779661, "grad_norm": 0.5185167193412781, "learning_rate": 4.1770459469887003e-07, "loss": 0.9891, "step": 514 }, { "epoch": 1.7152542372881356, "grad_norm": 0.6948317885398865, "learning_rate": 4.069353111818913e-07, "loss": 0.974, "step": 515 }, { "epoch": 1.7186440677966102, "grad_norm": 0.7650367021560669, "learning_rate": 3.963007827797627e-07, "loss": 1.0522, "step": 516 }, { "epoch": 1.7220338983050847, "grad_norm": 0.61717689037323, "learning_rate": 3.858013214970363e-07, "loss": 0.9634, "step": 517 }, { "epoch": 1.7254237288135594, "grad_norm": 0.6066478490829468, "learning_rate": 3.754372353755559e-07, "loss": 1.0132, "step": 518 }, { "epoch": 1.7254237288135594, "eval_loss": 0.9942722916603088, "eval_runtime": 31.566, "eval_samples_per_second": 9.187, "eval_steps_per_second": 1.172, "step": 518 }, { "epoch": 1.7288135593220337, "grad_norm": 0.5332470536231995, "learning_rate": 3.6520882848541606e-07, "loss": 0.9421, "step": 519 }, { "epoch": 1.7322033898305085, "grad_norm": 0.6434182524681091, "learning_rate": 3.5511640091604293e-07, "loss": 0.983, "step": 520 }, { "epoch": 1.735593220338983, "grad_norm": 0.5391385555267334, "learning_rate": 3.451602487673889e-07, "loss": 0.9888, "step": 521 }, { "epoch": 1.7389830508474575, "grad_norm": 0.7483072876930237, "learning_rate": 3.35340664141246e-07, "loss": 1.0695, "step": 522 }, { "epoch": 1.7423728813559323, "grad_norm": 0.6904388666152954, "learning_rate": 3.256579351326744e-07, "loss": 0.964, "step": 523 }, { "epoch": 1.7457627118644068, "grad_norm": 0.6558634638786316, "learning_rate": 3.161123458215554e-07, "loss": 0.9782, "step": 524 }, { "epoch": 1.7491525423728813, "grad_norm": 0.5622076392173767, "learning_rate": 3.067041762642475e-07, "loss": 0.9756, "step": 525 }, { "epoch": 1.752542372881356, "grad_norm": 0.5735416412353516, "learning_rate": 2.974337024853802e-07, "loss": 1.0201, "step": 526 }, { "epoch": 1.7559322033898304, "grad_norm": 0.6704559922218323, "learning_rate": 2.8830119646974796e-07, "loss": 0.9783, "step": 527 }, { "epoch": 1.759322033898305, "grad_norm": 0.8068829774856567, "learning_rate": 2.7930692615433353e-07, "loss": 0.9855, "step": 528 }, { "epoch": 1.7627118644067796, "grad_norm": 0.6776360273361206, "learning_rate": 2.704511554204486e-07, "loss": 1.0272, "step": 529 }, { "epoch": 1.7661016949152541, "grad_norm": 0.6782450079917908, "learning_rate": 2.617341440859883e-07, "loss": 0.9517, "step": 530 }, { "epoch": 1.769491525423729, "grad_norm": 0.6880106329917908, "learning_rate": 2.5315614789781064e-07, "loss": 0.9797, "step": 531 }, { "epoch": 1.7728813559322034, "grad_norm": 0.6763340830802917, "learning_rate": 2.447174185242324e-07, "loss": 1.0141, "step": 532 }, { "epoch": 1.776271186440678, "grad_norm": 0.5976564288139343, "learning_rate": 2.3641820354764755e-07, "loss": 0.9757, "step": 533 }, { "epoch": 1.7796610169491527, "grad_norm": 0.7546095252037048, "learning_rate": 2.2825874645725942e-07, "loss": 0.9813, "step": 534 }, { "epoch": 1.783050847457627, "grad_norm": 0.7561364769935608, "learning_rate": 2.2023928664194229e-07, "loss": 0.9373, "step": 535 }, { "epoch": 1.7864406779661017, "grad_norm": 0.6165825724601746, "learning_rate": 2.1236005938321092e-07, "loss": 1.0318, "step": 536 }, { "epoch": 1.7898305084745763, "grad_norm": 0.8413949608802795, "learning_rate": 2.046212958483268e-07, "loss": 1.0708, "step": 537 }, { "epoch": 1.7932203389830508, "grad_norm": 0.6189547181129456, "learning_rate": 1.9702322308350675e-07, "loss": 0.9673, "step": 538 }, { "epoch": 1.7966101694915255, "grad_norm": 0.8398978114128113, "learning_rate": 1.895660640072683e-07, "loss": 1.0174, "step": 539 }, { "epoch": 1.8, "grad_norm": 0.6388535499572754, "learning_rate": 1.8225003740388546e-07, "loss": 0.9457, "step": 540 }, { "epoch": 1.8033898305084746, "grad_norm": 0.599389374256134, "learning_rate": 1.7507535791697338e-07, "loss": 0.9797, "step": 541 }, { "epoch": 1.8067796610169493, "grad_norm": 0.7360069751739502, "learning_rate": 1.6804223604318825e-07, "loss": 1.0202, "step": 542 }, { "epoch": 1.8101694915254236, "grad_norm": 0.5618528723716736, "learning_rate": 1.6115087812605123e-07, "loss": 0.9735, "step": 543 }, { "epoch": 1.8135593220338984, "grad_norm": 0.8250850439071655, "learning_rate": 1.5440148634989827e-07, "loss": 1.0022, "step": 544 }, { "epoch": 1.8169491525423729, "grad_norm": 0.6865666508674622, "learning_rate": 1.477942587339426e-07, "loss": 1.0159, "step": 545 }, { "epoch": 1.8203389830508474, "grad_norm": 0.7297092080116272, "learning_rate": 1.413293891264722e-07, "loss": 1.0056, "step": 546 }, { "epoch": 1.8237288135593221, "grad_norm": 0.7617512345314026, "learning_rate": 1.350070671991549e-07, "loss": 0.9938, "step": 547 }, { "epoch": 1.8271186440677964, "grad_norm": 0.5772307515144348, "learning_rate": 1.2882747844147893e-07, "loss": 1.0221, "step": 548 }, { "epoch": 1.8305084745762712, "grad_norm": 0.7228500247001648, "learning_rate": 1.2279080415530832e-07, "loss": 1.0242, "step": 549 }, { "epoch": 1.8338983050847457, "grad_norm": 0.6391293406486511, "learning_rate": 1.1689722144956672e-07, "loss": 0.9978, "step": 550 }, { "epoch": 1.8372881355932202, "grad_norm": 0.768792986869812, "learning_rate": 1.1114690323503652e-07, "loss": 1.0402, "step": 551 }, { "epoch": 1.840677966101695, "grad_norm": 0.5624731779098511, "learning_rate": 1.0554001821929061e-07, "loss": 1.003, "step": 552 }, { "epoch": 1.8440677966101695, "grad_norm": 0.6495040655136108, "learning_rate": 1.0007673090173808e-07, "loss": 1.0152, "step": 553 }, { "epoch": 1.847457627118644, "grad_norm": 0.7147095203399658, "learning_rate": 9.475720156880419e-08, "loss": 0.9971, "step": 554 }, { "epoch": 1.8508474576271188, "grad_norm": 0.7212803959846497, "learning_rate": 8.95815862892202e-08, "loss": 0.9694, "step": 555 }, { "epoch": 1.854237288135593, "grad_norm": 0.6671217679977417, "learning_rate": 8.45500369094504e-08, "loss": 0.9383, "step": 556 }, { "epoch": 1.8576271186440678, "grad_norm": 0.6936548948287964, "learning_rate": 7.966270104923457e-08, "loss": 0.9831, "step": 557 }, { "epoch": 1.8610169491525423, "grad_norm": 0.763325572013855, "learning_rate": 7.491972209725807e-08, "loss": 0.9723, "step": 558 }, { "epoch": 1.8644067796610169, "grad_norm": 0.5692989230155945, "learning_rate": 7.032123920694356e-08, "loss": 0.9671, "step": 559 }, { "epoch": 1.8677966101694916, "grad_norm": 0.7116250991821289, "learning_rate": 6.58673872923693e-08, "loss": 1.0206, "step": 560 }, { "epoch": 1.8711864406779661, "grad_norm": 0.7291508316993713, "learning_rate": 6.15582970243117e-08, "loss": 1.0373, "step": 561 }, { "epoch": 1.8745762711864407, "grad_norm": 0.62852543592453, "learning_rate": 5.739409482640956e-08, "loss": 0.9715, "step": 562 }, { "epoch": 1.8779661016949154, "grad_norm": 0.5750467777252197, "learning_rate": 5.3374902871456965e-08, "loss": 0.9895, "step": 563 }, { "epoch": 1.8813559322033897, "grad_norm": 0.7081973552703857, "learning_rate": 4.950083907781733e-08, "loss": 0.9947, "step": 564 }, { "epoch": 1.8847457627118644, "grad_norm": 0.7674915790557861, "learning_rate": 4.577201710596613e-08, "loss": 1.0153, "step": 565 }, { "epoch": 1.888135593220339, "grad_norm": 0.6501796841621399, "learning_rate": 4.2188546355153016e-08, "loss": 1.0088, "step": 566 }, { "epoch": 1.8915254237288135, "grad_norm": 0.7513859868049622, "learning_rate": 3.8750531960194405e-08, "loss": 1.0003, "step": 567 }, { "epoch": 1.8949152542372882, "grad_norm": 0.7440161108970642, "learning_rate": 3.5458074788387585e-08, "loss": 0.9836, "step": 568 }, { "epoch": 1.8983050847457628, "grad_norm": 0.5496635437011719, "learning_rate": 3.231127143655422e-08, "loss": 1.007, "step": 569 }, { "epoch": 1.9016949152542373, "grad_norm": 0.6116195917129517, "learning_rate": 2.9310214228202016e-08, "loss": 0.9547, "step": 570 }, { "epoch": 1.905084745762712, "grad_norm": 0.6816879510879517, "learning_rate": 2.645499121081918e-08, "loss": 0.9336, "step": 571 }, { "epoch": 1.9084745762711863, "grad_norm": 0.7093566060066223, "learning_rate": 2.3745686153290314e-08, "loss": 0.9822, "step": 572 }, { "epoch": 1.911864406779661, "grad_norm": 0.6873040199279785, "learning_rate": 2.1182378543438408e-08, "loss": 0.9721, "step": 573 }, { "epoch": 1.9152542372881356, "grad_norm": 0.7039247751235962, "learning_rate": 1.8765143585693924e-08, "loss": 0.9994, "step": 574 }, { "epoch": 1.9186440677966101, "grad_norm": 0.6063643097877502, "learning_rate": 1.6494052198886557e-08, "loss": 0.9976, "step": 575 }, { "epoch": 1.9220338983050849, "grad_norm": 0.6409500241279602, "learning_rate": 1.4369171014165795e-08, "loss": 1.0173, "step": 576 }, { "epoch": 1.9254237288135592, "grad_norm": 0.6052128672599792, "learning_rate": 1.2390562373046367e-08, "loss": 1.0256, "step": 577 }, { "epoch": 1.928813559322034, "grad_norm": 0.6699828505516052, "learning_rate": 1.0558284325578038e-08, "loss": 0.9708, "step": 578 }, { "epoch": 1.9322033898305084, "grad_norm": 0.6533128023147583, "learning_rate": 8.872390628643645e-09, "loss": 1.0471, "step": 579 }, { "epoch": 1.935593220338983, "grad_norm": 0.7308453917503357, "learning_rate": 7.332930744380906e-09, "loss": 0.9762, "step": 580 }, { "epoch": 1.9389830508474577, "grad_norm": 0.6827077865600586, "learning_rate": 5.939949838731363e-09, "loss": 1.0043, "step": 581 }, { "epoch": 1.9423728813559322, "grad_norm": 0.5866710543632507, "learning_rate": 4.69348878011644e-09, "loss": 0.9443, "step": 582 }, { "epoch": 1.9457627118644067, "grad_norm": 0.6618573069572449, "learning_rate": 3.593584138237294e-09, "loss": 0.9786, "step": 583 }, { "epoch": 1.9491525423728815, "grad_norm": 0.5637463331222534, "learning_rate": 2.640268183002337e-09, "loss": 1.0212, "step": 584 }, { "epoch": 1.9525423728813558, "grad_norm": 0.6369325518608093, "learning_rate": 1.8335688835802169e-09, "loss": 0.9553, "step": 585 }, { "epoch": 1.9559322033898305, "grad_norm": 0.6241326928138733, "learning_rate": 1.173509907579362e-09, "loss": 0.961, "step": 586 }, { "epoch": 1.959322033898305, "grad_norm": 0.5839018225669861, "learning_rate": 6.601106203535379e-10, "loss": 0.9988, "step": 587 }, { "epoch": 1.9627118644067796, "grad_norm": 0.668761134147644, "learning_rate": 2.9338608443452154e-10, "loss": 1.0126, "step": 588 }, { "epoch": 1.9661016949152543, "grad_norm": 0.595515787601471, "learning_rate": 7.334705908745854e-11, "loss": 1.0217, "step": 589 }, { "epoch": 1.9694915254237289, "grad_norm": 0.6760256886482239, "learning_rate": 0.0, "loss": 1.0221, "step": 590 } ], "logging_steps": 1, "max_steps": 590, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 148, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.26368805808767e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }