{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5050505050505051, "eval_steps": 38, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003367003367003367, "grad_norm": 2.0, "learning_rate": 3.5714285714285716e-07, "loss": 1.0833, "step": 1 }, { "epoch": 0.003367003367003367, "eval_loss": 1.032954454421997, "eval_runtime": 8.7744, "eval_samples_per_second": 56.984, "eval_steps_per_second": 3.647, "step": 1 }, { "epoch": 0.006734006734006734, "grad_norm": 2.265625, "learning_rate": 7.142857142857143e-07, "loss": 0.996, "step": 2 }, { "epoch": 0.010101010101010102, "grad_norm": 1.640625, "learning_rate": 1.0714285714285714e-06, "loss": 1.0487, "step": 3 }, { "epoch": 0.013468013468013467, "grad_norm": 2.046875, "learning_rate": 1.4285714285714286e-06, "loss": 1.0553, "step": 4 }, { "epoch": 0.016835016835016835, "grad_norm": 2.453125, "learning_rate": 1.7857142857142859e-06, "loss": 1.0339, "step": 5 }, { "epoch": 0.020202020202020204, "grad_norm": 1.984375, "learning_rate": 2.1428571428571427e-06, "loss": 1.014, "step": 6 }, { "epoch": 0.02356902356902357, "grad_norm": 2.609375, "learning_rate": 2.5e-06, "loss": 1.0298, "step": 7 }, { "epoch": 0.026936026936026935, "grad_norm": 2.328125, "learning_rate": 2.8571428571428573e-06, "loss": 1.0226, "step": 8 }, { "epoch": 0.030303030303030304, "grad_norm": 2.25, "learning_rate": 3.2142857142857147e-06, "loss": 1.0739, "step": 9 }, { "epoch": 0.03367003367003367, "grad_norm": 2.28125, "learning_rate": 3.5714285714285718e-06, "loss": 1.0224, "step": 10 }, { "epoch": 0.037037037037037035, "grad_norm": 2.4375, "learning_rate": 3.928571428571429e-06, "loss": 1.0089, "step": 11 }, { "epoch": 0.04040404040404041, "grad_norm": 2.03125, "learning_rate": 4.2857142857142855e-06, "loss": 1.0529, "step": 12 }, { "epoch": 0.04377104377104377, "grad_norm": 2.03125, "learning_rate": 4.642857142857144e-06, "loss": 1.068, "step": 13 }, { "epoch": 0.04713804713804714, "grad_norm": 1.7265625, "learning_rate": 5e-06, "loss": 1.0318, "step": 14 }, { "epoch": 0.050505050505050504, "grad_norm": 1.8359375, "learning_rate": 4.9998459603839726e-06, "loss": 1.0439, "step": 15 }, { "epoch": 0.05387205387205387, "grad_norm": 2.15625, "learning_rate": 4.9993838605184505e-06, "loss": 1.0647, "step": 16 }, { "epoch": 0.05723905723905724, "grad_norm": 1.6015625, "learning_rate": 4.998613757348784e-06, "loss": 1.0081, "step": 17 }, { "epoch": 0.06060606060606061, "grad_norm": 1.65625, "learning_rate": 4.99753574577609e-06, "loss": 1.0098, "step": 18 }, { "epoch": 0.06397306397306397, "grad_norm": 1.4140625, "learning_rate": 4.996149958645559e-06, "loss": 0.9822, "step": 19 }, { "epoch": 0.06734006734006734, "grad_norm": 1.375, "learning_rate": 4.994456566730085e-06, "loss": 1.0474, "step": 20 }, { "epoch": 0.0707070707070707, "grad_norm": 1.2265625, "learning_rate": 4.992455778709222e-06, "loss": 1.0421, "step": 21 }, { "epoch": 0.07407407407407407, "grad_norm": 1.4609375, "learning_rate": 4.990147841143462e-06, "loss": 0.9845, "step": 22 }, { "epoch": 0.07744107744107744, "grad_norm": 1.2421875, "learning_rate": 4.98753303844386e-06, "loss": 0.9773, "step": 23 }, { "epoch": 0.08080808080808081, "grad_norm": 1.4296875, "learning_rate": 4.984611692836979e-06, "loss": 1.0511, "step": 24 }, { "epoch": 0.08417508417508418, "grad_norm": 1.1875, "learning_rate": 4.981384164325184e-06, "loss": 0.9915, "step": 25 }, { "epoch": 0.08754208754208755, "grad_norm": 1.15625, "learning_rate": 4.977850850642275e-06, "loss": 0.9881, "step": 26 }, { "epoch": 0.09090909090909091, "grad_norm": 1.296875, "learning_rate": 4.97401218720448e-06, "loss": 1.0623, "step": 27 }, { "epoch": 0.09427609427609428, "grad_norm": 1.2734375, "learning_rate": 4.969868647056793e-06, "loss": 1.0282, "step": 28 }, { "epoch": 0.09764309764309764, "grad_norm": 1.125, "learning_rate": 4.965420740814679e-06, "loss": 1.0122, "step": 29 }, { "epoch": 0.10101010101010101, "grad_norm": 1.25, "learning_rate": 4.960669016601155e-06, "loss": 1.0342, "step": 30 }, { "epoch": 0.10437710437710437, "grad_norm": 1.0625, "learning_rate": 4.95561405997924e-06, "loss": 0.9911, "step": 31 }, { "epoch": 0.10774410774410774, "grad_norm": 1.21875, "learning_rate": 4.950256493879795e-06, "loss": 1.025, "step": 32 }, { "epoch": 0.1111111111111111, "grad_norm": 1.1328125, "learning_rate": 4.94459697852476e-06, "loss": 1.022, "step": 33 }, { "epoch": 0.11447811447811448, "grad_norm": 1.1953125, "learning_rate": 4.938636211345792e-06, "loss": 1.0085, "step": 34 }, { "epoch": 0.11784511784511785, "grad_norm": 1.1484375, "learning_rate": 4.932374926898321e-06, "loss": 1.0109, "step": 35 }, { "epoch": 0.12121212121212122, "grad_norm": 1.0390625, "learning_rate": 4.92581389677103e-06, "loss": 0.9654, "step": 36 }, { "epoch": 0.12457912457912458, "grad_norm": 1.109375, "learning_rate": 4.918953929490768e-06, "loss": 1.0041, "step": 37 }, { "epoch": 0.12794612794612795, "grad_norm": 1.09375, "learning_rate": 4.911795870422916e-06, "loss": 1.0118, "step": 38 }, { "epoch": 0.12794612794612795, "eval_loss": 0.9947459697723389, "eval_runtime": 8.7184, "eval_samples_per_second": 57.35, "eval_steps_per_second": 3.67, "step": 38 }, { "epoch": 0.13131313131313133, "grad_norm": 0.9765625, "learning_rate": 4.904340601667208e-06, "loss": 0.9841, "step": 39 }, { "epoch": 0.13468013468013468, "grad_norm": 1.046875, "learning_rate": 4.896589041949036e-06, "loss": 1.0055, "step": 40 }, { "epoch": 0.13804713804713806, "grad_norm": 1.171875, "learning_rate": 4.888542146506224e-06, "loss": 0.9728, "step": 41 }, { "epoch": 0.1414141414141414, "grad_norm": 0.9765625, "learning_rate": 4.880200906971321e-06, "loss": 0.975, "step": 42 }, { "epoch": 0.1447811447811448, "grad_norm": 1.0234375, "learning_rate": 4.8715663512493924e-06, "loss": 0.9628, "step": 43 }, { "epoch": 0.14814814814814814, "grad_norm": 0.92578125, "learning_rate": 4.8626395433913595e-06, "loss": 0.9668, "step": 44 }, { "epoch": 0.15151515151515152, "grad_norm": 1.078125, "learning_rate": 4.853421583462866e-06, "loss": 0.9539, "step": 45 }, { "epoch": 0.15488215488215487, "grad_norm": 1.1484375, "learning_rate": 4.8439136074087165e-06, "loss": 1.0145, "step": 46 }, { "epoch": 0.15824915824915825, "grad_norm": 1.015625, "learning_rate": 4.834116786912897e-06, "loss": 0.9738, "step": 47 }, { "epoch": 0.16161616161616163, "grad_norm": 1.0390625, "learning_rate": 4.82403232925418e-06, "loss": 0.9907, "step": 48 }, { "epoch": 0.16498316498316498, "grad_norm": 1.0546875, "learning_rate": 4.813661477157355e-06, "loss": 0.9773, "step": 49 }, { "epoch": 0.16835016835016836, "grad_norm": 1.015625, "learning_rate": 4.803005508640083e-06, "loss": 0.9636, "step": 50 }, { "epoch": 0.1717171717171717, "grad_norm": 0.9375, "learning_rate": 4.7920657368554e-06, "loss": 0.9589, "step": 51 }, { "epoch": 0.1750841750841751, "grad_norm": 0.9765625, "learning_rate": 4.780843509929905e-06, "loss": 0.9753, "step": 52 }, { "epoch": 0.17845117845117844, "grad_norm": 1.015625, "learning_rate": 4.769340210797618e-06, "loss": 0.9324, "step": 53 }, { "epoch": 0.18181818181818182, "grad_norm": 0.96875, "learning_rate": 4.757557257029563e-06, "loss": 0.9439, "step": 54 }, { "epoch": 0.18518518518518517, "grad_norm": 1.0078125, "learning_rate": 4.745496100659083e-06, "loss": 0.9925, "step": 55 }, { "epoch": 0.18855218855218855, "grad_norm": 0.8828125, "learning_rate": 4.733158228002891e-06, "loss": 0.9294, "step": 56 }, { "epoch": 0.1919191919191919, "grad_norm": 0.95703125, "learning_rate": 4.720545159477921e-06, "loss": 0.9502, "step": 57 }, { "epoch": 0.19528619528619529, "grad_norm": 0.90234375, "learning_rate": 4.707658449413961e-06, "loss": 0.9538, "step": 58 }, { "epoch": 0.19865319865319866, "grad_norm": 1.03125, "learning_rate": 4.694499685862106e-06, "loss": 0.9484, "step": 59 }, { "epoch": 0.20202020202020202, "grad_norm": 1.0390625, "learning_rate": 4.681070490399064e-06, "loss": 0.9515, "step": 60 }, { "epoch": 0.2053872053872054, "grad_norm": 0.921875, "learning_rate": 4.667372517927323e-06, "loss": 0.956, "step": 61 }, { "epoch": 0.20875420875420875, "grad_norm": 0.88671875, "learning_rate": 4.653407456471222e-06, "loss": 0.9478, "step": 62 }, { "epoch": 0.21212121212121213, "grad_norm": 1.0390625, "learning_rate": 4.639177026968924e-06, "loss": 0.9427, "step": 63 }, { "epoch": 0.21548821548821548, "grad_norm": 0.9296875, "learning_rate": 4.624682983060346e-06, "loss": 1.0028, "step": 64 }, { "epoch": 0.21885521885521886, "grad_norm": 0.8828125, "learning_rate": 4.609927110871053e-06, "loss": 0.9442, "step": 65 }, { "epoch": 0.2222222222222222, "grad_norm": 0.890625, "learning_rate": 4.594911228792156e-06, "loss": 0.9695, "step": 66 }, { "epoch": 0.2255892255892256, "grad_norm": 0.94140625, "learning_rate": 4.579637187256222e-06, "loss": 0.9407, "step": 67 }, { "epoch": 0.22895622895622897, "grad_norm": 0.9609375, "learning_rate": 4.564106868509246e-06, "loss": 1.0014, "step": 68 }, { "epoch": 0.23232323232323232, "grad_norm": 0.9140625, "learning_rate": 4.5483221863786965e-06, "loss": 0.9249, "step": 69 }, { "epoch": 0.2356902356902357, "grad_norm": 0.83984375, "learning_rate": 4.5322850860376744e-06, "loss": 0.9623, "step": 70 }, { "epoch": 0.23905723905723905, "grad_norm": 0.9765625, "learning_rate": 4.515997543765202e-06, "loss": 0.9321, "step": 71 }, { "epoch": 0.24242424242424243, "grad_norm": 0.94921875, "learning_rate": 4.499461566702685e-06, "loss": 0.9407, "step": 72 }, { "epoch": 0.24579124579124578, "grad_norm": 0.86328125, "learning_rate": 4.48267919260657e-06, "loss": 0.9081, "step": 73 }, { "epoch": 0.24915824915824916, "grad_norm": 0.87890625, "learning_rate": 4.465652489597226e-06, "loss": 0.9259, "step": 74 }, { "epoch": 0.25252525252525254, "grad_norm": 0.984375, "learning_rate": 4.4483835559040885e-06, "loss": 0.9386, "step": 75 }, { "epoch": 0.2558922558922559, "grad_norm": 1.0546875, "learning_rate": 4.430874519607089e-06, "loss": 0.9884, "step": 76 }, { "epoch": 0.2558922558922559, "eval_loss": 0.9393355846405029, "eval_runtime": 8.6902, "eval_samples_per_second": 57.536, "eval_steps_per_second": 3.682, "step": 76 }, { "epoch": 0.25925925925925924, "grad_norm": 0.9765625, "learning_rate": 4.413127538374411e-06, "loss": 0.9479, "step": 77 }, { "epoch": 0.26262626262626265, "grad_norm": 0.8671875, "learning_rate": 4.395144799196593e-06, "loss": 0.9165, "step": 78 }, { "epoch": 0.265993265993266, "grad_norm": 1.0390625, "learning_rate": 4.376928518117028e-06, "loss": 0.9428, "step": 79 }, { "epoch": 0.26936026936026936, "grad_norm": 1.0703125, "learning_rate": 4.358480939958867e-06, "loss": 0.9752, "step": 80 }, { "epoch": 0.2727272727272727, "grad_norm": 1.0078125, "learning_rate": 4.339804338048397e-06, "loss": 0.8951, "step": 81 }, { "epoch": 0.2760942760942761, "grad_norm": 0.87890625, "learning_rate": 4.320901013934887e-06, "loss": 0.9329, "step": 82 }, { "epoch": 0.27946127946127947, "grad_norm": 0.84765625, "learning_rate": 4.301773297106968e-06, "loss": 0.9086, "step": 83 }, { "epoch": 0.2828282828282828, "grad_norm": 0.84765625, "learning_rate": 4.282423544705564e-06, "loss": 0.9417, "step": 84 }, { "epoch": 0.28619528619528617, "grad_norm": 0.91015625, "learning_rate": 4.262854141233419e-06, "loss": 0.9344, "step": 85 }, { "epoch": 0.2895622895622896, "grad_norm": 0.921875, "learning_rate": 4.243067498261251e-06, "loss": 0.905, "step": 86 }, { "epoch": 0.29292929292929293, "grad_norm": 0.8671875, "learning_rate": 4.223066054130568e-06, "loss": 0.8943, "step": 87 }, { "epoch": 0.2962962962962963, "grad_norm": 0.91015625, "learning_rate": 4.2028522736531895e-06, "loss": 0.9407, "step": 88 }, { "epoch": 0.2996632996632997, "grad_norm": 0.95703125, "learning_rate": 4.182428647807503e-06, "loss": 0.957, "step": 89 }, { "epoch": 0.30303030303030304, "grad_norm": 1.03125, "learning_rate": 4.161797693431493e-06, "loss": 0.9128, "step": 90 }, { "epoch": 0.3063973063973064, "grad_norm": 0.8515625, "learning_rate": 4.140961952912594e-06, "loss": 0.9008, "step": 91 }, { "epoch": 0.30976430976430974, "grad_norm": 1.09375, "learning_rate": 4.11992399387438e-06, "loss": 0.9073, "step": 92 }, { "epoch": 0.31313131313131315, "grad_norm": 0.90625, "learning_rate": 4.098686408860157e-06, "loss": 1.0136, "step": 93 }, { "epoch": 0.3164983164983165, "grad_norm": 0.9140625, "learning_rate": 4.077251815013477e-06, "loss": 0.965, "step": 94 }, { "epoch": 0.31986531986531985, "grad_norm": 0.9375, "learning_rate": 4.055622853755627e-06, "loss": 0.8822, "step": 95 }, { "epoch": 0.32323232323232326, "grad_norm": 0.859375, "learning_rate": 4.033802190460114e-06, "loss": 0.9241, "step": 96 }, { "epoch": 0.3265993265993266, "grad_norm": 1.015625, "learning_rate": 4.011792514124217e-06, "loss": 0.9491, "step": 97 }, { "epoch": 0.32996632996632996, "grad_norm": 0.890625, "learning_rate": 3.989596537037608e-06, "loss": 0.9461, "step": 98 }, { "epoch": 0.3333333333333333, "grad_norm": 1.0, "learning_rate": 3.967216994448116e-06, "loss": 0.9298, "step": 99 }, { "epoch": 0.3367003367003367, "grad_norm": 1.0078125, "learning_rate": 3.9446566442246615e-06, "loss": 0.9133, "step": 100 }, { "epoch": 0.3400673400673401, "grad_norm": 0.9765625, "learning_rate": 3.921918266517392e-06, "loss": 0.9334, "step": 101 }, { "epoch": 0.3434343434343434, "grad_norm": 0.79296875, "learning_rate": 3.899004663415083e-06, "loss": 0.9477, "step": 102 }, { "epoch": 0.3468013468013468, "grad_norm": 0.87109375, "learning_rate": 3.875918658599837e-06, "loss": 0.9307, "step": 103 }, { "epoch": 0.3501683501683502, "grad_norm": 1.0078125, "learning_rate": 3.852663096999104e-06, "loss": 0.8982, "step": 104 }, { "epoch": 0.35353535353535354, "grad_norm": 0.99609375, "learning_rate": 3.829240844435109e-06, "loss": 0.9564, "step": 105 }, { "epoch": 0.3569023569023569, "grad_norm": 0.890625, "learning_rate": 3.8056547872716865e-06, "loss": 0.916, "step": 106 }, { "epoch": 0.3602693602693603, "grad_norm": 0.90234375, "learning_rate": 3.7819078320585865e-06, "loss": 0.9077, "step": 107 }, { "epoch": 0.36363636363636365, "grad_norm": 0.9296875, "learning_rate": 3.7580029051732992e-06, "loss": 0.9215, "step": 108 }, { "epoch": 0.367003367003367, "grad_norm": 1.25, "learning_rate": 3.733942952460432e-06, "loss": 0.9287, "step": 109 }, { "epoch": 0.37037037037037035, "grad_norm": 0.828125, "learning_rate": 3.7097309388686865e-06, "loss": 0.9261, "step": 110 }, { "epoch": 0.37373737373737376, "grad_norm": 0.91796875, "learning_rate": 3.6853698480854853e-06, "loss": 0.8885, "step": 111 }, { "epoch": 0.3771043771043771, "grad_norm": 0.90234375, "learning_rate": 3.660862682169283e-06, "loss": 0.905, "step": 112 }, { "epoch": 0.38047138047138046, "grad_norm": 0.703125, "learning_rate": 3.636212461179623e-06, "loss": 0.8435, "step": 113 }, { "epoch": 0.3838383838383838, "grad_norm": 0.7734375, "learning_rate": 3.6114222228049657e-06, "loss": 0.9277, "step": 114 }, { "epoch": 0.3838383838383838, "eval_loss": 0.8986863493919373, "eval_runtime": 8.7365, "eval_samples_per_second": 57.231, "eval_steps_per_second": 3.663, "step": 114 }, { "epoch": 0.3872053872053872, "grad_norm": 0.8828125, "learning_rate": 3.5864950219883514e-06, "loss": 0.9274, "step": 115 }, { "epoch": 0.39057239057239057, "grad_norm": 0.8125, "learning_rate": 3.561433930550934e-06, "loss": 0.8989, "step": 116 }, { "epoch": 0.3939393939393939, "grad_norm": 0.81640625, "learning_rate": 3.536242036813436e-06, "loss": 0.9125, "step": 117 }, { "epoch": 0.39730639730639733, "grad_norm": 0.80859375, "learning_rate": 3.510922445215568e-06, "loss": 0.883, "step": 118 }, { "epoch": 0.4006734006734007, "grad_norm": 0.7578125, "learning_rate": 3.4854782759334625e-06, "loss": 0.8644, "step": 119 }, { "epoch": 0.40404040404040403, "grad_norm": 0.84765625, "learning_rate": 3.4599126644951758e-06, "loss": 0.907, "step": 120 }, { "epoch": 0.4074074074074074, "grad_norm": 0.796875, "learning_rate": 3.4342287613942804e-06, "loss": 0.889, "step": 121 }, { "epoch": 0.4107744107744108, "grad_norm": 0.80859375, "learning_rate": 3.4084297317016353e-06, "loss": 0.896, "step": 122 }, { "epoch": 0.41414141414141414, "grad_norm": 0.8359375, "learning_rate": 3.3825187546753426e-06, "loss": 0.8874, "step": 123 }, { "epoch": 0.4175084175084175, "grad_norm": 0.7890625, "learning_rate": 3.3564990233689632e-06, "loss": 0.8811, "step": 124 }, { "epoch": 0.4208754208754209, "grad_norm": 0.8203125, "learning_rate": 3.330373744238033e-06, "loss": 0.9437, "step": 125 }, { "epoch": 0.42424242424242425, "grad_norm": 0.890625, "learning_rate": 3.3041461367449256e-06, "loss": 0.8829, "step": 126 }, { "epoch": 0.4276094276094276, "grad_norm": 0.77734375, "learning_rate": 3.2778194329621104e-06, "loss": 0.8412, "step": 127 }, { "epoch": 0.43097643097643096, "grad_norm": 0.83984375, "learning_rate": 3.2513968771738606e-06, "loss": 0.8916, "step": 128 }, { "epoch": 0.43434343434343436, "grad_norm": 1.0078125, "learning_rate": 3.224881725476456e-06, "loss": 0.9644, "step": 129 }, { "epoch": 0.4377104377104377, "grad_norm": 0.78125, "learning_rate": 3.198277245376924e-06, "loss": 0.8623, "step": 130 }, { "epoch": 0.44107744107744107, "grad_norm": 0.71484375, "learning_rate": 3.1715867153903844e-06, "loss": 0.8786, "step": 131 }, { "epoch": 0.4444444444444444, "grad_norm": 0.69140625, "learning_rate": 3.144813424636031e-06, "loss": 0.8786, "step": 132 }, { "epoch": 0.4478114478114478, "grad_norm": 0.91015625, "learning_rate": 3.1179606724318052e-06, "loss": 0.9163, "step": 133 }, { "epoch": 0.4511784511784512, "grad_norm": 0.78515625, "learning_rate": 3.091031767887817e-06, "loss": 0.8656, "step": 134 }, { "epoch": 0.45454545454545453, "grad_norm": 0.72265625, "learning_rate": 3.0640300294985613e-06, "loss": 0.8901, "step": 135 }, { "epoch": 0.45791245791245794, "grad_norm": 0.83203125, "learning_rate": 3.036958784733967e-06, "loss": 0.8706, "step": 136 }, { "epoch": 0.4612794612794613, "grad_norm": 0.75390625, "learning_rate": 3.0098213696293542e-06, "loss": 0.8987, "step": 137 }, { "epoch": 0.46464646464646464, "grad_norm": 0.77734375, "learning_rate": 2.982621128374325e-06, "loss": 0.8984, "step": 138 }, { "epoch": 0.468013468013468, "grad_norm": 0.8828125, "learning_rate": 2.9553614129006543e-06, "loss": 0.8986, "step": 139 }, { "epoch": 0.4713804713804714, "grad_norm": 0.78515625, "learning_rate": 2.9280455824692255e-06, "loss": 0.8514, "step": 140 }, { "epoch": 0.47474747474747475, "grad_norm": 0.6875, "learning_rate": 2.9006770032560637e-06, "loss": 0.9182, "step": 141 }, { "epoch": 0.4781144781144781, "grad_norm": 1.1171875, "learning_rate": 2.8732590479375167e-06, "loss": 0.8728, "step": 142 }, { "epoch": 0.48148148148148145, "grad_norm": 0.68359375, "learning_rate": 2.8457950952746293e-06, "loss": 0.8692, "step": 143 }, { "epoch": 0.48484848484848486, "grad_norm": 1.2734375, "learning_rate": 2.8182885296967833e-06, "loss": 0.907, "step": 144 }, { "epoch": 0.4882154882154882, "grad_norm": 0.88671875, "learning_rate": 2.7907427408846156e-06, "loss": 0.8318, "step": 145 }, { "epoch": 0.49158249158249157, "grad_norm": 1.0703125, "learning_rate": 2.763161123352314e-06, "loss": 0.8542, "step": 146 }, { "epoch": 0.494949494949495, "grad_norm": 0.67578125, "learning_rate": 2.735547076029296e-06, "loss": 0.8602, "step": 147 }, { "epoch": 0.4983164983164983, "grad_norm": 1.0625, "learning_rate": 2.7079040018413586e-06, "loss": 0.8775, "step": 148 }, { "epoch": 0.5016835016835017, "grad_norm": 0.96484375, "learning_rate": 2.6802353072913307e-06, "loss": 0.8884, "step": 149 }, { "epoch": 0.5050505050505051, "grad_norm": 0.87890625, "learning_rate": 2.6525444020392794e-06, "loss": 0.8886, "step": 150 } ], "logging_steps": 1, "max_steps": 297, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 75, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.72875344887808e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }