{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.82857142857143, "eval_steps": 500, "global_step": 430, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 1.5384615384615387e-05, "loss": 5.1473, "step": 1 }, { "epoch": 0.05, "learning_rate": 3.0769230769230774e-05, "loss": 5.1654, "step": 2 }, { "epoch": 0.07, "learning_rate": 4.615384615384616e-05, "loss": 4.4404, "step": 3 }, { "epoch": 0.09, "learning_rate": 6.153846153846155e-05, "loss": 2.1218, "step": 4 }, { "epoch": 0.11, "learning_rate": 7.692307692307693e-05, "loss": 0.7891, "step": 5 }, { "epoch": 0.14, "learning_rate": 9.230769230769232e-05, "loss": 0.4084, "step": 6 }, { "epoch": 0.16, "learning_rate": 0.0001076923076923077, "loss": 0.2285, "step": 7 }, { "epoch": 0.18, "learning_rate": 0.0001230769230769231, "loss": 0.2082, "step": 8 }, { "epoch": 0.21, "learning_rate": 0.00013846153846153847, "loss": 0.1934, "step": 9 }, { "epoch": 0.23, "learning_rate": 0.00015384615384615385, "loss": 0.1951, "step": 10 }, { "epoch": 0.25, "learning_rate": 0.00016923076923076923, "loss": 0.1867, "step": 11 }, { "epoch": 0.27, "learning_rate": 0.00018461538461538463, "loss": 0.1931, "step": 12 }, { "epoch": 0.3, "learning_rate": 0.0002, "loss": 0.1895, "step": 13 }, { "epoch": 0.32, "learning_rate": 0.00019999716210981734, "loss": 0.16, "step": 14 }, { "epoch": 0.34, "learning_rate": 0.00019998864860034169, "loss": 0.2126, "step": 15 }, { "epoch": 0.37, "learning_rate": 0.0001999744599547812, "loss": 0.1772, "step": 16 }, { "epoch": 0.39, "learning_rate": 0.0001999545969784522, "loss": 0.1949, "step": 17 }, { "epoch": 0.41, "learning_rate": 0.00019992906079873365, "loss": 0.1923, "step": 18 }, { "epoch": 0.43, "learning_rate": 0.00019989785286500295, "loss": 0.1709, "step": 19 }, { "epoch": 0.46, "learning_rate": 0.0001998609749485539, "loss": 0.1823, "step": 20 }, { "epoch": 0.48, "learning_rate": 0.0001998184291424961, "loss": 0.1656, "step": 21 }, { "epoch": 0.5, "learning_rate": 0.00019977021786163598, "loss": 0.194, "step": 22 }, { "epoch": 0.53, "learning_rate": 0.00019971634384234003, "loss": 0.174, "step": 23 }, { "epoch": 0.55, "learning_rate": 0.00019965681014237917, "loss": 0.1699, "step": 24 }, { "epoch": 0.57, "learning_rate": 0.00019959162014075553, "loss": 0.1771, "step": 25 }, { "epoch": 0.59, "learning_rate": 0.00019952077753751036, "loss": 0.1942, "step": 26 }, { "epoch": 0.62, "learning_rate": 0.00019944428635351426, "loss": 0.1818, "step": 27 }, { "epoch": 0.64, "learning_rate": 0.00019936215093023884, "loss": 0.1956, "step": 28 }, { "epoch": 0.66, "learning_rate": 0.0001992743759295103, "loss": 0.1738, "step": 29 }, { "epoch": 0.69, "learning_rate": 0.00019918096633324492, "loss": 0.1897, "step": 30 }, { "epoch": 0.71, "learning_rate": 0.0001990819274431662, "loss": 0.1558, "step": 31 }, { "epoch": 0.73, "learning_rate": 0.00019897726488050406, "loss": 0.2183, "step": 32 }, { "epoch": 0.75, "learning_rate": 0.00019886698458567562, "loss": 0.1844, "step": 33 }, { "epoch": 0.78, "learning_rate": 0.00019875109281794825, "loss": 0.1774, "step": 34 }, { "epoch": 0.8, "learning_rate": 0.00019862959615508417, "loss": 0.1709, "step": 35 }, { "epoch": 0.82, "learning_rate": 0.00019850250149296703, "loss": 0.2023, "step": 36 }, { "epoch": 0.85, "learning_rate": 0.00019836981604521076, "loss": 0.1717, "step": 37 }, { "epoch": 0.87, "learning_rate": 0.00019823154734274997, "loss": 0.1751, "step": 38 }, { "epoch": 0.89, "learning_rate": 0.0001980877032334125, "loss": 0.1903, "step": 39 }, { "epoch": 0.91, "learning_rate": 0.00019793829188147406, "loss": 0.2017, "step": 40 }, { "epoch": 0.94, "learning_rate": 0.00019778332176719483, "loss": 0.1651, "step": 41 }, { "epoch": 0.96, "learning_rate": 0.00019762280168633814, "loss": 0.1913, "step": 42 }, { "epoch": 0.98, "learning_rate": 0.0001974567407496712, "loss": 0.1749, "step": 43 }, { "epoch": 1.01, "learning_rate": 0.0001972851483824481, "loss": 0.1953, "step": 44 }, { "epoch": 1.03, "learning_rate": 0.00019710803432387465, "loss": 0.2126, "step": 45 }, { "epoch": 1.05, "learning_rate": 0.00019692540862655585, "loss": 0.181, "step": 46 }, { "epoch": 1.07, "learning_rate": 0.0001967372816559252, "loss": 0.176, "step": 47 }, { "epoch": 1.1, "learning_rate": 0.00019654366408965635, "loss": 0.1655, "step": 48 }, { "epoch": 1.12, "learning_rate": 0.00019634456691705702, "loss": 0.1676, "step": 49 }, { "epoch": 1.14, "learning_rate": 0.00019614000143844558, "loss": 0.1438, "step": 50 }, { "epoch": 1.17, "learning_rate": 0.0001959299792645092, "loss": 0.1915, "step": 51 }, { "epoch": 1.19, "learning_rate": 0.00019571451231564525, "loss": 0.2314, "step": 52 }, { "epoch": 1.21, "learning_rate": 0.00019549361282128445, "loss": 0.1736, "step": 53 }, { "epoch": 1.23, "learning_rate": 0.00019526729331919697, "loss": 0.1765, "step": 54 }, { "epoch": 1.26, "learning_rate": 0.00019503556665478067, "loss": 0.1969, "step": 55 }, { "epoch": 1.28, "learning_rate": 0.00019479844598033202, "loss": 0.1968, "step": 56 }, { "epoch": 1.3, "learning_rate": 0.0001945559447542998, "loss": 0.1724, "step": 57 }, { "epoch": 1.33, "learning_rate": 0.00019430807674052092, "loss": 0.2096, "step": 58 }, { "epoch": 1.35, "learning_rate": 0.00019405485600743942, "loss": 0.1861, "step": 59 }, { "epoch": 1.37, "learning_rate": 0.00019379629692730798, "loss": 0.1752, "step": 60 }, { "epoch": 1.39, "learning_rate": 0.00019353241417537214, "loss": 0.1746, "step": 61 }, { "epoch": 1.42, "learning_rate": 0.00019326322272903722, "loss": 0.175, "step": 62 }, { "epoch": 1.44, "learning_rate": 0.00019298873786701857, "loss": 0.1752, "step": 63 }, { "epoch": 1.46, "learning_rate": 0.00019270897516847403, "loss": 0.1719, "step": 64 }, { "epoch": 1.49, "learning_rate": 0.00019242395051212, "loss": 0.1814, "step": 65 }, { "epoch": 1.51, "learning_rate": 0.00019213368007532986, "loss": 0.1792, "step": 66 }, { "epoch": 1.53, "learning_rate": 0.00019183818033321614, "loss": 0.1683, "step": 67 }, { "epoch": 1.55, "learning_rate": 0.00019153746805769512, "loss": 0.175, "step": 68 }, { "epoch": 1.58, "learning_rate": 0.00019123156031653515, "loss": 0.1558, "step": 69 }, { "epoch": 1.6, "learning_rate": 0.00019092047447238773, "loss": 0.1907, "step": 70 }, { "epoch": 1.62, "learning_rate": 0.00019060422818180207, "loss": 0.2572, "step": 71 }, { "epoch": 1.65, "learning_rate": 0.00019028283939422308, "loss": 0.1781, "step": 72 }, { "epoch": 1.67, "learning_rate": 0.0001899563263509725, "loss": 0.1659, "step": 73 }, { "epoch": 1.69, "learning_rate": 0.00018962470758421342, "loss": 0.174, "step": 74 }, { "epoch": 1.71, "learning_rate": 0.0001892880019158988, "loss": 0.1795, "step": 75 }, { "epoch": 1.74, "learning_rate": 0.00018894622845670283, "loss": 0.1712, "step": 76 }, { "epoch": 1.76, "learning_rate": 0.00018859940660493634, "loss": 0.1628, "step": 77 }, { "epoch": 1.78, "learning_rate": 0.00018824755604544594, "loss": 0.1901, "step": 78 }, { "epoch": 1.81, "learning_rate": 0.0001878906967484966, "loss": 0.1549, "step": 79 }, { "epoch": 1.83, "learning_rate": 0.0001875288489686382, "loss": 0.1972, "step": 80 }, { "epoch": 1.85, "learning_rate": 0.00018716203324355607, "loss": 0.1758, "step": 81 }, { "epoch": 1.87, "learning_rate": 0.00018679027039290497, "loss": 0.1772, "step": 82 }, { "epoch": 1.9, "learning_rate": 0.0001864135815171279, "loss": 0.1839, "step": 83 }, { "epoch": 1.92, "learning_rate": 0.00018603198799625807, "loss": 0.192, "step": 84 }, { "epoch": 1.94, "learning_rate": 0.00018564551148870563, "loss": 0.1795, "step": 85 }, { "epoch": 1.97, "learning_rate": 0.00018525417393002824, "loss": 0.1731, "step": 86 }, { "epoch": 1.99, "learning_rate": 0.00018485799753168634, "loss": 0.1965, "step": 87 }, { "epoch": 2.01, "learning_rate": 0.00018445700477978205, "loss": 0.214, "step": 88 }, { "epoch": 2.03, "learning_rate": 0.0001840512184337833, "loss": 0.2024, "step": 89 }, { "epoch": 2.06, "learning_rate": 0.00018364066152523183, "loss": 0.1786, "step": 90 }, { "epoch": 2.08, "learning_rate": 0.00018322535735643605, "loss": 0.1938, "step": 91 }, { "epoch": 2.1, "learning_rate": 0.00018280532949914842, "loss": 0.1634, "step": 92 }, { "epoch": 2.13, "learning_rate": 0.0001823806017932276, "loss": 0.1655, "step": 93 }, { "epoch": 2.15, "learning_rate": 0.00018195119834528534, "loss": 0.1635, "step": 94 }, { "epoch": 2.17, "learning_rate": 0.00018151714352731822, "loss": 0.1938, "step": 95 }, { "epoch": 2.19, "learning_rate": 0.00018107846197532433, "loss": 0.1696, "step": 96 }, { "epoch": 2.22, "learning_rate": 0.00018063517858790516, "loss": 0.1806, "step": 97 }, { "epoch": 2.24, "learning_rate": 0.00018018731852485206, "loss": 0.1826, "step": 98 }, { "epoch": 2.26, "learning_rate": 0.00017973490720571864, "loss": 0.1976, "step": 99 }, { "epoch": 2.29, "learning_rate": 0.00017927797030837768, "loss": 0.1815, "step": 100 }, { "epoch": 2.31, "learning_rate": 0.00017881653376756394, "loss": 0.1818, "step": 101 }, { "epoch": 2.33, "learning_rate": 0.0001783506237734019, "loss": 0.1649, "step": 102 }, { "epoch": 2.35, "learning_rate": 0.00017788026676991963, "loss": 0.1684, "step": 103 }, { "epoch": 2.38, "learning_rate": 0.00017740548945354752, "loss": 0.1517, "step": 104 }, { "epoch": 2.4, "learning_rate": 0.00017692631877160326, "loss": 0.1871, "step": 105 }, { "epoch": 2.42, "learning_rate": 0.0001764427819207624, "loss": 0.1921, "step": 106 }, { "epoch": 2.45, "learning_rate": 0.0001759549063455145, "loss": 0.1659, "step": 107 }, { "epoch": 2.47, "learning_rate": 0.00017546271973660574, "loss": 0.1807, "step": 108 }, { "epoch": 2.49, "learning_rate": 0.000174966250029467, "loss": 0.1808, "step": 109 }, { "epoch": 2.51, "learning_rate": 0.00017446552540262844, "loss": 0.1844, "step": 110 }, { "epoch": 2.54, "learning_rate": 0.0001739605742761201, "loss": 0.174, "step": 111 }, { "epoch": 2.56, "learning_rate": 0.00017345142530985887, "loss": 0.1752, "step": 112 }, { "epoch": 2.58, "learning_rate": 0.00017293810740202182, "loss": 0.1788, "step": 113 }, { "epoch": 2.61, "learning_rate": 0.00017242064968740598, "loss": 0.1748, "step": 114 }, { "epoch": 2.63, "learning_rate": 0.00017189908153577473, "loss": 0.1711, "step": 115 }, { "epoch": 2.65, "learning_rate": 0.0001713734325501908, "loss": 0.1741, "step": 116 }, { "epoch": 2.67, "learning_rate": 0.00017084373256533603, "loss": 0.1779, "step": 117 }, { "epoch": 2.7, "learning_rate": 0.00017031001164581828, "loss": 0.1761, "step": 118 }, { "epoch": 2.72, "learning_rate": 0.00016977230008446466, "loss": 0.1771, "step": 119 }, { "epoch": 2.74, "learning_rate": 0.00016923062840060234, "loss": 0.1682, "step": 120 }, { "epoch": 2.77, "learning_rate": 0.00016868502733832644, "loss": 0.175, "step": 121 }, { "epoch": 2.79, "learning_rate": 0.00016813552786475495, "loss": 0.1804, "step": 122 }, { "epoch": 2.81, "learning_rate": 0.00016758216116827105, "loss": 0.1724, "step": 123 }, { "epoch": 2.83, "learning_rate": 0.0001670249586567531, "loss": 0.2074, "step": 124 }, { "epoch": 2.86, "learning_rate": 0.00016646395195579178, "loss": 0.1881, "step": 125 }, { "epoch": 2.88, "learning_rate": 0.00016589917290689532, "loss": 0.1791, "step": 126 }, { "epoch": 2.9, "learning_rate": 0.00016533065356568206, "loss": 0.1841, "step": 127 }, { "epoch": 2.93, "learning_rate": 0.00016475842620006118, "loss": 0.1779, "step": 128 }, { "epoch": 2.95, "learning_rate": 0.0001641825232884011, "loss": 0.1812, "step": 129 }, { "epoch": 2.97, "learning_rate": 0.0001636029775176862, "loss": 0.1699, "step": 130 }, { "epoch": 2.99, "learning_rate": 0.0001630198217816616, "loss": 0.1741, "step": 131 }, { "epoch": 3.02, "learning_rate": 0.000162433089178966, "loss": 0.1683, "step": 132 }, { "epoch": 3.04, "learning_rate": 0.0001618428130112533, "loss": 0.1808, "step": 133 }, { "epoch": 3.06, "learning_rate": 0.0001612490267813023, "loss": 0.1663, "step": 134 }, { "epoch": 3.09, "learning_rate": 0.0001606517641911153, "loss": 0.1684, "step": 135 }, { "epoch": 3.11, "learning_rate": 0.00016005105914000507, "loss": 0.1675, "step": 136 }, { "epoch": 3.13, "learning_rate": 0.00015944694572267096, "loss": 0.1706, "step": 137 }, { "epoch": 3.15, "learning_rate": 0.00015883945822726372, "loss": 0.1773, "step": 138 }, { "epoch": 3.18, "learning_rate": 0.00015822863113343935, "loss": 0.1763, "step": 139 }, { "epoch": 3.2, "learning_rate": 0.00015761449911040208, "loss": 0.1799, "step": 140 }, { "epoch": 3.22, "learning_rate": 0.00015699709701493667, "loss": 0.1684, "step": 141 }, { "epoch": 3.25, "learning_rate": 0.0001563764598894301, "loss": 0.1742, "step": 142 }, { "epoch": 3.27, "learning_rate": 0.0001557526229598824, "loss": 0.1751, "step": 143 }, { "epoch": 3.29, "learning_rate": 0.0001551256216339076, "loss": 0.1754, "step": 144 }, { "epoch": 3.31, "learning_rate": 0.00015449549149872376, "loss": 0.1764, "step": 145 }, { "epoch": 3.34, "learning_rate": 0.00015386226831913348, "loss": 0.1703, "step": 146 }, { "epoch": 3.36, "learning_rate": 0.00015322598803549356, "loss": 0.1731, "step": 147 }, { "epoch": 3.38, "learning_rate": 0.00015258668676167546, "loss": 0.1741, "step": 148 }, { "epoch": 3.41, "learning_rate": 0.00015194440078301536, "loss": 0.1703, "step": 149 }, { "epoch": 3.43, "learning_rate": 0.00015129916655425468, "loss": 0.167, "step": 150 }, { "epoch": 3.45, "learning_rate": 0.00015065102069747118, "loss": 0.1876, "step": 151 }, { "epoch": 3.47, "learning_rate": 0.00015000000000000001, "loss": 0.1761, "step": 152 }, { "epoch": 3.5, "learning_rate": 0.00014934614141234618, "loss": 0.1592, "step": 153 }, { "epoch": 3.52, "learning_rate": 0.000148689482046087, "loss": 0.1581, "step": 154 }, { "epoch": 3.54, "learning_rate": 0.00014803005917176585, "loss": 0.1804, "step": 155 }, { "epoch": 3.57, "learning_rate": 0.00014736791021677676, "loss": 0.1699, "step": 156 }, { "epoch": 3.59, "learning_rate": 0.0001467030727632401, "loss": 0.2209, "step": 157 }, { "epoch": 3.61, "learning_rate": 0.0001460355845458695, "loss": 0.177, "step": 158 }, { "epoch": 3.63, "learning_rate": 0.00014536548344983016, "loss": 0.1828, "step": 159 }, { "epoch": 3.66, "learning_rate": 0.00014469280750858854, "loss": 0.1725, "step": 160 }, { "epoch": 3.68, "learning_rate": 0.00014401759490175362, "loss": 0.1645, "step": 161 }, { "epoch": 3.7, "learning_rate": 0.00014333988395290992, "loss": 0.1754, "step": 162 }, { "epoch": 3.73, "learning_rate": 0.00014265971312744252, "loss": 0.1867, "step": 163 }, { "epoch": 3.75, "learning_rate": 0.00014197712103035346, "loss": 0.1735, "step": 164 }, { "epoch": 3.77, "learning_rate": 0.00014129214640407102, "loss": 0.1767, "step": 165 }, { "epoch": 3.79, "learning_rate": 0.00014060482812625055, "loss": 0.1657, "step": 166 }, { "epoch": 3.82, "learning_rate": 0.0001399152052075679, "loss": 0.1734, "step": 167 }, { "epoch": 3.84, "learning_rate": 0.00013922331678950525, "loss": 0.1821, "step": 168 }, { "epoch": 3.86, "learning_rate": 0.00013852920214212964, "loss": 0.1839, "step": 169 }, { "epoch": 3.89, "learning_rate": 0.00013783290066186391, "loss": 0.1958, "step": 170 }, { "epoch": 3.91, "learning_rate": 0.00013713445186925075, "loss": 0.1815, "step": 171 }, { "epoch": 3.93, "learning_rate": 0.00013643389540670962, "loss": 0.1716, "step": 172 }, { "epoch": 3.95, "learning_rate": 0.00013573127103628667, "loss": 0.1688, "step": 173 }, { "epoch": 3.98, "learning_rate": 0.00013502661863739793, "loss": 0.1664, "step": 174 }, { "epoch": 4.0, "learning_rate": 0.00013431997820456592, "loss": 0.1638, "step": 175 }, { "epoch": 4.02, "learning_rate": 0.0001336113898451496, "loss": 0.2074, "step": 176 }, { "epoch": 4.05, "learning_rate": 0.0001329008937770679, "loss": 0.1675, "step": 177 }, { "epoch": 4.07, "learning_rate": 0.0001321885303265172, "loss": 0.1556, "step": 178 }, { "epoch": 4.09, "learning_rate": 0.00013147433992568227, "loss": 0.1653, "step": 179 }, { "epoch": 4.11, "learning_rate": 0.00013075836311044175, "loss": 0.1603, "step": 180 }, { "epoch": 4.14, "learning_rate": 0.0001300406405180671, "loss": 0.1758, "step": 181 }, { "epoch": 4.16, "learning_rate": 0.0001293212128849163, "loss": 0.1949, "step": 182 }, { "epoch": 4.18, "learning_rate": 0.00012860012104412165, "loss": 0.17, "step": 183 }, { "epoch": 4.21, "learning_rate": 0.0001278774059232723, "loss": 0.1662, "step": 184 }, { "epoch": 4.23, "learning_rate": 0.00012715310854209124, "loss": 0.1571, "step": 185 }, { "epoch": 4.25, "learning_rate": 0.00012642727001010694, "loss": 0.1979, "step": 186 }, { "epoch": 4.27, "learning_rate": 0.00012569993152432028, "loss": 0.1666, "step": 187 }, { "epoch": 4.3, "learning_rate": 0.00012497113436686627, "loss": 0.1065, "step": 188 }, { "epoch": 4.32, "learning_rate": 0.00012424091990267087, "loss": 0.1146, "step": 189 }, { "epoch": 4.34, "learning_rate": 0.0001235093295771032, "loss": 0.1749, "step": 190 }, { "epoch": 4.37, "learning_rate": 0.00012277640491362341, "loss": 0.1256, "step": 191 }, { "epoch": 4.39, "learning_rate": 0.0001220421875114256, "loss": 0.1835, "step": 192 }, { "epoch": 4.41, "learning_rate": 0.0001213067190430769, "loss": 0.1628, "step": 193 }, { "epoch": 4.43, "learning_rate": 0.00012057004125215223, "loss": 0.256, "step": 194 }, { "epoch": 4.46, "learning_rate": 0.00011983219595086506, "loss": 0.146, "step": 195 }, { "epoch": 4.48, "learning_rate": 0.00011909322501769406, "loss": 0.1682, "step": 196 }, { "epoch": 4.5, "learning_rate": 0.0001183531703950064, "loss": 0.1794, "step": 197 }, { "epoch": 4.53, "learning_rate": 0.00011761207408667703, "loss": 0.1905, "step": 198 }, { "epoch": 4.55, "learning_rate": 0.00011686997815570473, "loss": 0.1749, "step": 199 }, { "epoch": 4.57, "learning_rate": 0.00011612692472182463, "loss": 0.1775, "step": 200 }, { "epoch": 4.59, "learning_rate": 0.00011538295595911764, "loss": 0.1672, "step": 201 }, { "epoch": 4.62, "learning_rate": 0.00011463811409361667, "loss": 0.2042, "step": 202 }, { "epoch": 4.64, "learning_rate": 0.00011389244140091013, "loss": 0.1714, "step": 203 }, { "epoch": 4.66, "learning_rate": 0.00011314598020374231, "loss": 0.1637, "step": 204 }, { "epoch": 4.69, "learning_rate": 0.00011239877286961122, "loss": 0.1786, "step": 205 }, { "epoch": 4.71, "learning_rate": 0.00011165086180836406, "loss": 0.175, "step": 206 }, { "epoch": 4.73, "learning_rate": 0.00011090228946979, "loss": 0.1763, "step": 207 }, { "epoch": 4.75, "learning_rate": 0.00011015309834121081, "loss": 0.1941, "step": 208 }, { "epoch": 4.78, "learning_rate": 0.00010940333094506952, "loss": 0.1452, "step": 209 }, { "epoch": 4.8, "learning_rate": 0.00010865302983651673, "loss": 0.1719, "step": 210 }, { "epoch": 4.82, "learning_rate": 0.00010790223760099549, "loss": 0.1697, "step": 211 }, { "epoch": 4.85, "learning_rate": 0.00010715099685182408, "loss": 0.1644, "step": 212 }, { "epoch": 4.87, "learning_rate": 0.00010639935022777741, "loss": 0.1683, "step": 213 }, { "epoch": 4.89, "learning_rate": 0.00010564734039066699, "loss": 0.1746, "step": 214 }, { "epoch": 4.91, "learning_rate": 0.00010489501002291952, "loss": 0.1606, "step": 215 }, { "epoch": 4.94, "learning_rate": 0.00010414240182515429, "loss": 0.1841, "step": 216 }, { "epoch": 4.96, "learning_rate": 0.00010338955851375962, "loss": 0.1833, "step": 217 }, { "epoch": 4.98, "learning_rate": 0.00010263652281846837, "loss": 0.1802, "step": 218 }, { "epoch": 5.01, "learning_rate": 0.00010188333747993264, "loss": 0.1675, "step": 219 }, { "epoch": 5.03, "learning_rate": 0.00010113004524729799, "loss": 0.1598, "step": 220 }, { "epoch": 5.05, "learning_rate": 0.00010037668887577709, "loss": 0.1612, "step": 221 }, { "epoch": 5.07, "learning_rate": 9.962331112422293e-05, "loss": 0.1812, "step": 222 }, { "epoch": 5.1, "learning_rate": 9.886995475270205e-05, "loss": 0.1853, "step": 223 }, { "epoch": 5.12, "learning_rate": 9.811666252006742e-05, "loss": 0.1369, "step": 224 }, { "epoch": 5.14, "learning_rate": 9.73634771815317e-05, "loss": 0.1563, "step": 225 }, { "epoch": 5.17, "learning_rate": 9.661044148624037e-05, "loss": 0.1466, "step": 226 }, { "epoch": 5.19, "learning_rate": 9.58575981748457e-05, "loss": 0.1343, "step": 227 }, { "epoch": 5.21, "learning_rate": 9.510498997708049e-05, "loss": 0.1231, "step": 228 }, { "epoch": 5.23, "learning_rate": 9.435265960933302e-05, "loss": 0.1472, "step": 229 }, { "epoch": 5.26, "learning_rate": 9.360064977222262e-05, "loss": 0.1681, "step": 230 }, { "epoch": 5.28, "learning_rate": 9.284900314817597e-05, "loss": 0.2364, "step": 231 }, { "epoch": 5.3, "learning_rate": 9.209776239900453e-05, "loss": 0.1228, "step": 232 }, { "epoch": 5.33, "learning_rate": 9.134697016348327e-05, "loss": 0.1417, "step": 233 }, { "epoch": 5.35, "learning_rate": 9.05966690549305e-05, "loss": 0.1512, "step": 234 }, { "epoch": 5.37, "learning_rate": 8.984690165878921e-05, "loss": 0.1248, "step": 235 }, { "epoch": 5.39, "learning_rate": 8.909771053021002e-05, "loss": 0.1252, "step": 236 }, { "epoch": 5.42, "learning_rate": 8.834913819163595e-05, "loss": 0.1341, "step": 237 }, { "epoch": 5.44, "learning_rate": 8.760122713038881e-05, "loss": 0.1644, "step": 238 }, { "epoch": 5.46, "learning_rate": 8.685401979625774e-05, "loss": 0.0977, "step": 239 }, { "epoch": 5.49, "learning_rate": 8.610755859908991e-05, "loss": 0.1699, "step": 240 }, { "epoch": 5.51, "learning_rate": 8.536188590638334e-05, "loss": 0.1196, "step": 241 }, { "epoch": 5.53, "learning_rate": 8.46170440408824e-05, "loss": 0.0777, "step": 242 }, { "epoch": 5.55, "learning_rate": 8.387307527817539e-05, "loss": 0.1266, "step": 243 }, { "epoch": 5.58, "learning_rate": 8.313002184429529e-05, "loss": 0.1463, "step": 244 }, { "epoch": 5.6, "learning_rate": 8.238792591332299e-05, "loss": 0.1037, "step": 245 }, { "epoch": 5.62, "learning_rate": 8.164682960499361e-05, "loss": 0.1385, "step": 246 }, { "epoch": 5.65, "learning_rate": 8.090677498230596e-05, "loss": 0.0932, "step": 247 }, { "epoch": 5.67, "learning_rate": 8.016780404913496e-05, "loss": 0.1294, "step": 248 }, { "epoch": 5.69, "learning_rate": 7.942995874784776e-05, "loss": 0.191, "step": 249 }, { "epoch": 5.71, "learning_rate": 7.869328095692312e-05, "loss": 0.1488, "step": 250 }, { "epoch": 5.74, "learning_rate": 7.795781248857443e-05, "loss": 0.1259, "step": 251 }, { "epoch": 5.76, "learning_rate": 7.72235950863766e-05, "loss": 0.1266, "step": 252 }, { "epoch": 5.78, "learning_rate": 7.64906704228968e-05, "loss": 0.1172, "step": 253 }, { "epoch": 5.81, "learning_rate": 7.575908009732918e-05, "loss": 0.1032, "step": 254 }, { "epoch": 5.83, "learning_rate": 7.502886563313376e-05, "loss": 0.0891, "step": 255 }, { "epoch": 5.85, "learning_rate": 7.430006847567972e-05, "loss": 0.0909, "step": 256 }, { "epoch": 5.87, "learning_rate": 7.357272998989308e-05, "loss": 0.1367, "step": 257 }, { "epoch": 5.9, "learning_rate": 7.284689145790878e-05, "loss": 0.0965, "step": 258 }, { "epoch": 5.92, "learning_rate": 7.21225940767277e-05, "loss": 0.1868, "step": 259 }, { "epoch": 5.94, "learning_rate": 7.139987895587836e-05, "loss": 0.3087, "step": 260 }, { "epoch": 5.97, "learning_rate": 7.067878711508375e-05, "loss": 0.1388, "step": 261 }, { "epoch": 5.99, "learning_rate": 6.995935948193294e-05, "loss": 0.142, "step": 262 }, { "epoch": 6.01, "learning_rate": 6.924163688955825e-05, "loss": 0.1212, "step": 263 }, { "epoch": 6.03, "learning_rate": 6.852566007431773e-05, "loss": 0.1369, "step": 264 }, { "epoch": 6.06, "learning_rate": 6.781146967348284e-05, "loss": 0.0927, "step": 265 }, { "epoch": 6.08, "learning_rate": 6.709910622293212e-05, "loss": 0.1146, "step": 266 }, { "epoch": 6.1, "learning_rate": 6.638861015485043e-05, "loss": 0.1059, "step": 267 }, { "epoch": 6.13, "learning_rate": 6.568002179543409e-05, "loss": 0.1108, "step": 268 }, { "epoch": 6.15, "learning_rate": 6.497338136260209e-05, "loss": 0.1333, "step": 269 }, { "epoch": 6.17, "learning_rate": 6.426872896371331e-05, "loss": 0.115, "step": 270 }, { "epoch": 6.19, "learning_rate": 6.356610459329038e-05, "loss": 0.0776, "step": 271 }, { "epoch": 6.22, "learning_rate": 6.286554813074925e-05, "loss": 0.1038, "step": 272 }, { "epoch": 6.24, "learning_rate": 6.21670993381361e-05, "loss": 0.0796, "step": 273 }, { "epoch": 6.26, "learning_rate": 6.147079785787038e-05, "loss": 0.0982, "step": 274 }, { "epoch": 6.29, "learning_rate": 6.0776683210494766e-05, "loss": 0.114, "step": 275 }, { "epoch": 6.31, "learning_rate": 6.0084794792432155e-05, "loss": 0.0922, "step": 276 }, { "epoch": 6.33, "learning_rate": 5.93951718737495e-05, "loss": 0.0725, "step": 277 }, { "epoch": 6.35, "learning_rate": 5.8707853595928985e-05, "loss": 0.0855, "step": 278 }, { "epoch": 6.38, "learning_rate": 5.802287896964658e-05, "loss": 0.1254, "step": 279 }, { "epoch": 6.4, "learning_rate": 5.734028687255751e-05, "loss": 0.1193, "step": 280 }, { "epoch": 6.42, "learning_rate": 5.666011604709005e-05, "loss": 0.1212, "step": 281 }, { "epoch": 6.45, "learning_rate": 5.598240509824642e-05, "loss": 0.1744, "step": 282 }, { "epoch": 6.47, "learning_rate": 5.530719249141147e-05, "loss": 0.062, "step": 283 }, { "epoch": 6.49, "learning_rate": 5.463451655016988e-05, "loss": 0.1408, "step": 284 }, { "epoch": 6.51, "learning_rate": 5.39644154541305e-05, "loss": 0.0819, "step": 285 }, { "epoch": 6.54, "learning_rate": 5.329692723675994e-05, "loss": 0.118, "step": 286 }, { "epoch": 6.56, "learning_rate": 5.263208978322326e-05, "loss": 0.0602, "step": 287 }, { "epoch": 6.58, "learning_rate": 5.1969940828234184e-05, "loss": 0.0708, "step": 288 }, { "epoch": 6.61, "learning_rate": 5.131051795391302e-05, "loss": 0.107, "step": 289 }, { "epoch": 6.63, "learning_rate": 5.065385858765383e-05, "loss": 0.0621, "step": 290 }, { "epoch": 6.65, "learning_rate": 5.000000000000002e-05, "loss": 0.0428, "step": 291 }, { "epoch": 6.67, "learning_rate": 4.934897930252886e-05, "loss": 0.111, "step": 292 }, { "epoch": 6.7, "learning_rate": 4.870083344574531e-05, "loss": 0.1184, "step": 293 }, { "epoch": 6.72, "learning_rate": 4.805559921698464e-05, "loss": 0.0919, "step": 294 }, { "epoch": 6.74, "learning_rate": 4.7413313238324556e-05, "loss": 0.0477, "step": 295 }, { "epoch": 6.77, "learning_rate": 4.6774011964506435e-05, "loss": 0.0738, "step": 296 }, { "epoch": 6.79, "learning_rate": 4.613773168086657e-05, "loss": 0.101, "step": 297 }, { "epoch": 6.81, "learning_rate": 4.550450850127625e-05, "loss": 0.0585, "step": 298 }, { "epoch": 6.83, "learning_rate": 4.4874378366092476e-05, "loss": 0.0443, "step": 299 }, { "epoch": 6.86, "learning_rate": 4.42473770401176e-05, "loss": 0.1272, "step": 300 }, { "epoch": 6.88, "learning_rate": 4.3623540110569935e-05, "loss": 0.1569, "step": 301 }, { "epoch": 6.9, "learning_rate": 4.300290298506333e-05, "loss": 0.0314, "step": 302 }, { "epoch": 6.93, "learning_rate": 4.238550088959796e-05, "loss": 0.1179, "step": 303 }, { "epoch": 6.95, "learning_rate": 4.1771368866560665e-05, "loss": 0.1037, "step": 304 }, { "epoch": 6.97, "learning_rate": 4.116054177273627e-05, "loss": 0.0898, "step": 305 }, { "epoch": 6.99, "learning_rate": 4.0553054277329074e-05, "loss": 0.1015, "step": 306 }, { "epoch": 7.02, "learning_rate": 3.9948940859994966e-05, "loss": 0.0652, "step": 307 }, { "epoch": 7.04, "learning_rate": 3.9348235808884724e-05, "loss": 0.0403, "step": 308 }, { "epoch": 7.06, "learning_rate": 3.875097321869768e-05, "loss": 0.0501, "step": 309 }, { "epoch": 7.09, "learning_rate": 3.815718698874672e-05, "loss": 0.0874, "step": 310 }, { "epoch": 7.11, "learning_rate": 3.7566910821034005e-05, "loss": 0.0336, "step": 311 }, { "epoch": 7.13, "learning_rate": 3.698017821833844e-05, "loss": 0.0606, "step": 312 }, { "epoch": 7.15, "learning_rate": 3.6397022482313805e-05, "loss": 0.0154, "step": 313 }, { "epoch": 7.18, "learning_rate": 3.5817476711598906e-05, "loss": 0.0232, "step": 314 }, { "epoch": 7.2, "learning_rate": 3.524157379993882e-05, "loss": 0.0202, "step": 315 }, { "epoch": 7.22, "learning_rate": 3.466934643431795e-05, "loss": 0.0991, "step": 316 }, { "epoch": 7.25, "learning_rate": 3.4100827093104694e-05, "loss": 0.1159, "step": 317 }, { "epoch": 7.27, "learning_rate": 3.353604804420821e-05, "loss": 0.012, "step": 318 }, { "epoch": 7.29, "learning_rate": 3.2975041343246936e-05, "loss": 0.0735, "step": 319 }, { "epoch": 7.31, "learning_rate": 3.241783883172895e-05, "loss": 0.0097, "step": 320 }, { "epoch": 7.34, "learning_rate": 3.186447213524508e-05, "loss": 0.03, "step": 321 }, { "epoch": 7.36, "learning_rate": 3.131497266167357e-05, "loss": 0.0764, "step": 322 }, { "epoch": 7.38, "learning_rate": 3.076937159939768e-05, "loss": 0.0166, "step": 323 }, { "epoch": 7.41, "learning_rate": 3.0227699915535367e-05, "loss": 0.1195, "step": 324 }, { "epoch": 7.43, "learning_rate": 2.968998835418174e-05, "loss": 0.117, "step": 325 }, { "epoch": 7.45, "learning_rate": 2.9156267434663963e-05, "loss": 0.0241, "step": 326 }, { "epoch": 7.47, "learning_rate": 2.862656744980926e-05, "loss": 0.0874, "step": 327 }, { "epoch": 7.5, "learning_rate": 2.81009184642253e-05, "loss": 0.062, "step": 328 }, { "epoch": 7.52, "learning_rate": 2.757935031259402e-05, "loss": 0.0262, "step": 329 }, { "epoch": 7.54, "learning_rate": 2.7061892597978177e-05, "loss": 0.1282, "step": 330 }, { "epoch": 7.57, "learning_rate": 2.6548574690141125e-05, "loss": 0.0045, "step": 331 }, { "epoch": 7.59, "learning_rate": 2.603942572387993e-05, "loss": 0.0423, "step": 332 }, { "epoch": 7.61, "learning_rate": 2.553447459737157e-05, "loss": 0.0448, "step": 333 }, { "epoch": 7.63, "learning_rate": 2.5033749970533015e-05, "loss": 0.0534, "step": 334 }, { "epoch": 7.66, "learning_rate": 2.4537280263394258e-05, "loss": 0.04, "step": 335 }, { "epoch": 7.68, "learning_rate": 2.4045093654485518e-05, "loss": 0.0356, "step": 336 }, { "epoch": 7.7, "learning_rate": 2.355721807923761e-05, "loss": 0.0786, "step": 337 }, { "epoch": 7.73, "learning_rate": 2.307368122839675e-05, "loss": 0.0441, "step": 338 }, { "epoch": 7.75, "learning_rate": 2.2594510546452507e-05, "loss": 0.0155, "step": 339 }, { "epoch": 7.77, "learning_rate": 2.2119733230080408e-05, "loss": 0.0217, "step": 340 }, { "epoch": 7.79, "learning_rate": 2.1649376226598106e-05, "loss": 0.0472, "step": 341 }, { "epoch": 7.82, "learning_rate": 2.1183466232436088e-05, "loss": 0.0354, "step": 342 }, { "epoch": 7.84, "learning_rate": 2.0722029691622336e-05, "loss": 0.0702, "step": 343 }, { "epoch": 7.86, "learning_rate": 2.026509279428137e-05, "loss": 0.0759, "step": 344 }, { "epoch": 7.89, "learning_rate": 1.9812681475147942e-05, "loss": 0.1333, "step": 345 }, { "epoch": 7.91, "learning_rate": 1.9364821412094857e-05, "loss": 0.0323, "step": 346 }, { "epoch": 7.93, "learning_rate": 1.8921538024675678e-05, "loss": 0.0105, "step": 347 }, { "epoch": 7.95, "learning_rate": 1.848285647268181e-05, "loss": 0.0554, "step": 348 }, { "epoch": 7.98, "learning_rate": 1.8048801654714688e-05, "loss": 0.045, "step": 349 }, { "epoch": 8.0, "learning_rate": 1.761939820677241e-05, "loss": 0.0068, "step": 350 }, { "epoch": 8.02, "learning_rate": 1.7194670500851616e-05, "loss": 0.024, "step": 351 }, { "epoch": 8.05, "learning_rate": 1.6774642643563953e-05, "loss": 0.0245, "step": 352 }, { "epoch": 8.07, "learning_rate": 1.6359338474768193e-05, "loss": 0.0177, "step": 353 }, { "epoch": 8.09, "learning_rate": 1.594878156621672e-05, "loss": 0.0234, "step": 354 }, { "epoch": 8.11, "learning_rate": 1.554299522021796e-05, "loss": 0.0174, "step": 355 }, { "epoch": 8.14, "learning_rate": 1.5142002468313699e-05, "loss": 0.0074, "step": 356 }, { "epoch": 8.16, "learning_rate": 1.4745826069971758e-05, "loss": 0.0468, "step": 357 }, { "epoch": 8.18, "learning_rate": 1.4354488511294417e-05, "loss": 0.0051, "step": 358 }, { "epoch": 8.21, "learning_rate": 1.3968012003741948e-05, "loss": 0.0042, "step": 359 }, { "epoch": 8.23, "learning_rate": 1.35864184828721e-05, "loss": 0.0071, "step": 360 }, { "epoch": 8.25, "learning_rate": 1.3209729607095023e-05, "loss": 0.0074, "step": 361 }, { "epoch": 8.27, "learning_rate": 1.2837966756443975e-05, "loss": 0.0087, "step": 362 }, { "epoch": 8.3, "learning_rate": 1.2471151031361794e-05, "loss": 0.0081, "step": 363 }, { "epoch": 8.32, "learning_rate": 1.2109303251503434e-05, "loss": 0.0068, "step": 364 }, { "epoch": 8.34, "learning_rate": 1.1752443954554082e-05, "loss": 0.0068, "step": 365 }, { "epoch": 8.37, "learning_rate": 1.1400593395063686e-05, "loss": 0.01, "step": 366 }, { "epoch": 8.39, "learning_rate": 1.1053771543297198e-05, "loss": 0.0078, "step": 367 }, { "epoch": 8.41, "learning_rate": 1.0711998084101205e-05, "loss": 0.0106, "step": 368 }, { "epoch": 8.43, "learning_rate": 1.0375292415786575e-05, "loss": 0.0035, "step": 369 }, { "epoch": 8.46, "learning_rate": 1.0043673649027518e-05, "loss": 0.0715, "step": 370 }, { "epoch": 8.48, "learning_rate": 9.717160605776932e-06, "loss": 0.0093, "step": 371 }, { "epoch": 8.5, "learning_rate": 9.39577181819794e-06, "loss": 0.0815, "step": 372 }, { "epoch": 8.53, "learning_rate": 9.07952552761232e-06, "loss": 0.0086, "step": 373 }, { "epoch": 8.55, "learning_rate": 8.768439683464868e-06, "loss": 0.0138, "step": 374 }, { "epoch": 8.57, "learning_rate": 8.462531942304896e-06, "loss": 0.0132, "step": 375 }, { "epoch": 8.59, "learning_rate": 8.161819666783888e-06, "loss": 0.0141, "step": 376 }, { "epoch": 8.62, "learning_rate": 7.866319924670163e-06, "loss": 0.0477, "step": 377 }, { "epoch": 8.64, "learning_rate": 7.576049487880033e-06, "loss": 0.0103, "step": 378 }, { "epoch": 8.66, "learning_rate": 7.291024831525961e-06, "loss": 0.0044, "step": 379 }, { "epoch": 8.69, "learning_rate": 7.011262132981456e-06, "loss": 0.051, "step": 380 }, { "epoch": 8.71, "learning_rate": 6.7367772709627905e-06, "loss": 0.0031, "step": 381 }, { "epoch": 8.73, "learning_rate": 6.467585824627887e-06, "loss": 0.016, "step": 382 }, { "epoch": 8.75, "learning_rate": 6.203703072692013e-06, "loss": 0.0054, "step": 383 }, { "epoch": 8.78, "learning_rate": 5.945143992560587e-06, "loss": 0.0042, "step": 384 }, { "epoch": 8.8, "learning_rate": 5.691923259479093e-06, "loss": 0.0133, "step": 385 }, { "epoch": 8.82, "learning_rate": 5.444055245700208e-06, "loss": 0.026, "step": 386 }, { "epoch": 8.85, "learning_rate": 5.201554019667965e-06, "loss": 0.0347, "step": 387 }, { "epoch": 8.87, "learning_rate": 4.964433345219355e-06, "loss": 0.0498, "step": 388 }, { "epoch": 8.89, "learning_rate": 4.732706680803045e-06, "loss": 0.0218, "step": 389 }, { "epoch": 8.91, "learning_rate": 4.506387178715565e-06, "loss": 0.0094, "step": 390 }, { "epoch": 8.94, "learning_rate": 4.285487684354772e-06, "loss": 0.0798, "step": 391 }, { "epoch": 8.96, "learning_rate": 4.070020735490809e-06, "loss": 0.0036, "step": 392 }, { "epoch": 8.98, "learning_rate": 3.859998561554434e-06, "loss": 0.0086, "step": 393 }, { "epoch": 9.01, "learning_rate": 3.655433082942972e-06, "loss": 0.0673, "step": 394 }, { "epoch": 9.03, "learning_rate": 3.4563359103436886e-06, "loss": 0.0103, "step": 395 }, { "epoch": 9.05, "learning_rate": 3.262718344074811e-06, "loss": 0.0038, "step": 396 }, { "epoch": 9.07, "learning_rate": 3.0745913734441355e-06, "loss": 0.0127, "step": 397 }, { "epoch": 9.1, "learning_rate": 2.891965676125352e-06, "loss": 0.0093, "step": 398 }, { "epoch": 9.12, "learning_rate": 2.7148516175519277e-06, "loss": 0.0137, "step": 399 }, { "epoch": 9.14, "learning_rate": 2.5432592503288e-06, "loss": 0.0027, "step": 400 }, { "epoch": 9.17, "learning_rate": 2.377198313661877e-06, "loss": 0.0102, "step": 401 }, { "epoch": 9.19, "learning_rate": 2.2166782328051803e-06, "loss": 0.0051, "step": 402 }, { "epoch": 9.21, "learning_rate": 2.0617081185259512e-06, "loss": 0.0027, "step": 403 }, { "epoch": 9.23, "learning_rate": 1.912296766587507e-06, "loss": 0.0074, "step": 404 }, { "epoch": 9.26, "learning_rate": 1.7684526572500416e-06, "loss": 0.0029, "step": 405 }, { "epoch": 9.28, "learning_rate": 1.6301839547892328e-06, "loss": 0.0106, "step": 406 }, { "epoch": 9.3, "learning_rate": 1.4974985070329683e-06, "loss": 0.0243, "step": 407 }, { "epoch": 9.33, "learning_rate": 1.3704038449158573e-06, "loss": 0.0031, "step": 408 }, { "epoch": 9.35, "learning_rate": 1.2489071820517396e-06, "loss": 0.0041, "step": 409 }, { "epoch": 9.37, "learning_rate": 1.1330154143243787e-06, "loss": 0.0096, "step": 410 }, { "epoch": 9.39, "learning_rate": 1.0227351194959545e-06, "loss": 0.0103, "step": 411 }, { "epoch": 9.42, "learning_rate": 9.180725568338044e-07, "loss": 0.0069, "step": 412 }, { "epoch": 9.44, "learning_rate": 8.190336667550868e-07, "loss": 0.0048, "step": 413 }, { "epoch": 9.46, "learning_rate": 7.256240704897166e-07, "loss": 0.0057, "step": 414 }, { "epoch": 9.49, "learning_rate": 6.378490697611761e-07, "loss": 0.003, "step": 415 }, { "epoch": 9.51, "learning_rate": 5.55713646485756e-07, "loss": 0.0067, "step": 416 }, { "epoch": 9.53, "learning_rate": 4.79222462489648e-07, "loss": 0.0036, "step": 417 }, { "epoch": 9.55, "learning_rate": 4.0837985924448984e-07, "loss": 0.0055, "step": 418 }, { "epoch": 9.58, "learning_rate": 3.431898576208292e-07, "loss": 0.0027, "step": 419 }, { "epoch": 9.6, "learning_rate": 2.836561576599839e-07, "loss": 0.0069, "step": 420 }, { "epoch": 9.62, "learning_rate": 2.2978213836400975e-07, "loss": 0.0185, "step": 421 }, { "epoch": 9.65, "learning_rate": 1.815708575038988e-07, "loss": 0.0238, "step": 422 }, { "epoch": 9.67, "learning_rate": 1.3902505144608446e-07, "loss": 0.012, "step": 423 }, { "epoch": 9.69, "learning_rate": 1.0214713499706597e-07, "loss": 0.003, "step": 424 }, { "epoch": 9.71, "learning_rate": 7.093920126638454e-08, "loss": 0.0022, "step": 425 }, { "epoch": 9.74, "learning_rate": 4.54030215478074e-08, "loss": 0.0037, "step": 426 }, { "epoch": 9.76, "learning_rate": 2.5540045218819253e-08, "loss": 0.0024, "step": 427 }, { "epoch": 9.78, "learning_rate": 1.1351399658321438e-08, "loss": 0.0027, "step": 428 }, { "epoch": 9.81, "learning_rate": 2.8378901826831005e-09, "loss": 0.0033, "step": 429 }, { "epoch": 9.83, "learning_rate": 0.0, "loss": 0.0051, "step": 430 }, { "epoch": 9.83, "step": 430, "total_flos": 2.686707530150707e+16, "train_loss": 0.1627144819580365, "train_runtime": 335.6804, "train_samples_per_second": 20.794, "train_steps_per_second": 1.281 } ], "logging_steps": 1.0, "max_steps": 430, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 50000, "total_flos": 2.686707530150707e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }