{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 2226, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.00029999626535870253, "loss": 1.7113, "step": 10 }, { "epoch": 0.03, "learning_rate": 0.0002999850616207776, "loss": 1.6682, "step": 20 }, { "epoch": 0.04, "learning_rate": 0.00029996638934411774, "loss": 1.6829, "step": 30 }, { "epoch": 0.05, "learning_rate": 0.00029994024945851293, "loss": 1.4132, "step": 40 }, { "epoch": 0.07, "learning_rate": 0.00029990664326560453, "loss": 1.4066, "step": 50 }, { "epoch": 0.08, "learning_rate": 0.0002998655724388202, "loss": 1.5205, "step": 60 }, { "epoch": 0.09, "learning_rate": 0.0002998170390232907, "loss": 1.5003, "step": 70 }, { "epoch": 0.11, "learning_rate": 0.000299761045435748, "loss": 1.4407, "step": 80 }, { "epoch": 0.12, "learning_rate": 0.0002996975944644049, "loss": 1.4323, "step": 90 }, { "epoch": 0.13, "learning_rate": 0.0002996266892688164, "loss": 1.4167, "step": 100 }, { "epoch": 0.15, "learning_rate": 0.00029954833337972206, "loss": 1.4195, "step": 110 }, { "epoch": 0.16, "learning_rate": 0.0002994625306988704, "loss": 1.4177, "step": 120 }, { "epoch": 0.18, "learning_rate": 0.0002993692854988246, "loss": 1.4279, "step": 130 }, { "epoch": 0.19, "learning_rate": 0.0002992686024227496, "loss": 1.3698, "step": 140 }, { "epoch": 0.2, "learning_rate": 0.0002991604864841811, "loss": 1.4032, "step": 150 }, { "epoch": 0.22, "learning_rate": 0.00029904494306677576, "loss": 1.2865, "step": 160 }, { "epoch": 0.23, "learning_rate": 0.00029892197792404313, "loss": 1.2427, "step": 170 }, { "epoch": 0.24, "learning_rate": 0.0002987915971790592, "loss": 1.2646, "step": 180 }, { "epoch": 0.26, "learning_rate": 0.00029865380732416153, "loss": 1.2246, "step": 190 }, { "epoch": 0.27, "learning_rate": 0.00029850861522062586, "loss": 1.2994, "step": 200 }, { "epoch": 0.28, "learning_rate": 0.00029835602809832456, "loss": 1.4174, "step": 210 }, { "epoch": 0.3, "learning_rate": 0.00029819605355536655, "loss": 1.1977, "step": 220 }, { "epoch": 0.31, "learning_rate": 0.0002980286995577189, "loss": 1.2475, "step": 230 }, { "epoch": 0.32, "learning_rate": 0.0002978539744388104, "loss": 1.3046, "step": 240 }, { "epoch": 0.34, "learning_rate": 0.00029767188689911616, "loss": 1.231, "step": 250 }, { "epoch": 0.35, "learning_rate": 0.00029748244600572493, "loss": 1.1623, "step": 260 }, { "epoch": 0.36, "learning_rate": 0.0002972856611918871, "loss": 1.22, "step": 270 }, { "epoch": 0.38, "learning_rate": 0.00029708154225654526, "loss": 1.3153, "step": 280 }, { "epoch": 0.39, "learning_rate": 0.00029687009936384606, "loss": 1.0245, "step": 290 }, { "epoch": 0.4, "learning_rate": 0.0002966513430426344, "loss": 1.1617, "step": 300 }, { "epoch": 0.42, "learning_rate": 0.0002964252841859287, "loss": 1.3038, "step": 310 }, { "epoch": 0.43, "learning_rate": 0.00029619193405037905, "loss": 1.2176, "step": 320 }, { "epoch": 0.44, "learning_rate": 0.000295951304255706, "loss": 1.0473, "step": 330 }, { "epoch": 0.46, "learning_rate": 0.0002957034067841225, "loss": 1.0024, "step": 340 }, { "epoch": 0.47, "learning_rate": 0.00029544825397973706, "loss": 1.0392, "step": 350 }, { "epoch": 0.49, "learning_rate": 0.00029518585854793896, "loss": 1.1253, "step": 360 }, { "epoch": 0.5, "learning_rate": 0.00029491623355476574, "loss": 1.2854, "step": 370 }, { "epoch": 0.51, "learning_rate": 0.0002946393924262526, "loss": 1.2807, "step": 380 }, { "epoch": 0.53, "learning_rate": 0.0002943553489477636, "loss": 1.1513, "step": 390 }, { "epoch": 0.54, "learning_rate": 0.00029406411726330553, "loss": 0.971, "step": 400 }, { "epoch": 0.55, "learning_rate": 0.0002937657118748234, "loss": 0.97, "step": 410 }, { "epoch": 0.57, "learning_rate": 0.00029346014764147836, "loss": 1.0773, "step": 420 }, { "epoch": 0.58, "learning_rate": 0.0002931474397789078, "loss": 1.1714, "step": 430 }, { "epoch": 0.59, "learning_rate": 0.0002928276038584677, "loss": 1.1828, "step": 440 }, { "epoch": 0.61, "learning_rate": 0.0002925006558064572, "loss": 1.0298, "step": 450 }, { "epoch": 0.62, "learning_rate": 0.0002921666119033256, "loss": 1.0366, "step": 460 }, { "epoch": 0.63, "learning_rate": 0.0002918254887828617, "loss": 0.9101, "step": 470 }, { "epoch": 0.65, "learning_rate": 0.0002914773034313653, "loss": 0.9801, "step": 480 }, { "epoch": 0.66, "learning_rate": 0.0002911220731868018, "loss": 0.9764, "step": 490 }, { "epoch": 0.67, "learning_rate": 0.00029075981573793827, "loss": 0.8117, "step": 500 }, { "epoch": 0.69, "learning_rate": 0.00029039054912346313, "loss": 0.9689, "step": 510 }, { "epoch": 0.7, "learning_rate": 0.0002900142917310877, "loss": 1.068, "step": 520 }, { "epoch": 0.71, "learning_rate": 0.00028963106229663063, "loss": 0.9002, "step": 530 }, { "epoch": 0.73, "learning_rate": 0.0002892408799030848, "loss": 0.7903, "step": 540 }, { "epoch": 0.74, "learning_rate": 0.00028884376397966734, "loss": 0.8156, "step": 550 }, { "epoch": 0.75, "learning_rate": 0.00028843973430085204, "loss": 1.0247, "step": 560 }, { "epoch": 0.77, "learning_rate": 0.00028802881098538433, "loss": 0.8413, "step": 570 }, { "epoch": 0.78, "learning_rate": 0.0002876110144952802, "loss": 0.9629, "step": 580 }, { "epoch": 0.8, "learning_rate": 0.00028718636563480654, "loss": 0.9488, "step": 590 }, { "epoch": 0.81, "learning_rate": 0.0002867548855494457, "loss": 0.74, "step": 600 }, { "epoch": 0.82, "learning_rate": 0.00028631659572484223, "loss": 0.8782, "step": 610 }, { "epoch": 0.84, "learning_rate": 0.0002858715179857333, "loss": 0.7538, "step": 620 }, { "epoch": 0.85, "learning_rate": 0.0002854196744948615, "loss": 1.0124, "step": 630 }, { "epoch": 0.86, "learning_rate": 0.00028496108775187177, "loss": 1.0094, "step": 640 }, { "epoch": 0.88, "learning_rate": 0.00028449578059219045, "loss": 0.9834, "step": 650 }, { "epoch": 0.89, "learning_rate": 0.0002840237761858889, "loss": 0.8183, "step": 660 }, { "epoch": 0.9, "learning_rate": 0.00028354509803652894, "loss": 0.7833, "step": 670 }, { "epoch": 0.92, "learning_rate": 0.00028305976997999307, "loss": 1.0735, "step": 680 }, { "epoch": 0.93, "learning_rate": 0.00028256781618329733, "loss": 0.6456, "step": 690 }, { "epoch": 0.94, "learning_rate": 0.0002820692611433879, "loss": 0.8017, "step": 700 }, { "epoch": 0.96, "learning_rate": 0.00028156412968592144, "loss": 0.639, "step": 710 }, { "epoch": 0.97, "learning_rate": 0.0002810524469640285, "loss": 0.9926, "step": 720 }, { "epoch": 0.98, "learning_rate": 0.0002805342384570614, "loss": 0.6367, "step": 730 }, { "epoch": 1.0, "learning_rate": 0.00028000952996932554, "loss": 0.7394, "step": 740 }, { "epoch": 1.01, "learning_rate": 0.0002794783476287939, "loss": 0.5386, "step": 750 }, { "epoch": 1.02, "learning_rate": 0.0002789407178858066, "loss": 0.7581, "step": 760 }, { "epoch": 1.04, "learning_rate": 0.00027839666751175354, "loss": 0.7513, "step": 770 }, { "epoch": 1.05, "learning_rate": 0.0002778462235977413, "loss": 0.9431, "step": 780 }, { "epoch": 1.06, "learning_rate": 0.0002772894135532442, "loss": 0.9494, "step": 790 }, { "epoch": 1.08, "learning_rate": 0.00027672626510473936, "loss": 0.6138, "step": 800 }, { "epoch": 1.09, "learning_rate": 0.0002761568062943261, "loss": 0.8516, "step": 810 }, { "epoch": 1.11, "learning_rate": 0.00027558106547832985, "loss": 0.7844, "step": 820 }, { "epoch": 1.12, "learning_rate": 0.0002749990713258895, "loss": 0.6772, "step": 830 }, { "epoch": 1.13, "learning_rate": 0.00027441085281753024, "loss": 0.6081, "step": 840 }, { "epoch": 1.15, "learning_rate": 0.0002738164392437207, "loss": 0.7722, "step": 850 }, { "epoch": 1.16, "learning_rate": 0.0002732158602034138, "loss": 0.6299, "step": 860 }, { "epoch": 1.17, "learning_rate": 0.00027260914560257345, "loss": 0.6504, "step": 870 }, { "epoch": 1.19, "learning_rate": 0.00027199632565268504, "loss": 0.637, "step": 880 }, { "epoch": 1.2, "learning_rate": 0.0002713774308692511, "loss": 0.7156, "step": 890 }, { "epoch": 1.21, "learning_rate": 0.00027075249207027187, "loss": 0.797, "step": 900 }, { "epoch": 1.23, "learning_rate": 0.00027012154037471065, "loss": 0.8322, "step": 910 }, { "epoch": 1.24, "learning_rate": 0.00026948460720094416, "loss": 0.7509, "step": 920 }, { "epoch": 1.25, "learning_rate": 0.0002688417242651983, "loss": 0.716, "step": 930 }, { "epoch": 1.27, "learning_rate": 0.00026819292357996847, "loss": 0.6985, "step": 940 }, { "epoch": 1.28, "learning_rate": 0.00026753823745242583, "loss": 0.7565, "step": 950 }, { "epoch": 1.29, "learning_rate": 0.0002668776984828083, "loss": 0.9529, "step": 960 }, { "epoch": 1.31, "learning_rate": 0.0002662113395627975, "loss": 0.7075, "step": 970 }, { "epoch": 1.32, "learning_rate": 0.0002655391938738806, "loss": 0.7943, "step": 980 }, { "epoch": 1.33, "learning_rate": 0.00026486129488569824, "loss": 0.8068, "step": 990 }, { "epoch": 1.35, "learning_rate": 0.0002641776763543778, "loss": 0.7974, "step": 1000 }, { "epoch": 1.36, "learning_rate": 0.0002634883723208527, "loss": 0.667, "step": 1010 }, { "epoch": 1.37, "learning_rate": 0.0002627934171091669, "loss": 0.8704, "step": 1020 }, { "epoch": 1.39, "learning_rate": 0.00026209284532476636, "loss": 0.5226, "step": 1030 }, { "epoch": 1.4, "learning_rate": 0.0002613866918527752, "loss": 0.6797, "step": 1040 }, { "epoch": 1.42, "learning_rate": 0.0002606749918562591, "loss": 0.6037, "step": 1050 }, { "epoch": 1.43, "learning_rate": 0.0002599577807744739, "loss": 0.539, "step": 1060 }, { "epoch": 1.44, "learning_rate": 0.0002592350943211014, "loss": 0.628, "step": 1070 }, { "epoch": 1.46, "learning_rate": 0.0002585069684824706, "loss": 0.7272, "step": 1080 }, { "epoch": 1.47, "learning_rate": 0.0002577734395157657, "loss": 0.6978, "step": 1090 }, { "epoch": 1.48, "learning_rate": 0.00025703454394722115, "loss": 0.5347, "step": 1100 }, { "epoch": 1.5, "learning_rate": 0.00025629031857030225, "loss": 0.6832, "step": 1110 }, { "epoch": 1.51, "learning_rate": 0.0002555408004438734, "loss": 0.4517, "step": 1120 }, { "epoch": 1.52, "learning_rate": 0.00025478602689035253, "loss": 0.4694, "step": 1130 }, { "epoch": 1.54, "learning_rate": 0.00025402603549385284, "loss": 0.5531, "step": 1140 }, { "epoch": 1.55, "learning_rate": 0.000253260864098311, "loss": 0.965, "step": 1150 }, { "epoch": 1.56, "learning_rate": 0.00025249055080560297, "loss": 0.5112, "step": 1160 }, { "epoch": 1.58, "learning_rate": 0.0002517151339736464, "loss": 0.6728, "step": 1170 }, { "epoch": 1.59, "learning_rate": 0.00025093465221449115, "loss": 0.7728, "step": 1180 }, { "epoch": 1.6, "learning_rate": 0.0002501491443923959, "loss": 0.5934, "step": 1190 }, { "epoch": 1.62, "learning_rate": 0.0002493586496218933, "loss": 0.6981, "step": 1200 }, { "epoch": 1.63, "learning_rate": 0.0002485632072658423, "loss": 0.5475, "step": 1210 }, { "epoch": 1.64, "learning_rate": 0.0002477628569334679, "loss": 0.691, "step": 1220 }, { "epoch": 1.66, "learning_rate": 0.00024695763847838866, "loss": 0.7188, "step": 1230 }, { "epoch": 1.67, "learning_rate": 0.00024614759199663265, "loss": 0.6256, "step": 1240 }, { "epoch": 1.68, "learning_rate": 0.0002453327578246404, "loss": 0.4491, "step": 1250 }, { "epoch": 1.7, "learning_rate": 0.0002445131765372567, "loss": 0.7007, "step": 1260 }, { "epoch": 1.71, "learning_rate": 0.00024368888894570962, "loss": 0.6256, "step": 1270 }, { "epoch": 1.73, "learning_rate": 0.000242859936095579, "loss": 0.7769, "step": 1280 }, { "epoch": 1.74, "learning_rate": 0.00024202635926475223, "loss": 0.5488, "step": 1290 }, { "epoch": 1.75, "learning_rate": 0.00024118819996136865, "loss": 0.4278, "step": 1300 }, { "epoch": 1.77, "learning_rate": 0.00024034549992175288, "loss": 0.4966, "step": 1310 }, { "epoch": 1.78, "learning_rate": 0.0002394983011083366, "loss": 0.4736, "step": 1320 }, { "epoch": 1.79, "learning_rate": 0.00023864664570756873, "loss": 0.6513, "step": 1330 }, { "epoch": 1.81, "learning_rate": 0.00023779057612781506, "loss": 0.5389, "step": 1340 }, { "epoch": 1.82, "learning_rate": 0.00023693013499724632, "loss": 0.3802, "step": 1350 }, { "epoch": 1.83, "learning_rate": 0.0002360653651617156, "loss": 0.754, "step": 1360 }, { "epoch": 1.85, "learning_rate": 0.00023519630968262477, "loss": 0.6096, "step": 1370 }, { "epoch": 1.86, "learning_rate": 0.00023432301183478018, "loss": 0.5114, "step": 1380 }, { "epoch": 1.87, "learning_rate": 0.00023344551510423808, "loss": 0.6215, "step": 1390 }, { "epoch": 1.89, "learning_rate": 0.00023256386318613873, "loss": 0.5015, "step": 1400 }, { "epoch": 1.9, "learning_rate": 0.00023167809998253102, "loss": 0.6841, "step": 1410 }, { "epoch": 1.91, "learning_rate": 0.00023078826960018612, "loss": 0.7431, "step": 1420 }, { "epoch": 1.93, "learning_rate": 0.00022989441634840128, "loss": 0.6028, "step": 1430 }, { "epoch": 1.94, "learning_rate": 0.00022899658473679344, "loss": 0.6164, "step": 1440 }, { "epoch": 1.95, "learning_rate": 0.00022809481947308276, "loss": 0.7823, "step": 1450 }, { "epoch": 1.97, "learning_rate": 0.0002271891654608665, "loss": 0.5562, "step": 1460 }, { "epoch": 1.98, "learning_rate": 0.00022627966779738306, "loss": 0.6174, "step": 1470 }, { "epoch": 1.99, "learning_rate": 0.00022536637177126615, "loss": 0.671, "step": 1480 }, { "epoch": 2.01, "learning_rate": 0.00022444932286028987, "loss": 0.6333, "step": 1490 }, { "epoch": 2.02, "learning_rate": 0.00022352856672910404, "loss": 0.5254, "step": 1500 }, { "epoch": 2.04, "learning_rate": 0.00022260414922696027, "loss": 0.4731, "step": 1510 }, { "epoch": 2.05, "learning_rate": 0.00022167611638542896, "loss": 0.7305, "step": 1520 }, { "epoch": 2.06, "learning_rate": 0.00022074451441610708, "loss": 0.5911, "step": 1530 }, { "epoch": 2.08, "learning_rate": 0.00021980938970831717, "loss": 0.4527, "step": 1540 }, { "epoch": 2.09, "learning_rate": 0.00021887078882679723, "loss": 0.4437, "step": 1550 }, { "epoch": 2.1, "learning_rate": 0.0002179287585093822, "loss": 0.5298, "step": 1560 }, { "epoch": 2.12, "learning_rate": 0.00021698334566467626, "loss": 0.7712, "step": 1570 }, { "epoch": 2.13, "learning_rate": 0.0002160345973697176, "loss": 0.5333, "step": 1580 }, { "epoch": 2.14, "learning_rate": 0.00021508256086763368, "loss": 0.6186, "step": 1590 }, { "epoch": 2.16, "learning_rate": 0.00021412728356528905, "loss": 0.5444, "step": 1600 }, { "epoch": 2.17, "learning_rate": 0.00021316881303092445, "loss": 0.385, "step": 1610 }, { "epoch": 2.18, "learning_rate": 0.00021220719699178848, "loss": 0.5459, "step": 1620 }, { "epoch": 2.2, "learning_rate": 0.00021124248333176079, "loss": 0.4447, "step": 1630 }, { "epoch": 2.21, "learning_rate": 0.0002102747200889677, "loss": 0.5434, "step": 1640 }, { "epoch": 2.22, "learning_rate": 0.00020930395545339008, "loss": 0.5391, "step": 1650 }, { "epoch": 2.24, "learning_rate": 0.00020833023776446407, "loss": 0.5926, "step": 1660 }, { "epoch": 2.25, "learning_rate": 0.00020735361550867345, "loss": 0.6304, "step": 1670 }, { "epoch": 2.26, "learning_rate": 0.0002063741373171357, "loss": 0.4942, "step": 1680 }, { "epoch": 2.28, "learning_rate": 0.00020539185196318023, "loss": 0.507, "step": 1690 }, { "epoch": 2.29, "learning_rate": 0.00020440680835991969, "loss": 0.4658, "step": 1700 }, { "epoch": 2.3, "learning_rate": 0.00020341905555781433, "loss": 0.4042, "step": 1710 }, { "epoch": 2.32, "learning_rate": 0.00020242864274222955, "loss": 0.5539, "step": 1720 }, { "epoch": 2.33, "learning_rate": 0.0002014356192309868, "loss": 0.4031, "step": 1730 }, { "epoch": 2.35, "learning_rate": 0.00020044003447190756, "loss": 0.4963, "step": 1740 }, { "epoch": 2.36, "learning_rate": 0.00019944193804035117, "loss": 0.3302, "step": 1750 }, { "epoch": 2.37, "learning_rate": 0.00019844137963674643, "loss": 0.5527, "step": 1760 }, { "epoch": 2.39, "learning_rate": 0.0001974384090841164, "loss": 0.485, "step": 1770 }, { "epoch": 2.4, "learning_rate": 0.00019643307632559776, "loss": 0.6018, "step": 1780 }, { "epoch": 2.41, "learning_rate": 0.0001954254314219536, "loss": 0.4818, "step": 1790 }, { "epoch": 2.43, "learning_rate": 0.00019441552454908096, "loss": 0.5312, "step": 1800 }, { "epoch": 2.44, "learning_rate": 0.00019340340599551193, "loss": 0.5679, "step": 1810 }, { "epoch": 2.45, "learning_rate": 0.00019238912615990983, "loss": 0.3859, "step": 1820 }, { "epoch": 2.47, "learning_rate": 0.0001913727355485595, "loss": 0.4745, "step": 1830 }, { "epoch": 2.48, "learning_rate": 0.0001903542847728523, "loss": 0.5523, "step": 1840 }, { "epoch": 2.49, "learning_rate": 0.00018933382454676588, "loss": 0.3833, "step": 1850 }, { "epoch": 2.51, "learning_rate": 0.00018831140568433897, "loss": 0.4132, "step": 1860 }, { "epoch": 2.52, "learning_rate": 0.000187287079097141, "loss": 0.3685, "step": 1870 }, { "epoch": 2.53, "learning_rate": 0.000186260895791737, "loss": 0.6318, "step": 1880 }, { "epoch": 2.55, "learning_rate": 0.00018523290686714756, "loss": 0.4088, "step": 1890 }, { "epoch": 2.56, "learning_rate": 0.0001842031635123046, "loss": 0.5499, "step": 1900 }, { "epoch": 2.57, "learning_rate": 0.00018317171700350224, "loss": 0.4856, "step": 1910 }, { "epoch": 2.59, "learning_rate": 0.0001821386187018435, "loss": 0.6596, "step": 1920 }, { "epoch": 2.6, "learning_rate": 0.00018110392005068286, "loss": 0.3197, "step": 1930 }, { "epoch": 2.61, "learning_rate": 0.00018006767257306447, "loss": 0.3975, "step": 1940 }, { "epoch": 2.63, "learning_rate": 0.00017902992786915663, "loss": 0.3733, "step": 1950 }, { "epoch": 2.64, "learning_rate": 0.00017799073761368234, "loss": 0.4203, "step": 1960 }, { "epoch": 2.65, "learning_rate": 0.00017695015355334624, "loss": 0.4533, "step": 1970 }, { "epoch": 2.67, "learning_rate": 0.00017590822750425774, "loss": 0.3846, "step": 1980 }, { "epoch": 2.68, "learning_rate": 0.0001748650113493508, "loss": 0.4219, "step": 1990 }, { "epoch": 2.7, "learning_rate": 0.0001738205570358006, "loss": 0.2895, "step": 2000 }, { "epoch": 2.71, "learning_rate": 0.00017277491657243668, "loss": 0.3751, "step": 2010 }, { "epoch": 2.72, "learning_rate": 0.000171728142027153, "loss": 0.508, "step": 2020 }, { "epoch": 2.74, "learning_rate": 0.00017068028552431566, "loss": 0.5577, "step": 2030 }, { "epoch": 2.75, "learning_rate": 0.00016963139924216675, "loss": 0.4342, "step": 2040 }, { "epoch": 2.76, "learning_rate": 0.00016858153541022676, "loss": 0.3891, "step": 2050 }, { "epoch": 2.78, "learning_rate": 0.00016753074630669327, "loss": 0.4064, "step": 2060 }, { "epoch": 2.79, "learning_rate": 0.00016647908425583804, "loss": 0.3571, "step": 2070 }, { "epoch": 2.8, "learning_rate": 0.00016542660162540136, "loss": 0.53, "step": 2080 }, { "epoch": 2.82, "learning_rate": 0.00016437335082398455, "loss": 0.4457, "step": 2090 }, { "epoch": 2.83, "learning_rate": 0.00016331938429844022, "loss": 0.4608, "step": 2100 }, { "epoch": 2.84, "learning_rate": 0.0001622647545312604, "loss": 0.4084, "step": 2110 }, { "epoch": 2.86, "learning_rate": 0.00016120951403796364, "loss": 0.6194, "step": 2120 }, { "epoch": 2.87, "learning_rate": 0.0001601537153644795, "loss": 0.5458, "step": 2130 }, { "epoch": 2.88, "learning_rate": 0.00015909741108453243, "loss": 0.3484, "step": 2140 }, { "epoch": 2.9, "learning_rate": 0.00015804065379702352, "loss": 0.2758, "step": 2150 }, { "epoch": 2.91, "learning_rate": 0.00015698349612341156, "loss": 0.2401, "step": 2160 }, { "epoch": 2.92, "learning_rate": 0.00015592599070509265, "loss": 0.4804, "step": 2170 }, { "epoch": 2.94, "learning_rate": 0.00015486819020077886, "loss": 0.4722, "step": 2180 }, { "epoch": 2.95, "learning_rate": 0.0001538101472838762, "loss": 0.4195, "step": 2190 }, { "epoch": 2.96, "learning_rate": 0.00015275191463986159, "loss": 0.4658, "step": 2200 }, { "epoch": 2.98, "learning_rate": 0.00015169354496365948, "loss": 0.5397, "step": 2210 }, { "epoch": 2.99, "learning_rate": 0.0001506350909570179, "loss": 0.4333, "step": 2220 } ], "logging_steps": 10, "max_steps": 4452, "num_train_epochs": 6, "save_steps": 500, "total_flos": 1.163304048181248e+16, "trial_name": null, "trial_params": null }