|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.023342261748452116, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00023342261748452117, |
|
"grad_norm": 0.2070077508687973, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7672, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00046684523496904234, |
|
"grad_norm": 0.2195936143398285, |
|
"learning_rate": 0.0001959183673469388, |
|
"loss": 1.1291, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0007002678524535634, |
|
"grad_norm": 0.1822710633277893, |
|
"learning_rate": 0.00019183673469387756, |
|
"loss": 0.6041, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0009336904699380847, |
|
"grad_norm": 0.10866066068410873, |
|
"learning_rate": 0.00018775510204081634, |
|
"loss": 0.5399, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0011671130874226058, |
|
"grad_norm": 0.06832244247198105, |
|
"learning_rate": 0.00018367346938775512, |
|
"loss": 0.4337, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0014005357049071269, |
|
"grad_norm": 0.13112975656986237, |
|
"learning_rate": 0.0001795918367346939, |
|
"loss": 0.4785, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0016339583223916481, |
|
"grad_norm": 0.05374117195606232, |
|
"learning_rate": 0.00017551020408163265, |
|
"loss": 0.4458, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0018673809398761694, |
|
"grad_norm": 0.049559202045202255, |
|
"learning_rate": 0.00017142857142857143, |
|
"loss": 0.4517, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0021008035573606906, |
|
"grad_norm": 0.10584782809019089, |
|
"learning_rate": 0.00016734693877551023, |
|
"loss": 0.4592, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0023342261748452117, |
|
"grad_norm": 0.062141530215740204, |
|
"learning_rate": 0.00016326530612244898, |
|
"loss": 0.4625, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0025676487923297327, |
|
"grad_norm": 0.14701640605926514, |
|
"learning_rate": 0.00015918367346938776, |
|
"loss": 0.4818, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0028010714098142537, |
|
"grad_norm": 0.048784978687763214, |
|
"learning_rate": 0.00015510204081632654, |
|
"loss": 0.4687, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.003034494027298775, |
|
"grad_norm": 0.05522393435239792, |
|
"learning_rate": 0.0001510204081632653, |
|
"loss": 0.4576, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0032679166447832962, |
|
"grad_norm": 0.05478575825691223, |
|
"learning_rate": 0.0001469387755102041, |
|
"loss": 0.4666, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0035013392622678173, |
|
"grad_norm": 0.09066344797611237, |
|
"learning_rate": 0.00014285714285714287, |
|
"loss": 0.4168, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0037347618797523388, |
|
"grad_norm": 0.054524753242731094, |
|
"learning_rate": 0.00013877551020408165, |
|
"loss": 0.4813, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.00396818449723686, |
|
"grad_norm": 0.12929686903953552, |
|
"learning_rate": 0.0001346938775510204, |
|
"loss": 0.4981, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.004201607114721381, |
|
"grad_norm": 0.05895541235804558, |
|
"learning_rate": 0.00013061224489795917, |
|
"loss": 0.4078, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.004435029732205902, |
|
"grad_norm": 0.05701744183897972, |
|
"learning_rate": 0.00012653061224489798, |
|
"loss": 0.4323, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.004668452349690423, |
|
"grad_norm": 0.10815092921257019, |
|
"learning_rate": 0.00012244897959183676, |
|
"loss": 0.5232, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.004901874967174945, |
|
"grad_norm": 0.1338973492383957, |
|
"learning_rate": 0.00011836734693877552, |
|
"loss": 0.5053, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.005135297584659465, |
|
"grad_norm": 0.04165051504969597, |
|
"learning_rate": 0.00011428571428571428, |
|
"loss": 0.4149, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.005368720202143987, |
|
"grad_norm": 0.05300717428326607, |
|
"learning_rate": 0.00011020408163265306, |
|
"loss": 0.444, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.0056021428196285075, |
|
"grad_norm": 0.1370624154806137, |
|
"learning_rate": 0.00010612244897959185, |
|
"loss": 0.4525, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.005835565437113029, |
|
"grad_norm": 0.049909207969903946, |
|
"learning_rate": 0.00010204081632653062, |
|
"loss": 0.4497, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.00606898805459755, |
|
"grad_norm": 0.110743448138237, |
|
"learning_rate": 9.79591836734694e-05, |
|
"loss": 0.4837, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.006302410672082071, |
|
"grad_norm": 0.09541227668523788, |
|
"learning_rate": 9.387755102040817e-05, |
|
"loss": 0.49, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0065358332895665925, |
|
"grad_norm": 0.05263066291809082, |
|
"learning_rate": 8.979591836734695e-05, |
|
"loss": 0.4437, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.006769255907051114, |
|
"grad_norm": 0.09211356937885284, |
|
"learning_rate": 8.571428571428571e-05, |
|
"loss": 0.4479, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.007002678524535635, |
|
"grad_norm": 0.05164729803800583, |
|
"learning_rate": 8.163265306122449e-05, |
|
"loss": 0.4329, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.007236101142020156, |
|
"grad_norm": 0.08837030827999115, |
|
"learning_rate": 7.755102040816327e-05, |
|
"loss": 0.4533, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.0074695237595046775, |
|
"grad_norm": 0.0369272343814373, |
|
"learning_rate": 7.346938775510205e-05, |
|
"loss": 0.3667, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.007702946376989198, |
|
"grad_norm": 0.059746578335762024, |
|
"learning_rate": 6.938775510204082e-05, |
|
"loss": 0.424, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.00793636899447372, |
|
"grad_norm": 0.04736114665865898, |
|
"learning_rate": 6.530612244897959e-05, |
|
"loss": 0.4538, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.00816979161195824, |
|
"grad_norm": 0.04814208671450615, |
|
"learning_rate": 6.122448979591838e-05, |
|
"loss": 0.4894, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.008403214229442763, |
|
"grad_norm": 0.04663668945431709, |
|
"learning_rate": 5.714285714285714e-05, |
|
"loss": 0.5158, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.008636636846927283, |
|
"grad_norm": 0.08329813182353973, |
|
"learning_rate": 5.3061224489795926e-05, |
|
"loss": 0.5901, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.008870059464411804, |
|
"grad_norm": 0.0947406217455864, |
|
"learning_rate": 4.89795918367347e-05, |
|
"loss": 0.4438, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.009103482081896326, |
|
"grad_norm": 0.048670731484889984, |
|
"learning_rate": 4.4897959183673474e-05, |
|
"loss": 0.4304, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.009336904699380847, |
|
"grad_norm": 0.12740883231163025, |
|
"learning_rate": 4.0816326530612245e-05, |
|
"loss": 0.5186, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.009570327316865367, |
|
"grad_norm": 0.13359272480010986, |
|
"learning_rate": 3.673469387755102e-05, |
|
"loss": 0.5146, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.00980374993434989, |
|
"grad_norm": 0.07435787469148636, |
|
"learning_rate": 3.265306122448979e-05, |
|
"loss": 0.4666, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.01003717255183441, |
|
"grad_norm": 0.05466726794838905, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 0.3812, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.01027059516931893, |
|
"grad_norm": 0.05390426889061928, |
|
"learning_rate": 2.448979591836735e-05, |
|
"loss": 0.4026, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.010504017786803453, |
|
"grad_norm": 0.055242184549570084, |
|
"learning_rate": 2.0408163265306123e-05, |
|
"loss": 0.437, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.010737440404287974, |
|
"grad_norm": 0.03994165360927582, |
|
"learning_rate": 1.6326530612244897e-05, |
|
"loss": 0.4343, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.010970863021772494, |
|
"grad_norm": 0.04847300797700882, |
|
"learning_rate": 1.2244897959183674e-05, |
|
"loss": 0.4618, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.011204285639257015, |
|
"grad_norm": 0.08686497807502747, |
|
"learning_rate": 8.163265306122448e-06, |
|
"loss": 0.4264, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.011437708256741537, |
|
"grad_norm": 0.09176526963710785, |
|
"learning_rate": 4.081632653061224e-06, |
|
"loss": 0.5168, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.011671130874226058, |
|
"grad_norm": 0.10465481132268906, |
|
"learning_rate": 0.0, |
|
"loss": 0.4519, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.011904553491710579, |
|
"grad_norm": 0.051657382398843765, |
|
"learning_rate": 9.8989898989899e-05, |
|
"loss": 0.4728, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.0121379761091951, |
|
"grad_norm": 0.062193650752305984, |
|
"learning_rate": 9.696969696969698e-05, |
|
"loss": 0.4483, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.012371398726679621, |
|
"grad_norm": 0.06362653523683548, |
|
"learning_rate": 9.494949494949495e-05, |
|
"loss": 0.4215, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.012604821344164142, |
|
"grad_norm": 0.06238653138279915, |
|
"learning_rate": 9.292929292929293e-05, |
|
"loss": 0.4224, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.012838243961648664, |
|
"grad_norm": 0.0477604866027832, |
|
"learning_rate": 9.090909090909092e-05, |
|
"loss": 0.448, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.013071666579133185, |
|
"grad_norm": 0.09850312024354935, |
|
"learning_rate": 8.888888888888889e-05, |
|
"loss": 0.4424, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.013305089196617706, |
|
"grad_norm": 0.06217048689723015, |
|
"learning_rate": 8.686868686868688e-05, |
|
"loss": 0.3644, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.013538511814102228, |
|
"grad_norm": 0.043189432471990585, |
|
"learning_rate": 8.484848484848486e-05, |
|
"loss": 0.4564, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.013771934431586749, |
|
"grad_norm": 0.10206077247858047, |
|
"learning_rate": 8.282828282828283e-05, |
|
"loss": 0.4176, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.01400535704907127, |
|
"grad_norm": 0.05712655559182167, |
|
"learning_rate": 8.080808080808081e-05, |
|
"loss": 0.3896, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.014238779666555791, |
|
"grad_norm": 0.04486239328980446, |
|
"learning_rate": 7.878787878787879e-05, |
|
"loss": 0.3761, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.014472202284040312, |
|
"grad_norm": 0.043401289731264114, |
|
"learning_rate": 7.676767676767676e-05, |
|
"loss": 0.4471, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.014705624901524833, |
|
"grad_norm": 0.4940922260284424, |
|
"learning_rate": 7.474747474747475e-05, |
|
"loss": 0.4569, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.014939047519009355, |
|
"grad_norm": 0.10270397365093231, |
|
"learning_rate": 7.272727272727273e-05, |
|
"loss": 0.4805, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.015172470136493876, |
|
"grad_norm": 0.13152533769607544, |
|
"learning_rate": 7.07070707070707e-05, |
|
"loss": 0.5194, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.015405892753978396, |
|
"grad_norm": 0.07382863759994507, |
|
"learning_rate": 6.86868686868687e-05, |
|
"loss": 0.4161, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.015639315371462917, |
|
"grad_norm": 0.08843934535980225, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.5265, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.01587273798894744, |
|
"grad_norm": 0.053686585277318954, |
|
"learning_rate": 6.464646464646466e-05, |
|
"loss": 0.4667, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.01610616060643196, |
|
"grad_norm": 0.05910225212574005, |
|
"learning_rate": 6.262626262626264e-05, |
|
"loss": 0.4254, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.01633958322391648, |
|
"grad_norm": 0.039652127772569656, |
|
"learning_rate": 6.060606060606061e-05, |
|
"loss": 0.4511, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.016573005841401003, |
|
"grad_norm": 0.0999956876039505, |
|
"learning_rate": 5.858585858585859e-05, |
|
"loss": 0.4396, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.016806428458885525, |
|
"grad_norm": 0.03926937282085419, |
|
"learning_rate": 5.6565656565656563e-05, |
|
"loss": 0.4178, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.017039851076370044, |
|
"grad_norm": 0.09462181478738785, |
|
"learning_rate": 5.4545454545454546e-05, |
|
"loss": 0.4092, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.017273273693854566, |
|
"grad_norm": 0.05022445321083069, |
|
"learning_rate": 5.2525252525252536e-05, |
|
"loss": 0.422, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.01750669631133909, |
|
"grad_norm": 0.10167255997657776, |
|
"learning_rate": 5.050505050505051e-05, |
|
"loss": 0.4028, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.017740118928823607, |
|
"grad_norm": 0.0910029336810112, |
|
"learning_rate": 4.848484848484849e-05, |
|
"loss": 0.4341, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.01797354154630813, |
|
"grad_norm": 0.047616615891456604, |
|
"learning_rate": 4.6464646464646464e-05, |
|
"loss": 0.411, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.018206964163792652, |
|
"grad_norm": 0.08828525990247726, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.4616, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.01844038678127717, |
|
"grad_norm": 0.044807884842157364, |
|
"learning_rate": 4.242424242424243e-05, |
|
"loss": 0.4865, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.018673809398761693, |
|
"grad_norm": 0.08502307534217834, |
|
"learning_rate": 4.0404040404040405e-05, |
|
"loss": 0.4624, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.018907232016246216, |
|
"grad_norm": 0.1129627451300621, |
|
"learning_rate": 3.838383838383838e-05, |
|
"loss": 0.4338, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.019140654633730735, |
|
"grad_norm": 0.10634730011224747, |
|
"learning_rate": 3.6363636363636364e-05, |
|
"loss": 0.5142, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.019374077251215257, |
|
"grad_norm": 0.04792294651269913, |
|
"learning_rate": 3.434343434343435e-05, |
|
"loss": 0.4286, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.01960749986869978, |
|
"grad_norm": 0.046725083142519, |
|
"learning_rate": 3.232323232323233e-05, |
|
"loss": 0.4116, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.019840922486184298, |
|
"grad_norm": 0.052620138972997665, |
|
"learning_rate": 3.0303030303030306e-05, |
|
"loss": 0.4141, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.02007434510366882, |
|
"grad_norm": 0.10660973191261292, |
|
"learning_rate": 2.8282828282828282e-05, |
|
"loss": 0.4347, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.020307767721153343, |
|
"grad_norm": 0.0386226549744606, |
|
"learning_rate": 2.6262626262626268e-05, |
|
"loss": 0.4461, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.02054119033863786, |
|
"grad_norm": 0.07292847335338593, |
|
"learning_rate": 2.4242424242424244e-05, |
|
"loss": 0.4355, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.020774612956122384, |
|
"grad_norm": 0.06434612721204758, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.5035, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.021008035573606906, |
|
"grad_norm": 0.10716721415519714, |
|
"learning_rate": 2.0202020202020203e-05, |
|
"loss": 0.3866, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.021241458191091425, |
|
"grad_norm": 0.04890590161085129, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 0.3767, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.021474880808575948, |
|
"grad_norm": 0.06173992156982422, |
|
"learning_rate": 1.6161616161616165e-05, |
|
"loss": 0.4063, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.021708303426060466, |
|
"grad_norm": 0.053141020238399506, |
|
"learning_rate": 1.4141414141414141e-05, |
|
"loss": 0.4751, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.02194172604354499, |
|
"grad_norm": 0.05243794620037079, |
|
"learning_rate": 1.2121212121212122e-05, |
|
"loss": 0.4625, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.02217514866102951, |
|
"grad_norm": 0.061262525618076324, |
|
"learning_rate": 1.0101010101010101e-05, |
|
"loss": 0.4492, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.02240857127851403, |
|
"grad_norm": 0.10454926639795303, |
|
"learning_rate": 8.080808080808082e-06, |
|
"loss": 0.4522, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.022641993895998552, |
|
"grad_norm": 0.05348167195916176, |
|
"learning_rate": 6.060606060606061e-06, |
|
"loss": 0.434, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.022875416513483075, |
|
"grad_norm": 0.1315009742975235, |
|
"learning_rate": 4.040404040404041e-06, |
|
"loss": 0.5003, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.023108839130967593, |
|
"grad_norm": 0.0520632266998291, |
|
"learning_rate": 2.0202020202020206e-06, |
|
"loss": 0.3953, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.023342261748452116, |
|
"grad_norm": 0.08680278062820435, |
|
"learning_rate": 0.0, |
|
"loss": 0.432, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.170835390089626e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|