|
{ |
|
"best_metric": 0.8914665579795837, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-110", |
|
"epoch": 0.26252983293556087, |
|
"eval_steps": 5, |
|
"global_step": 110, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002386634844868735, |
|
"grad_norm": 1.6264880895614624, |
|
"learning_rate": 2e-05, |
|
"loss": 1.7576, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002386634844868735, |
|
"eval_loss": 1.4666008949279785, |
|
"eval_runtime": 26.2232, |
|
"eval_samples_per_second": 6.75, |
|
"eval_steps_per_second": 3.394, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00477326968973747, |
|
"grad_norm": 0.8592613935470581, |
|
"learning_rate": 4e-05, |
|
"loss": 0.8915, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.007159904534606206, |
|
"grad_norm": 0.985676646232605, |
|
"learning_rate": 6e-05, |
|
"loss": 1.1643, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00954653937947494, |
|
"grad_norm": 1.3740118741989136, |
|
"learning_rate": 8e-05, |
|
"loss": 1.5305, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.011933174224343675, |
|
"grad_norm": 0.9007270336151123, |
|
"learning_rate": 0.0001, |
|
"loss": 1.191, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.011933174224343675, |
|
"eval_loss": 1.421371340751648, |
|
"eval_runtime": 26.2293, |
|
"eval_samples_per_second": 6.748, |
|
"eval_steps_per_second": 3.393, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.014319809069212411, |
|
"grad_norm": 2.1037585735321045, |
|
"learning_rate": 0.00012, |
|
"loss": 1.6708, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.016706443914081145, |
|
"grad_norm": 0.9997168183326721, |
|
"learning_rate": 0.00014, |
|
"loss": 1.2175, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01909307875894988, |
|
"grad_norm": 1.029247522354126, |
|
"learning_rate": 0.00016, |
|
"loss": 1.2724, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.021479713603818614, |
|
"grad_norm": 1.2429360151290894, |
|
"learning_rate": 0.00018, |
|
"loss": 1.3846, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02386634844868735, |
|
"grad_norm": 0.4330953061580658, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4449, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02386634844868735, |
|
"eval_loss": 1.1578279733657837, |
|
"eval_runtime": 27.585, |
|
"eval_samples_per_second": 6.417, |
|
"eval_steps_per_second": 3.226, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.026252983293556086, |
|
"grad_norm": 0.9032881259918213, |
|
"learning_rate": 0.0001999979446958366, |
|
"loss": 1.1913, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.028639618138424822, |
|
"grad_norm": 1.043502926826477, |
|
"learning_rate": 0.00019999177886783194, |
|
"loss": 1.067, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.031026252983293555, |
|
"grad_norm": 0.984898030757904, |
|
"learning_rate": 0.00019998150276943902, |
|
"loss": 1.2715, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03341288782816229, |
|
"grad_norm": 0.8400496244430542, |
|
"learning_rate": 0.000199967116823068, |
|
"loss": 0.8158, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03579952267303103, |
|
"grad_norm": 0.925491452217102, |
|
"learning_rate": 0.0001999486216200688, |
|
"loss": 1.6707, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03579952267303103, |
|
"eval_loss": 1.0528874397277832, |
|
"eval_runtime": 26.2645, |
|
"eval_samples_per_second": 6.739, |
|
"eval_steps_per_second": 3.389, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03818615751789976, |
|
"grad_norm": 0.749546468257904, |
|
"learning_rate": 0.00019992601792070679, |
|
"loss": 0.9433, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0405727923627685, |
|
"grad_norm": 0.9893090128898621, |
|
"learning_rate": 0.00019989930665413147, |
|
"loss": 1.0754, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04295942720763723, |
|
"grad_norm": 0.8510335087776184, |
|
"learning_rate": 0.00019986848891833845, |
|
"loss": 1.2892, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.045346062052505964, |
|
"grad_norm": 0.8910903334617615, |
|
"learning_rate": 0.0001998335659801241, |
|
"loss": 0.8846, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0477326968973747, |
|
"grad_norm": 0.8577730059623718, |
|
"learning_rate": 0.00019979453927503364, |
|
"loss": 0.9342, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0477326968973747, |
|
"eval_loss": 1.0087474584579468, |
|
"eval_runtime": 26.2663, |
|
"eval_samples_per_second": 6.739, |
|
"eval_steps_per_second": 3.388, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.050119331742243436, |
|
"grad_norm": 0.9688092470169067, |
|
"learning_rate": 0.00019975141040730207, |
|
"loss": 1.1933, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.05250596658711217, |
|
"grad_norm": 0.7774447202682495, |
|
"learning_rate": 0.0001997041811497882, |
|
"loss": 0.8412, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05489260143198091, |
|
"grad_norm": 0.7570910453796387, |
|
"learning_rate": 0.00019965285344390184, |
|
"loss": 0.8872, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.057279236276849645, |
|
"grad_norm": 0.5232482552528381, |
|
"learning_rate": 0.00019959742939952392, |
|
"loss": 0.9852, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.059665871121718374, |
|
"grad_norm": 1.0864367485046387, |
|
"learning_rate": 0.00019953791129491983, |
|
"loss": 1.1447, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.059665871121718374, |
|
"eval_loss": 0.9940932393074036, |
|
"eval_runtime": 26.2586, |
|
"eval_samples_per_second": 6.741, |
|
"eval_steps_per_second": 3.389, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06205250596658711, |
|
"grad_norm": 0.9833227396011353, |
|
"learning_rate": 0.00019947430157664576, |
|
"loss": 1.4313, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.06443914081145585, |
|
"grad_norm": 0.7902147769927979, |
|
"learning_rate": 0.00019940660285944803, |
|
"loss": 1.0728, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06682577565632458, |
|
"grad_norm": 0.6701146364212036, |
|
"learning_rate": 0.00019933481792615583, |
|
"loss": 0.8888, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06921241050119331, |
|
"grad_norm": 0.845893383026123, |
|
"learning_rate": 0.0001992589497275665, |
|
"loss": 0.9849, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.07159904534606205, |
|
"grad_norm": 0.7827535271644592, |
|
"learning_rate": 0.0001991790013823246, |
|
"loss": 1.331, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07159904534606205, |
|
"eval_loss": 0.9710781574249268, |
|
"eval_runtime": 26.2207, |
|
"eval_samples_per_second": 6.75, |
|
"eval_steps_per_second": 3.394, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07398568019093078, |
|
"grad_norm": 0.9344210028648376, |
|
"learning_rate": 0.00019909497617679348, |
|
"loss": 1.013, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.07637231503579953, |
|
"grad_norm": 0.6921480894088745, |
|
"learning_rate": 0.0001990068775649202, |
|
"loss": 0.6896, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.07875894988066826, |
|
"grad_norm": 0.804854154586792, |
|
"learning_rate": 0.00019891470916809362, |
|
"loss": 0.8884, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.081145584725537, |
|
"grad_norm": 0.7858572602272034, |
|
"learning_rate": 0.00019881847477499557, |
|
"loss": 0.8469, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.08353221957040573, |
|
"grad_norm": 0.7166551947593689, |
|
"learning_rate": 0.00019871817834144504, |
|
"loss": 0.9763, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08353221957040573, |
|
"eval_loss": 0.9543755054473877, |
|
"eval_runtime": 26.2507, |
|
"eval_samples_per_second": 6.743, |
|
"eval_steps_per_second": 3.39, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08591885441527446, |
|
"grad_norm": 0.7922567129135132, |
|
"learning_rate": 0.0001986138239902355, |
|
"loss": 1.0582, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0883054892601432, |
|
"grad_norm": 1.0015374422073364, |
|
"learning_rate": 0.0001985054160109657, |
|
"loss": 1.0839, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.09069212410501193, |
|
"grad_norm": 1.455461025238037, |
|
"learning_rate": 0.00019839295885986296, |
|
"loss": 1.2132, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.09307875894988067, |
|
"grad_norm": 0.781535267829895, |
|
"learning_rate": 0.0001982764571596004, |
|
"loss": 0.7504, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0954653937947494, |
|
"grad_norm": 0.8738696575164795, |
|
"learning_rate": 0.00019815591569910654, |
|
"loss": 0.6841, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0954653937947494, |
|
"eval_loss": 0.9494202136993408, |
|
"eval_runtime": 26.2561, |
|
"eval_samples_per_second": 6.741, |
|
"eval_steps_per_second": 3.39, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09785202863961814, |
|
"grad_norm": 1.0066485404968262, |
|
"learning_rate": 0.00019803133943336874, |
|
"loss": 1.1701, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.10023866348448687, |
|
"grad_norm": 0.8418964147567749, |
|
"learning_rate": 0.0001979027334832293, |
|
"loss": 1.0454, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.1026252983293556, |
|
"grad_norm": 0.9529140591621399, |
|
"learning_rate": 0.00019777010313517518, |
|
"loss": 0.9311, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.10501193317422435, |
|
"grad_norm": 0.9450991749763489, |
|
"learning_rate": 0.00019763345384112043, |
|
"loss": 0.9843, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.10739856801909307, |
|
"grad_norm": 0.8391886353492737, |
|
"learning_rate": 0.00019749279121818235, |
|
"loss": 0.7273, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10739856801909307, |
|
"eval_loss": 0.9417486786842346, |
|
"eval_runtime": 26.2253, |
|
"eval_samples_per_second": 6.749, |
|
"eval_steps_per_second": 3.394, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10978520286396182, |
|
"grad_norm": 0.8711974620819092, |
|
"learning_rate": 0.00019734812104845047, |
|
"loss": 1.0276, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.11217183770883055, |
|
"grad_norm": 0.6002468466758728, |
|
"learning_rate": 0.00019719944927874881, |
|
"loss": 0.7602, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.11455847255369929, |
|
"grad_norm": 1.059345006942749, |
|
"learning_rate": 0.0001970467820203915, |
|
"loss": 1.3701, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.11694510739856802, |
|
"grad_norm": 0.9398312568664551, |
|
"learning_rate": 0.00019689012554893154, |
|
"loss": 0.9587, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.11933174224343675, |
|
"grad_norm": 0.8663720488548279, |
|
"learning_rate": 0.00019672948630390294, |
|
"loss": 0.766, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11933174224343675, |
|
"eval_loss": 0.9387193918228149, |
|
"eval_runtime": 26.2728, |
|
"eval_samples_per_second": 6.737, |
|
"eval_steps_per_second": 3.388, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12171837708830549, |
|
"grad_norm": 0.7873828411102295, |
|
"learning_rate": 0.00019656487088855592, |
|
"loss": 1.2591, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.12410501193317422, |
|
"grad_norm": 0.6708641648292542, |
|
"learning_rate": 0.00019639628606958533, |
|
"loss": 0.7497, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.12649164677804295, |
|
"grad_norm": 0.5296116471290588, |
|
"learning_rate": 0.0001962237387768529, |
|
"loss": 0.9209, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.1288782816229117, |
|
"grad_norm": 0.701153576374054, |
|
"learning_rate": 0.00019604723610310194, |
|
"loss": 0.9039, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.13126491646778043, |
|
"grad_norm": 0.607571005821228, |
|
"learning_rate": 0.00019586678530366606, |
|
"loss": 0.9841, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.13126491646778043, |
|
"eval_loss": 0.9331147074699402, |
|
"eval_runtime": 26.2365, |
|
"eval_samples_per_second": 6.746, |
|
"eval_steps_per_second": 3.392, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.13365155131264916, |
|
"grad_norm": 0.7479352951049805, |
|
"learning_rate": 0.00019568239379617088, |
|
"loss": 0.8665, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.1360381861575179, |
|
"grad_norm": 0.8115050196647644, |
|
"learning_rate": 0.00019549406916022905, |
|
"loss": 1.0235, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.13842482100238662, |
|
"grad_norm": 0.5173358917236328, |
|
"learning_rate": 0.00019530181913712872, |
|
"loss": 0.8548, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.14081145584725538, |
|
"grad_norm": 0.917434811592102, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 0.9314, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.1431980906921241, |
|
"grad_norm": 0.7886952757835388, |
|
"learning_rate": 0.00019490557470106686, |
|
"loss": 0.8733, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1431980906921241, |
|
"eval_loss": 0.9312522411346436, |
|
"eval_runtime": 26.2683, |
|
"eval_samples_per_second": 6.738, |
|
"eval_steps_per_second": 3.388, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14558472553699284, |
|
"grad_norm": 0.7969014644622803, |
|
"learning_rate": 0.00019470159657616215, |
|
"loss": 0.6603, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.14797136038186157, |
|
"grad_norm": 0.9583745002746582, |
|
"learning_rate": 0.00019449372563954293, |
|
"loss": 1.466, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.15035799522673032, |
|
"grad_norm": 0.7162270545959473, |
|
"learning_rate": 0.0001942819704359693, |
|
"loss": 0.8824, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.15274463007159905, |
|
"grad_norm": 0.47627589106559753, |
|
"learning_rate": 0.00019406633966986828, |
|
"loss": 0.6296, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.15513126491646778, |
|
"grad_norm": 0.7122361063957214, |
|
"learning_rate": 0.00019384684220497605, |
|
"loss": 0.64, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.15513126491646778, |
|
"eval_loss": 0.9237020015716553, |
|
"eval_runtime": 26.2941, |
|
"eval_samples_per_second": 6.732, |
|
"eval_steps_per_second": 3.385, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.1575178997613365, |
|
"grad_norm": 0.8619460463523865, |
|
"learning_rate": 0.00019362348706397373, |
|
"loss": 1.1417, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.15990453460620524, |
|
"grad_norm": 0.7796176075935364, |
|
"learning_rate": 0.00019339628342811632, |
|
"loss": 0.859, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.162291169451074, |
|
"grad_norm": 0.8874034285545349, |
|
"learning_rate": 0.0001931652406368554, |
|
"loss": 0.9786, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.16467780429594273, |
|
"grad_norm": 0.7292467951774597, |
|
"learning_rate": 0.0001929303681874552, |
|
"loss": 0.8093, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.16706443914081145, |
|
"grad_norm": 0.8368440270423889, |
|
"learning_rate": 0.0001926916757346022, |
|
"loss": 1.3638, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16706443914081145, |
|
"eval_loss": 0.9212387800216675, |
|
"eval_runtime": 26.2497, |
|
"eval_samples_per_second": 6.743, |
|
"eval_steps_per_second": 3.391, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16945107398568018, |
|
"grad_norm": 0.6919611692428589, |
|
"learning_rate": 0.00019244917309000817, |
|
"loss": 0.9292, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.1718377088305489, |
|
"grad_norm": 0.8706820607185364, |
|
"learning_rate": 0.00019220287022200707, |
|
"loss": 1.122, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.17422434367541767, |
|
"grad_norm": 0.6912488341331482, |
|
"learning_rate": 0.0001919527772551451, |
|
"loss": 1.0077, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.1766109785202864, |
|
"grad_norm": 0.6089036464691162, |
|
"learning_rate": 0.00019169890446976454, |
|
"loss": 0.9816, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.17899761336515513, |
|
"grad_norm": 0.7333152294158936, |
|
"learning_rate": 0.00019144126230158127, |
|
"loss": 0.8958, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17899761336515513, |
|
"eval_loss": 0.9194671511650085, |
|
"eval_runtime": 26.2263, |
|
"eval_samples_per_second": 6.749, |
|
"eval_steps_per_second": 3.394, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.18138424821002386, |
|
"grad_norm": 0.7732102870941162, |
|
"learning_rate": 0.0001911798613412557, |
|
"loss": 0.8417, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.18377088305489261, |
|
"grad_norm": 0.6444439888000488, |
|
"learning_rate": 0.0001909147123339575, |
|
"loss": 0.9722, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.18615751789976134, |
|
"grad_norm": 0.522572934627533, |
|
"learning_rate": 0.0001906458261789238, |
|
"loss": 0.7848, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.18854415274463007, |
|
"grad_norm": 0.6048774719238281, |
|
"learning_rate": 0.00019037321392901136, |
|
"loss": 1.0934, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.1909307875894988, |
|
"grad_norm": 0.7766179442405701, |
|
"learning_rate": 0.0001900968867902419, |
|
"loss": 0.8923, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1909307875894988, |
|
"eval_loss": 0.9133721590042114, |
|
"eval_runtime": 26.2379, |
|
"eval_samples_per_second": 6.746, |
|
"eval_steps_per_second": 3.392, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19331742243436753, |
|
"grad_norm": 0.6204676032066345, |
|
"learning_rate": 0.0001898168561213419, |
|
"loss": 0.7503, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.1957040572792363, |
|
"grad_norm": 0.5830619931221008, |
|
"learning_rate": 0.0001895331334332753, |
|
"loss": 0.7373, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.19809069212410502, |
|
"grad_norm": 0.6380212306976318, |
|
"learning_rate": 0.0001892457303887706, |
|
"loss": 0.789, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.20047732696897375, |
|
"grad_norm": 0.6974972486495972, |
|
"learning_rate": 0.0001889546588018412, |
|
"loss": 0.8987, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.20286396181384247, |
|
"grad_norm": 0.6037717461585999, |
|
"learning_rate": 0.00018865993063730004, |
|
"loss": 1.1555, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.20286396181384247, |
|
"eval_loss": 0.9121592044830322, |
|
"eval_runtime": 26.2683, |
|
"eval_samples_per_second": 6.738, |
|
"eval_steps_per_second": 3.388, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.2052505966587112, |
|
"grad_norm": 0.5582723617553711, |
|
"learning_rate": 0.00018836155801026753, |
|
"loss": 0.6236, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.20763723150357996, |
|
"grad_norm": 0.7444891929626465, |
|
"learning_rate": 0.0001880595531856738, |
|
"loss": 0.6428, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.2100238663484487, |
|
"grad_norm": 0.5842642188072205, |
|
"learning_rate": 0.00018775392857775432, |
|
"loss": 0.9149, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.21241050119331742, |
|
"grad_norm": 0.6907650232315063, |
|
"learning_rate": 0.00018744469674953956, |
|
"loss": 0.8653, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.21479713603818615, |
|
"grad_norm": 0.7942304611206055, |
|
"learning_rate": 0.00018713187041233896, |
|
"loss": 0.7069, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21479713603818615, |
|
"eval_loss": 0.910210132598877, |
|
"eval_runtime": 26.2879, |
|
"eval_samples_per_second": 6.733, |
|
"eval_steps_per_second": 3.386, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2171837708830549, |
|
"grad_norm": 0.7289912104606628, |
|
"learning_rate": 0.00018681546242521786, |
|
"loss": 0.7671, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.21957040572792363, |
|
"grad_norm": 0.6675541400909424, |
|
"learning_rate": 0.00018649548579446936, |
|
"loss": 0.6645, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.22195704057279236, |
|
"grad_norm": 0.4568694829940796, |
|
"learning_rate": 0.0001861719536730795, |
|
"loss": 0.7777, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2243436754176611, |
|
"grad_norm": 0.715552568435669, |
|
"learning_rate": 0.00018584487936018661, |
|
"loss": 1.1339, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.22673031026252982, |
|
"grad_norm": 0.8277347087860107, |
|
"learning_rate": 0.00018551427630053463, |
|
"loss": 0.9398, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.22673031026252982, |
|
"eval_loss": 0.9004982113838196, |
|
"eval_runtime": 26.2476, |
|
"eval_samples_per_second": 6.743, |
|
"eval_steps_per_second": 3.391, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.22911694510739858, |
|
"grad_norm": 0.7335620522499084, |
|
"learning_rate": 0.00018518015808392045, |
|
"loss": 0.8793, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.2315035799522673, |
|
"grad_norm": 0.5709030032157898, |
|
"learning_rate": 0.00018484253844463526, |
|
"loss": 0.7161, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.23389021479713604, |
|
"grad_norm": 0.6625402569770813, |
|
"learning_rate": 0.00018450143126090015, |
|
"loss": 0.8631, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.23627684964200477, |
|
"grad_norm": 0.3961223065853119, |
|
"learning_rate": 0.00018415685055429533, |
|
"loss": 0.6657, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2386634844868735, |
|
"grad_norm": 0.8134841322898865, |
|
"learning_rate": 0.00018380881048918405, |
|
"loss": 1.1527, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2386634844868735, |
|
"eval_loss": 0.8966440558433533, |
|
"eval_runtime": 26.2452, |
|
"eval_samples_per_second": 6.744, |
|
"eval_steps_per_second": 3.391, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24105011933174225, |
|
"grad_norm": 0.6637277007102966, |
|
"learning_rate": 0.00018345732537213027, |
|
"loss": 1.175, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.24343675417661098, |
|
"grad_norm": 0.5118420124053955, |
|
"learning_rate": 0.00018310240965131041, |
|
"loss": 0.852, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2458233890214797, |
|
"grad_norm": 0.6100435256958008, |
|
"learning_rate": 0.00018274407791591966, |
|
"loss": 0.8036, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.24821002386634844, |
|
"grad_norm": 0.8629751801490784, |
|
"learning_rate": 0.00018238234489557215, |
|
"loss": 0.9638, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.25059665871121717, |
|
"grad_norm": 0.6925487518310547, |
|
"learning_rate": 0.0001820172254596956, |
|
"loss": 1.0022, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.25059665871121717, |
|
"eval_loss": 0.8950950503349304, |
|
"eval_runtime": 26.2737, |
|
"eval_samples_per_second": 6.737, |
|
"eval_steps_per_second": 3.387, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2529832935560859, |
|
"grad_norm": 0.6932248473167419, |
|
"learning_rate": 0.00018164873461691986, |
|
"loss": 0.7119, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.2553699284009546, |
|
"grad_norm": 0.6727349162101746, |
|
"learning_rate": 0.00018127688751446027, |
|
"loss": 1.0059, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.2577565632458234, |
|
"grad_norm": 0.7314417362213135, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 0.8165, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.26014319809069214, |
|
"grad_norm": 0.5489733219146729, |
|
"learning_rate": 0.0001805231858085356, |
|
"loss": 0.8262, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.26252983293556087, |
|
"grad_norm": 0.5698820948600769, |
|
"learning_rate": 0.00018014136218679567, |
|
"loss": 0.7001, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.26252983293556087, |
|
"eval_loss": 0.8914665579795837, |
|
"eval_runtime": 26.2981, |
|
"eval_samples_per_second": 6.731, |
|
"eval_steps_per_second": 3.384, |
|
"step": 110 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.153367887413248e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|