|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.007874009289690544, |
|
"eval_steps": 500, |
|
"global_step": 600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.3123348816150905e-05, |
|
"grad_norm": 7.072159767150879, |
|
"learning_rate": 5e-06, |
|
"loss": 2.0831, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 2.624669763230181e-05, |
|
"grad_norm": 6.047312259674072, |
|
"learning_rate": 1e-05, |
|
"loss": 1.888, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 3.9370046448452714e-05, |
|
"grad_norm": 4.877658843994141, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.8215, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 5.249339526460362e-05, |
|
"grad_norm": 5.105004787445068, |
|
"learning_rate": 2e-05, |
|
"loss": 1.7382, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 6.561674408075452e-05, |
|
"grad_norm": 4.398859024047852, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.519, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 7.874009289690543e-05, |
|
"grad_norm": 4.532390117645264, |
|
"learning_rate": 3e-05, |
|
"loss": 1.6656, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 9.186344171305633e-05, |
|
"grad_norm": 5.0148749351501465, |
|
"learning_rate": 3.5e-05, |
|
"loss": 1.6811, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.00010498679052920724, |
|
"grad_norm": 4.4031877517700195, |
|
"learning_rate": 4e-05, |
|
"loss": 1.7468, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.00011811013934535815, |
|
"grad_norm": 5.005381107330322, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.3947, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00013123348816150904, |
|
"grad_norm": 3.713514566421509, |
|
"learning_rate": 5e-05, |
|
"loss": 1.405, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00014435683697765995, |
|
"grad_norm": 9.202157974243164, |
|
"learning_rate": 4.995798319327731e-05, |
|
"loss": 1.5283, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.00015748018579381085, |
|
"grad_norm": 4.239948749542236, |
|
"learning_rate": 4.991596638655463e-05, |
|
"loss": 1.3864, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.00017060353460996176, |
|
"grad_norm": 2.7827541828155518, |
|
"learning_rate": 4.9873949579831936e-05, |
|
"loss": 1.4856, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.00018372688342611267, |
|
"grad_norm": 3.2062172889709473, |
|
"learning_rate": 4.9831932773109245e-05, |
|
"loss": 1.1977, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.00019685023224226357, |
|
"grad_norm": 3.271573066711426, |
|
"learning_rate": 4.978991596638656e-05, |
|
"loss": 1.2708, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.00020997358105841448, |
|
"grad_norm": 2.543915033340454, |
|
"learning_rate": 4.974789915966387e-05, |
|
"loss": 1.4229, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0002230969298745654, |
|
"grad_norm": 3.8253746032714844, |
|
"learning_rate": 4.970588235294118e-05, |
|
"loss": 1.1775, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0002362202786907163, |
|
"grad_norm": 3.0684823989868164, |
|
"learning_rate": 4.966386554621849e-05, |
|
"loss": 1.1546, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.00024934362750686723, |
|
"grad_norm": 3.0834896564483643, |
|
"learning_rate": 4.9621848739495804e-05, |
|
"loss": 1.3198, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0002624669763230181, |
|
"grad_norm": 3.402059555053711, |
|
"learning_rate": 4.957983193277311e-05, |
|
"loss": 1.2536, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.000275590325139169, |
|
"grad_norm": 2.9694066047668457, |
|
"learning_rate": 4.953781512605042e-05, |
|
"loss": 1.2723, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0002887136739553199, |
|
"grad_norm": 2.3450403213500977, |
|
"learning_rate": 4.949579831932774e-05, |
|
"loss": 1.4902, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0003018370227714708, |
|
"grad_norm": 3.2001914978027344, |
|
"learning_rate": 4.9453781512605046e-05, |
|
"loss": 1.3031, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0003149603715876217, |
|
"grad_norm": 2.4799821376800537, |
|
"learning_rate": 4.9411764705882355e-05, |
|
"loss": 1.3414, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0003280837204037726, |
|
"grad_norm": 2.330723285675049, |
|
"learning_rate": 4.936974789915967e-05, |
|
"loss": 1.133, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0003412070692199235, |
|
"grad_norm": 2.308879852294922, |
|
"learning_rate": 4.932773109243698e-05, |
|
"loss": 1.4592, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.00035433041803607443, |
|
"grad_norm": 3.835871934890747, |
|
"learning_rate": 4.928571428571429e-05, |
|
"loss": 0.8886, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.00036745376685222534, |
|
"grad_norm": 2.9454944133758545, |
|
"learning_rate": 4.9243697478991605e-05, |
|
"loss": 1.1211, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.00038057711566837624, |
|
"grad_norm": 2.6786818504333496, |
|
"learning_rate": 4.920168067226891e-05, |
|
"loss": 1.1148, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.00039370046448452715, |
|
"grad_norm": 2.5377731323242188, |
|
"learning_rate": 4.9159663865546216e-05, |
|
"loss": 1.1519, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.00040682381330067806, |
|
"grad_norm": 3.0859804153442383, |
|
"learning_rate": 4.911764705882353e-05, |
|
"loss": 1.2233, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.00041994716211682896, |
|
"grad_norm": 2.6658570766448975, |
|
"learning_rate": 4.907563025210084e-05, |
|
"loss": 1.2685, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.00043307051093297987, |
|
"grad_norm": 2.3701725006103516, |
|
"learning_rate": 4.903361344537815e-05, |
|
"loss": 1.274, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0004461938597491308, |
|
"grad_norm": 2.379979133605957, |
|
"learning_rate": 4.8991596638655466e-05, |
|
"loss": 1.4761, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0004593172085652817, |
|
"grad_norm": 2.8867926597595215, |
|
"learning_rate": 4.8949579831932775e-05, |
|
"loss": 1.0537, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0004724405573814326, |
|
"grad_norm": 2.3834433555603027, |
|
"learning_rate": 4.8907563025210084e-05, |
|
"loss": 0.9892, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0004855639061975835, |
|
"grad_norm": 2.4166579246520996, |
|
"learning_rate": 4.886554621848739e-05, |
|
"loss": 1.3343, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0004986872550137345, |
|
"grad_norm": 2.471297264099121, |
|
"learning_rate": 4.882352941176471e-05, |
|
"loss": 0.8938, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0005118106038298853, |
|
"grad_norm": 2.4426910877227783, |
|
"learning_rate": 4.878151260504202e-05, |
|
"loss": 0.8963, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0005249339526460362, |
|
"grad_norm": 2.60628342628479, |
|
"learning_rate": 4.8739495798319326e-05, |
|
"loss": 1.1995, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0005380573014621871, |
|
"grad_norm": 2.7314293384552, |
|
"learning_rate": 4.869747899159664e-05, |
|
"loss": 1.3207, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.000551180650278338, |
|
"grad_norm": 2.0626285076141357, |
|
"learning_rate": 4.865546218487395e-05, |
|
"loss": 1.0551, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0005643039990944889, |
|
"grad_norm": 2.429570436477661, |
|
"learning_rate": 4.861344537815126e-05, |
|
"loss": 1.163, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0005774273479106398, |
|
"grad_norm": 2.1089115142822266, |
|
"learning_rate": 4.8571428571428576e-05, |
|
"loss": 1.1919, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0005905506967267908, |
|
"grad_norm": 2.2090256214141846, |
|
"learning_rate": 4.8529411764705885e-05, |
|
"loss": 1.0066, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0006036740455429416, |
|
"grad_norm": 2.6260828971862793, |
|
"learning_rate": 4.8487394957983194e-05, |
|
"loss": 1.1277, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0006167973943590926, |
|
"grad_norm": 2.6351325511932373, |
|
"learning_rate": 4.844537815126051e-05, |
|
"loss": 1.1907, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0006299207431752434, |
|
"grad_norm": 2.4064207077026367, |
|
"learning_rate": 4.840336134453782e-05, |
|
"loss": 1.1826, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0006430440919913944, |
|
"grad_norm": 2.2323639392852783, |
|
"learning_rate": 4.836134453781513e-05, |
|
"loss": 1.2202, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0006561674408075452, |
|
"grad_norm": 2.712174654006958, |
|
"learning_rate": 4.831932773109244e-05, |
|
"loss": 1.0955, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0006692907896236962, |
|
"grad_norm": 1.9196995496749878, |
|
"learning_rate": 4.827731092436975e-05, |
|
"loss": 0.9956, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.000682414138439847, |
|
"grad_norm": 2.415881395339966, |
|
"learning_rate": 4.823529411764706e-05, |
|
"loss": 1.1644, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.000695537487255998, |
|
"grad_norm": 2.6141293048858643, |
|
"learning_rate": 4.819327731092437e-05, |
|
"loss": 1.2137, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0007086608360721489, |
|
"grad_norm": 2.353332757949829, |
|
"learning_rate": 4.8151260504201686e-05, |
|
"loss": 1.0817, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0007217841848882998, |
|
"grad_norm": 2.222496271133423, |
|
"learning_rate": 4.8109243697478995e-05, |
|
"loss": 0.9575, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0007349075337044507, |
|
"grad_norm": 2.616074323654175, |
|
"learning_rate": 4.8067226890756304e-05, |
|
"loss": 1.1872, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.0007480308825206016, |
|
"grad_norm": 2.496767520904541, |
|
"learning_rate": 4.802521008403362e-05, |
|
"loss": 1.1774, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0007611542313367525, |
|
"grad_norm": 2.3791749477386475, |
|
"learning_rate": 4.798319327731093e-05, |
|
"loss": 1.0514, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.0007742775801529034, |
|
"grad_norm": 2.317368745803833, |
|
"learning_rate": 4.794117647058824e-05, |
|
"loss": 1.1349, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.0007874009289690543, |
|
"grad_norm": 2.3613386154174805, |
|
"learning_rate": 4.7899159663865554e-05, |
|
"loss": 0.9665, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0008005242777852053, |
|
"grad_norm": 2.117526054382324, |
|
"learning_rate": 4.785714285714286e-05, |
|
"loss": 1.1252, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0008136476266013561, |
|
"grad_norm": 2.248126745223999, |
|
"learning_rate": 4.781512605042017e-05, |
|
"loss": 1.0003, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0008267709754175071, |
|
"grad_norm": 2.05367374420166, |
|
"learning_rate": 4.777310924369748e-05, |
|
"loss": 1.4639, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.0008398943242336579, |
|
"grad_norm": 2.026202440261841, |
|
"learning_rate": 4.7731092436974796e-05, |
|
"loss": 1.1478, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.0008530176730498089, |
|
"grad_norm": 2.1997697353363037, |
|
"learning_rate": 4.7689075630252105e-05, |
|
"loss": 1.2204, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0008661410218659597, |
|
"grad_norm": 1.8945584297180176, |
|
"learning_rate": 4.7647058823529414e-05, |
|
"loss": 1.0535, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.0008792643706821107, |
|
"grad_norm": 2.010007619857788, |
|
"learning_rate": 4.760504201680672e-05, |
|
"loss": 1.0941, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0008923877194982616, |
|
"grad_norm": 2.593773365020752, |
|
"learning_rate": 4.756302521008403e-05, |
|
"loss": 1.154, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.0009055110683144125, |
|
"grad_norm": 2.3330702781677246, |
|
"learning_rate": 4.752100840336134e-05, |
|
"loss": 1.1325, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0009186344171305634, |
|
"grad_norm": 2.130695343017578, |
|
"learning_rate": 4.747899159663866e-05, |
|
"loss": 0.916, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0009317577659467143, |
|
"grad_norm": 2.6290881633758545, |
|
"learning_rate": 4.7436974789915966e-05, |
|
"loss": 1.295, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0009448811147628652, |
|
"grad_norm": 2.073453187942505, |
|
"learning_rate": 4.7394957983193275e-05, |
|
"loss": 1.2464, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0009580044635790161, |
|
"grad_norm": 2.0779201984405518, |
|
"learning_rate": 4.735294117647059e-05, |
|
"loss": 1.3151, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.000971127812395167, |
|
"grad_norm": 2.802860975265503, |
|
"learning_rate": 4.73109243697479e-05, |
|
"loss": 0.872, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.000984251161211318, |
|
"grad_norm": 2.034449815750122, |
|
"learning_rate": 4.726890756302521e-05, |
|
"loss": 1.1338, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.000997374510027469, |
|
"grad_norm": 1.8365809917449951, |
|
"learning_rate": 4.7226890756302525e-05, |
|
"loss": 1.268, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.0010104978588436197, |
|
"grad_norm": 2.7444915771484375, |
|
"learning_rate": 4.7184873949579834e-05, |
|
"loss": 1.1891, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0010236212076597706, |
|
"grad_norm": 2.1176328659057617, |
|
"learning_rate": 4.714285714285714e-05, |
|
"loss": 1.3602, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0010367445564759216, |
|
"grad_norm": 2.3768794536590576, |
|
"learning_rate": 4.710084033613446e-05, |
|
"loss": 1.2641, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.0010498679052920723, |
|
"grad_norm": 2.1677494049072266, |
|
"learning_rate": 4.705882352941177e-05, |
|
"loss": 1.1084, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0010629912541082233, |
|
"grad_norm": 2.5021719932556152, |
|
"learning_rate": 4.7016806722689076e-05, |
|
"loss": 1.0985, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0010761146029243742, |
|
"grad_norm": 2.0837371349334717, |
|
"learning_rate": 4.6974789915966385e-05, |
|
"loss": 1.3129, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0010892379517405252, |
|
"grad_norm": 2.4068455696105957, |
|
"learning_rate": 4.69327731092437e-05, |
|
"loss": 1.0889, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.001102361300556676, |
|
"grad_norm": 2.3378102779388428, |
|
"learning_rate": 4.689075630252101e-05, |
|
"loss": 1.1516, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.001115484649372827, |
|
"grad_norm": 2.165783166885376, |
|
"learning_rate": 4.684873949579832e-05, |
|
"loss": 1.2695, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0011286079981889779, |
|
"grad_norm": 2.403777837753296, |
|
"learning_rate": 4.6806722689075635e-05, |
|
"loss": 1.0025, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.0011417313470051288, |
|
"grad_norm": 2.1567893028259277, |
|
"learning_rate": 4.6764705882352944e-05, |
|
"loss": 1.0176, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0011548546958212796, |
|
"grad_norm": 2.3139235973358154, |
|
"learning_rate": 4.672268907563025e-05, |
|
"loss": 0.9044, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0011679780446374305, |
|
"grad_norm": 2.56440806388855, |
|
"learning_rate": 4.668067226890757e-05, |
|
"loss": 0.7672, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0011811013934535815, |
|
"grad_norm": 2.718919515609741, |
|
"learning_rate": 4.663865546218488e-05, |
|
"loss": 1.1918, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0011942247422697325, |
|
"grad_norm": 1.8234260082244873, |
|
"learning_rate": 4.6596638655462187e-05, |
|
"loss": 0.9833, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.0012073480910858832, |
|
"grad_norm": 2.540635585784912, |
|
"learning_rate": 4.65546218487395e-05, |
|
"loss": 0.7035, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0012204714399020342, |
|
"grad_norm": 2.1362297534942627, |
|
"learning_rate": 4.651260504201681e-05, |
|
"loss": 0.9999, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0012335947887181851, |
|
"grad_norm": 2.085761547088623, |
|
"learning_rate": 4.647058823529412e-05, |
|
"loss": 1.1759, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.001246718137534336, |
|
"grad_norm": 2.096830129623413, |
|
"learning_rate": 4.642857142857143e-05, |
|
"loss": 1.0324, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.0012598414863504868, |
|
"grad_norm": 2.817042112350464, |
|
"learning_rate": 4.6386554621848745e-05, |
|
"loss": 1.1897, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.0012729648351666378, |
|
"grad_norm": 2.062160015106201, |
|
"learning_rate": 4.6344537815126054e-05, |
|
"loss": 0.8634, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.0012860881839827888, |
|
"grad_norm": 1.8099182844161987, |
|
"learning_rate": 4.630252100840336e-05, |
|
"loss": 1.1785, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.0012992115327989397, |
|
"grad_norm": 2.1783230304718018, |
|
"learning_rate": 4.626050420168068e-05, |
|
"loss": 1.2024, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.0013123348816150905, |
|
"grad_norm": 1.9956905841827393, |
|
"learning_rate": 4.621848739495799e-05, |
|
"loss": 0.9637, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0013254582304312414, |
|
"grad_norm": 2.4066596031188965, |
|
"learning_rate": 4.61764705882353e-05, |
|
"loss": 1.2583, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.0013385815792473924, |
|
"grad_norm": 2.0673747062683105, |
|
"learning_rate": 4.613445378151261e-05, |
|
"loss": 1.1186, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.0013517049280635433, |
|
"grad_norm": 1.8033102750778198, |
|
"learning_rate": 4.6092436974789915e-05, |
|
"loss": 1.1626, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.001364828276879694, |
|
"grad_norm": 2.032924175262451, |
|
"learning_rate": 4.6050420168067224e-05, |
|
"loss": 1.0937, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.001377951625695845, |
|
"grad_norm": 2.554765462875366, |
|
"learning_rate": 4.600840336134454e-05, |
|
"loss": 1.0015, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.001391074974511996, |
|
"grad_norm": 2.19063401222229, |
|
"learning_rate": 4.596638655462185e-05, |
|
"loss": 0.9031, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.001404198323328147, |
|
"grad_norm": 2.0013623237609863, |
|
"learning_rate": 4.592436974789916e-05, |
|
"loss": 1.1653, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.0014173216721442977, |
|
"grad_norm": 1.965734601020813, |
|
"learning_rate": 4.588235294117647e-05, |
|
"loss": 0.9685, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.0014304450209604487, |
|
"grad_norm": 2.040010690689087, |
|
"learning_rate": 4.584033613445378e-05, |
|
"loss": 1.1904, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.0014435683697765996, |
|
"grad_norm": 2.6654715538024902, |
|
"learning_rate": 4.579831932773109e-05, |
|
"loss": 1.2973, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0014566917185927506, |
|
"grad_norm": 2.138746500015259, |
|
"learning_rate": 4.575630252100841e-05, |
|
"loss": 1.0583, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.0014698150674089013, |
|
"grad_norm": 2.018465995788574, |
|
"learning_rate": 4.5714285714285716e-05, |
|
"loss": 1.2435, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.0014829384162250523, |
|
"grad_norm": 1.9513568878173828, |
|
"learning_rate": 4.5672268907563025e-05, |
|
"loss": 1.0105, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.0014960617650412033, |
|
"grad_norm": 2.152390718460083, |
|
"learning_rate": 4.5630252100840334e-05, |
|
"loss": 1.279, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.0015091851138573542, |
|
"grad_norm": 2.2886104583740234, |
|
"learning_rate": 4.558823529411765e-05, |
|
"loss": 1.106, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.001522308462673505, |
|
"grad_norm": 2.221177816390991, |
|
"learning_rate": 4.554621848739496e-05, |
|
"loss": 0.8511, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.001535431811489656, |
|
"grad_norm": 1.791222095489502, |
|
"learning_rate": 4.550420168067227e-05, |
|
"loss": 1.082, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.001548555160305807, |
|
"grad_norm": 2.0436720848083496, |
|
"learning_rate": 4.5462184873949584e-05, |
|
"loss": 1.4781, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.0015616785091219576, |
|
"grad_norm": 2.416624069213867, |
|
"learning_rate": 4.542016806722689e-05, |
|
"loss": 1.0889, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.0015748018579381086, |
|
"grad_norm": 2.185920476913452, |
|
"learning_rate": 4.53781512605042e-05, |
|
"loss": 1.0695, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0015879252067542596, |
|
"grad_norm": 2.1533713340759277, |
|
"learning_rate": 4.533613445378152e-05, |
|
"loss": 0.9034, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.0016010485555704105, |
|
"grad_norm": 2.1179723739624023, |
|
"learning_rate": 4.5294117647058826e-05, |
|
"loss": 1.094, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.0016141719043865613, |
|
"grad_norm": 2.0470693111419678, |
|
"learning_rate": 4.5252100840336135e-05, |
|
"loss": 1.0756, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.0016272952532027122, |
|
"grad_norm": 1.9948326349258423, |
|
"learning_rate": 4.521008403361345e-05, |
|
"loss": 1.1651, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.0016404186020188632, |
|
"grad_norm": 1.8133087158203125, |
|
"learning_rate": 4.516806722689076e-05, |
|
"loss": 1.356, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.0016535419508350141, |
|
"grad_norm": 2.027836322784424, |
|
"learning_rate": 4.512605042016807e-05, |
|
"loss": 1.0432, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.001666665299651165, |
|
"grad_norm": 2.365647792816162, |
|
"learning_rate": 4.508403361344538e-05, |
|
"loss": 1.0608, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.0016797886484673159, |
|
"grad_norm": 2.0502471923828125, |
|
"learning_rate": 4.5042016806722694e-05, |
|
"loss": 0.9935, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.0016929119972834668, |
|
"grad_norm": 2.469956398010254, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.0578, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.0017060353460996178, |
|
"grad_norm": 2.224716901779175, |
|
"learning_rate": 4.495798319327731e-05, |
|
"loss": 1.0054, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0017191586949157685, |
|
"grad_norm": 2.0795905590057373, |
|
"learning_rate": 4.491596638655463e-05, |
|
"loss": 0.9304, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.0017322820437319195, |
|
"grad_norm": 2.5419461727142334, |
|
"learning_rate": 4.4873949579831937e-05, |
|
"loss": 1.0532, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.0017454053925480704, |
|
"grad_norm": 1.9789624214172363, |
|
"learning_rate": 4.4831932773109246e-05, |
|
"loss": 0.7064, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.0017585287413642214, |
|
"grad_norm": 2.256998062133789, |
|
"learning_rate": 4.478991596638656e-05, |
|
"loss": 1.0151, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.0017716520901803721, |
|
"grad_norm": 1.9684431552886963, |
|
"learning_rate": 4.474789915966387e-05, |
|
"loss": 1.3969, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.001784775438996523, |
|
"grad_norm": 1.9599086046218872, |
|
"learning_rate": 4.470588235294118e-05, |
|
"loss": 0.9254, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.001797898787812674, |
|
"grad_norm": 2.0463593006134033, |
|
"learning_rate": 4.4663865546218495e-05, |
|
"loss": 1.1675, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.001811022136628825, |
|
"grad_norm": 2.3988492488861084, |
|
"learning_rate": 4.4621848739495804e-05, |
|
"loss": 1.1021, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.0018241454854449758, |
|
"grad_norm": 2.1077089309692383, |
|
"learning_rate": 4.457983193277311e-05, |
|
"loss": 1.0832, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.0018372688342611267, |
|
"grad_norm": 2.027130365371704, |
|
"learning_rate": 4.453781512605042e-05, |
|
"loss": 1.1428, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0018503921830772777, |
|
"grad_norm": 1.8151514530181885, |
|
"learning_rate": 4.449579831932773e-05, |
|
"loss": 1.6232, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.0018635155318934287, |
|
"grad_norm": 2.0072662830352783, |
|
"learning_rate": 4.445378151260504e-05, |
|
"loss": 0.9918, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.0018766388807095794, |
|
"grad_norm": 2.0975875854492188, |
|
"learning_rate": 4.4411764705882356e-05, |
|
"loss": 1.1645, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.0018897622295257304, |
|
"grad_norm": 1.8917436599731445, |
|
"learning_rate": 4.4369747899159665e-05, |
|
"loss": 0.9891, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.0019028855783418813, |
|
"grad_norm": 2.3437271118164062, |
|
"learning_rate": 4.4327731092436974e-05, |
|
"loss": 0.9545, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.0019160089271580323, |
|
"grad_norm": 1.8604134321212769, |
|
"learning_rate": 4.428571428571428e-05, |
|
"loss": 1.0553, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.001929132275974183, |
|
"grad_norm": 2.1565375328063965, |
|
"learning_rate": 4.42436974789916e-05, |
|
"loss": 1.411, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.001942255624790334, |
|
"grad_norm": 2.3755760192871094, |
|
"learning_rate": 4.420168067226891e-05, |
|
"loss": 0.9843, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.0019553789736064847, |
|
"grad_norm": 2.2564103603363037, |
|
"learning_rate": 4.4159663865546217e-05, |
|
"loss": 0.9633, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.001968502322422636, |
|
"grad_norm": 2.1097161769866943, |
|
"learning_rate": 4.411764705882353e-05, |
|
"loss": 0.935, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0019816256712387867, |
|
"grad_norm": 1.8181250095367432, |
|
"learning_rate": 4.407563025210084e-05, |
|
"loss": 1.0492, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.001994749020054938, |
|
"grad_norm": 1.8607770204544067, |
|
"learning_rate": 4.403361344537815e-05, |
|
"loss": 0.9848, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.0020078723688710886, |
|
"grad_norm": 1.7969127893447876, |
|
"learning_rate": 4.3991596638655466e-05, |
|
"loss": 1.0657, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.0020209957176872393, |
|
"grad_norm": 2.0169637203216553, |
|
"learning_rate": 4.3949579831932775e-05, |
|
"loss": 1.0059, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.0020341190665033905, |
|
"grad_norm": 1.8811042308807373, |
|
"learning_rate": 4.3907563025210084e-05, |
|
"loss": 1.0386, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.0020472424153195412, |
|
"grad_norm": 1.9799407720565796, |
|
"learning_rate": 4.38655462184874e-05, |
|
"loss": 0.8287, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.002060365764135692, |
|
"grad_norm": 1.5626941919326782, |
|
"learning_rate": 4.382352941176471e-05, |
|
"loss": 0.9688, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.002073489112951843, |
|
"grad_norm": 2.183609962463379, |
|
"learning_rate": 4.378151260504202e-05, |
|
"loss": 1.3839, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.002086612461767994, |
|
"grad_norm": 1.854836106300354, |
|
"learning_rate": 4.373949579831933e-05, |
|
"loss": 1.2027, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.0020997358105841447, |
|
"grad_norm": 2.2002053260803223, |
|
"learning_rate": 4.369747899159664e-05, |
|
"loss": 1.078, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.002112859159400296, |
|
"grad_norm": 1.8938093185424805, |
|
"learning_rate": 4.365546218487395e-05, |
|
"loss": 1.2866, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.0021259825082164466, |
|
"grad_norm": 2.015660524368286, |
|
"learning_rate": 4.361344537815126e-05, |
|
"loss": 1.0272, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.0021391058570325978, |
|
"grad_norm": 1.8368442058563232, |
|
"learning_rate": 4.3571428571428576e-05, |
|
"loss": 1.2469, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.0021522292058487485, |
|
"grad_norm": 2.056487560272217, |
|
"learning_rate": 4.3529411764705885e-05, |
|
"loss": 1.1621, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.0021653525546648992, |
|
"grad_norm": 1.8423100709915161, |
|
"learning_rate": 4.3487394957983194e-05, |
|
"loss": 1.0864, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.0021784759034810504, |
|
"grad_norm": 1.7012152671813965, |
|
"learning_rate": 4.344537815126051e-05, |
|
"loss": 0.7503, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.002191599252297201, |
|
"grad_norm": 2.687711000442505, |
|
"learning_rate": 4.340336134453782e-05, |
|
"loss": 1.2613, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.002204722601113352, |
|
"grad_norm": 1.9029107093811035, |
|
"learning_rate": 4.336134453781513e-05, |
|
"loss": 1.0015, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.002217845949929503, |
|
"grad_norm": 1.7663654088974, |
|
"learning_rate": 4.3319327731092444e-05, |
|
"loss": 0.9883, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.002230969298745654, |
|
"grad_norm": 1.8921157121658325, |
|
"learning_rate": 4.327731092436975e-05, |
|
"loss": 1.1788, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.002244092647561805, |
|
"grad_norm": 1.870092511177063, |
|
"learning_rate": 4.323529411764706e-05, |
|
"loss": 0.8684, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.0022572159963779558, |
|
"grad_norm": 1.918101191520691, |
|
"learning_rate": 4.319327731092437e-05, |
|
"loss": 0.7238, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.0022703393451941065, |
|
"grad_norm": 2.038949966430664, |
|
"learning_rate": 4.3151260504201687e-05, |
|
"loss": 1.1109, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.0022834626940102577, |
|
"grad_norm": 2.1337692737579346, |
|
"learning_rate": 4.3109243697478996e-05, |
|
"loss": 1.0716, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.0022965860428264084, |
|
"grad_norm": 1.7539408206939697, |
|
"learning_rate": 4.3067226890756305e-05, |
|
"loss": 1.1325, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.002309709391642559, |
|
"grad_norm": 2.0798778533935547, |
|
"learning_rate": 4.302521008403362e-05, |
|
"loss": 0.9844, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.0023228327404587103, |
|
"grad_norm": 2.239736318588257, |
|
"learning_rate": 4.298319327731093e-05, |
|
"loss": 0.9153, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.002335956089274861, |
|
"grad_norm": 1.7551506757736206, |
|
"learning_rate": 4.294117647058823e-05, |
|
"loss": 0.7961, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.0023490794380910123, |
|
"grad_norm": 1.6866077184677124, |
|
"learning_rate": 4.289915966386555e-05, |
|
"loss": 0.8045, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.002362202786907163, |
|
"grad_norm": 2.037832260131836, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 1.0526, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0023753261357233138, |
|
"grad_norm": 1.9151948690414429, |
|
"learning_rate": 4.2815126050420165e-05, |
|
"loss": 1.0393, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.002388449484539465, |
|
"grad_norm": 1.807278037071228, |
|
"learning_rate": 4.277310924369748e-05, |
|
"loss": 0.7957, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.0024015728333556157, |
|
"grad_norm": 1.9136124849319458, |
|
"learning_rate": 4.273109243697479e-05, |
|
"loss": 1.0114, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.0024146961821717664, |
|
"grad_norm": 2.3868467807769775, |
|
"learning_rate": 4.26890756302521e-05, |
|
"loss": 1.1214, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.0024278195309879176, |
|
"grad_norm": 2.0041465759277344, |
|
"learning_rate": 4.2647058823529415e-05, |
|
"loss": 1.2091, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.0024409428798040683, |
|
"grad_norm": 1.787702202796936, |
|
"learning_rate": 4.2605042016806724e-05, |
|
"loss": 1.2995, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.0024540662286202195, |
|
"grad_norm": 1.82904052734375, |
|
"learning_rate": 4.256302521008403e-05, |
|
"loss": 1.129, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.0024671895774363703, |
|
"grad_norm": 2.173250675201416, |
|
"learning_rate": 4.252100840336135e-05, |
|
"loss": 1.0799, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.002480312926252521, |
|
"grad_norm": 1.7813307046890259, |
|
"learning_rate": 4.247899159663866e-05, |
|
"loss": 1.0202, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.002493436275068672, |
|
"grad_norm": 2.2016477584838867, |
|
"learning_rate": 4.2436974789915967e-05, |
|
"loss": 0.718, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.002506559623884823, |
|
"grad_norm": 2.0901100635528564, |
|
"learning_rate": 4.2394957983193276e-05, |
|
"loss": 1.0118, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.0025196829727009737, |
|
"grad_norm": 2.0191895961761475, |
|
"learning_rate": 4.235294117647059e-05, |
|
"loss": 1.1978, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.002532806321517125, |
|
"grad_norm": 1.896297812461853, |
|
"learning_rate": 4.23109243697479e-05, |
|
"loss": 0.9719, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.0025459296703332756, |
|
"grad_norm": 1.894870400428772, |
|
"learning_rate": 4.226890756302521e-05, |
|
"loss": 1.229, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.0025590530191494263, |
|
"grad_norm": 1.902392029762268, |
|
"learning_rate": 4.2226890756302525e-05, |
|
"loss": 1.3133, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.0025721763679655775, |
|
"grad_norm": 2.120286464691162, |
|
"learning_rate": 4.2184873949579834e-05, |
|
"loss": 1.1485, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.0025852997167817283, |
|
"grad_norm": 2.0686750411987305, |
|
"learning_rate": 4.214285714285714e-05, |
|
"loss": 1.0732, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.0025984230655978794, |
|
"grad_norm": 1.6315714120864868, |
|
"learning_rate": 4.210084033613446e-05, |
|
"loss": 0.6762, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.00261154641441403, |
|
"grad_norm": 1.7602286338806152, |
|
"learning_rate": 4.205882352941177e-05, |
|
"loss": 0.8897, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.002624669763230181, |
|
"grad_norm": 2.136775493621826, |
|
"learning_rate": 4.201680672268908e-05, |
|
"loss": 1.1179, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.002637793112046332, |
|
"grad_norm": 1.9759247303009033, |
|
"learning_rate": 4.197478991596639e-05, |
|
"loss": 1.078, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.002650916460862483, |
|
"grad_norm": 2.1391897201538086, |
|
"learning_rate": 4.19327731092437e-05, |
|
"loss": 1.0673, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.0026640398096786336, |
|
"grad_norm": 2.3699793815612793, |
|
"learning_rate": 4.189075630252101e-05, |
|
"loss": 1.0816, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.0026771631584947848, |
|
"grad_norm": 1.7057411670684814, |
|
"learning_rate": 4.184873949579832e-05, |
|
"loss": 1.1088, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.0026902865073109355, |
|
"grad_norm": 1.6120874881744385, |
|
"learning_rate": 4.1806722689075635e-05, |
|
"loss": 1.2588, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.0027034098561270867, |
|
"grad_norm": 2.3143081665039062, |
|
"learning_rate": 4.1764705882352944e-05, |
|
"loss": 1.3962, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.0027165332049432374, |
|
"grad_norm": 2.304960250854492, |
|
"learning_rate": 4.172268907563025e-05, |
|
"loss": 1.0249, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.002729656553759388, |
|
"grad_norm": 1.8955740928649902, |
|
"learning_rate": 4.168067226890757e-05, |
|
"loss": 0.7611, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.0027427799025755394, |
|
"grad_norm": 2.069223403930664, |
|
"learning_rate": 4.163865546218488e-05, |
|
"loss": 0.9099, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.00275590325139169, |
|
"grad_norm": 1.9223281145095825, |
|
"learning_rate": 4.159663865546219e-05, |
|
"loss": 1.3042, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.002769026600207841, |
|
"grad_norm": 1.9752320051193237, |
|
"learning_rate": 4.15546218487395e-05, |
|
"loss": 1.0339, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.002782149949023992, |
|
"grad_norm": 1.8929498195648193, |
|
"learning_rate": 4.151260504201681e-05, |
|
"loss": 0.9886, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.0027952732978401428, |
|
"grad_norm": 2.2697014808654785, |
|
"learning_rate": 4.147058823529412e-05, |
|
"loss": 1.1485, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.002808396646656294, |
|
"grad_norm": 1.9803391695022583, |
|
"learning_rate": 4.1428571428571437e-05, |
|
"loss": 1.1344, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.0028215199954724447, |
|
"grad_norm": 2.1848607063293457, |
|
"learning_rate": 4.138655462184874e-05, |
|
"loss": 1.1566, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.0028346433442885954, |
|
"grad_norm": 1.7310750484466553, |
|
"learning_rate": 4.134453781512605e-05, |
|
"loss": 0.7865, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.0028477666931047466, |
|
"grad_norm": 1.832837462425232, |
|
"learning_rate": 4.1302521008403364e-05, |
|
"loss": 1.0452, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.0028608900419208974, |
|
"grad_norm": 2.0909769535064697, |
|
"learning_rate": 4.126050420168067e-05, |
|
"loss": 0.965, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.002874013390737048, |
|
"grad_norm": 2.217707872390747, |
|
"learning_rate": 4.121848739495798e-05, |
|
"loss": 1.0826, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.0028871367395531993, |
|
"grad_norm": 1.8040285110473633, |
|
"learning_rate": 4.11764705882353e-05, |
|
"loss": 0.8233, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.00290026008836935, |
|
"grad_norm": 1.7424750328063965, |
|
"learning_rate": 4.1134453781512606e-05, |
|
"loss": 0.9798, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.002913383437185501, |
|
"grad_norm": 2.0254833698272705, |
|
"learning_rate": 4.1092436974789915e-05, |
|
"loss": 0.9843, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.002926506786001652, |
|
"grad_norm": 2.2267329692840576, |
|
"learning_rate": 4.1050420168067224e-05, |
|
"loss": 0.8196, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.0029396301348178027, |
|
"grad_norm": 2.4124982357025146, |
|
"learning_rate": 4.100840336134454e-05, |
|
"loss": 1.3434, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.002952753483633954, |
|
"grad_norm": 1.988885760307312, |
|
"learning_rate": 4.096638655462185e-05, |
|
"loss": 1.1987, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.0029658768324501046, |
|
"grad_norm": 1.7969712018966675, |
|
"learning_rate": 4.092436974789916e-05, |
|
"loss": 1.2787, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.0029790001812662554, |
|
"grad_norm": 1.963279128074646, |
|
"learning_rate": 4.0882352941176474e-05, |
|
"loss": 1.0341, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.0029921235300824065, |
|
"grad_norm": 1.8143019676208496, |
|
"learning_rate": 4.084033613445378e-05, |
|
"loss": 1.3237, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.0030052468788985573, |
|
"grad_norm": 2.0594189167022705, |
|
"learning_rate": 4.079831932773109e-05, |
|
"loss": 0.8073, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.0030183702277147085, |
|
"grad_norm": 1.9188764095306396, |
|
"learning_rate": 4.075630252100841e-05, |
|
"loss": 1.1112, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.003031493576530859, |
|
"grad_norm": 1.802120327949524, |
|
"learning_rate": 4.0714285714285717e-05, |
|
"loss": 1.1549, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.00304461692534701, |
|
"grad_norm": 1.954909324645996, |
|
"learning_rate": 4.0672268907563026e-05, |
|
"loss": 1.1407, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.003057740274163161, |
|
"grad_norm": 1.897289752960205, |
|
"learning_rate": 4.063025210084034e-05, |
|
"loss": 0.9183, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.003070863622979312, |
|
"grad_norm": 2.0453414916992188, |
|
"learning_rate": 4.058823529411765e-05, |
|
"loss": 1.2498, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.0030839869717954626, |
|
"grad_norm": 1.9474778175354004, |
|
"learning_rate": 4.054621848739496e-05, |
|
"loss": 1.018, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.003097110320611614, |
|
"grad_norm": 1.6720625162124634, |
|
"learning_rate": 4.050420168067227e-05, |
|
"loss": 1.0404, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.0031102336694277645, |
|
"grad_norm": 1.9552992582321167, |
|
"learning_rate": 4.0462184873949584e-05, |
|
"loss": 0.9688, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.0031233570182439153, |
|
"grad_norm": 2.513448476791382, |
|
"learning_rate": 4.042016806722689e-05, |
|
"loss": 1.0062, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.0031364803670600665, |
|
"grad_norm": 2.00137996673584, |
|
"learning_rate": 4.03781512605042e-05, |
|
"loss": 0.9615, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.003149603715876217, |
|
"grad_norm": 2.3891799449920654, |
|
"learning_rate": 4.033613445378152e-05, |
|
"loss": 0.8978, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.0031627270646923684, |
|
"grad_norm": 2.0855042934417725, |
|
"learning_rate": 4.029411764705883e-05, |
|
"loss": 0.852, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.003175850413508519, |
|
"grad_norm": 2.079238176345825, |
|
"learning_rate": 4.0252100840336136e-05, |
|
"loss": 0.9779, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.00318897376232467, |
|
"grad_norm": 1.7385294437408447, |
|
"learning_rate": 4.021008403361345e-05, |
|
"loss": 0.9277, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.003202097111140821, |
|
"grad_norm": 1.709777593612671, |
|
"learning_rate": 4.016806722689076e-05, |
|
"loss": 1.1463, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.003215220459956972, |
|
"grad_norm": 1.9025888442993164, |
|
"learning_rate": 4.012605042016807e-05, |
|
"loss": 1.1001, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.0032283438087731225, |
|
"grad_norm": 1.8409806489944458, |
|
"learning_rate": 4.0084033613445385e-05, |
|
"loss": 1.0485, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.0032414671575892737, |
|
"grad_norm": 1.6577681303024292, |
|
"learning_rate": 4.0042016806722694e-05, |
|
"loss": 0.92, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.0032545905064054245, |
|
"grad_norm": 1.6596322059631348, |
|
"learning_rate": 4e-05, |
|
"loss": 1.1192, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.0032677138552215756, |
|
"grad_norm": 2.784494161605835, |
|
"learning_rate": 3.995798319327731e-05, |
|
"loss": 1.0173, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.0032808372040377264, |
|
"grad_norm": 2.500913381576538, |
|
"learning_rate": 3.991596638655463e-05, |
|
"loss": 1.1406, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.003293960552853877, |
|
"grad_norm": 1.7924182415008545, |
|
"learning_rate": 3.987394957983194e-05, |
|
"loss": 1.0221, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.0033070839016700283, |
|
"grad_norm": 2.1434240341186523, |
|
"learning_rate": 3.9831932773109246e-05, |
|
"loss": 1.1815, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.003320207250486179, |
|
"grad_norm": 1.7933728694915771, |
|
"learning_rate": 3.9789915966386555e-05, |
|
"loss": 1.2489, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.00333333059930233, |
|
"grad_norm": 1.8706622123718262, |
|
"learning_rate": 3.9747899159663864e-05, |
|
"loss": 1.1719, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.003346453948118481, |
|
"grad_norm": 1.8657987117767334, |
|
"learning_rate": 3.970588235294117e-05, |
|
"loss": 1.0179, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.0033595772969346317, |
|
"grad_norm": 1.7262569665908813, |
|
"learning_rate": 3.966386554621849e-05, |
|
"loss": 0.975, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.003372700645750783, |
|
"grad_norm": 1.865169882774353, |
|
"learning_rate": 3.96218487394958e-05, |
|
"loss": 0.8847, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.0033858239945669336, |
|
"grad_norm": 1.642320156097412, |
|
"learning_rate": 3.957983193277311e-05, |
|
"loss": 1.0513, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.0033989473433830844, |
|
"grad_norm": 2.1787827014923096, |
|
"learning_rate": 3.953781512605042e-05, |
|
"loss": 0.9024, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.0034120706921992356, |
|
"grad_norm": 1.8304857015609741, |
|
"learning_rate": 3.949579831932773e-05, |
|
"loss": 1.2173, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0034251940410153863, |
|
"grad_norm": 1.6914318799972534, |
|
"learning_rate": 3.945378151260504e-05, |
|
"loss": 1.2612, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.003438317389831537, |
|
"grad_norm": 2.179232597351074, |
|
"learning_rate": 3.9411764705882356e-05, |
|
"loss": 0.9205, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.003451440738647688, |
|
"grad_norm": 1.780931830406189, |
|
"learning_rate": 3.9369747899159665e-05, |
|
"loss": 1.0604, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.003464564087463839, |
|
"grad_norm": 2.5096867084503174, |
|
"learning_rate": 3.9327731092436974e-05, |
|
"loss": 1.3598, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.00347768743627999, |
|
"grad_norm": 1.8979699611663818, |
|
"learning_rate": 3.928571428571429e-05, |
|
"loss": 1.1095, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.003490810785096141, |
|
"grad_norm": 1.8904502391815186, |
|
"learning_rate": 3.92436974789916e-05, |
|
"loss": 0.883, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.0035039341339122916, |
|
"grad_norm": 1.938879132270813, |
|
"learning_rate": 3.920168067226891e-05, |
|
"loss": 0.9752, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.003517057482728443, |
|
"grad_norm": 2.0486090183258057, |
|
"learning_rate": 3.915966386554622e-05, |
|
"loss": 1.2316, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.0035301808315445935, |
|
"grad_norm": 1.877327561378479, |
|
"learning_rate": 3.911764705882353e-05, |
|
"loss": 1.117, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.0035433041803607443, |
|
"grad_norm": 1.7931269407272339, |
|
"learning_rate": 3.907563025210084e-05, |
|
"loss": 1.2002, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0035564275291768955, |
|
"grad_norm": 1.9284062385559082, |
|
"learning_rate": 3.903361344537815e-05, |
|
"loss": 1.3203, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.003569550877993046, |
|
"grad_norm": 2.175030469894409, |
|
"learning_rate": 3.8991596638655467e-05, |
|
"loss": 1.1273, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.003582674226809197, |
|
"grad_norm": 1.7806055545806885, |
|
"learning_rate": 3.8949579831932776e-05, |
|
"loss": 1.1313, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.003595797575625348, |
|
"grad_norm": 1.6851775646209717, |
|
"learning_rate": 3.8907563025210084e-05, |
|
"loss": 1.2696, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.003608920924441499, |
|
"grad_norm": 2.279461622238159, |
|
"learning_rate": 3.88655462184874e-05, |
|
"loss": 1.0396, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.00362204427325765, |
|
"grad_norm": 2.158630847930908, |
|
"learning_rate": 3.882352941176471e-05, |
|
"loss": 0.8833, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.003635167622073801, |
|
"grad_norm": 2.1691272258758545, |
|
"learning_rate": 3.878151260504202e-05, |
|
"loss": 1.0293, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.0036482909708899515, |
|
"grad_norm": 1.891581654548645, |
|
"learning_rate": 3.8739495798319334e-05, |
|
"loss": 0.8519, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.0036614143197061027, |
|
"grad_norm": 1.8272262811660767, |
|
"learning_rate": 3.869747899159664e-05, |
|
"loss": 0.8203, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.0036745376685222535, |
|
"grad_norm": 1.7315893173217773, |
|
"learning_rate": 3.865546218487395e-05, |
|
"loss": 1.2325, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.003687661017338404, |
|
"grad_norm": 1.6825525760650635, |
|
"learning_rate": 3.861344537815126e-05, |
|
"loss": 1.2273, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.0037007843661545554, |
|
"grad_norm": 2.0799367427825928, |
|
"learning_rate": 3.857142857142858e-05, |
|
"loss": 1.2272, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.003713907714970706, |
|
"grad_norm": 2.270346164703369, |
|
"learning_rate": 3.8529411764705886e-05, |
|
"loss": 0.998, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.0037270310637868573, |
|
"grad_norm": 1.8360576629638672, |
|
"learning_rate": 3.8487394957983195e-05, |
|
"loss": 1.044, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.003740154412603008, |
|
"grad_norm": 1.993119716644287, |
|
"learning_rate": 3.844537815126051e-05, |
|
"loss": 1.1743, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.003753277761419159, |
|
"grad_norm": 1.6042628288269043, |
|
"learning_rate": 3.840336134453782e-05, |
|
"loss": 1.2144, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.00376640111023531, |
|
"grad_norm": 1.707850694656372, |
|
"learning_rate": 3.836134453781513e-05, |
|
"loss": 1.1868, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.0037795244590514607, |
|
"grad_norm": 1.8242610692977905, |
|
"learning_rate": 3.8319327731092444e-05, |
|
"loss": 1.228, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.0037926478078676115, |
|
"grad_norm": 2.129991054534912, |
|
"learning_rate": 3.8277310924369746e-05, |
|
"loss": 0.9623, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.0038057711566837626, |
|
"grad_norm": 1.8809386491775513, |
|
"learning_rate": 3.8235294117647055e-05, |
|
"loss": 1.0932, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.0038188945054999134, |
|
"grad_norm": 1.7827450037002563, |
|
"learning_rate": 3.819327731092437e-05, |
|
"loss": 0.6651, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.0038320178543160646, |
|
"grad_norm": 1.965325951576233, |
|
"learning_rate": 3.815126050420168e-05, |
|
"loss": 1.0463, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.0038451412031322153, |
|
"grad_norm": 1.9733631610870361, |
|
"learning_rate": 3.810924369747899e-05, |
|
"loss": 1.1021, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.003858264551948366, |
|
"grad_norm": 1.8928642272949219, |
|
"learning_rate": 3.8067226890756305e-05, |
|
"loss": 1.0295, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.0038713879007645172, |
|
"grad_norm": 1.9470313787460327, |
|
"learning_rate": 3.8025210084033614e-05, |
|
"loss": 0.9477, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.003884511249580668, |
|
"grad_norm": 1.9431153535842896, |
|
"learning_rate": 3.798319327731092e-05, |
|
"loss": 1.2317, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.0038976345983968187, |
|
"grad_norm": 1.8935089111328125, |
|
"learning_rate": 3.794117647058824e-05, |
|
"loss": 0.7785, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.0039107579472129695, |
|
"grad_norm": 1.6975841522216797, |
|
"learning_rate": 3.789915966386555e-05, |
|
"loss": 0.7581, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.003923881296029121, |
|
"grad_norm": 1.9243444204330444, |
|
"learning_rate": 3.785714285714286e-05, |
|
"loss": 0.9032, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.003937004644845272, |
|
"grad_norm": 1.9851335287094116, |
|
"learning_rate": 3.7815126050420166e-05, |
|
"loss": 1.055, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.003950127993661423, |
|
"grad_norm": 1.655465841293335, |
|
"learning_rate": 3.777310924369748e-05, |
|
"loss": 0.9511, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.003963251342477573, |
|
"grad_norm": 1.923627495765686, |
|
"learning_rate": 3.773109243697479e-05, |
|
"loss": 1.3182, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.003976374691293724, |
|
"grad_norm": 1.884634017944336, |
|
"learning_rate": 3.76890756302521e-05, |
|
"loss": 1.2622, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.003989498040109876, |
|
"grad_norm": 2.0541749000549316, |
|
"learning_rate": 3.7647058823529415e-05, |
|
"loss": 1.2305, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.004002621388926026, |
|
"grad_norm": 2.058009147644043, |
|
"learning_rate": 3.7605042016806724e-05, |
|
"loss": 1.0203, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.004015744737742177, |
|
"grad_norm": 2.1403768062591553, |
|
"learning_rate": 3.756302521008403e-05, |
|
"loss": 1.1616, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.004028868086558328, |
|
"grad_norm": 2.3530690670013428, |
|
"learning_rate": 3.752100840336135e-05, |
|
"loss": 0.8852, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.004041991435374479, |
|
"grad_norm": 1.6284990310668945, |
|
"learning_rate": 3.747899159663866e-05, |
|
"loss": 1.0244, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.004055114784190629, |
|
"grad_norm": 2.03271746635437, |
|
"learning_rate": 3.743697478991597e-05, |
|
"loss": 1.0321, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.004068238133006781, |
|
"grad_norm": 2.220259666442871, |
|
"learning_rate": 3.739495798319328e-05, |
|
"loss": 0.9091, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.004081361481822932, |
|
"grad_norm": 1.762771487236023, |
|
"learning_rate": 3.735294117647059e-05, |
|
"loss": 1.0977, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.0040944848306390825, |
|
"grad_norm": 1.9179171323776245, |
|
"learning_rate": 3.73109243697479e-05, |
|
"loss": 1.238, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.004107608179455233, |
|
"grad_norm": 1.933432936668396, |
|
"learning_rate": 3.726890756302521e-05, |
|
"loss": 0.9684, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.004120731528271384, |
|
"grad_norm": 1.7434195280075073, |
|
"learning_rate": 3.7226890756302525e-05, |
|
"loss": 1.0966, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.004133854877087536, |
|
"grad_norm": 1.692446231842041, |
|
"learning_rate": 3.7184873949579834e-05, |
|
"loss": 0.8348, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.004146978225903686, |
|
"grad_norm": 2.00734281539917, |
|
"learning_rate": 3.7142857142857143e-05, |
|
"loss": 0.9317, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.004160101574719837, |
|
"grad_norm": 2.176311492919922, |
|
"learning_rate": 3.710084033613446e-05, |
|
"loss": 1.1143, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.004173224923535988, |
|
"grad_norm": 1.5961421728134155, |
|
"learning_rate": 3.705882352941177e-05, |
|
"loss": 1.1549, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.0041863482723521386, |
|
"grad_norm": 1.9222772121429443, |
|
"learning_rate": 3.701680672268908e-05, |
|
"loss": 1.0957, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.004199471621168289, |
|
"grad_norm": 1.6824790239334106, |
|
"learning_rate": 3.697478991596639e-05, |
|
"loss": 0.9843, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.004212594969984441, |
|
"grad_norm": 2.092710256576538, |
|
"learning_rate": 3.69327731092437e-05, |
|
"loss": 0.9426, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.004225718318800592, |
|
"grad_norm": 1.9568061828613281, |
|
"learning_rate": 3.689075630252101e-05, |
|
"loss": 1.1585, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.004238841667616742, |
|
"grad_norm": 1.711948275566101, |
|
"learning_rate": 3.684873949579833e-05, |
|
"loss": 1.0928, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.004251965016432893, |
|
"grad_norm": 2.0425591468811035, |
|
"learning_rate": 3.6806722689075636e-05, |
|
"loss": 1.0866, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.004265088365249044, |
|
"grad_norm": 1.9059627056121826, |
|
"learning_rate": 3.6764705882352945e-05, |
|
"loss": 1.1211, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.0042782117140651955, |
|
"grad_norm": 1.7678598165512085, |
|
"learning_rate": 3.6722689075630254e-05, |
|
"loss": 0.9953, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.004291335062881346, |
|
"grad_norm": 1.829349160194397, |
|
"learning_rate": 3.668067226890756e-05, |
|
"loss": 1.2049, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.004304458411697497, |
|
"grad_norm": 1.9673535823822021, |
|
"learning_rate": 3.663865546218487e-05, |
|
"loss": 1.1124, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.004317581760513648, |
|
"grad_norm": 1.9089158773422241, |
|
"learning_rate": 3.659663865546219e-05, |
|
"loss": 0.9055, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.0043307051093297985, |
|
"grad_norm": 1.805672287940979, |
|
"learning_rate": 3.6554621848739496e-05, |
|
"loss": 0.815, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.00434382845814595, |
|
"grad_norm": 2.055886745452881, |
|
"learning_rate": 3.6512605042016805e-05, |
|
"loss": 0.923, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.004356951806962101, |
|
"grad_norm": 2.1708016395568848, |
|
"learning_rate": 3.6470588235294114e-05, |
|
"loss": 1.1073, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.004370075155778252, |
|
"grad_norm": 1.728820562362671, |
|
"learning_rate": 3.642857142857143e-05, |
|
"loss": 0.9458, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.004383198504594402, |
|
"grad_norm": 1.920433521270752, |
|
"learning_rate": 3.638655462184874e-05, |
|
"loss": 1.3087, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.004396321853410553, |
|
"grad_norm": 2.3520448207855225, |
|
"learning_rate": 3.634453781512605e-05, |
|
"loss": 1.0046, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.004409445202226704, |
|
"grad_norm": 1.5997459888458252, |
|
"learning_rate": 3.6302521008403364e-05, |
|
"loss": 1.0329, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.004422568551042855, |
|
"grad_norm": 1.7262095212936401, |
|
"learning_rate": 3.626050420168067e-05, |
|
"loss": 0.7392, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.004435691899859006, |
|
"grad_norm": 1.6960225105285645, |
|
"learning_rate": 3.621848739495798e-05, |
|
"loss": 0.8531, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.004448815248675157, |
|
"grad_norm": 2.0684216022491455, |
|
"learning_rate": 3.61764705882353e-05, |
|
"loss": 1.1234, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.004461938597491308, |
|
"grad_norm": 2.0957415103912354, |
|
"learning_rate": 3.613445378151261e-05, |
|
"loss": 1.0791, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.004475061946307458, |
|
"grad_norm": 1.9028066396713257, |
|
"learning_rate": 3.6092436974789916e-05, |
|
"loss": 1.1277, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.00448818529512361, |
|
"grad_norm": 1.9382456541061401, |
|
"learning_rate": 3.605042016806723e-05, |
|
"loss": 1.0221, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.004501308643939761, |
|
"grad_norm": 1.6922097206115723, |
|
"learning_rate": 3.600840336134454e-05, |
|
"loss": 1.0441, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.0045144319927559115, |
|
"grad_norm": 1.9319812059402466, |
|
"learning_rate": 3.596638655462185e-05, |
|
"loss": 1.1206, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.004527555341572062, |
|
"grad_norm": 1.809410572052002, |
|
"learning_rate": 3.592436974789916e-05, |
|
"loss": 1.3128, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.004540678690388213, |
|
"grad_norm": 2.202211380004883, |
|
"learning_rate": 3.5882352941176474e-05, |
|
"loss": 1.2144, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.004553802039204364, |
|
"grad_norm": 1.9447896480560303, |
|
"learning_rate": 3.584033613445378e-05, |
|
"loss": 0.931, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.004566925388020515, |
|
"grad_norm": 1.7502014636993408, |
|
"learning_rate": 3.579831932773109e-05, |
|
"loss": 1.0937, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.004580048736836666, |
|
"grad_norm": 2.021968364715576, |
|
"learning_rate": 3.575630252100841e-05, |
|
"loss": 1.1214, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.004593172085652817, |
|
"grad_norm": 1.980770230293274, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 0.8333, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.004606295434468968, |
|
"grad_norm": 1.9700994491577148, |
|
"learning_rate": 3.5672268907563026e-05, |
|
"loss": 1.2036, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.004619418783285118, |
|
"grad_norm": 1.9649808406829834, |
|
"learning_rate": 3.563025210084034e-05, |
|
"loss": 0.8695, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.00463254213210127, |
|
"grad_norm": 1.7544773817062378, |
|
"learning_rate": 3.558823529411765e-05, |
|
"loss": 0.9898, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.004645665480917421, |
|
"grad_norm": 1.497639536857605, |
|
"learning_rate": 3.554621848739496e-05, |
|
"loss": 1.0418, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.004658788829733571, |
|
"grad_norm": 1.8401079177856445, |
|
"learning_rate": 3.5504201680672275e-05, |
|
"loss": 0.7553, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.004671912178549722, |
|
"grad_norm": 1.866067886352539, |
|
"learning_rate": 3.5462184873949584e-05, |
|
"loss": 1.23, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.004685035527365873, |
|
"grad_norm": 2.1467809677124023, |
|
"learning_rate": 3.5420168067226893e-05, |
|
"loss": 1.021, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.0046981588761820245, |
|
"grad_norm": 1.55268132686615, |
|
"learning_rate": 3.53781512605042e-05, |
|
"loss": 0.7216, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.004711282224998175, |
|
"grad_norm": 2.081575632095337, |
|
"learning_rate": 3.533613445378152e-05, |
|
"loss": 1.0118, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.004724405573814326, |
|
"grad_norm": 2.164594888687134, |
|
"learning_rate": 3.529411764705883e-05, |
|
"loss": 1.1738, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.004737528922630477, |
|
"grad_norm": 2.3375766277313232, |
|
"learning_rate": 3.5252100840336136e-05, |
|
"loss": 1.2937, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.0047506522714466275, |
|
"grad_norm": 1.8707530498504639, |
|
"learning_rate": 3.521008403361345e-05, |
|
"loss": 0.7568, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.004763775620262778, |
|
"grad_norm": 1.7898566722869873, |
|
"learning_rate": 3.516806722689076e-05, |
|
"loss": 1.0675, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.00477689896907893, |
|
"grad_norm": 2.018583059310913, |
|
"learning_rate": 3.512605042016806e-05, |
|
"loss": 1.2629, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.004790022317895081, |
|
"grad_norm": 1.7101985216140747, |
|
"learning_rate": 3.508403361344538e-05, |
|
"loss": 1.2771, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.004803145666711231, |
|
"grad_norm": 1.6018425226211548, |
|
"learning_rate": 3.504201680672269e-05, |
|
"loss": 1.3823, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.004816269015527382, |
|
"grad_norm": 1.9749130010604858, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.8286, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.004829392364343533, |
|
"grad_norm": 1.8984951972961426, |
|
"learning_rate": 3.495798319327731e-05, |
|
"loss": 0.8364, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.0048425157131596844, |
|
"grad_norm": 1.8086698055267334, |
|
"learning_rate": 3.491596638655462e-05, |
|
"loss": 0.9319, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.004855639061975835, |
|
"grad_norm": 1.8288997411727905, |
|
"learning_rate": 3.487394957983193e-05, |
|
"loss": 1.1326, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.004868762410791986, |
|
"grad_norm": 2.247119903564453, |
|
"learning_rate": 3.4831932773109246e-05, |
|
"loss": 0.9275, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.004881885759608137, |
|
"grad_norm": 1.9033137559890747, |
|
"learning_rate": 3.4789915966386555e-05, |
|
"loss": 1.0881, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.004895009108424287, |
|
"grad_norm": 2.0219953060150146, |
|
"learning_rate": 3.4747899159663864e-05, |
|
"loss": 0.787, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.004908132457240439, |
|
"grad_norm": 1.8668859004974365, |
|
"learning_rate": 3.470588235294118e-05, |
|
"loss": 0.7476, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.00492125580605659, |
|
"grad_norm": 1.9940170049667358, |
|
"learning_rate": 3.466386554621849e-05, |
|
"loss": 1.0816, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.0049343791548727405, |
|
"grad_norm": 1.749637246131897, |
|
"learning_rate": 3.46218487394958e-05, |
|
"loss": 1.0829, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.004947502503688891, |
|
"grad_norm": 1.9126695394515991, |
|
"learning_rate": 3.457983193277311e-05, |
|
"loss": 0.9706, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.004960625852505042, |
|
"grad_norm": 2.0018599033355713, |
|
"learning_rate": 3.453781512605042e-05, |
|
"loss": 1.0444, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.004973749201321193, |
|
"grad_norm": 1.6699880361557007, |
|
"learning_rate": 3.449579831932773e-05, |
|
"loss": 0.9605, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.004986872550137344, |
|
"grad_norm": 2.0638937950134277, |
|
"learning_rate": 3.445378151260504e-05, |
|
"loss": 0.8426, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.004999995898953495, |
|
"grad_norm": 1.8554656505584717, |
|
"learning_rate": 3.441176470588236e-05, |
|
"loss": 1.2654, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.005013119247769646, |
|
"grad_norm": 2.101221799850464, |
|
"learning_rate": 3.4369747899159666e-05, |
|
"loss": 0.9724, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.005026242596585797, |
|
"grad_norm": 2.00722336769104, |
|
"learning_rate": 3.4327731092436975e-05, |
|
"loss": 0.9005, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.005039365945401947, |
|
"grad_norm": 1.6129575967788696, |
|
"learning_rate": 3.428571428571429e-05, |
|
"loss": 1.0246, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.005052489294218099, |
|
"grad_norm": 1.8390120267868042, |
|
"learning_rate": 3.42436974789916e-05, |
|
"loss": 1.2598, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.00506561264303425, |
|
"grad_norm": 2.0762507915496826, |
|
"learning_rate": 3.420168067226891e-05, |
|
"loss": 1.3111, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.0050787359918504004, |
|
"grad_norm": 1.7343683242797852, |
|
"learning_rate": 3.4159663865546224e-05, |
|
"loss": 0.8649, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.005091859340666551, |
|
"grad_norm": 2.0712454319000244, |
|
"learning_rate": 3.411764705882353e-05, |
|
"loss": 1.2333, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.005104982689482702, |
|
"grad_norm": 1.7495871782302856, |
|
"learning_rate": 3.407563025210084e-05, |
|
"loss": 1.2728, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.005118106038298853, |
|
"grad_norm": 1.8208390474319458, |
|
"learning_rate": 3.403361344537815e-05, |
|
"loss": 0.873, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.005131229387115004, |
|
"grad_norm": 1.7520668506622314, |
|
"learning_rate": 3.399159663865547e-05, |
|
"loss": 0.8759, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.005144352735931155, |
|
"grad_norm": 1.6596717834472656, |
|
"learning_rate": 3.3949579831932776e-05, |
|
"loss": 1.3544, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.005157476084747306, |
|
"grad_norm": 1.497276782989502, |
|
"learning_rate": 3.3907563025210085e-05, |
|
"loss": 1.13, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.0051705994335634565, |
|
"grad_norm": 2.0411555767059326, |
|
"learning_rate": 3.38655462184874e-05, |
|
"loss": 0.9337, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.005183722782379607, |
|
"grad_norm": 2.373502016067505, |
|
"learning_rate": 3.382352941176471e-05, |
|
"loss": 1.022, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.005196846131195759, |
|
"grad_norm": 1.9800091981887817, |
|
"learning_rate": 3.378151260504202e-05, |
|
"loss": 1.0296, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.00520996948001191, |
|
"grad_norm": 2.000948667526245, |
|
"learning_rate": 3.3739495798319334e-05, |
|
"loss": 0.9769, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.00522309282882806, |
|
"grad_norm": 1.7864891290664673, |
|
"learning_rate": 3.3697478991596643e-05, |
|
"loss": 0.9527, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.005236216177644211, |
|
"grad_norm": 1.7589308023452759, |
|
"learning_rate": 3.365546218487395e-05, |
|
"loss": 0.8136, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.005249339526460362, |
|
"grad_norm": 1.9042621850967407, |
|
"learning_rate": 3.361344537815127e-05, |
|
"loss": 1.371, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0052624628752765135, |
|
"grad_norm": 2.0386345386505127, |
|
"learning_rate": 3.357142857142857e-05, |
|
"loss": 1.282, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.005275586224092664, |
|
"grad_norm": 1.938333511352539, |
|
"learning_rate": 3.352941176470588e-05, |
|
"loss": 0.9357, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.005288709572908815, |
|
"grad_norm": 2.111893892288208, |
|
"learning_rate": 3.3487394957983195e-05, |
|
"loss": 0.9913, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.005301832921724966, |
|
"grad_norm": 1.8991844654083252, |
|
"learning_rate": 3.3445378151260504e-05, |
|
"loss": 1.0779, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.0053149562705411164, |
|
"grad_norm": 2.0768351554870605, |
|
"learning_rate": 3.340336134453781e-05, |
|
"loss": 1.2548, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.005328079619357267, |
|
"grad_norm": 1.9904977083206177, |
|
"learning_rate": 3.336134453781513e-05, |
|
"loss": 0.9451, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.005341202968173419, |
|
"grad_norm": 1.7511869668960571, |
|
"learning_rate": 3.331932773109244e-05, |
|
"loss": 1.3958, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.0053543263169895695, |
|
"grad_norm": 1.9327552318572998, |
|
"learning_rate": 3.327731092436975e-05, |
|
"loss": 1.1969, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.00536744966580572, |
|
"grad_norm": 1.969369888305664, |
|
"learning_rate": 3.3235294117647056e-05, |
|
"loss": 0.9569, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.005380573014621871, |
|
"grad_norm": 1.5960437059402466, |
|
"learning_rate": 3.319327731092437e-05, |
|
"loss": 1.0643, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.005393696363438022, |
|
"grad_norm": 2.1960508823394775, |
|
"learning_rate": 3.315126050420168e-05, |
|
"loss": 0.9549, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.005406819712254173, |
|
"grad_norm": 2.2554352283477783, |
|
"learning_rate": 3.310924369747899e-05, |
|
"loss": 0.868, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.005419943061070324, |
|
"grad_norm": 1.931188941001892, |
|
"learning_rate": 3.3067226890756305e-05, |
|
"loss": 0.9063, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.005433066409886475, |
|
"grad_norm": 2.1585495471954346, |
|
"learning_rate": 3.3025210084033614e-05, |
|
"loss": 0.8968, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.005446189758702626, |
|
"grad_norm": 2.0901172161102295, |
|
"learning_rate": 3.2983193277310923e-05, |
|
"loss": 1.1077, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.005459313107518776, |
|
"grad_norm": 1.8528449535369873, |
|
"learning_rate": 3.294117647058824e-05, |
|
"loss": 1.0224, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.005472436456334928, |
|
"grad_norm": 2.231144428253174, |
|
"learning_rate": 3.289915966386555e-05, |
|
"loss": 1.0864, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.005485559805151079, |
|
"grad_norm": 1.7245928049087524, |
|
"learning_rate": 3.285714285714286e-05, |
|
"loss": 1.0887, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.0054986831539672295, |
|
"grad_norm": 1.8599853515625, |
|
"learning_rate": 3.281512605042017e-05, |
|
"loss": 0.7638, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.00551180650278338, |
|
"grad_norm": 1.6251364946365356, |
|
"learning_rate": 3.277310924369748e-05, |
|
"loss": 1.0903, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.005524929851599531, |
|
"grad_norm": 1.7949484586715698, |
|
"learning_rate": 3.273109243697479e-05, |
|
"loss": 1.454, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.005538053200415682, |
|
"grad_norm": 1.6007757186889648, |
|
"learning_rate": 3.26890756302521e-05, |
|
"loss": 1.0825, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.005551176549231833, |
|
"grad_norm": 1.8195713758468628, |
|
"learning_rate": 3.2647058823529416e-05, |
|
"loss": 0.7635, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.005564299898047984, |
|
"grad_norm": 2.4276785850524902, |
|
"learning_rate": 3.2605042016806725e-05, |
|
"loss": 1.28, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.005577423246864135, |
|
"grad_norm": 1.8293254375457764, |
|
"learning_rate": 3.2563025210084034e-05, |
|
"loss": 1.1156, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.0055905465956802855, |
|
"grad_norm": 1.8416686058044434, |
|
"learning_rate": 3.252100840336135e-05, |
|
"loss": 1.0777, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.005603669944496436, |
|
"grad_norm": 1.7739605903625488, |
|
"learning_rate": 3.247899159663866e-05, |
|
"loss": 0.7509, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.005616793293312588, |
|
"grad_norm": 1.3967030048370361, |
|
"learning_rate": 3.243697478991597e-05, |
|
"loss": 1.0822, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.005629916642128739, |
|
"grad_norm": 1.643563985824585, |
|
"learning_rate": 3.239495798319328e-05, |
|
"loss": 1.0556, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.005643039990944889, |
|
"grad_norm": 1.8893026113510132, |
|
"learning_rate": 3.235294117647059e-05, |
|
"loss": 0.9656, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.00565616333976104, |
|
"grad_norm": 2.050086736679077, |
|
"learning_rate": 3.23109243697479e-05, |
|
"loss": 1.2009, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.005669286688577191, |
|
"grad_norm": 1.813232660293579, |
|
"learning_rate": 3.226890756302522e-05, |
|
"loss": 0.9385, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.005682410037393342, |
|
"grad_norm": 2.043942928314209, |
|
"learning_rate": 3.2226890756302526e-05, |
|
"loss": 1.2035, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.005695533386209493, |
|
"grad_norm": 2.143899917602539, |
|
"learning_rate": 3.2184873949579835e-05, |
|
"loss": 0.9407, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.005708656735025644, |
|
"grad_norm": 2.0677599906921387, |
|
"learning_rate": 3.2142857142857144e-05, |
|
"loss": 0.9974, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.005721780083841795, |
|
"grad_norm": 1.8135740756988525, |
|
"learning_rate": 3.210084033613446e-05, |
|
"loss": 0.9685, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.0057349034326579455, |
|
"grad_norm": 1.9901634454727173, |
|
"learning_rate": 3.205882352941177e-05, |
|
"loss": 0.7587, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.005748026781474096, |
|
"grad_norm": 2.103733777999878, |
|
"learning_rate": 3.201680672268908e-05, |
|
"loss": 1.0126, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.005761150130290248, |
|
"grad_norm": 2.0034232139587402, |
|
"learning_rate": 3.197478991596639e-05, |
|
"loss": 1.0483, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.0057742734791063986, |
|
"grad_norm": 2.2869062423706055, |
|
"learning_rate": 3.1932773109243696e-05, |
|
"loss": 0.6886, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.005787396827922549, |
|
"grad_norm": 1.5798542499542236, |
|
"learning_rate": 3.1890756302521005e-05, |
|
"loss": 0.8474, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.0058005201767387, |
|
"grad_norm": 1.913794755935669, |
|
"learning_rate": 3.184873949579832e-05, |
|
"loss": 0.8935, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.005813643525554851, |
|
"grad_norm": 1.8879010677337646, |
|
"learning_rate": 3.180672268907563e-05, |
|
"loss": 1.1235, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.005826766874371002, |
|
"grad_norm": 2.1588127613067627, |
|
"learning_rate": 3.176470588235294e-05, |
|
"loss": 1.1129, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.005839890223187153, |
|
"grad_norm": 1.6797983646392822, |
|
"learning_rate": 3.1722689075630254e-05, |
|
"loss": 1.2736, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.005853013572003304, |
|
"grad_norm": 1.5610647201538086, |
|
"learning_rate": 3.168067226890756e-05, |
|
"loss": 1.0033, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.005866136920819455, |
|
"grad_norm": 1.567574381828308, |
|
"learning_rate": 3.163865546218487e-05, |
|
"loss": 1.0611, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.005879260269635605, |
|
"grad_norm": 1.9480379819869995, |
|
"learning_rate": 3.159663865546219e-05, |
|
"loss": 1.3461, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.005892383618451756, |
|
"grad_norm": 1.6626474857330322, |
|
"learning_rate": 3.15546218487395e-05, |
|
"loss": 0.934, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.005905506967267908, |
|
"grad_norm": 1.9532502889633179, |
|
"learning_rate": 3.1512605042016806e-05, |
|
"loss": 1.2752, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.0059186303160840585, |
|
"grad_norm": 1.858870267868042, |
|
"learning_rate": 3.147058823529412e-05, |
|
"loss": 0.8752, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.005931753664900209, |
|
"grad_norm": 1.7586513757705688, |
|
"learning_rate": 3.142857142857143e-05, |
|
"loss": 1.3883, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.00594487701371636, |
|
"grad_norm": 1.7400894165039062, |
|
"learning_rate": 3.138655462184874e-05, |
|
"loss": 1.1679, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.005958000362532511, |
|
"grad_norm": 1.8386495113372803, |
|
"learning_rate": 3.134453781512605e-05, |
|
"loss": 1.1425, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.005971123711348662, |
|
"grad_norm": 1.9906026124954224, |
|
"learning_rate": 3.1302521008403364e-05, |
|
"loss": 0.9552, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.005984247060164813, |
|
"grad_norm": 2.0481302738189697, |
|
"learning_rate": 3.1260504201680673e-05, |
|
"loss": 1.0198, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.005997370408980964, |
|
"grad_norm": 1.8666774034500122, |
|
"learning_rate": 3.121848739495798e-05, |
|
"loss": 0.9435, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.0060104937577971146, |
|
"grad_norm": 1.7503705024719238, |
|
"learning_rate": 3.11764705882353e-05, |
|
"loss": 1.238, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.006023617106613265, |
|
"grad_norm": 1.548405647277832, |
|
"learning_rate": 3.113445378151261e-05, |
|
"loss": 0.9299, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.006036740455429417, |
|
"grad_norm": 1.5533720254898071, |
|
"learning_rate": 3.1092436974789916e-05, |
|
"loss": 1.0623, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.006049863804245568, |
|
"grad_norm": 1.8287532329559326, |
|
"learning_rate": 3.105042016806723e-05, |
|
"loss": 1.0802, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.006062987153061718, |
|
"grad_norm": 1.733933448791504, |
|
"learning_rate": 3.100840336134454e-05, |
|
"loss": 1.527, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.006076110501877869, |
|
"grad_norm": 1.5526071786880493, |
|
"learning_rate": 3.096638655462185e-05, |
|
"loss": 1.0319, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.00608923385069402, |
|
"grad_norm": 1.6806490421295166, |
|
"learning_rate": 3.0924369747899166e-05, |
|
"loss": 1.5237, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.006102357199510171, |
|
"grad_norm": 3.8694965839385986, |
|
"learning_rate": 3.0882352941176475e-05, |
|
"loss": 1.1718, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.006115480548326322, |
|
"grad_norm": 1.8779933452606201, |
|
"learning_rate": 3.0840336134453784e-05, |
|
"loss": 0.6304, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.006128603897142473, |
|
"grad_norm": 1.7891600131988525, |
|
"learning_rate": 3.079831932773109e-05, |
|
"loss": 1.1527, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.006141727245958624, |
|
"grad_norm": 1.9199869632720947, |
|
"learning_rate": 3.075630252100841e-05, |
|
"loss": 1.1132, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.0061548505947747745, |
|
"grad_norm": 1.7727890014648438, |
|
"learning_rate": 3.071428571428572e-05, |
|
"loss": 0.976, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.006167973943590925, |
|
"grad_norm": 2.1416878700256348, |
|
"learning_rate": 3.0672268907563026e-05, |
|
"loss": 1.0768, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.006181097292407077, |
|
"grad_norm": 2.037363290786743, |
|
"learning_rate": 3.063025210084034e-05, |
|
"loss": 1.2424, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.006194220641223228, |
|
"grad_norm": 1.7760509252548218, |
|
"learning_rate": 3.058823529411765e-05, |
|
"loss": 0.881, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.006207343990039378, |
|
"grad_norm": 1.8409169912338257, |
|
"learning_rate": 3.054621848739496e-05, |
|
"loss": 0.9362, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.006220467338855529, |
|
"grad_norm": 1.7716394662857056, |
|
"learning_rate": 3.0504201680672273e-05, |
|
"loss": 0.9846, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.00623359068767168, |
|
"grad_norm": 1.9643959999084473, |
|
"learning_rate": 3.0462184873949578e-05, |
|
"loss": 1.2064, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.0062467140364878306, |
|
"grad_norm": 1.6986589431762695, |
|
"learning_rate": 3.042016806722689e-05, |
|
"loss": 0.7699, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.006259837385303982, |
|
"grad_norm": 1.9324336051940918, |
|
"learning_rate": 3.03781512605042e-05, |
|
"loss": 1.0145, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.006272960734120133, |
|
"grad_norm": 1.7184754610061646, |
|
"learning_rate": 3.0336134453781512e-05, |
|
"loss": 1.2643, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.006286084082936284, |
|
"grad_norm": 1.7461143732070923, |
|
"learning_rate": 3.0294117647058824e-05, |
|
"loss": 1.1703, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.006299207431752434, |
|
"grad_norm": 1.843173861503601, |
|
"learning_rate": 3.0252100840336133e-05, |
|
"loss": 0.9474, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.006312330780568585, |
|
"grad_norm": 2.223266839981079, |
|
"learning_rate": 3.0210084033613446e-05, |
|
"loss": 1.1058, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.006325454129384737, |
|
"grad_norm": 2.2394039630889893, |
|
"learning_rate": 3.0168067226890755e-05, |
|
"loss": 0.9657, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.0063385774782008875, |
|
"grad_norm": 1.9419907331466675, |
|
"learning_rate": 3.0126050420168067e-05, |
|
"loss": 1.1787, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.006351700827017038, |
|
"grad_norm": 1.90862238407135, |
|
"learning_rate": 3.008403361344538e-05, |
|
"loss": 1.5107, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.006364824175833189, |
|
"grad_norm": 1.8972816467285156, |
|
"learning_rate": 3.004201680672269e-05, |
|
"loss": 0.9348, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.00637794752464934, |
|
"grad_norm": 1.6643794775009155, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5877, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.006391070873465491, |
|
"grad_norm": 2.072737216949463, |
|
"learning_rate": 2.995798319327731e-05, |
|
"loss": 1.0851, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.006404194222281642, |
|
"grad_norm": 2.223226308822632, |
|
"learning_rate": 2.9915966386554622e-05, |
|
"loss": 1.1913, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.006417317571097793, |
|
"grad_norm": 1.7678091526031494, |
|
"learning_rate": 2.9873949579831935e-05, |
|
"loss": 0.9093, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.006430440919913944, |
|
"grad_norm": 1.7829846143722534, |
|
"learning_rate": 2.9831932773109244e-05, |
|
"loss": 1.1833, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.006443564268730094, |
|
"grad_norm": 1.6032719612121582, |
|
"learning_rate": 2.9789915966386556e-05, |
|
"loss": 0.6613, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.006456687617546245, |
|
"grad_norm": 1.7531033754348755, |
|
"learning_rate": 2.9747899159663868e-05, |
|
"loss": 1.0188, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.006469810966362397, |
|
"grad_norm": 2.0264689922332764, |
|
"learning_rate": 2.9705882352941177e-05, |
|
"loss": 0.9985, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.006482934315178547, |
|
"grad_norm": 1.5329761505126953, |
|
"learning_rate": 2.966386554621849e-05, |
|
"loss": 1.1659, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.006496057663994698, |
|
"grad_norm": 2.2568962574005127, |
|
"learning_rate": 2.96218487394958e-05, |
|
"loss": 1.2176, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.006509181012810849, |
|
"grad_norm": 1.817195177078247, |
|
"learning_rate": 2.957983193277311e-05, |
|
"loss": 0.704, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.006522304361627, |
|
"grad_norm": 1.8540563583374023, |
|
"learning_rate": 2.9537815126050423e-05, |
|
"loss": 0.9465, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.006535427710443151, |
|
"grad_norm": 1.8276430368423462, |
|
"learning_rate": 2.9495798319327732e-05, |
|
"loss": 1.1567, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.006548551059259302, |
|
"grad_norm": 1.963805913925171, |
|
"learning_rate": 2.9453781512605045e-05, |
|
"loss": 0.9897, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.006561674408075453, |
|
"grad_norm": 1.851785659790039, |
|
"learning_rate": 2.9411764705882354e-05, |
|
"loss": 1.0783, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0065747977568916035, |
|
"grad_norm": 1.7229440212249756, |
|
"learning_rate": 2.9369747899159666e-05, |
|
"loss": 0.884, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.006587921105707754, |
|
"grad_norm": 1.7793407440185547, |
|
"learning_rate": 2.932773109243698e-05, |
|
"loss": 1.2513, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.006601044454523905, |
|
"grad_norm": 1.5506298542022705, |
|
"learning_rate": 2.9285714285714288e-05, |
|
"loss": 0.9357, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.006614167803340057, |
|
"grad_norm": 1.7655850648880005, |
|
"learning_rate": 2.92436974789916e-05, |
|
"loss": 0.9378, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.006627291152156207, |
|
"grad_norm": 1.7562974691390991, |
|
"learning_rate": 2.9201680672268912e-05, |
|
"loss": 1.1196, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.006640414500972358, |
|
"grad_norm": 1.629323959350586, |
|
"learning_rate": 2.915966386554622e-05, |
|
"loss": 1.1883, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.006653537849788509, |
|
"grad_norm": 1.8579301834106445, |
|
"learning_rate": 2.9117647058823534e-05, |
|
"loss": 1.2, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.00666666119860466, |
|
"grad_norm": 1.8027002811431885, |
|
"learning_rate": 2.9075630252100843e-05, |
|
"loss": 0.9524, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.006679784547420811, |
|
"grad_norm": 1.908895492553711, |
|
"learning_rate": 2.9033613445378155e-05, |
|
"loss": 0.993, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.006692907896236962, |
|
"grad_norm": 1.8436781167984009, |
|
"learning_rate": 2.8991596638655467e-05, |
|
"loss": 0.8254, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.006706031245053113, |
|
"grad_norm": 2.0006208419799805, |
|
"learning_rate": 2.8949579831932776e-05, |
|
"loss": 1.2799, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.006719154593869263, |
|
"grad_norm": 1.581337571144104, |
|
"learning_rate": 2.890756302521009e-05, |
|
"loss": 1.0694, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.006732277942685414, |
|
"grad_norm": 2.022268056869507, |
|
"learning_rate": 2.8865546218487394e-05, |
|
"loss": 0.7551, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.006745401291501566, |
|
"grad_norm": 1.7129675149917603, |
|
"learning_rate": 2.8823529411764703e-05, |
|
"loss": 1.1265, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.0067585246403177165, |
|
"grad_norm": 1.994723916053772, |
|
"learning_rate": 2.8781512605042016e-05, |
|
"loss": 1.0374, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.006771647989133867, |
|
"grad_norm": 1.7831419706344604, |
|
"learning_rate": 2.8739495798319328e-05, |
|
"loss": 0.8946, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.006784771337950018, |
|
"grad_norm": 1.705159306526184, |
|
"learning_rate": 2.8697478991596637e-05, |
|
"loss": 1.0248, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.006797894686766169, |
|
"grad_norm": 1.675284504890442, |
|
"learning_rate": 2.865546218487395e-05, |
|
"loss": 1.0446, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.0068110180355823195, |
|
"grad_norm": 1.8878024816513062, |
|
"learning_rate": 2.861344537815126e-05, |
|
"loss": 1.1455, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.006824141384398471, |
|
"grad_norm": 1.783176302909851, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 1.1697, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.006837264733214622, |
|
"grad_norm": 1.6017670631408691, |
|
"learning_rate": 2.8529411764705883e-05, |
|
"loss": 0.883, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.006850388082030773, |
|
"grad_norm": 2.264923572540283, |
|
"learning_rate": 2.8487394957983192e-05, |
|
"loss": 1.2535, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.006863511430846923, |
|
"grad_norm": 1.7666747570037842, |
|
"learning_rate": 2.8445378151260505e-05, |
|
"loss": 1.0606, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.006876634779663074, |
|
"grad_norm": 1.9199743270874023, |
|
"learning_rate": 2.8403361344537817e-05, |
|
"loss": 1.0026, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.006889758128479226, |
|
"grad_norm": 1.6176460981369019, |
|
"learning_rate": 2.8361344537815126e-05, |
|
"loss": 1.2288, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.006902881477295376, |
|
"grad_norm": 2.1176071166992188, |
|
"learning_rate": 2.831932773109244e-05, |
|
"loss": 0.8439, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.006916004826111527, |
|
"grad_norm": 1.7792637348175049, |
|
"learning_rate": 2.8277310924369747e-05, |
|
"loss": 0.8614, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.006929128174927678, |
|
"grad_norm": 2.24349045753479, |
|
"learning_rate": 2.823529411764706e-05, |
|
"loss": 1.1758, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.006942251523743829, |
|
"grad_norm": 1.985783576965332, |
|
"learning_rate": 2.8193277310924372e-05, |
|
"loss": 0.9556, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.00695537487255998, |
|
"grad_norm": 1.7994742393493652, |
|
"learning_rate": 2.815126050420168e-05, |
|
"loss": 0.8336, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.006968498221376131, |
|
"grad_norm": 1.6311463117599487, |
|
"learning_rate": 2.8109243697478993e-05, |
|
"loss": 0.9338, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.006981621570192282, |
|
"grad_norm": 1.8948392868041992, |
|
"learning_rate": 2.8067226890756302e-05, |
|
"loss": 1.2195, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.0069947449190084325, |
|
"grad_norm": 1.5576555728912354, |
|
"learning_rate": 2.8025210084033615e-05, |
|
"loss": 1.2998, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.007007868267824583, |
|
"grad_norm": 1.9670222997665405, |
|
"learning_rate": 2.7983193277310927e-05, |
|
"loss": 1.1826, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.007020991616640734, |
|
"grad_norm": 1.8046157360076904, |
|
"learning_rate": 2.7941176470588236e-05, |
|
"loss": 1.1026, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.007034114965456886, |
|
"grad_norm": 2.0909459590911865, |
|
"learning_rate": 2.789915966386555e-05, |
|
"loss": 0.9153, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.007047238314273036, |
|
"grad_norm": 1.9456409215927124, |
|
"learning_rate": 2.785714285714286e-05, |
|
"loss": 1.1347, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.007060361663089187, |
|
"grad_norm": 1.6101740598678589, |
|
"learning_rate": 2.781512605042017e-05, |
|
"loss": 0.8582, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.007073485011905338, |
|
"grad_norm": 1.9965813159942627, |
|
"learning_rate": 2.7773109243697482e-05, |
|
"loss": 1.0083, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.007086608360721489, |
|
"grad_norm": 1.5864198207855225, |
|
"learning_rate": 2.773109243697479e-05, |
|
"loss": 0.8058, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.00709973170953764, |
|
"grad_norm": 1.828796625137329, |
|
"learning_rate": 2.7689075630252104e-05, |
|
"loss": 0.9877, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.007112855058353791, |
|
"grad_norm": 2.0490660667419434, |
|
"learning_rate": 2.7647058823529416e-05, |
|
"loss": 0.8834, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.007125978407169942, |
|
"grad_norm": 1.8640308380126953, |
|
"learning_rate": 2.7605042016806725e-05, |
|
"loss": 1.1126, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.007139101755986092, |
|
"grad_norm": 1.790665864944458, |
|
"learning_rate": 2.7563025210084037e-05, |
|
"loss": 1.0966, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.007152225104802243, |
|
"grad_norm": 1.8603692054748535, |
|
"learning_rate": 2.7521008403361346e-05, |
|
"loss": 0.9278, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.007165348453618394, |
|
"grad_norm": 1.5675926208496094, |
|
"learning_rate": 2.747899159663866e-05, |
|
"loss": 0.9246, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.0071784718024345455, |
|
"grad_norm": 2.200451612472534, |
|
"learning_rate": 2.743697478991597e-05, |
|
"loss": 1.3844, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.007191595151250696, |
|
"grad_norm": 1.7244689464569092, |
|
"learning_rate": 2.739495798319328e-05, |
|
"loss": 1.0398, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.007204718500066847, |
|
"grad_norm": 1.7680357694625854, |
|
"learning_rate": 2.7352941176470593e-05, |
|
"loss": 0.9782, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.007217841848882998, |
|
"grad_norm": 1.7877306938171387, |
|
"learning_rate": 2.7310924369747898e-05, |
|
"loss": 0.9214, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.0072309651976991485, |
|
"grad_norm": 1.6443262100219727, |
|
"learning_rate": 2.7268907563025207e-05, |
|
"loss": 1.002, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.0072440885465153, |
|
"grad_norm": 1.9577383995056152, |
|
"learning_rate": 2.722689075630252e-05, |
|
"loss": 1.4069, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.007257211895331451, |
|
"grad_norm": 1.518309235572815, |
|
"learning_rate": 2.7184873949579832e-05, |
|
"loss": 0.7467, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.007270335244147602, |
|
"grad_norm": 1.8443019390106201, |
|
"learning_rate": 2.714285714285714e-05, |
|
"loss": 0.8807, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.007283458592963752, |
|
"grad_norm": 1.7123879194259644, |
|
"learning_rate": 2.7100840336134453e-05, |
|
"loss": 0.8683, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.007296581941779903, |
|
"grad_norm": 1.9878864288330078, |
|
"learning_rate": 2.7058823529411766e-05, |
|
"loss": 1.2636, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.007309705290596055, |
|
"grad_norm": 1.8165278434753418, |
|
"learning_rate": 2.7016806722689075e-05, |
|
"loss": 1.2438, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.0073228286394122055, |
|
"grad_norm": 2.156190872192383, |
|
"learning_rate": 2.6974789915966387e-05, |
|
"loss": 1.0709, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.007335951988228356, |
|
"grad_norm": 1.7182382345199585, |
|
"learning_rate": 2.6932773109243696e-05, |
|
"loss": 0.7785, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.007349075337044507, |
|
"grad_norm": 2.2051150798797607, |
|
"learning_rate": 2.689075630252101e-05, |
|
"loss": 1.0183, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.007362198685860658, |
|
"grad_norm": 1.5881297588348389, |
|
"learning_rate": 2.684873949579832e-05, |
|
"loss": 0.8281, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.007375322034676808, |
|
"grad_norm": 1.5579569339752197, |
|
"learning_rate": 2.680672268907563e-05, |
|
"loss": 1.1472, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.00738844538349296, |
|
"grad_norm": 1.930672287940979, |
|
"learning_rate": 2.6764705882352942e-05, |
|
"loss": 0.9618, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.007401568732309111, |
|
"grad_norm": 1.9797165393829346, |
|
"learning_rate": 2.672268907563025e-05, |
|
"loss": 0.7419, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.0074146920811252615, |
|
"grad_norm": 1.7777879238128662, |
|
"learning_rate": 2.6680672268907564e-05, |
|
"loss": 1.2476, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.007427815429941412, |
|
"grad_norm": 1.7834422588348389, |
|
"learning_rate": 2.6638655462184876e-05, |
|
"loss": 0.9808, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.007440938778757563, |
|
"grad_norm": 4.512395858764648, |
|
"learning_rate": 2.6596638655462185e-05, |
|
"loss": 1.2391, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.007454062127573715, |
|
"grad_norm": 1.4711298942565918, |
|
"learning_rate": 2.6554621848739497e-05, |
|
"loss": 0.9466, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.007467185476389865, |
|
"grad_norm": 2.1497201919555664, |
|
"learning_rate": 2.651260504201681e-05, |
|
"loss": 1.0423, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.007480308825206016, |
|
"grad_norm": 2.1284451484680176, |
|
"learning_rate": 2.647058823529412e-05, |
|
"loss": 1.4471, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.007493432174022167, |
|
"grad_norm": 2.2171754837036133, |
|
"learning_rate": 2.642857142857143e-05, |
|
"loss": 1.0436, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.007506555522838318, |
|
"grad_norm": 2.088338613510132, |
|
"learning_rate": 2.638655462184874e-05, |
|
"loss": 1.0471, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.007519678871654469, |
|
"grad_norm": 1.9278613328933716, |
|
"learning_rate": 2.6344537815126052e-05, |
|
"loss": 1.0237, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.00753280222047062, |
|
"grad_norm": 1.7411606311798096, |
|
"learning_rate": 2.6302521008403365e-05, |
|
"loss": 1.2358, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.007545925569286771, |
|
"grad_norm": 1.9148597717285156, |
|
"learning_rate": 2.6260504201680674e-05, |
|
"loss": 1.0746, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.0075590489181029214, |
|
"grad_norm": 1.8952248096466064, |
|
"learning_rate": 2.6218487394957986e-05, |
|
"loss": 0.9406, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.007572172266919072, |
|
"grad_norm": 1.5231568813323975, |
|
"learning_rate": 2.6176470588235295e-05, |
|
"loss": 1.0948, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.007585295615735223, |
|
"grad_norm": 1.7733741998672485, |
|
"learning_rate": 2.6134453781512608e-05, |
|
"loss": 0.7948, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.0075984189645513745, |
|
"grad_norm": 2.0340850353240967, |
|
"learning_rate": 2.609243697478992e-05, |
|
"loss": 0.8924, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.007611542313367525, |
|
"grad_norm": 2.275388240814209, |
|
"learning_rate": 2.605042016806723e-05, |
|
"loss": 1.245, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.007624665662183676, |
|
"grad_norm": 1.734357237815857, |
|
"learning_rate": 2.600840336134454e-05, |
|
"loss": 1.1312, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.007637789010999827, |
|
"grad_norm": 1.541772723197937, |
|
"learning_rate": 2.5966386554621854e-05, |
|
"loss": 0.7925, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.0076509123598159775, |
|
"grad_norm": 1.9468810558319092, |
|
"learning_rate": 2.5924369747899163e-05, |
|
"loss": 0.868, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.007664035708632129, |
|
"grad_norm": 1.7712815999984741, |
|
"learning_rate": 2.5882352941176475e-05, |
|
"loss": 0.8882, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.00767715905744828, |
|
"grad_norm": 2.1997227668762207, |
|
"learning_rate": 2.5840336134453784e-05, |
|
"loss": 0.8745, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.007690282406264431, |
|
"grad_norm": 1.7818772792816162, |
|
"learning_rate": 2.5798319327731096e-05, |
|
"loss": 1.1102, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.007703405755080581, |
|
"grad_norm": 1.9181782007217407, |
|
"learning_rate": 2.5756302521008402e-05, |
|
"loss": 0.9866, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.007716529103896732, |
|
"grad_norm": 1.757861614227295, |
|
"learning_rate": 2.5714285714285714e-05, |
|
"loss": 0.9548, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.007729652452712883, |
|
"grad_norm": 2.0078554153442383, |
|
"learning_rate": 2.5672268907563023e-05, |
|
"loss": 1.0659, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.0077427758015290345, |
|
"grad_norm": 2.041961669921875, |
|
"learning_rate": 2.5630252100840336e-05, |
|
"loss": 1.1867, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.007755899150345185, |
|
"grad_norm": 1.8703291416168213, |
|
"learning_rate": 2.5588235294117645e-05, |
|
"loss": 0.9826, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.007769022499161336, |
|
"grad_norm": 1.6957659721374512, |
|
"learning_rate": 2.5546218487394957e-05, |
|
"loss": 0.9584, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.007782145847977487, |
|
"grad_norm": 2.2199056148529053, |
|
"learning_rate": 2.550420168067227e-05, |
|
"loss": 1.2266, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.0077952691967936374, |
|
"grad_norm": 1.8995167016983032, |
|
"learning_rate": 2.546218487394958e-05, |
|
"loss": 1.0272, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.007808392545609789, |
|
"grad_norm": 1.8795480728149414, |
|
"learning_rate": 2.542016806722689e-05, |
|
"loss": 1.2015, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.007821515894425939, |
|
"grad_norm": 1.541920781135559, |
|
"learning_rate": 2.53781512605042e-05, |
|
"loss": 0.9476, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.00783463924324209, |
|
"grad_norm": 1.7051913738250732, |
|
"learning_rate": 2.5336134453781512e-05, |
|
"loss": 1.0941, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.007847762592058242, |
|
"grad_norm": 1.6097373962402344, |
|
"learning_rate": 2.5294117647058825e-05, |
|
"loss": 0.9476, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.007860885940874392, |
|
"grad_norm": 1.8319956064224243, |
|
"learning_rate": 2.5252100840336134e-05, |
|
"loss": 1.1252, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.007874009289690544, |
|
"grad_norm": 1.8047981262207031, |
|
"learning_rate": 2.5210084033613446e-05, |
|
"loss": 0.9398, |
|
"step": 600 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.644383561996206e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|