|
{
|
|
"best_metric": 1.47908163,
|
|
"best_model_checkpoint": "D:\\_____NEW_NN\\LLM\\MiniCPM-V\\finetune\\output\\phi3-vision-128k-instruct\\v9-20240710-235159\\checkpoint-500",
|
|
"epoch": 2.8828828828828827,
|
|
"eval_steps": 50,
|
|
"global_step": 800,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"acc": 0.4856407,
|
|
"epoch": 0.0036036036036036037,
|
|
"grad_norm": 0.734375,
|
|
"learning_rate": 2.4107142857142856e-06,
|
|
"loss": 2.42667556,
|
|
"memory(GiB)": 18.11,
|
|
"step": 1,
|
|
"train_speed(iter/s)": 0.072451
|
|
},
|
|
{
|
|
"acc": 0.50815099,
|
|
"epoch": 0.018018018018018018,
|
|
"grad_norm": 0.671875,
|
|
"learning_rate": 1.2053571428571429e-05,
|
|
"loss": 2.28746271,
|
|
"memory(GiB)": 19.3,
|
|
"step": 5,
|
|
"train_speed(iter/s)": 0.081978
|
|
},
|
|
{
|
|
"acc": 0.50680609,
|
|
"epoch": 0.036036036036036036,
|
|
"grad_norm": 0.76953125,
|
|
"learning_rate": 2.4107142857142858e-05,
|
|
"loss": 2.29894772,
|
|
"memory(GiB)": 19.3,
|
|
"step": 10,
|
|
"train_speed(iter/s)": 0.084125
|
|
},
|
|
{
|
|
"acc": 0.51412601,
|
|
"epoch": 0.05405405405405406,
|
|
"grad_norm": 0.76171875,
|
|
"learning_rate": 3.616071428571428e-05,
|
|
"loss": 2.34161263,
|
|
"memory(GiB)": 19.7,
|
|
"step": 15,
|
|
"train_speed(iter/s)": 0.08456
|
|
},
|
|
{
|
|
"acc": 0.52338777,
|
|
"epoch": 0.07207207207207207,
|
|
"grad_norm": 0.6015625,
|
|
"learning_rate": 4.8214285714285716e-05,
|
|
"loss": 2.23036633,
|
|
"memory(GiB)": 19.88,
|
|
"step": 20,
|
|
"train_speed(iter/s)": 0.084117
|
|
},
|
|
{
|
|
"acc": 0.55944238,
|
|
"epoch": 0.09009009009009009,
|
|
"grad_norm": 0.66796875,
|
|
"learning_rate": 6.026785714285715e-05,
|
|
"loss": 2.01084595,
|
|
"memory(GiB)": 19.93,
|
|
"step": 25,
|
|
"train_speed(iter/s)": 0.084444
|
|
},
|
|
{
|
|
"acc": 0.57758675,
|
|
"epoch": 0.10810810810810811,
|
|
"grad_norm": 0.765625,
|
|
"learning_rate": 7.232142857142856e-05,
|
|
"loss": 1.94100876,
|
|
"memory(GiB)": 20.21,
|
|
"step": 30,
|
|
"train_speed(iter/s)": 0.085158
|
|
},
|
|
{
|
|
"acc": 0.5666451,
|
|
"epoch": 0.12612612612612611,
|
|
"grad_norm": 0.796875,
|
|
"learning_rate": 8.4375e-05,
|
|
"loss": 1.96992569,
|
|
"memory(GiB)": 19.42,
|
|
"step": 35,
|
|
"train_speed(iter/s)": 0.085562
|
|
},
|
|
{
|
|
"acc": 0.55766659,
|
|
"epoch": 0.14414414414414414,
|
|
"grad_norm": 0.828125,
|
|
"learning_rate": 9.642857142857143e-05,
|
|
"loss": 2.01305885,
|
|
"memory(GiB)": 19.71,
|
|
"step": 40,
|
|
"train_speed(iter/s)": 0.0857
|
|
},
|
|
{
|
|
"acc": 0.56964116,
|
|
"epoch": 0.16216216216216217,
|
|
"grad_norm": 0.83203125,
|
|
"learning_rate": 0.00010848214285714286,
|
|
"loss": 1.925914,
|
|
"memory(GiB)": 19.68,
|
|
"step": 45,
|
|
"train_speed(iter/s)": 0.08577
|
|
},
|
|
{
|
|
"acc": 0.56270452,
|
|
"epoch": 0.18018018018018017,
|
|
"grad_norm": 0.9375,
|
|
"learning_rate": 0.0001205357142857143,
|
|
"loss": 1.94923038,
|
|
"memory(GiB)": 19.65,
|
|
"step": 50,
|
|
"train_speed(iter/s)": 0.085942
|
|
},
|
|
{
|
|
"epoch": 0.18018018018018017,
|
|
"eval_acc": 0.5890983000739098,
|
|
"eval_loss": 1.795773983001709,
|
|
"eval_runtime": 136.6505,
|
|
"eval_samples_per_second": 1.105,
|
|
"eval_steps_per_second": 0.556,
|
|
"step": 50
|
|
},
|
|
{
|
|
"acc": 0.57772484,
|
|
"epoch": 0.1981981981981982,
|
|
"grad_norm": 0.7265625,
|
|
"learning_rate": 0.00013258928571428571,
|
|
"loss": 1.86195869,
|
|
"memory(GiB)": 23.11,
|
|
"step": 55,
|
|
"train_speed(iter/s)": 0.070857
|
|
},
|
|
{
|
|
"acc": 0.59196444,
|
|
"epoch": 0.21621621621621623,
|
|
"grad_norm": 0.8125,
|
|
"learning_rate": 0.00013499518432841625,
|
|
"loss": 1.74724998,
|
|
"memory(GiB)": 19.42,
|
|
"step": 60,
|
|
"train_speed(iter/s)": 0.071911
|
|
},
|
|
{
|
|
"acc": 0.57253065,
|
|
"epoch": 0.23423423423423423,
|
|
"grad_norm": 0.69921875,
|
|
"learning_rate": 0.00013497562184025362,
|
|
"loss": 1.87580814,
|
|
"memory(GiB)": 19.61,
|
|
"step": 65,
|
|
"train_speed(iter/s)": 0.072807
|
|
},
|
|
{
|
|
"acc": 0.59546819,
|
|
"epoch": 0.25225225225225223,
|
|
"grad_norm": 0.73046875,
|
|
"learning_rate": 0.00013494101591406666,
|
|
"loss": 1.73464546,
|
|
"memory(GiB)": 19.58,
|
|
"step": 70,
|
|
"train_speed(iter/s)": 0.073652
|
|
},
|
|
{
|
|
"acc": 0.59667702,
|
|
"epoch": 0.2702702702702703,
|
|
"grad_norm": 0.8203125,
|
|
"learning_rate": 0.00013489137426511745,
|
|
"loss": 1.69518318,
|
|
"memory(GiB)": 18.19,
|
|
"step": 75,
|
|
"train_speed(iter/s)": 0.074445
|
|
},
|
|
{
|
|
"acc": 0.61824327,
|
|
"epoch": 0.2882882882882883,
|
|
"grad_norm": 0.828125,
|
|
"learning_rate": 0.00013482670796082633,
|
|
"loss": 1.64374161,
|
|
"memory(GiB)": 19.52,
|
|
"step": 80,
|
|
"train_speed(iter/s)": 0.075071
|
|
},
|
|
{
|
|
"acc": 0.60798159,
|
|
"epoch": 0.3063063063063063,
|
|
"grad_norm": 0.7734375,
|
|
"learning_rate": 0.00013474703141830443,
|
|
"loss": 1.68669338,
|
|
"memory(GiB)": 19.57,
|
|
"step": 85,
|
|
"train_speed(iter/s)": 0.07562
|
|
},
|
|
{
|
|
"acc": 0.5981144,
|
|
"epoch": 0.32432432432432434,
|
|
"grad_norm": 0.80078125,
|
|
"learning_rate": 0.00013465236240113953,
|
|
"loss": 1.701264,
|
|
"memory(GiB)": 20.19,
|
|
"step": 90,
|
|
"train_speed(iter/s)": 0.076188
|
|
},
|
|
{
|
|
"acc": 0.59871612,
|
|
"epoch": 0.34234234234234234,
|
|
"grad_norm": 1.0234375,
|
|
"learning_rate": 0.00013454272201543564,
|
|
"loss": 1.76608849,
|
|
"memory(GiB)": 19.35,
|
|
"step": 95,
|
|
"train_speed(iter/s)": 0.076637
|
|
},
|
|
{
|
|
"acc": 0.61396523,
|
|
"epoch": 0.36036036036036034,
|
|
"grad_norm": 0.7109375,
|
|
"learning_rate": 0.00013441813470510747,
|
|
"loss": 1.61449242,
|
|
"memory(GiB)": 19.69,
|
|
"step": 100,
|
|
"train_speed(iter/s)": 0.077075
|
|
},
|
|
{
|
|
"epoch": 0.36036036036036034,
|
|
"eval_acc": 0.6091648189209165,
|
|
"eval_loss": 1.6449466943740845,
|
|
"eval_runtime": 134.5726,
|
|
"eval_samples_per_second": 1.122,
|
|
"eval_steps_per_second": 0.565,
|
|
"step": 100
|
|
},
|
|
{
|
|
"acc": 0.61147785,
|
|
"epoch": 0.3783783783783784,
|
|
"grad_norm": 0.69921875,
|
|
"learning_rate": 0.00013427862824643083,
|
|
"loss": 1.60589867,
|
|
"memory(GiB)": 21.03,
|
|
"step": 105,
|
|
"train_speed(iter/s)": 0.070426
|
|
},
|
|
{
|
|
"acc": 0.6038115,
|
|
"epoch": 0.3963963963963964,
|
|
"grad_norm": 0.88671875,
|
|
"learning_rate": 0.00013412423374184996,
|
|
"loss": 1.69055023,
|
|
"memory(GiB)": 19.44,
|
|
"step": 110,
|
|
"train_speed(iter/s)": 0.07105
|
|
},
|
|
{
|
|
"acc": 0.62303677,
|
|
"epoch": 0.4144144144144144,
|
|
"grad_norm": 0.84375,
|
|
"learning_rate": 0.00013395498561304334,
|
|
"loss": 1.5716897,
|
|
"memory(GiB)": 19.27,
|
|
"step": 115,
|
|
"train_speed(iter/s)": 0.071618
|
|
},
|
|
{
|
|
"acc": 0.6214046,
|
|
"epoch": 0.43243243243243246,
|
|
"grad_norm": 0.640625,
|
|
"learning_rate": 0.00013377092159324956,
|
|
"loss": 1.57531881,
|
|
"memory(GiB)": 19.36,
|
|
"step": 120,
|
|
"train_speed(iter/s)": 0.07209
|
|
},
|
|
{
|
|
"acc": 0.58676672,
|
|
"epoch": 0.45045045045045046,
|
|
"grad_norm": 0.68359375,
|
|
"learning_rate": 0.00013357208271885473,
|
|
"loss": 1.74933128,
|
|
"memory(GiB)": 19.32,
|
|
"step": 125,
|
|
"train_speed(iter/s)": 0.072581
|
|
},
|
|
{
|
|
"acc": 0.59380612,
|
|
"epoch": 0.46846846846846846,
|
|
"grad_norm": 0.7890625,
|
|
"learning_rate": 0.00013335851332024374,
|
|
"loss": 1.69583378,
|
|
"memory(GiB)": 20.18,
|
|
"step": 130,
|
|
"train_speed(iter/s)": 0.073016
|
|
},
|
|
{
|
|
"acc": 0.62007999,
|
|
"epoch": 0.4864864864864865,
|
|
"grad_norm": 0.73828125,
|
|
"learning_rate": 0.0001331302610119168,
|
|
"loss": 1.60020466,
|
|
"memory(GiB)": 19.52,
|
|
"step": 135,
|
|
"train_speed(iter/s)": 0.073417
|
|
},
|
|
{
|
|
"acc": 0.6116991,
|
|
"epoch": 0.5045045045045045,
|
|
"grad_norm": 1.1015625,
|
|
"learning_rate": 0.00013288737668187408,
|
|
"loss": 1.62470894,
|
|
"memory(GiB)": 19.47,
|
|
"step": 140,
|
|
"train_speed(iter/s)": 0.073817
|
|
},
|
|
{
|
|
"acc": 0.60051751,
|
|
"epoch": 0.5225225225225225,
|
|
"grad_norm": 0.87109375,
|
|
"learning_rate": 0.00013262991448027034,
|
|
"loss": 1.6651041,
|
|
"memory(GiB)": 19.42,
|
|
"step": 145,
|
|
"train_speed(iter/s)": 0.074194
|
|
},
|
|
{
|
|
"acc": 0.60736594,
|
|
"epoch": 0.5405405405405406,
|
|
"grad_norm": 0.76953125,
|
|
"learning_rate": 0.00013235793180734238,
|
|
"loss": 1.64281559,
|
|
"memory(GiB)": 19.53,
|
|
"step": 150,
|
|
"train_speed(iter/s)": 0.074547
|
|
},
|
|
{
|
|
"epoch": 0.5405405405405406,
|
|
"eval_acc": 0.6190317812269032,
|
|
"eval_loss": 1.5917434692382812,
|
|
"eval_runtime": 135.0141,
|
|
"eval_samples_per_second": 1.118,
|
|
"eval_steps_per_second": 0.563,
|
|
"step": 150
|
|
},
|
|
{
|
|
"acc": 0.61663914,
|
|
"epoch": 0.5585585585585585,
|
|
"grad_norm": 1.0625,
|
|
"learning_rate": 0.00013207148930061195,
|
|
"loss": 1.60914173,
|
|
"memory(GiB)": 23.05,
|
|
"step": 155,
|
|
"train_speed(iter/s)": 0.070306
|
|
},
|
|
{
|
|
"acc": 0.60967774,
|
|
"epoch": 0.5765765765765766,
|
|
"grad_norm": 0.76953125,
|
|
"learning_rate": 0.00013177065082136668,
|
|
"loss": 1.59582939,
|
|
"memory(GiB)": 19.47,
|
|
"step": 160,
|
|
"train_speed(iter/s)": 0.070712
|
|
},
|
|
{
|
|
"acc": 0.63630972,
|
|
"epoch": 0.5945945945945946,
|
|
"grad_norm": 0.70703125,
|
|
"learning_rate": 0.00013145548344042262,
|
|
"loss": 1.50356016,
|
|
"memory(GiB)": 19.62,
|
|
"step": 165,
|
|
"train_speed(iter/s)": 0.071104
|
|
},
|
|
{
|
|
"acc": 0.60439692,
|
|
"epoch": 0.6126126126126126,
|
|
"grad_norm": 0.73046875,
|
|
"learning_rate": 0.00013112605742317095,
|
|
"loss": 1.67050171,
|
|
"memory(GiB)": 19.41,
|
|
"step": 170,
|
|
"train_speed(iter/s)": 0.071478
|
|
},
|
|
{
|
|
"acc": 0.62380457,
|
|
"epoch": 0.6306306306306306,
|
|
"grad_norm": 0.76171875,
|
|
"learning_rate": 0.0001307824462139125,
|
|
"loss": 1.53042831,
|
|
"memory(GiB)": 19.5,
|
|
"step": 175,
|
|
"train_speed(iter/s)": 0.071843
|
|
},
|
|
{
|
|
"acc": 0.61549187,
|
|
"epoch": 0.6486486486486487,
|
|
"grad_norm": 0.7578125,
|
|
"learning_rate": 0.00013042472641948386,
|
|
"loss": 1.59476538,
|
|
"memory(GiB)": 19.53,
|
|
"step": 180,
|
|
"train_speed(iter/s)": 0.072168
|
|
},
|
|
{
|
|
"acc": 0.64418182,
|
|
"epoch": 0.6666666666666666,
|
|
"grad_norm": 1.1796875,
|
|
"learning_rate": 0.0001300529777921779,
|
|
"loss": 1.47999802,
|
|
"memory(GiB)": 19.32,
|
|
"step": 185,
|
|
"train_speed(iter/s)": 0.072501
|
|
},
|
|
{
|
|
"acc": 0.62201657,
|
|
"epoch": 0.6846846846846847,
|
|
"grad_norm": 0.6484375,
|
|
"learning_rate": 0.00012966728321196346,
|
|
"loss": 1.5685544,
|
|
"memory(GiB)": 19.47,
|
|
"step": 190,
|
|
"train_speed(iter/s)": 0.072821
|
|
},
|
|
{
|
|
"acc": 0.61418505,
|
|
"epoch": 0.7027027027027027,
|
|
"grad_norm": 0.8984375,
|
|
"learning_rate": 0.00012926772866800757,
|
|
"loss": 1.6284462,
|
|
"memory(GiB)": 19.45,
|
|
"step": 195,
|
|
"train_speed(iter/s)": 0.073127
|
|
},
|
|
{
|
|
"acc": 0.62820964,
|
|
"epoch": 0.7207207207207207,
|
|
"grad_norm": 0.8515625,
|
|
"learning_rate": 0.00012885440323950434,
|
|
"loss": 1.54364405,
|
|
"memory(GiB)": 19.53,
|
|
"step": 200,
|
|
"train_speed(iter/s)": 0.073413
|
|
},
|
|
{
|
|
"epoch": 0.7207207207207207,
|
|
"eval_acc": 0.6269770879526977,
|
|
"eval_loss": 1.5466336011886597,
|
|
"eval_runtime": 134.7868,
|
|
"eval_samples_per_second": 1.12,
|
|
"eval_steps_per_second": 0.564,
|
|
"step": 200
|
|
},
|
|
{
|
|
"acc": 0.6605804,
|
|
"epoch": 0.7387387387387387,
|
|
"grad_norm": 0.7578125,
|
|
"learning_rate": 0.00012842739907581525,
|
|
"loss": 1.42957153,
|
|
"memory(GiB)": 23.0,
|
|
"step": 205,
|
|
"train_speed(iter/s)": 0.070232
|
|
},
|
|
{
|
|
"acc": 0.61267309,
|
|
"epoch": 0.7567567567567568,
|
|
"grad_norm": 0.90234375,
|
|
"learning_rate": 0.00012798681137592477,
|
|
"loss": 1.62853241,
|
|
"memory(GiB)": 17.96,
|
|
"step": 210,
|
|
"train_speed(iter/s)": 0.070571
|
|
},
|
|
{
|
|
"acc": 0.63069816,
|
|
"epoch": 0.7747747747747747,
|
|
"grad_norm": 0.89453125,
|
|
"learning_rate": 0.00012753273836721597,
|
|
"loss": 1.56295233,
|
|
"memory(GiB)": 19.4,
|
|
"step": 215,
|
|
"train_speed(iter/s)": 0.070892
|
|
},
|
|
{
|
|
"acc": 0.60362072,
|
|
"epoch": 0.7927927927927928,
|
|
"grad_norm": 1.0703125,
|
|
"learning_rate": 0.00012706528128357127,
|
|
"loss": 1.63038826,
|
|
"memory(GiB)": 19.37,
|
|
"step": 220,
|
|
"train_speed(iter/s)": 0.071181
|
|
},
|
|
{
|
|
"acc": 0.62272639,
|
|
"epoch": 0.8108108108108109,
|
|
"grad_norm": 0.8828125,
|
|
"learning_rate": 0.00012658454434280253,
|
|
"loss": 1.5756237,
|
|
"memory(GiB)": 19.62,
|
|
"step": 225,
|
|
"train_speed(iter/s)": 0.071466
|
|
},
|
|
{
|
|
"acc": 0.59926658,
|
|
"epoch": 0.8288288288288288,
|
|
"grad_norm": 0.75390625,
|
|
"learning_rate": 0.00012609063472341633,
|
|
"loss": 1.60503426,
|
|
"memory(GiB)": 19.63,
|
|
"step": 230,
|
|
"train_speed(iter/s)": 0.071751
|
|
},
|
|
{
|
|
"acc": 0.60133944,
|
|
"epoch": 0.8468468468468469,
|
|
"grad_norm": 1.3515625,
|
|
"learning_rate": 0.0001255836625407187,
|
|
"loss": 1.64450779,
|
|
"memory(GiB)": 19.31,
|
|
"step": 235,
|
|
"train_speed(iter/s)": 0.072034
|
|
},
|
|
{
|
|
"acc": 0.64020758,
|
|
"epoch": 0.8648648648648649,
|
|
"grad_norm": 0.9375,
|
|
"learning_rate": 0.00012506374082226534,
|
|
"loss": 1.47053967,
|
|
"memory(GiB)": 18.85,
|
|
"step": 240,
|
|
"train_speed(iter/s)": 0.072286
|
|
},
|
|
{
|
|
"acc": 0.62713485,
|
|
"epoch": 0.8828828828828829,
|
|
"grad_norm": 0.82421875,
|
|
"learning_rate": 0.00012453098548266276,
|
|
"loss": 1.51464148,
|
|
"memory(GiB)": 19.35,
|
|
"step": 245,
|
|
"train_speed(iter/s)": 0.07254
|
|
},
|
|
{
|
|
"acc": 0.6202302,
|
|
"epoch": 0.9009009009009009,
|
|
"grad_norm": 0.625,
|
|
"learning_rate": 0.0001239855152977253,
|
|
"loss": 1.54778471,
|
|
"memory(GiB)": 19.53,
|
|
"step": 250,
|
|
"train_speed(iter/s)": 0.072758
|
|
},
|
|
{
|
|
"epoch": 0.9009009009009009,
|
|
"eval_acc": 0.6308573540280857,
|
|
"eval_loss": 1.510523796081543,
|
|
"eval_runtime": 134.5445,
|
|
"eval_samples_per_second": 1.122,
|
|
"eval_steps_per_second": 0.565,
|
|
"step": 250
|
|
},
|
|
{
|
|
"acc": 0.63671951,
|
|
"epoch": 0.918918918918919,
|
|
"grad_norm": 1.7109375,
|
|
"learning_rate": 0.00012342745187799459,
|
|
"loss": 1.48321924,
|
|
"memory(GiB)": 19.53,
|
|
"step": 255,
|
|
"train_speed(iter/s)": 0.070273
|
|
},
|
|
{
|
|
"acc": 0.63577223,
|
|
"epoch": 0.9369369369369369,
|
|
"grad_norm": 0.7890625,
|
|
"learning_rate": 0.000122856919641627,
|
|
"loss": 1.50699987,
|
|
"memory(GiB)": 19.94,
|
|
"step": 260,
|
|
"train_speed(iter/s)": 0.070553
|
|
},
|
|
{
|
|
"acc": 0.64953299,
|
|
"epoch": 0.954954954954955,
|
|
"grad_norm": 0.85546875,
|
|
"learning_rate": 0.000122274045786655,
|
|
"loss": 1.46005678,
|
|
"memory(GiB)": 20.1,
|
|
"step": 265,
|
|
"train_speed(iter/s)": 0.070802
|
|
},
|
|
{
|
|
"acc": 0.62153759,
|
|
"epoch": 0.972972972972973,
|
|
"grad_norm": 1.0625,
|
|
"learning_rate": 0.00012167896026262893,
|
|
"loss": 1.55834417,
|
|
"memory(GiB)": 19.86,
|
|
"step": 270,
|
|
"train_speed(iter/s)": 0.071052
|
|
},
|
|
{
|
|
"acc": 0.64055209,
|
|
"epoch": 0.990990990990991,
|
|
"grad_norm": 1.125,
|
|
"learning_rate": 0.00012107179574164504,
|
|
"loss": 1.54932261,
|
|
"memory(GiB)": 20.06,
|
|
"step": 275,
|
|
"train_speed(iter/s)": 0.071274
|
|
},
|
|
{
|
|
"acc": 0.62708969,
|
|
"epoch": 1.009009009009009,
|
|
"grad_norm": 0.671875,
|
|
"learning_rate": 0.00012045268758876699,
|
|
"loss": 1.49731979,
|
|
"memory(GiB)": 19.82,
|
|
"step": 280,
|
|
"train_speed(iter/s)": 0.07152
|
|
},
|
|
{
|
|
"acc": 0.6689836,
|
|
"epoch": 1.027027027027027,
|
|
"grad_norm": 0.859375,
|
|
"learning_rate": 0.00011982177383184648,
|
|
"loss": 1.2817215,
|
|
"memory(GiB)": 19.85,
|
|
"step": 285,
|
|
"train_speed(iter/s)": 0.07175
|
|
},
|
|
{
|
|
"acc": 0.67519293,
|
|
"epoch": 1.045045045045045,
|
|
"grad_norm": 1.046875,
|
|
"learning_rate": 0.00011917919513075066,
|
|
"loss": 1.28632126,
|
|
"memory(GiB)": 19.98,
|
|
"step": 290,
|
|
"train_speed(iter/s)": 0.071951
|
|
},
|
|
{
|
|
"acc": 0.67276659,
|
|
"epoch": 1.063063063063063,
|
|
"grad_norm": 0.8984375,
|
|
"learning_rate": 0.00011852509474600237,
|
|
"loss": 1.27065611,
|
|
"memory(GiB)": 20.03,
|
|
"step": 295,
|
|
"train_speed(iter/s)": 0.072155
|
|
},
|
|
{
|
|
"acc": 0.64641519,
|
|
"epoch": 1.0810810810810811,
|
|
"grad_norm": 0.98046875,
|
|
"learning_rate": 0.00011785961850684083,
|
|
"loss": 1.38271847,
|
|
"memory(GiB)": 19.09,
|
|
"step": 300,
|
|
"train_speed(iter/s)": 0.072371
|
|
},
|
|
{
|
|
"epoch": 1.0810810810810811,
|
|
"eval_acc": 0.6305617147080562,
|
|
"eval_loss": 1.523685097694397,
|
|
"eval_runtime": 134.8234,
|
|
"eval_samples_per_second": 1.12,
|
|
"eval_steps_per_second": 0.564,
|
|
"step": 300
|
|
},
|
|
{
|
|
"acc": 0.67837138,
|
|
"epoch": 1.0990990990990992,
|
|
"grad_norm": 0.953125,
|
|
"learning_rate": 0.00011718291477870959,
|
|
"loss": 1.29290819,
|
|
"memory(GiB)": 22.8,
|
|
"step": 305,
|
|
"train_speed(iter/s)": 0.070277
|
|
},
|
|
{
|
|
"acc": 0.67195911,
|
|
"epoch": 1.117117117117117,
|
|
"grad_norm": 1.796875,
|
|
"learning_rate": 0.00011649513443017889,
|
|
"loss": 1.24073734,
|
|
"memory(GiB)": 19.39,
|
|
"step": 310,
|
|
"train_speed(iter/s)": 0.070516
|
|
},
|
|
{
|
|
"acc": 0.69478951,
|
|
"epoch": 1.135135135135135,
|
|
"grad_norm": 1.203125,
|
|
"learning_rate": 0.00011579643079931018,
|
|
"loss": 1.20378675,
|
|
"memory(GiB)": 19.38,
|
|
"step": 315,
|
|
"train_speed(iter/s)": 0.070713
|
|
},
|
|
{
|
|
"acc": 0.68726826,
|
|
"epoch": 1.1531531531531531,
|
|
"grad_norm": 0.98828125,
|
|
"learning_rate": 0.00011508695965946992,
|
|
"loss": 1.23284683,
|
|
"memory(GiB)": 19.98,
|
|
"step": 320,
|
|
"train_speed(iter/s)": 0.070919
|
|
},
|
|
{
|
|
"acc": 0.65419765,
|
|
"epoch": 1.1711711711711712,
|
|
"grad_norm": 0.93359375,
|
|
"learning_rate": 0.00011436687918460052,
|
|
"loss": 1.37520065,
|
|
"memory(GiB)": 20.02,
|
|
"step": 325,
|
|
"train_speed(iter/s)": 0.071117
|
|
},
|
|
{
|
|
"acc": 0.66610641,
|
|
"epoch": 1.1891891891891893,
|
|
"grad_norm": 0.8671875,
|
|
"learning_rate": 0.000113636349913956,
|
|
"loss": 1.30743008,
|
|
"memory(GiB)": 19.35,
|
|
"step": 330,
|
|
"train_speed(iter/s)": 0.071322
|
|
},
|
|
{
|
|
"acc": 0.67390976,
|
|
"epoch": 1.2072072072072073,
|
|
"grad_norm": 1.6640625,
|
|
"learning_rate": 0.00011289553471631045,
|
|
"loss": 1.28322783,
|
|
"memory(GiB)": 19.49,
|
|
"step": 335,
|
|
"train_speed(iter/s)": 0.071518
|
|
},
|
|
{
|
|
"acc": 0.68137512,
|
|
"epoch": 1.2252252252252251,
|
|
"grad_norm": 0.6953125,
|
|
"learning_rate": 0.00011214459875364693,
|
|
"loss": 1.23027716,
|
|
"memory(GiB)": 19.38,
|
|
"step": 340,
|
|
"train_speed(iter/s)": 0.071692
|
|
},
|
|
{
|
|
"acc": 0.67859125,
|
|
"epoch": 1.2432432432432432,
|
|
"grad_norm": 0.78515625,
|
|
"learning_rate": 0.00011138370944433531,
|
|
"loss": 1.22896252,
|
|
"memory(GiB)": 20.06,
|
|
"step": 345,
|
|
"train_speed(iter/s)": 0.071876
|
|
},
|
|
{
|
|
"acc": 0.66445112,
|
|
"epoch": 1.2612612612612613,
|
|
"grad_norm": 0.90234375,
|
|
"learning_rate": 0.00011061303642580694,
|
|
"loss": 1.30674038,
|
|
"memory(GiB)": 19.49,
|
|
"step": 350,
|
|
"train_speed(iter/s)": 0.072045
|
|
},
|
|
{
|
|
"epoch": 1.2612612612612613,
|
|
"eval_acc": 0.6356245380635624,
|
|
"eval_loss": 1.5072119235992432,
|
|
"eval_runtime": 134.5232,
|
|
"eval_samples_per_second": 1.122,
|
|
"eval_steps_per_second": 0.565,
|
|
"step": 350
|
|
},
|
|
{
|
|
"acc": 0.67729836,
|
|
"epoch": 1.2792792792792793,
|
|
"grad_norm": 0.90625,
|
|
"learning_rate": 0.00010983275151673467,
|
|
"loss": 1.24173574,
|
|
"memory(GiB)": 18.93,
|
|
"step": 355,
|
|
"train_speed(iter/s)": 0.07029
|
|
},
|
|
{
|
|
"acc": 0.7040791,
|
|
"epoch": 1.2972972972972974,
|
|
"grad_norm": 0.84765625,
|
|
"learning_rate": 0.00010904302867872639,
|
|
"loss": 1.17582674,
|
|
"memory(GiB)": 19.29,
|
|
"step": 360,
|
|
"train_speed(iter/s)": 0.070479
|
|
},
|
|
{
|
|
"acc": 0.66356058,
|
|
"epoch": 1.3153153153153152,
|
|
"grad_norm": 0.82421875,
|
|
"learning_rate": 0.00010824404397754104,
|
|
"loss": 1.26798725,
|
|
"memory(GiB)": 19.36,
|
|
"step": 365,
|
|
"train_speed(iter/s)": 0.070661
|
|
},
|
|
{
|
|
"acc": 0.69379635,
|
|
"epoch": 1.3333333333333333,
|
|
"grad_norm": 0.98828125,
|
|
"learning_rate": 0.0001074359755438354,
|
|
"loss": 1.24331112,
|
|
"memory(GiB)": 20.16,
|
|
"step": 370,
|
|
"train_speed(iter/s)": 0.070843
|
|
},
|
|
{
|
|
"acc": 0.68220735,
|
|
"epoch": 1.3513513513513513,
|
|
"grad_norm": 0.94140625,
|
|
"learning_rate": 0.00010661900353345051,
|
|
"loss": 1.20891714,
|
|
"memory(GiB)": 19.61,
|
|
"step": 375,
|
|
"train_speed(iter/s)": 0.071015
|
|
},
|
|
{
|
|
"acc": 0.67620883,
|
|
"epoch": 1.3693693693693694,
|
|
"grad_norm": 1.0625,
|
|
"learning_rate": 0.0001057933100872466,
|
|
"loss": 1.23957863,
|
|
"memory(GiB)": 20.17,
|
|
"step": 380,
|
|
"train_speed(iter/s)": 0.071181
|
|
},
|
|
{
|
|
"acc": 0.63655629,
|
|
"epoch": 1.3873873873873874,
|
|
"grad_norm": 0.78515625,
|
|
"learning_rate": 0.00010495907929049546,
|
|
"loss": 1.44390507,
|
|
"memory(GiB)": 19.25,
|
|
"step": 385,
|
|
"train_speed(iter/s)": 0.071356
|
|
},
|
|
{
|
|
"acc": 0.67883902,
|
|
"epoch": 1.4054054054054055,
|
|
"grad_norm": 0.8828125,
|
|
"learning_rate": 0.00010411649713183925,
|
|
"loss": 1.29691544,
|
|
"memory(GiB)": 18.78,
|
|
"step": 390,
|
|
"train_speed(iter/s)": 0.071515
|
|
},
|
|
{
|
|
"acc": 0.67202511,
|
|
"epoch": 1.4234234234234235,
|
|
"grad_norm": 0.953125,
|
|
"learning_rate": 0.00010326575146182521,
|
|
"loss": 1.31318274,
|
|
"memory(GiB)": 19.88,
|
|
"step": 395,
|
|
"train_speed(iter/s)": 0.071677
|
|
},
|
|
{
|
|
"acc": 0.69274058,
|
|
"epoch": 1.4414414414414414,
|
|
"grad_norm": 0.82421875,
|
|
"learning_rate": 0.00010240703195102489,
|
|
"loss": 1.15976305,
|
|
"memory(GiB)": 19.46,
|
|
"step": 400,
|
|
"train_speed(iter/s)": 0.071832
|
|
},
|
|
{
|
|
"epoch": 1.4414414414414414,
|
|
"eval_acc": 0.6368440502586844,
|
|
"eval_loss": 1.4986343383789062,
|
|
"eval_runtime": 134.3425,
|
|
"eval_samples_per_second": 1.124,
|
|
"eval_steps_per_second": 0.566,
|
|
"step": 400
|
|
},
|
|
{
|
|
"acc": 0.71039405,
|
|
"epoch": 1.4594594594594594,
|
|
"grad_norm": 0.77734375,
|
|
"learning_rate": 0.0001015405300477479,
|
|
"loss": 1.12253609,
|
|
"memory(GiB)": 19.92,
|
|
"step": 405,
|
|
"train_speed(iter/s)": 0.070298
|
|
},
|
|
{
|
|
"acc": 0.71356583,
|
|
"epoch": 1.4774774774774775,
|
|
"grad_norm": 0.84375,
|
|
"learning_rate": 0.0001006664389353592,
|
|
"loss": 1.13753939,
|
|
"memory(GiB)": 19.31,
|
|
"step": 410,
|
|
"train_speed(iter/s)": 0.070457
|
|
},
|
|
{
|
|
"acc": 0.675458,
|
|
"epoch": 1.4954954954954955,
|
|
"grad_norm": 1.1328125,
|
|
"learning_rate": 9.978495348920958e-05,
|
|
"loss": 1.29233532,
|
|
"memory(GiB)": 19.06,
|
|
"step": 415,
|
|
"train_speed(iter/s)": 0.070616
|
|
},
|
|
{
|
|
"acc": 0.67761598,
|
|
"epoch": 1.5135135135135136,
|
|
"grad_norm": 0.6875,
|
|
"learning_rate": 9.889627023318897e-05,
|
|
"loss": 1.22440186,
|
|
"memory(GiB)": 19.16,
|
|
"step": 420,
|
|
"train_speed(iter/s)": 0.070773
|
|
},
|
|
{
|
|
"acc": 0.67492404,
|
|
"epoch": 1.5315315315315314,
|
|
"grad_norm": 0.81640625,
|
|
"learning_rate": 9.800058729591212e-05,
|
|
"loss": 1.22408361,
|
|
"memory(GiB)": 19.97,
|
|
"step": 425,
|
|
"train_speed(iter/s)": 0.070935
|
|
},
|
|
{
|
|
"acc": 0.68050842,
|
|
"epoch": 1.5495495495495497,
|
|
"grad_norm": 0.84765625,
|
|
"learning_rate": 9.70981043665466e-05,
|
|
"loss": 1.2078824,
|
|
"memory(GiB)": 19.92,
|
|
"step": 430,
|
|
"train_speed(iter/s)": 0.07109
|
|
},
|
|
{
|
|
"acc": 0.6750885,
|
|
"epoch": 1.5675675675675675,
|
|
"grad_norm": 0.66796875,
|
|
"learning_rate": 9.618902265029284e-05,
|
|
"loss": 1.28742075,
|
|
"memory(GiB)": 19.27,
|
|
"step": 435,
|
|
"train_speed(iter/s)": 0.071229
|
|
},
|
|
{
|
|
"acc": 0.64411507,
|
|
"epoch": 1.5855855855855856,
|
|
"grad_norm": 0.95703125,
|
|
"learning_rate": 9.527354482352616e-05,
|
|
"loss": 1.37240067,
|
|
"memory(GiB)": 20.21,
|
|
"step": 440,
|
|
"train_speed(iter/s)": 0.071374
|
|
},
|
|
{
|
|
"acc": 0.67574663,
|
|
"epoch": 1.6036036036036037,
|
|
"grad_norm": 0.83984375,
|
|
"learning_rate": 9.435187498861085e-05,
|
|
"loss": 1.27780771,
|
|
"memory(GiB)": 19.95,
|
|
"step": 445,
|
|
"train_speed(iter/s)": 0.071519
|
|
},
|
|
{
|
|
"acc": 0.67897987,
|
|
"epoch": 1.6216216216216215,
|
|
"grad_norm": 1.2265625,
|
|
"learning_rate": 9.342421862839632e-05,
|
|
"loss": 1.26616125,
|
|
"memory(GiB)": 19.32,
|
|
"step": 450,
|
|
"train_speed(iter/s)": 0.071661
|
|
},
|
|
{
|
|
"epoch": 1.6216216216216215,
|
|
"eval_acc": 0.6424611973392461,
|
|
"eval_loss": 1.4772522449493408,
|
|
"eval_runtime": 134.5995,
|
|
"eval_samples_per_second": 1.122,
|
|
"eval_steps_per_second": 0.565,
|
|
"step": 450
|
|
},
|
|
{
|
|
"acc": 0.66755495,
|
|
"epoch": 1.6396396396396398,
|
|
"grad_norm": 1.0390625,
|
|
"learning_rate": 9.249078256040541e-05,
|
|
"loss": 1.30118093,
|
|
"memory(GiB)": 22.82,
|
|
"step": 455,
|
|
"train_speed(iter/s)": 0.070312
|
|
},
|
|
{
|
|
"acc": 0.66560607,
|
|
"epoch": 1.6576576576576576,
|
|
"grad_norm": 1.0546875,
|
|
"learning_rate": 9.155177489072527e-05,
|
|
"loss": 1.31042576,
|
|
"memory(GiB)": 19.56,
|
|
"step": 460,
|
|
"train_speed(iter/s)": 0.070454
|
|
},
|
|
{
|
|
"acc": 0.67957892,
|
|
"epoch": 1.6756756756756757,
|
|
"grad_norm": 1.3828125,
|
|
"learning_rate": 9.060740496761082e-05,
|
|
"loss": 1.31165123,
|
|
"memory(GiB)": 19.38,
|
|
"step": 465,
|
|
"train_speed(iter/s)": 0.070592
|
|
},
|
|
{
|
|
"acc": 0.6744031,
|
|
"epoch": 1.6936936936936937,
|
|
"grad_norm": 1.4140625,
|
|
"learning_rate": 8.965788333481144e-05,
|
|
"loss": 1.26758223,
|
|
"memory(GiB)": 19.42,
|
|
"step": 470,
|
|
"train_speed(iter/s)": 0.070726
|
|
},
|
|
{
|
|
"acc": 0.66551232,
|
|
"epoch": 1.7117117117117115,
|
|
"grad_norm": 0.98046875,
|
|
"learning_rate": 8.870342168463085e-05,
|
|
"loss": 1.27216129,
|
|
"memory(GiB)": 19.27,
|
|
"step": 475,
|
|
"train_speed(iter/s)": 0.070864
|
|
},
|
|
{
|
|
"acc": 0.65833273,
|
|
"epoch": 1.7297297297297298,
|
|
"grad_norm": 0.9140625,
|
|
"learning_rate": 8.77442328107313e-05,
|
|
"loss": 1.32684155,
|
|
"memory(GiB)": 19.48,
|
|
"step": 480,
|
|
"train_speed(iter/s)": 0.070997
|
|
},
|
|
{
|
|
"acc": 0.68646383,
|
|
"epoch": 1.7477477477477477,
|
|
"grad_norm": 1.3671875,
|
|
"learning_rate": 8.678053056069184e-05,
|
|
"loss": 1.2200016,
|
|
"memory(GiB)": 19.24,
|
|
"step": 485,
|
|
"train_speed(iter/s)": 0.071136
|
|
},
|
|
{
|
|
"acc": 0.69040904,
|
|
"epoch": 1.7657657657657657,
|
|
"grad_norm": 1.6171875,
|
|
"learning_rate": 8.581252978833194e-05,
|
|
"loss": 1.18706884,
|
|
"memory(GiB)": 19.53,
|
|
"step": 490,
|
|
"train_speed(iter/s)": 0.07127
|
|
},
|
|
{
|
|
"acc": 0.66571455,
|
|
"epoch": 1.7837837837837838,
|
|
"grad_norm": 0.8515625,
|
|
"learning_rate": 8.484044630581057e-05,
|
|
"loss": 1.29456005,
|
|
"memory(GiB)": 20.09,
|
|
"step": 495,
|
|
"train_speed(iter/s)": 0.071401
|
|
},
|
|
{
|
|
"acc": 0.67682033,
|
|
"epoch": 1.8018018018018018,
|
|
"grad_norm": 1.0,
|
|
"learning_rate": 8.386449683551164e-05,
|
|
"loss": 1.20547714,
|
|
"memory(GiB)": 19.95,
|
|
"step": 500,
|
|
"train_speed(iter/s)": 0.071533
|
|
},
|
|
{
|
|
"epoch": 1.8018018018018018,
|
|
"eval_acc": 0.6413155949741316,
|
|
"eval_loss": 1.479081630706787,
|
|
"eval_runtime": 134.2299,
|
|
"eval_samples_per_second": 1.125,
|
|
"eval_steps_per_second": 0.566,
|
|
"step": 500
|
|
},
|
|
{
|
|
"acc": 0.67326751,
|
|
"epoch": 1.8198198198198199,
|
|
"grad_norm": 1.0546875,
|
|
"learning_rate": 8.288489896172669e-05,
|
|
"loss": 1.25247726,
|
|
"memory(GiB)": 20.29,
|
|
"step": 505,
|
|
"train_speed(iter/s)": 0.070304
|
|
},
|
|
{
|
|
"acc": 0.66375732,
|
|
"epoch": 1.8378378378378377,
|
|
"grad_norm": 0.9296875,
|
|
"learning_rate": 8.190187108214514e-05,
|
|
"loss": 1.28065901,
|
|
"memory(GiB)": 20.04,
|
|
"step": 510,
|
|
"train_speed(iter/s)": 0.070438
|
|
},
|
|
{
|
|
"acc": 0.69006267,
|
|
"epoch": 1.855855855855856,
|
|
"grad_norm": 1.0234375,
|
|
"learning_rate": 8.091563235916343e-05,
|
|
"loss": 1.13905525,
|
|
"memory(GiB)": 20.03,
|
|
"step": 515,
|
|
"train_speed(iter/s)": 0.070569
|
|
},
|
|
{
|
|
"acc": 0.69745221,
|
|
"epoch": 1.8738738738738738,
|
|
"grad_norm": 0.96484375,
|
|
"learning_rate": 7.992640267102351e-05,
|
|
"loss": 1.14712362,
|
|
"memory(GiB)": 18.5,
|
|
"step": 520,
|
|
"train_speed(iter/s)": 0.070709
|
|
},
|
|
{
|
|
"acc": 0.6707756,
|
|
"epoch": 1.8918918918918919,
|
|
"grad_norm": 1.328125,
|
|
"learning_rate": 7.893440256279186e-05,
|
|
"loss": 1.30717278,
|
|
"memory(GiB)": 20.66,
|
|
"step": 525,
|
|
"train_speed(iter/s)": 0.07083
|
|
},
|
|
{
|
|
"acc": 0.66872559,
|
|
"epoch": 1.90990990990991,
|
|
"grad_norm": 0.9765625,
|
|
"learning_rate": 7.793985319718982e-05,
|
|
"loss": 1.28408003,
|
|
"memory(GiB)": 19.48,
|
|
"step": 530,
|
|
"train_speed(iter/s)": 0.070948
|
|
},
|
|
{
|
|
"acc": 0.68111048,
|
|
"epoch": 1.9279279279279278,
|
|
"grad_norm": 0.76171875,
|
|
"learning_rate": 7.694297630528612e-05,
|
|
"loss": 1.21391411,
|
|
"memory(GiB)": 19.88,
|
|
"step": 535,
|
|
"train_speed(iter/s)": 0.071071
|
|
},
|
|
{
|
|
"acc": 0.65094652,
|
|
"epoch": 1.945945945945946,
|
|
"grad_norm": 0.83203125,
|
|
"learning_rate": 7.594399413706277e-05,
|
|
"loss": 1.34138126,
|
|
"memory(GiB)": 19.9,
|
|
"step": 540,
|
|
"train_speed(iter/s)": 0.071193
|
|
},
|
|
{
|
|
"acc": 0.67896776,
|
|
"epoch": 1.9639639639639639,
|
|
"grad_norm": 0.796875,
|
|
"learning_rate": 7.494312941186529e-05,
|
|
"loss": 1.22575331,
|
|
"memory(GiB)": 19.43,
|
|
"step": 545,
|
|
"train_speed(iter/s)": 0.071302
|
|
},
|
|
{
|
|
"acc": 0.6839644,
|
|
"epoch": 1.981981981981982,
|
|
"grad_norm": 0.78515625,
|
|
"learning_rate": 7.394060526874825e-05,
|
|
"loss": 1.25017443,
|
|
"memory(GiB)": 19.25,
|
|
"step": 550,
|
|
"train_speed(iter/s)": 0.07142
|
|
},
|
|
{
|
|
"epoch": 1.981981981981982,
|
|
"eval_acc": 0.645269770879527,
|
|
"eval_loss": 1.4606801271438599,
|
|
"eval_runtime": 134.7756,
|
|
"eval_samples_per_second": 1.12,
|
|
"eval_steps_per_second": 0.564,
|
|
"step": 550
|
|
},
|
|
{
|
|
"acc": 0.68771811,
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.81640625,
|
|
"learning_rate": 7.293664521672729e-05,
|
|
"loss": 1.22415581,
|
|
"memory(GiB)": 22.67,
|
|
"step": 555,
|
|
"train_speed(iter/s)": 0.070304
|
|
},
|
|
{
|
|
"acc": 0.741537,
|
|
"epoch": 2.018018018018018,
|
|
"grad_norm": 0.6171875,
|
|
"learning_rate": 7.193147308494851e-05,
|
|
"loss": 0.95370378,
|
|
"memory(GiB)": 19.64,
|
|
"step": 560,
|
|
"train_speed(iter/s)": 0.070425
|
|
},
|
|
{
|
|
"acc": 0.75044699,
|
|
"epoch": 2.036036036036036,
|
|
"grad_norm": 1.09375,
|
|
"learning_rate": 7.09253129727867e-05,
|
|
"loss": 0.95568914,
|
|
"memory(GiB)": 19.4,
|
|
"step": 565,
|
|
"train_speed(iter/s)": 0.070541
|
|
},
|
|
{
|
|
"acc": 0.75126195,
|
|
"epoch": 2.054054054054054,
|
|
"grad_norm": 1.3671875,
|
|
"learning_rate": 6.991838919988322e-05,
|
|
"loss": 0.92719631,
|
|
"memory(GiB)": 19.54,
|
|
"step": 570,
|
|
"train_speed(iter/s)": 0.070658
|
|
},
|
|
{
|
|
"acc": 0.74883032,
|
|
"epoch": 2.0720720720720722,
|
|
"grad_norm": 1.0078125,
|
|
"learning_rate": 6.891092625613469e-05,
|
|
"loss": 0.92080975,
|
|
"memory(GiB)": 20.17,
|
|
"step": 575,
|
|
"train_speed(iter/s)": 0.07077
|
|
},
|
|
{
|
|
"acc": 0.76222944,
|
|
"epoch": 2.09009009009009,
|
|
"grad_norm": 0.99609375,
|
|
"learning_rate": 6.790314875164393e-05,
|
|
"loss": 0.88407106,
|
|
"memory(GiB)": 19.57,
|
|
"step": 580,
|
|
"train_speed(iter/s)": 0.070882
|
|
},
|
|
{
|
|
"acc": 0.76224823,
|
|
"epoch": 2.108108108108108,
|
|
"grad_norm": 1.0859375,
|
|
"learning_rate": 6.689528136664377e-05,
|
|
"loss": 0.85150976,
|
|
"memory(GiB)": 19.54,
|
|
"step": 585,
|
|
"train_speed(iter/s)": 0.070995
|
|
},
|
|
{
|
|
"acc": 0.73958569,
|
|
"epoch": 2.126126126126126,
|
|
"grad_norm": 1.3828125,
|
|
"learning_rate": 6.588754880140573e-05,
|
|
"loss": 0.92128286,
|
|
"memory(GiB)": 19.58,
|
|
"step": 590,
|
|
"train_speed(iter/s)": 0.071101
|
|
},
|
|
{
|
|
"acc": 0.74549003,
|
|
"epoch": 2.144144144144144,
|
|
"grad_norm": 1.359375,
|
|
"learning_rate": 6.488017572614363e-05,
|
|
"loss": 0.90851021,
|
|
"memory(GiB)": 18.59,
|
|
"step": 595,
|
|
"train_speed(iter/s)": 0.071211
|
|
},
|
|
{
|
|
"acc": 0.73912826,
|
|
"epoch": 2.1621621621621623,
|
|
"grad_norm": 1.3125,
|
|
"learning_rate": 6.387338673092443e-05,
|
|
"loss": 0.92900734,
|
|
"memory(GiB)": 19.54,
|
|
"step": 600,
|
|
"train_speed(iter/s)": 0.071321
|
|
},
|
|
{
|
|
"epoch": 2.1621621621621623,
|
|
"eval_acc": 0.6320768662232077,
|
|
"eval_loss": 1.5818341970443726,
|
|
"eval_runtime": 134.4691,
|
|
"eval_samples_per_second": 1.123,
|
|
"eval_steps_per_second": 0.565,
|
|
"step": 600
|
|
},
|
|
{
|
|
"acc": 0.75979438,
|
|
"epoch": 2.18018018018018,
|
|
"grad_norm": 1.09375,
|
|
"learning_rate": 6.286740627559656e-05,
|
|
"loss": 0.89129753,
|
|
"memory(GiB)": 22.37,
|
|
"step": 605,
|
|
"train_speed(iter/s)": 0.070301
|
|
},
|
|
{
|
|
"acc": 0.72820721,
|
|
"epoch": 2.1981981981981984,
|
|
"grad_norm": 2.15625,
|
|
"learning_rate": 6.186245863974757e-05,
|
|
"loss": 0.96495447,
|
|
"memory(GiB)": 19.6,
|
|
"step": 610,
|
|
"train_speed(iter/s)": 0.070413
|
|
},
|
|
{
|
|
"acc": 0.75764585,
|
|
"epoch": 2.2162162162162162,
|
|
"grad_norm": 1.0078125,
|
|
"learning_rate": 6.0858767872701715e-05,
|
|
"loss": 0.89218092,
|
|
"memory(GiB)": 20.15,
|
|
"step": 615,
|
|
"train_speed(iter/s)": 0.070515
|
|
},
|
|
{
|
|
"acc": 0.75772595,
|
|
"epoch": 2.234234234234234,
|
|
"grad_norm": 1.6328125,
|
|
"learning_rate": 5.985655774356901e-05,
|
|
"loss": 0.89191771,
|
|
"memory(GiB)": 19.46,
|
|
"step": 620,
|
|
"train_speed(iter/s)": 0.070627
|
|
},
|
|
{
|
|
"acc": 0.7377079,
|
|
"epoch": 2.2522522522522523,
|
|
"grad_norm": 1.1875,
|
|
"learning_rate": 5.8856051691356884e-05,
|
|
"loss": 0.94241228,
|
|
"memory(GiB)": 19.35,
|
|
"step": 625,
|
|
"train_speed(iter/s)": 0.070733
|
|
},
|
|
{
|
|
"acc": 0.77948771,
|
|
"epoch": 2.27027027027027,
|
|
"grad_norm": 1.2890625,
|
|
"learning_rate": 5.785747277515506e-05,
|
|
"loss": 0.79317036,
|
|
"memory(GiB)": 20.48,
|
|
"step": 630,
|
|
"train_speed(iter/s)": 0.070844
|
|
},
|
|
{
|
|
"acc": 0.76766949,
|
|
"epoch": 2.2882882882882885,
|
|
"grad_norm": 0.97265625,
|
|
"learning_rate": 5.686104362440552e-05,
|
|
"loss": 0.82855272,
|
|
"memory(GiB)": 20.12,
|
|
"step": 635,
|
|
"train_speed(iter/s)": 0.070945
|
|
},
|
|
{
|
|
"acc": 0.74998231,
|
|
"epoch": 2.3063063063063063,
|
|
"grad_norm": 2.9375,
|
|
"learning_rate": 5.586698638926811e-05,
|
|
"loss": 0.93049393,
|
|
"memory(GiB)": 20.06,
|
|
"step": 640,
|
|
"train_speed(iter/s)": 0.071044
|
|
},
|
|
{
|
|
"acc": 0.75094385,
|
|
"epoch": 2.3243243243243246,
|
|
"grad_norm": 1.1875,
|
|
"learning_rate": 5.487552269109287e-05,
|
|
"loss": 0.86875353,
|
|
"memory(GiB)": 19.33,
|
|
"step": 645,
|
|
"train_speed(iter/s)": 0.071146
|
|
},
|
|
{
|
|
"acc": 0.74836354,
|
|
"epoch": 2.3423423423423424,
|
|
"grad_norm": 1.1328125,
|
|
"learning_rate": 5.388687357301051e-05,
|
|
"loss": 0.88861446,
|
|
"memory(GiB)": 20.11,
|
|
"step": 650,
|
|
"train_speed(iter/s)": 0.071249
|
|
},
|
|
{
|
|
"epoch": 2.3423423423423424,
|
|
"eval_acc": 0.630709534368071,
|
|
"eval_loss": 1.5767972469329834,
|
|
"eval_runtime": 134.3063,
|
|
"eval_samples_per_second": 1.124,
|
|
"eval_steps_per_second": 0.566,
|
|
"step": 650
|
|
},
|
|
{
|
|
"acc": 0.76697993,
|
|
"epoch": 2.3603603603603602,
|
|
"grad_norm": 1.2734375,
|
|
"learning_rate": 5.290125945065162e-05,
|
|
"loss": 0.85701361,
|
|
"memory(GiB)": 22.96,
|
|
"step": 655,
|
|
"train_speed(iter/s)": 0.070324
|
|
},
|
|
{
|
|
"acc": 0.76252317,
|
|
"epoch": 2.3783783783783785,
|
|
"grad_norm": 1.0390625,
|
|
"learning_rate": 5.191890006300573e-05,
|
|
"loss": 0.85787058,
|
|
"memory(GiB)": 20.13,
|
|
"step": 660,
|
|
"train_speed(iter/s)": 0.070422
|
|
},
|
|
{
|
|
"acc": 0.7651772,
|
|
"epoch": 2.3963963963963963,
|
|
"grad_norm": 1.1875,
|
|
"learning_rate": 5.094001442343155e-05,
|
|
"loss": 0.8521904,
|
|
"memory(GiB)": 19.86,
|
|
"step": 665,
|
|
"train_speed(iter/s)": 0.070523
|
|
},
|
|
{
|
|
"acc": 0.73847542,
|
|
"epoch": 2.4144144144144146,
|
|
"grad_norm": 1.2734375,
|
|
"learning_rate": 4.996482077082849e-05,
|
|
"loss": 0.95858736,
|
|
"memory(GiB)": 19.29,
|
|
"step": 670,
|
|
"train_speed(iter/s)": 0.070628
|
|
},
|
|
{
|
|
"acc": 0.74675932,
|
|
"epoch": 2.4324324324324325,
|
|
"grad_norm": 1.2734375,
|
|
"learning_rate": 4.899353652098139e-05,
|
|
"loss": 0.86487961,
|
|
"memory(GiB)": 18.64,
|
|
"step": 675,
|
|
"train_speed(iter/s)": 0.070727
|
|
},
|
|
{
|
|
"acc": 0.73309464,
|
|
"epoch": 2.4504504504504503,
|
|
"grad_norm": 1.8671875,
|
|
"learning_rate": 4.802637821808819e-05,
|
|
"loss": 0.93775883,
|
|
"memory(GiB)": 19.78,
|
|
"step": 680,
|
|
"train_speed(iter/s)": 0.070825
|
|
},
|
|
{
|
|
"acc": 0.76575212,
|
|
"epoch": 2.4684684684684686,
|
|
"grad_norm": 1.03125,
|
|
"learning_rate": 4.706356148648246e-05,
|
|
"loss": 0.8259285,
|
|
"memory(GiB)": 19.9,
|
|
"step": 685,
|
|
"train_speed(iter/s)": 0.07092
|
|
},
|
|
{
|
|
"acc": 0.76865396,
|
|
"epoch": 2.4864864864864864,
|
|
"grad_norm": 1.3125,
|
|
"learning_rate": 4.6105300982560625e-05,
|
|
"loss": 0.84868517,
|
|
"memory(GiB)": 19.19,
|
|
"step": 690,
|
|
"train_speed(iter/s)": 0.071014
|
|
},
|
|
{
|
|
"acc": 0.75694928,
|
|
"epoch": 2.5045045045045047,
|
|
"grad_norm": 1.03125,
|
|
"learning_rate": 4.515181034692515e-05,
|
|
"loss": 0.87043924,
|
|
"memory(GiB)": 19.95,
|
|
"step": 695,
|
|
"train_speed(iter/s)": 0.071105
|
|
},
|
|
{
|
|
"acc": 0.75771561,
|
|
"epoch": 2.5225225225225225,
|
|
"grad_norm": 1.3515625,
|
|
"learning_rate": 4.420330215675415e-05,
|
|
"loss": 0.86245804,
|
|
"memory(GiB)": 19.18,
|
|
"step": 700,
|
|
"train_speed(iter/s)": 0.071194
|
|
},
|
|
{
|
|
"epoch": 2.5225225225225225,
|
|
"eval_acc": 0.6335181079083518,
|
|
"eval_loss": 1.5894646644592285,
|
|
"eval_runtime": 134.225,
|
|
"eval_samples_per_second": 1.125,
|
|
"eval_steps_per_second": 0.566,
|
|
"step": 700
|
|
},
|
|
{
|
|
"acc": 0.76191721,
|
|
"epoch": 2.5405405405405403,
|
|
"grad_norm": 1.71875,
|
|
"learning_rate": 4.325998787840818e-05,
|
|
"loss": 0.85848246,
|
|
"memory(GiB)": 19.14,
|
|
"step": 705,
|
|
"train_speed(iter/s)": 0.070324
|
|
},
|
|
{
|
|
"acc": 0.76571012,
|
|
"epoch": 2.5585585585585586,
|
|
"grad_norm": 1.15625,
|
|
"learning_rate": 4.2322077820284477e-05,
|
|
"loss": 0.85979414,
|
|
"memory(GiB)": 20.01,
|
|
"step": 710,
|
|
"train_speed(iter/s)": 0.070422
|
|
},
|
|
{
|
|
"acc": 0.73852654,
|
|
"epoch": 2.5765765765765765,
|
|
"grad_norm": 1.6484375,
|
|
"learning_rate": 4.138978108592962e-05,
|
|
"loss": 0.90148897,
|
|
"memory(GiB)": 19.05,
|
|
"step": 715,
|
|
"train_speed(iter/s)": 0.070518
|
|
},
|
|
{
|
|
"acc": 0.76960816,
|
|
"epoch": 2.5945945945945947,
|
|
"grad_norm": 3.71875,
|
|
"learning_rate": 4.046330552742053e-05,
|
|
"loss": 0.88053255,
|
|
"memory(GiB)": 19.25,
|
|
"step": 720,
|
|
"train_speed(iter/s)": 0.070616
|
|
},
|
|
{
|
|
"acc": 0.77552128,
|
|
"epoch": 2.6126126126126126,
|
|
"grad_norm": 0.96484375,
|
|
"learning_rate": 3.954285769902474e-05,
|
|
"loss": 0.83608866,
|
|
"memory(GiB)": 19.96,
|
|
"step": 725,
|
|
"train_speed(iter/s)": 0.070707
|
|
},
|
|
{
|
|
"acc": 0.76034231,
|
|
"epoch": 2.6306306306306304,
|
|
"grad_norm": 1.078125,
|
|
"learning_rate": 3.8628642811149894e-05,
|
|
"loss": 0.84258709,
|
|
"memory(GiB)": 19.75,
|
|
"step": 730,
|
|
"train_speed(iter/s)": 0.070796
|
|
},
|
|
{
|
|
"acc": 0.73506665,
|
|
"epoch": 2.6486486486486487,
|
|
"grad_norm": 2.125,
|
|
"learning_rate": 3.772086468459271e-05,
|
|
"loss": 0.96418314,
|
|
"memory(GiB)": 19.94,
|
|
"step": 735,
|
|
"train_speed(iter/s)": 0.070887
|
|
},
|
|
{
|
|
"acc": 0.74339218,
|
|
"epoch": 2.6666666666666665,
|
|
"grad_norm": 1.3359375,
|
|
"learning_rate": 3.6819725705098094e-05,
|
|
"loss": 0.94632616,
|
|
"memory(GiB)": 19.98,
|
|
"step": 740,
|
|
"train_speed(iter/s)": 0.070978
|
|
},
|
|
{
|
|
"acc": 0.75258017,
|
|
"epoch": 2.684684684684685,
|
|
"grad_norm": 1.328125,
|
|
"learning_rate": 3.592542677823787e-05,
|
|
"loss": 0.89630384,
|
|
"memory(GiB)": 19.9,
|
|
"step": 745,
|
|
"train_speed(iter/s)": 0.071065
|
|
},
|
|
{
|
|
"acc": 0.7422905,
|
|
"epoch": 2.7027027027027026,
|
|
"grad_norm": 1.46875,
|
|
"learning_rate": 3.503816728461963e-05,
|
|
"loss": 0.92554636,
|
|
"memory(GiB)": 19.94,
|
|
"step": 750,
|
|
"train_speed(iter/s)": 0.071152
|
|
},
|
|
{
|
|
"epoch": 2.7027027027027026,
|
|
"eval_acc": 0.6360679970436068,
|
|
"eval_loss": 1.577430248260498,
|
|
"eval_runtime": 134.0595,
|
|
"eval_samples_per_second": 1.126,
|
|
"eval_steps_per_second": 0.567,
|
|
"step": 750
|
|
},
|
|
{
|
|
"acc": 0.76009235,
|
|
"epoch": 2.7207207207207205,
|
|
"grad_norm": 1.7265625,
|
|
"learning_rate": 3.415814503543563e-05,
|
|
"loss": 0.89433851,
|
|
"memory(GiB)": 19.38,
|
|
"step": 755,
|
|
"train_speed(iter/s)": 0.070345
|
|
},
|
|
{
|
|
"acc": 0.75049233,
|
|
"epoch": 2.7387387387387387,
|
|
"grad_norm": 1.453125,
|
|
"learning_rate": 3.3285556228361483e-05,
|
|
"loss": 0.90194426,
|
|
"memory(GiB)": 19.78,
|
|
"step": 760,
|
|
"train_speed(iter/s)": 0.070432
|
|
},
|
|
{
|
|
"acc": 0.73652792,
|
|
"epoch": 2.756756756756757,
|
|
"grad_norm": 1.375,
|
|
"learning_rate": 3.2420595403814615e-05,
|
|
"loss": 0.94170513,
|
|
"memory(GiB)": 19.18,
|
|
"step": 765,
|
|
"train_speed(iter/s)": 0.070517
|
|
},
|
|
{
|
|
"acc": 0.74097948,
|
|
"epoch": 2.774774774774775,
|
|
"grad_norm": 1.171875,
|
|
"learning_rate": 3.156345540158226e-05,
|
|
"loss": 0.92526283,
|
|
"memory(GiB)": 19.96,
|
|
"step": 770,
|
|
"train_speed(iter/s)": 0.070603
|
|
},
|
|
{
|
|
"acc": 0.77357135,
|
|
"epoch": 2.7927927927927927,
|
|
"grad_norm": 1.21875,
|
|
"learning_rate": 3.0714327317828445e-05,
|
|
"loss": 0.84344234,
|
|
"memory(GiB)": 19.42,
|
|
"step": 775,
|
|
"train_speed(iter/s)": 0.070681
|
|
},
|
|
{
|
|
"acc": 0.76570077,
|
|
"epoch": 2.810810810810811,
|
|
"grad_norm": 1.4765625,
|
|
"learning_rate": 2.9873400462489982e-05,
|
|
"loss": 0.85261898,
|
|
"memory(GiB)": 19.91,
|
|
"step": 780,
|
|
"train_speed(iter/s)": 0.070768
|
|
},
|
|
{
|
|
"acc": 0.73979292,
|
|
"epoch": 2.828828828828829,
|
|
"grad_norm": 1.375,
|
|
"learning_rate": 2.904086231707032e-05,
|
|
"loss": 0.94777365,
|
|
"memory(GiB)": 19.72,
|
|
"step": 785,
|
|
"train_speed(iter/s)": 0.07085
|
|
},
|
|
{
|
|
"acc": 0.75035534,
|
|
"epoch": 2.846846846846847,
|
|
"grad_norm": 1.1484375,
|
|
"learning_rate": 2.8216898492841355e-05,
|
|
"loss": 0.88380022,
|
|
"memory(GiB)": 19.09,
|
|
"step": 790,
|
|
"train_speed(iter/s)": 0.070936
|
|
},
|
|
{
|
|
"acc": 0.76033754,
|
|
"epoch": 2.864864864864865,
|
|
"grad_norm": 1.078125,
|
|
"learning_rate": 2.7401692689462153e-05,
|
|
"loss": 0.84767551,
|
|
"memory(GiB)": 20.02,
|
|
"step": 795,
|
|
"train_speed(iter/s)": 0.071016
|
|
},
|
|
{
|
|
"acc": 0.74806399,
|
|
"epoch": 2.8828828828828827,
|
|
"grad_norm": 1.53125,
|
|
"learning_rate": 2.6595426654023643e-05,
|
|
"loss": 0.92544088,
|
|
"memory(GiB)": 19.88,
|
|
"step": 800,
|
|
"train_speed(iter/s)": 0.0711
|
|
},
|
|
{
|
|
"epoch": 2.8828828828828827,
|
|
"eval_acc": 0.635920177383592,
|
|
"eval_loss": 1.5869847536087036,
|
|
"eval_runtime": 134.517,
|
|
"eval_samples_per_second": 1.123,
|
|
"eval_steps_per_second": 0.565,
|
|
"step": 800
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 1108,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 4,
|
|
"save_steps": 100,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.3166381763355443e+17,
|
|
"train_batch_size": 2,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|