muxue-qwen1.5-7B / trainer_state.json
nicedoctors's picture
Upload 12 files
9da8c2b verified
raw
history blame
No virus
16.5 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"grad_norm": 1.559026837348938,
"learning_rate": 4.998766400914329e-05,
"loss": 4.2591,
"step": 5
},
{
"epoch": 0.4,
"grad_norm": 2.0623748302459717,
"learning_rate": 4.995066821070679e-05,
"loss": 4.084,
"step": 10
},
{
"epoch": 0.6,
"grad_norm": 1.7944402694702148,
"learning_rate": 4.9889049115077005e-05,
"loss": 4.0724,
"step": 15
},
{
"epoch": 0.8,
"grad_norm": 2.1178815364837646,
"learning_rate": 4.980286753286195e-05,
"loss": 3.9116,
"step": 20
},
{
"epoch": 1.0,
"grad_norm": 2.5003485679626465,
"learning_rate": 4.9692208514878444e-05,
"loss": 3.7709,
"step": 25
},
{
"epoch": 1.2,
"grad_norm": 1.663772463798523,
"learning_rate": 4.9557181268217227e-05,
"loss": 3.5777,
"step": 30
},
{
"epoch": 1.4,
"grad_norm": 1.7687804698944092,
"learning_rate": 4.939791904846869e-05,
"loss": 3.2731,
"step": 35
},
{
"epoch": 1.6,
"grad_norm": 1.3904050588607788,
"learning_rate": 4.9214579028215776e-05,
"loss": 3.3322,
"step": 40
},
{
"epoch": 1.8,
"grad_norm": 1.6052061319351196,
"learning_rate": 4.900734214192358e-05,
"loss": 3.1392,
"step": 45
},
{
"epoch": 2.0,
"grad_norm": 1.6881974935531616,
"learning_rate": 4.877641290737884e-05,
"loss": 3.0443,
"step": 50
},
{
"epoch": 2.2,
"grad_norm": 1.8101818561553955,
"learning_rate": 4.852201922385564e-05,
"loss": 3.1381,
"step": 55
},
{
"epoch": 2.4,
"grad_norm": 1.9677544832229614,
"learning_rate": 4.8244412147206284e-05,
"loss": 2.907,
"step": 60
},
{
"epoch": 2.6,
"grad_norm": 1.909866213798523,
"learning_rate": 4.794386564209953e-05,
"loss": 2.9507,
"step": 65
},
{
"epoch": 2.8,
"grad_norm": 1.7826056480407715,
"learning_rate": 4.762067631165049e-05,
"loss": 2.9334,
"step": 70
},
{
"epoch": 3.0,
"grad_norm": 2.4618849754333496,
"learning_rate": 4.72751631047092e-05,
"loss": 2.7628,
"step": 75
},
{
"epoch": 3.2,
"grad_norm": 1.7693885564804077,
"learning_rate": 4.690766700109659e-05,
"loss": 2.7438,
"step": 80
},
{
"epoch": 3.4,
"grad_norm": 2.086184024810791,
"learning_rate": 4.65185506750986e-05,
"loss": 2.7949,
"step": 85
},
{
"epoch": 3.6,
"grad_norm": 2.5929203033447266,
"learning_rate": 4.610819813755038e-05,
"loss": 2.7056,
"step": 90
},
{
"epoch": 3.8,
"grad_norm": 2.221397638320923,
"learning_rate": 4.567701435686404e-05,
"loss": 2.5453,
"step": 95
},
{
"epoch": 4.0,
"grad_norm": 2.4999215602874756,
"learning_rate": 4.522542485937369e-05,
"loss": 2.7453,
"step": 100
},
{
"epoch": 4.2,
"grad_norm": 2.4940710067749023,
"learning_rate": 4.4753875309392266e-05,
"loss": 2.5124,
"step": 105
},
{
"epoch": 4.4,
"grad_norm": 3.088253974914551,
"learning_rate": 4.426283106939474e-05,
"loss": 2.5935,
"step": 110
},
{
"epoch": 4.6,
"grad_norm": 2.6688263416290283,
"learning_rate": 4.375277674076149e-05,
"loss": 2.5463,
"step": 115
},
{
"epoch": 4.8,
"grad_norm": 2.6959311962127686,
"learning_rate": 4.3224215685535294e-05,
"loss": 2.6062,
"step": 120
},
{
"epoch": 5.0,
"grad_norm": 3.0782675743103027,
"learning_rate": 4.267766952966369e-05,
"loss": 2.4869,
"step": 125
},
{
"epoch": 5.2,
"grad_norm": 3.190086603164673,
"learning_rate": 4.211367764821722e-05,
"loss": 2.457,
"step": 130
},
{
"epoch": 5.4,
"grad_norm": 3.3880529403686523,
"learning_rate": 4.1532796633091296e-05,
"loss": 2.4223,
"step": 135
},
{
"epoch": 5.6,
"grad_norm": 2.6318459510803223,
"learning_rate": 4.093559974371725e-05,
"loss": 2.385,
"step": 140
},
{
"epoch": 5.8,
"grad_norm": 2.92958402633667,
"learning_rate": 4.0322676341324415e-05,
"loss": 2.2885,
"step": 145
},
{
"epoch": 6.0,
"grad_norm": 3.336378574371338,
"learning_rate": 3.969463130731183e-05,
"loss": 2.4326,
"step": 150
},
{
"epoch": 6.2,
"grad_norm": 3.508898973464966,
"learning_rate": 3.905208444630327e-05,
"loss": 2.1526,
"step": 155
},
{
"epoch": 6.4,
"grad_norm": 4.225854396820068,
"learning_rate": 3.8395669874474915e-05,
"loss": 2.4194,
"step": 160
},
{
"epoch": 6.6,
"grad_norm": 4.112431526184082,
"learning_rate": 3.7726035393759285e-05,
"loss": 2.2839,
"step": 165
},
{
"epoch": 6.8,
"grad_norm": 3.599271774291992,
"learning_rate": 3.704384185254288e-05,
"loss": 2.378,
"step": 170
},
{
"epoch": 7.0,
"grad_norm": 4.479362964630127,
"learning_rate": 3.634976249348867e-05,
"loss": 2.0319,
"step": 175
},
{
"epoch": 7.2,
"grad_norm": 4.234251499176025,
"learning_rate": 3.564448228912682e-05,
"loss": 2.021,
"step": 180
},
{
"epoch": 7.4,
"grad_norm": 4.8591461181640625,
"learning_rate": 3.4928697265869515e-05,
"loss": 1.9893,
"step": 185
},
{
"epoch": 7.6,
"grad_norm": 6.527431488037109,
"learning_rate": 3.4203113817116957e-05,
"loss": 2.1713,
"step": 190
},
{
"epoch": 7.8,
"grad_norm": 5.432151794433594,
"learning_rate": 3.346844800613229e-05,
"loss": 2.2043,
"step": 195
},
{
"epoch": 8.0,
"grad_norm": 4.323482513427734,
"learning_rate": 3.272542485937369e-05,
"loss": 2.2359,
"step": 200
},
{
"epoch": 8.2,
"grad_norm": 3.9754505157470703,
"learning_rate": 3.1974777650980735e-05,
"loss": 2.0855,
"step": 205
},
{
"epoch": 8.4,
"grad_norm": 4.956503391265869,
"learning_rate": 3.121724717912138e-05,
"loss": 1.9709,
"step": 210
},
{
"epoch": 8.6,
"grad_norm": 5.2445969581604,
"learning_rate": 3.045358103491357e-05,
"loss": 2.062,
"step": 215
},
{
"epoch": 8.8,
"grad_norm": 4.901844024658203,
"learning_rate": 2.9684532864643122e-05,
"loss": 2.0368,
"step": 220
},
{
"epoch": 9.0,
"grad_norm": 4.711243629455566,
"learning_rate": 2.8910861626005776e-05,
"loss": 1.8624,
"step": 225
},
{
"epoch": 9.2,
"grad_norm": 4.800491809844971,
"learning_rate": 2.8133330839107608e-05,
"loss": 2.0423,
"step": 230
},
{
"epoch": 9.4,
"grad_norm": 5.560423851013184,
"learning_rate": 2.7352707832962865e-05,
"loss": 1.905,
"step": 235
},
{
"epoch": 9.6,
"grad_norm": 7.123173713684082,
"learning_rate": 2.656976298823284e-05,
"loss": 1.6245,
"step": 240
},
{
"epoch": 9.8,
"grad_norm": 6.563107013702393,
"learning_rate": 2.578526897695321e-05,
"loss": 1.8757,
"step": 245
},
{
"epoch": 10.0,
"grad_norm": 5.408273220062256,
"learning_rate": 2.5e-05,
"loss": 1.9541,
"step": 250
},
{
"epoch": 10.2,
"grad_norm": 5.59449577331543,
"learning_rate": 2.4214731023046793e-05,
"loss": 1.822,
"step": 255
},
{
"epoch": 10.4,
"grad_norm": 7.534347057342529,
"learning_rate": 2.3430237011767167e-05,
"loss": 1.8813,
"step": 260
},
{
"epoch": 10.6,
"grad_norm": 7.906167984008789,
"learning_rate": 2.2647292167037144e-05,
"loss": 1.8534,
"step": 265
},
{
"epoch": 10.8,
"grad_norm": 5.6607184410095215,
"learning_rate": 2.186666916089239e-05,
"loss": 1.7331,
"step": 270
},
{
"epoch": 11.0,
"grad_norm": 6.6628217697143555,
"learning_rate": 2.1089138373994223e-05,
"loss": 1.6813,
"step": 275
},
{
"epoch": 11.2,
"grad_norm": 6.011382579803467,
"learning_rate": 2.031546713535688e-05,
"loss": 1.5243,
"step": 280
},
{
"epoch": 11.4,
"grad_norm": 6.615753173828125,
"learning_rate": 1.9546418965086442e-05,
"loss": 1.7315,
"step": 285
},
{
"epoch": 11.6,
"grad_norm": 6.512957572937012,
"learning_rate": 1.8782752820878634e-05,
"loss": 1.797,
"step": 290
},
{
"epoch": 11.8,
"grad_norm": 6.8899664878845215,
"learning_rate": 1.802522234901927e-05,
"loss": 1.7544,
"step": 295
},
{
"epoch": 12.0,
"grad_norm": 6.717526435852051,
"learning_rate": 1.7274575140626318e-05,
"loss": 1.7081,
"step": 300
},
{
"epoch": 12.2,
"grad_norm": 5.812675952911377,
"learning_rate": 1.6531551993867717e-05,
"loss": 1.6276,
"step": 305
},
{
"epoch": 12.4,
"grad_norm": 6.911491870880127,
"learning_rate": 1.5796886182883053e-05,
"loss": 1.6319,
"step": 310
},
{
"epoch": 12.6,
"grad_norm": 8.3098726272583,
"learning_rate": 1.5071302734130489e-05,
"loss": 1.5262,
"step": 315
},
{
"epoch": 12.8,
"grad_norm": 6.8229756355285645,
"learning_rate": 1.4355517710873184e-05,
"loss": 1.6186,
"step": 320
},
{
"epoch": 13.0,
"grad_norm": 6.8094258308410645,
"learning_rate": 1.3650237506511331e-05,
"loss": 1.6026,
"step": 325
},
{
"epoch": 13.2,
"grad_norm": 6.559398651123047,
"learning_rate": 1.2956158147457115e-05,
"loss": 1.4957,
"step": 330
},
{
"epoch": 13.4,
"grad_norm": 7.712869167327881,
"learning_rate": 1.2273964606240718e-05,
"loss": 1.5775,
"step": 335
},
{
"epoch": 13.6,
"grad_norm": 8.607151985168457,
"learning_rate": 1.1604330125525079e-05,
"loss": 1.6038,
"step": 340
},
{
"epoch": 13.8,
"grad_norm": 7.38192081451416,
"learning_rate": 1.0947915553696742e-05,
"loss": 1.4489,
"step": 345
},
{
"epoch": 14.0,
"grad_norm": 9.552181243896484,
"learning_rate": 1.0305368692688174e-05,
"loss": 1.6264,
"step": 350
},
{
"epoch": 14.2,
"grad_norm": 6.163412094116211,
"learning_rate": 9.677323658675594e-06,
"loss": 1.6046,
"step": 355
},
{
"epoch": 14.4,
"grad_norm": 8.483848571777344,
"learning_rate": 9.064400256282757e-06,
"loss": 1.4736,
"step": 360
},
{
"epoch": 14.6,
"grad_norm": 7.673094749450684,
"learning_rate": 8.467203366908707e-06,
"loss": 1.493,
"step": 365
},
{
"epoch": 14.8,
"grad_norm": 7.977968692779541,
"learning_rate": 7.886322351782783e-06,
"loss": 1.5078,
"step": 370
},
{
"epoch": 15.0,
"grad_norm": 7.63812255859375,
"learning_rate": 7.3223304703363135e-06,
"loss": 1.4198,
"step": 375
},
{
"epoch": 15.2,
"grad_norm": 6.485208034515381,
"learning_rate": 6.775784314464717e-06,
"loss": 1.5416,
"step": 380
},
{
"epoch": 15.4,
"grad_norm": 8.849021911621094,
"learning_rate": 6.247223259238513e-06,
"loss": 1.3864,
"step": 385
},
{
"epoch": 15.6,
"grad_norm": 7.422196388244629,
"learning_rate": 5.737168930605272e-06,
"loss": 1.4536,
"step": 390
},
{
"epoch": 15.8,
"grad_norm": 7.654792785644531,
"learning_rate": 5.24612469060774e-06,
"loss": 1.4819,
"step": 395
},
{
"epoch": 16.0,
"grad_norm": 7.572329044342041,
"learning_rate": 4.7745751406263165e-06,
"loss": 1.4569,
"step": 400
},
{
"epoch": 16.2,
"grad_norm": 7.488677024841309,
"learning_rate": 4.322985643135952e-06,
"loss": 1.4596,
"step": 405
},
{
"epoch": 16.4,
"grad_norm": 7.173449993133545,
"learning_rate": 3.891801862449629e-06,
"loss": 1.2999,
"step": 410
},
{
"epoch": 16.6,
"grad_norm": 7.496935844421387,
"learning_rate": 3.4814493249014116e-06,
"loss": 1.4009,
"step": 415
},
{
"epoch": 16.8,
"grad_norm": 7.768974304199219,
"learning_rate": 3.0923329989034132e-06,
"loss": 1.6062,
"step": 420
},
{
"epoch": 17.0,
"grad_norm": 7.606301784515381,
"learning_rate": 2.7248368952908053e-06,
"loss": 1.3563,
"step": 425
},
{
"epoch": 17.2,
"grad_norm": 9.161933898925781,
"learning_rate": 2.379323688349516e-06,
"loss": 1.2701,
"step": 430
},
{
"epoch": 17.4,
"grad_norm": 9.012870788574219,
"learning_rate": 2.0561343579004715e-06,
"loss": 1.539,
"step": 435
},
{
"epoch": 17.6,
"grad_norm": 7.092794418334961,
"learning_rate": 1.7555878527937164e-06,
"loss": 1.4352,
"step": 440
},
{
"epoch": 17.8,
"grad_norm": 6.655755996704102,
"learning_rate": 1.4779807761443636e-06,
"loss": 1.2718,
"step": 445
},
{
"epoch": 18.0,
"grad_norm": 6.042492866516113,
"learning_rate": 1.2235870926211619e-06,
"loss": 1.5636,
"step": 450
},
{
"epoch": 18.2,
"grad_norm": 7.48867654800415,
"learning_rate": 9.926578580764234e-07,
"loss": 1.6322,
"step": 455
},
{
"epoch": 18.4,
"grad_norm": 7.198723793029785,
"learning_rate": 7.854209717842231e-07,
"loss": 1.3335,
"step": 460
},
{
"epoch": 18.6,
"grad_norm": 8.145171165466309,
"learning_rate": 6.020809515313142e-07,
"loss": 1.2486,
"step": 465
},
{
"epoch": 18.8,
"grad_norm": 8.835026741027832,
"learning_rate": 4.4281873178278475e-07,
"loss": 1.41,
"step": 470
},
{
"epoch": 19.0,
"grad_norm": 8.490466117858887,
"learning_rate": 3.077914851215585e-07,
"loss": 1.3855,
"step": 475
},
{
"epoch": 19.2,
"grad_norm": 6.034891605377197,
"learning_rate": 1.9713246713805588e-07,
"loss": 1.4357,
"step": 480
},
{
"epoch": 19.4,
"grad_norm": 7.8184919357299805,
"learning_rate": 1.109508849230001e-07,
"loss": 1.224,
"step": 485
},
{
"epoch": 19.6,
"grad_norm": 8.390954971313477,
"learning_rate": 4.9331789293211026e-08,
"loss": 1.4121,
"step": 490
},
{
"epoch": 19.8,
"grad_norm": 8.34197998046875,
"learning_rate": 1.233599085671e-08,
"loss": 1.4843,
"step": 495
},
{
"epoch": 20.0,
"grad_norm": 7.5700273513793945,
"learning_rate": 0.0,
"loss": 1.52,
"step": 500
},
{
"epoch": 20.0,
"step": 500,
"total_flos": 1.3524244538720256e+16,
"train_loss": 2.0708214292526246,
"train_runtime": 754.3437,
"train_samples_per_second": 10.605,
"train_steps_per_second": 0.663
}
],
"logging_steps": 5,
"max_steps": 500,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 100,
"total_flos": 1.3524244538720256e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}