zephyr-ds / trainer_state.json
jikaixuan's picture
Model save
8c6b41c verified
raw
history blame
6.82 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9997382884061764,
"eval_steps": 100,
"global_step": 955,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5.208333333333333e-09,
"logits/chosen": -2.7525930404663086,
"logits/rejected": -2.6732418537139893,
"logps/chosen": -297.177001953125,
"logps/rejected": -236.72621154785156,
"loss": 0.6931,
"pred_label": 0.0,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1,
"use_label": 17.0
},
{
"epoch": 0.1,
"learning_rate": 4.976717112922002e-07,
"logits/chosen": -2.6616106033325195,
"logits/rejected": -2.6597719192504883,
"logps/chosen": -270.4000244140625,
"logps/rejected": -249.33827209472656,
"loss": 0.6829,
"pred_label": 333.43182373046875,
"rewards/accuracies": 0.4965277910232544,
"rewards/chosen": 0.0011782451765611768,
"rewards/margins": 0.001073930412530899,
"rewards/rejected": 0.00010431456030346453,
"step": 100,
"use_label": 1283.5682373046875
},
{
"epoch": 0.21,
"learning_rate": 4.3946449359720607e-07,
"logits/chosen": -2.6945221424102783,
"logits/rejected": -2.678621530532837,
"logps/chosen": -271.6979064941406,
"logps/rejected": -254.37026977539062,
"loss": 0.6799,
"pred_label": 1038.7462158203125,
"rewards/accuracies": 0.5350000262260437,
"rewards/chosen": 0.004888341296464205,
"rewards/margins": 0.007898561656475067,
"rewards/rejected": -0.0030102210585027933,
"step": 200,
"use_label": 3762.253662109375
},
{
"epoch": 0.31,
"learning_rate": 3.812572759022118e-07,
"logits/chosen": -2.6708526611328125,
"logits/rejected": -2.6628105640411377,
"logps/chosen": -272.3077392578125,
"logps/rejected": -253.75027465820312,
"loss": 0.6728,
"pred_label": 1884.596923828125,
"rewards/accuracies": 0.5653125047683716,
"rewards/chosen": 0.010109632275998592,
"rewards/margins": 0.016557401046156883,
"rewards/rejected": -0.006447767838835716,
"step": 300,
"use_label": 6116.4033203125
},
{
"epoch": 0.42,
"learning_rate": 3.230500582072177e-07,
"logits/chosen": -2.668009042739868,
"logits/rejected": -2.650494337081909,
"logps/chosen": -267.6447448730469,
"logps/rejected": -253.59107971191406,
"loss": 0.6616,
"pred_label": 3012.675537109375,
"rewards/accuracies": 0.6193749904632568,
"rewards/chosen": 0.017754318192601204,
"rewards/margins": 0.030351871624588966,
"rewards/rejected": -0.012597555294632912,
"step": 400,
"use_label": 8188.32421875
},
{
"epoch": 0.52,
"learning_rate": 2.648428405122235e-07,
"logits/chosen": -2.6697680950164795,
"logits/rejected": -2.6707708835601807,
"logps/chosen": -271.2095642089844,
"logps/rejected": -247.21224975585938,
"loss": 0.6528,
"pred_label": 4377.916015625,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.024391591548919678,
"rewards/margins": 0.04303843528032303,
"rewards/rejected": -0.01864684373140335,
"step": 500,
"use_label": 10023.083984375
},
{
"epoch": 0.63,
"learning_rate": 2.0663562281722933e-07,
"logits/chosen": -2.659043073654175,
"logits/rejected": -2.6555004119873047,
"logps/chosen": -272.95050048828125,
"logps/rejected": -251.1392364501953,
"loss": 0.6442,
"pred_label": 5962.0673828125,
"rewards/accuracies": 0.6553124785423279,
"rewards/chosen": 0.030743848532438278,
"rewards/margins": 0.0554736964404583,
"rewards/rejected": -0.024729840457439423,
"step": 600,
"use_label": 11638.9326171875
},
{
"epoch": 0.73,
"learning_rate": 1.4842840512223514e-07,
"logits/chosen": -2.6591668128967285,
"logits/rejected": -2.6622869968414307,
"logps/chosen": -269.9889221191406,
"logps/rejected": -245.4040985107422,
"loss": 0.64,
"pred_label": 7640.8505859375,
"rewards/accuracies": 0.6478124856948853,
"rewards/chosen": 0.03263993561267853,
"rewards/margins": 0.061180587857961655,
"rewards/rejected": -0.02854064851999283,
"step": 700,
"use_label": 13160.150390625
},
{
"epoch": 0.84,
"learning_rate": 9.022118742724097e-08,
"logits/chosen": -2.650268793106079,
"logits/rejected": -2.6555473804473877,
"logps/chosen": -272.705322265625,
"logps/rejected": -252.30169677734375,
"loss": 0.6368,
"pred_label": 9366.9609375,
"rewards/accuracies": 0.6415625214576721,
"rewards/chosen": 0.031398553401231766,
"rewards/margins": 0.06083739921450615,
"rewards/rejected": -0.029438842087984085,
"step": 800,
"use_label": 14634.0390625
},
{
"epoch": 0.94,
"learning_rate": 3.20139697322468e-08,
"logits/chosen": -2.6563680171966553,
"logits/rejected": -2.6590001583099365,
"logps/chosen": -269.04559326171875,
"logps/rejected": -253.2301025390625,
"loss": 0.6377,
"pred_label": 11126.677734375,
"rewards/accuracies": 0.6418750286102295,
"rewards/chosen": 0.02964354306459427,
"rewards/margins": 0.05687180534005165,
"rewards/rejected": -0.027228260412812233,
"step": 900,
"use_label": 16074.322265625
},
{
"epoch": 1.0,
"eval_logits/chosen": -2.4939169883728027,
"eval_logits/rejected": -2.495774507522583,
"eval_logps/chosen": -269.28546142578125,
"eval_logps/rejected": -253.23594665527344,
"eval_loss": 0.6354129910469055,
"eval_pred_label": 13234.32421875,
"eval_rewards/accuracies": 0.6259999871253967,
"eval_rewards/chosen": 0.027118388563394547,
"eval_rewards/margins": 0.056793875992298126,
"eval_rewards/rejected": -0.029675481840968132,
"eval_runtime": 1016.337,
"eval_samples_per_second": 1.968,
"eval_steps_per_second": 0.246,
"eval_use_label": 17827.67578125,
"step": 955
},
{
"epoch": 1.0,
"step": 955,
"total_flos": 0.0,
"train_loss": 0.6554346030919339,
"train_runtime": 50166.5495,
"train_samples_per_second": 1.219,
"train_steps_per_second": 0.019
}
],
"logging_steps": 100,
"max_steps": 955,
"num_train_epochs": 1,
"save_steps": 10,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}