zephyr-ds / trainer_state.json
jikaixuan's picture
Model save
b039b8d verified
raw
history blame
6.81 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9997382884061764,
"eval_steps": 100,
"global_step": 955,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5.208333333333333e-09,
"logits/chosen": -2.980285167694092,
"logits/rejected": -2.87275767326355,
"logps/chosen": -313.4390563964844,
"logps/rejected": -236.1754150390625,
"loss": 0.6931,
"pred_label": 0.0,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1,
"use_label": 10.0
},
{
"epoch": 0.1,
"learning_rate": 4.976717112922002e-07,
"logits/chosen": -2.8194870948791504,
"logits/rejected": -2.8288567066192627,
"logps/chosen": -285.2724304199219,
"logps/rejected": -270.956298828125,
"loss": 0.6838,
"pred_label": 150.7020263671875,
"rewards/accuracies": 0.5050504803657532,
"rewards/chosen": 0.000817809603177011,
"rewards/margins": 0.0014873194741085172,
"rewards/rejected": -0.0006695101037621498,
"step": 100,
"use_label": 659.2979736328125
},
{
"epoch": 0.21,
"learning_rate": 4.3946449359720607e-07,
"logits/chosen": -2.828075647354126,
"logits/rejected": -2.816530227661133,
"logps/chosen": -278.7549133300781,
"logps/rejected": -260.5694274902344,
"loss": 0.683,
"pred_label": 452.552490234375,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": 0.002707230392843485,
"rewards/margins": 0.005774380639195442,
"rewards/rejected": -0.003067150479182601,
"step": 200,
"use_label": 1949.447509765625
},
{
"epoch": 0.31,
"learning_rate": 3.812572759022118e-07,
"logits/chosen": -2.8141846656799316,
"logits/rejected": -2.8159701824188232,
"logps/chosen": -284.0125732421875,
"logps/rejected": -253.9112091064453,
"loss": 0.6807,
"pred_label": 775.85498046875,
"rewards/accuracies": 0.5575000047683716,
"rewards/chosen": 0.005504029802978039,
"rewards/margins": 0.009370613843202591,
"rewards/rejected": -0.0038665838073939085,
"step": 300,
"use_label": 3226.14501953125
},
{
"epoch": 0.42,
"learning_rate": 3.230500582072177e-07,
"logits/chosen": -2.826817512512207,
"logits/rejected": -2.8094358444213867,
"logps/chosen": -284.3566589355469,
"logps/rejected": -262.80731201171875,
"loss": 0.6769,
"pred_label": 1149.0574951171875,
"rewards/accuracies": 0.5774999856948853,
"rewards/chosen": 0.007384983357042074,
"rewards/margins": 0.015422500669956207,
"rewards/rejected": -0.008037514984607697,
"step": 400,
"use_label": 4452.9423828125
},
{
"epoch": 0.52,
"learning_rate": 2.648428405122235e-07,
"logits/chosen": -2.807734966278076,
"logits/rejected": -2.796409845352173,
"logps/chosen": -269.9852600097656,
"logps/rejected": -252.07232666015625,
"loss": 0.6728,
"pred_label": 1592.5675048828125,
"rewards/accuracies": 0.5756250023841858,
"rewards/chosen": 0.006774631794542074,
"rewards/margins": 0.01554279588162899,
"rewards/rejected": -0.008768163621425629,
"step": 500,
"use_label": 5609.4326171875
},
{
"epoch": 0.63,
"learning_rate": 2.0663562281722933e-07,
"logits/chosen": -2.8339650630950928,
"logits/rejected": -2.82075572013855,
"logps/chosen": -285.0927734375,
"logps/rejected": -265.4134826660156,
"loss": 0.6681,
"pred_label": 2111.6650390625,
"rewards/accuracies": 0.6206250190734863,
"rewards/chosen": 0.013815036043524742,
"rewards/margins": 0.0289370846003294,
"rewards/rejected": -0.015122047625482082,
"step": 600,
"use_label": 6690.3349609375
},
{
"epoch": 0.73,
"learning_rate": 1.4842840512223514e-07,
"logits/chosen": -2.827232599258423,
"logits/rejected": -2.811751127243042,
"logps/chosen": -281.4178771972656,
"logps/rejected": -248.81068420410156,
"loss": 0.6659,
"pred_label": 2680.2724609375,
"rewards/accuracies": 0.6200000047683716,
"rewards/chosen": 0.01417633332312107,
"rewards/margins": 0.029135096818208694,
"rewards/rejected": -0.014958759769797325,
"step": 700,
"use_label": 7721.7275390625
},
{
"epoch": 0.84,
"learning_rate": 9.022118742724097e-08,
"logits/chosen": -2.8300516605377197,
"logits/rejected": -2.835542678833008,
"logps/chosen": -288.608642578125,
"logps/rejected": -261.5773010253906,
"loss": 0.6646,
"pred_label": 3286.232421875,
"rewards/accuracies": 0.6200000047683716,
"rewards/chosen": 0.014839441515505314,
"rewards/margins": 0.03136582300066948,
"rewards/rejected": -0.01652638241648674,
"step": 800,
"use_label": 8715.767578125
},
{
"epoch": 0.94,
"learning_rate": 3.20139697322468e-08,
"logits/chosen": -2.8211710453033447,
"logits/rejected": -2.8280835151672363,
"logps/chosen": -277.363525390625,
"logps/rejected": -256.4843444824219,
"loss": 0.6641,
"pred_label": 3882.75244140625,
"rewards/accuracies": 0.6331250071525574,
"rewards/chosen": 0.01446867547929287,
"rewards/margins": 0.02932187356054783,
"rewards/rejected": -0.014853193424642086,
"step": 900,
"use_label": 9719.2470703125
},
{
"epoch": 1.0,
"eval_logits/chosen": -2.842418670654297,
"eval_logits/rejected": -2.846235752105713,
"eval_logps/chosen": -284.122314453125,
"eval_logps/rejected": -259.4594421386719,
"eval_loss": 0.6635700464248657,
"eval_pred_label": 4600.50390625,
"eval_rewards/accuracies": 0.628000020980835,
"eval_rewards/chosen": 0.013506044633686543,
"eval_rewards/margins": 0.029479000717401505,
"eval_rewards/rejected": -0.015972958877682686,
"eval_runtime": 438.8322,
"eval_samples_per_second": 4.558,
"eval_steps_per_second": 0.285,
"eval_use_label": 10931.49609375,
"step": 955
},
{
"epoch": 1.0,
"step": 955,
"total_flos": 0.0,
"train_loss": 0.6728555943953429,
"train_runtime": 24272.064,
"train_samples_per_second": 2.519,
"train_steps_per_second": 0.039
}
],
"logging_steps": 100,
"max_steps": 955,
"num_train_epochs": 1,
"save_steps": 10,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}