zephyr-ds / trainer_state.json
jikaixuan's picture
Model save
5b6d4b2 verified
raw
history blame
6.58 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9997382884061764,
"eval_steps": 100,
"global_step": 955,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5.208333333333333e-09,
"logits/chosen": -2.980285167694092,
"logits/rejected": -2.87275767326355,
"logps/chosen": -313.4390563964844,
"logps/rejected": -236.1754150390625,
"loss": 0.6931,
"pred_label": 0.0,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1,
"use_label": 0.0
},
{
"epoch": 0.1,
"learning_rate": 4.976717112922002e-07,
"logits/chosen": -2.8195502758026123,
"logits/rejected": -2.828876495361328,
"logps/chosen": -285.2825012207031,
"logps/rejected": -270.9394836425781,
"loss": 0.6931,
"pred_label": 0.0,
"rewards/accuracies": 0.4873737394809723,
"rewards/chosen": -0.0001882972428575158,
"rewards/margins": -0.0012037099804729223,
"rewards/rejected": 0.0010154128540307283,
"step": 100,
"use_label": 0.0
},
{
"epoch": 0.21,
"learning_rate": 4.3946449359720607e-07,
"logits/chosen": -2.8279786109924316,
"logits/rejected": -2.8164660930633545,
"logps/chosen": -278.72576904296875,
"logps/rejected": -260.5760498046875,
"loss": 0.6901,
"pred_label": 0.0,
"rewards/accuracies": 0.5506250262260437,
"rewards/chosen": 0.0056201983243227005,
"rewards/margins": 0.009352817200124264,
"rewards/rejected": -0.003732620272785425,
"step": 200,
"use_label": 0.0
},
{
"epoch": 0.31,
"learning_rate": 3.812572759022118e-07,
"logits/chosen": -2.8140082359313965,
"logits/rejected": -2.8158328533172607,
"logps/chosen": -284.0040283203125,
"logps/rejected": -253.93580627441406,
"loss": 0.6875,
"pred_label": 0.0,
"rewards/accuracies": 0.5793750286102295,
"rewards/chosen": 0.006361996755003929,
"rewards/margins": 0.012689676135778427,
"rewards/rejected": -0.006327680312097073,
"step": 300,
"use_label": 0.0
},
{
"epoch": 0.42,
"learning_rate": 3.230500582072177e-07,
"logits/chosen": -2.826840877532959,
"logits/rejected": -2.8091540336608887,
"logps/chosen": -284.3250427246094,
"logps/rejected": -262.8480529785156,
"loss": 0.6842,
"pred_label": 0.0,
"rewards/accuracies": 0.6031249761581421,
"rewards/chosen": 0.010542460717260838,
"rewards/margins": 0.02265419438481331,
"rewards/rejected": -0.012111731804907322,
"step": 400,
"use_label": 0.0
},
{
"epoch": 0.52,
"learning_rate": 2.648428405122235e-07,
"logits/chosen": -2.8074240684509277,
"logits/rejected": -2.795992136001587,
"logps/chosen": -269.97344970703125,
"logps/rejected": -252.08152770996094,
"loss": 0.6821,
"pred_label": 0.0,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.007951202802360058,
"rewards/margins": 0.017641538754105568,
"rewards/rejected": -0.00969033595174551,
"step": 500,
"use_label": 0.0
},
{
"epoch": 0.63,
"learning_rate": 2.0663562281722933e-07,
"logits/chosen": -2.8337247371673584,
"logits/rejected": -2.8203465938568115,
"logps/chosen": -285.0716857910156,
"logps/rejected": -265.4593811035156,
"loss": 0.6782,
"pred_label": 0.0,
"rewards/accuracies": 0.6356250047683716,
"rewards/chosen": 0.015922056511044502,
"rewards/margins": 0.035635706037282944,
"rewards/rejected": -0.01971365138888359,
"step": 600,
"use_label": 0.0
},
{
"epoch": 0.73,
"learning_rate": 1.4842840512223514e-07,
"logits/chosen": -2.8270840644836426,
"logits/rejected": -2.8112306594848633,
"logps/chosen": -281.3733215332031,
"logps/rejected": -248.8465576171875,
"loss": 0.6762,
"pred_label": 0.0,
"rewards/accuracies": 0.6324999928474426,
"rewards/chosen": 0.018638433888554573,
"rewards/margins": 0.03718380257487297,
"rewards/rejected": -0.018545370548963547,
"step": 700,
"use_label": 0.0
},
{
"epoch": 0.84,
"learning_rate": 9.022118742724097e-08,
"logits/chosen": -2.8297488689422607,
"logits/rejected": -2.83492374420166,
"logps/chosen": -288.54058837890625,
"logps/rejected": -261.6487731933594,
"loss": 0.6736,
"pred_label": 0.0,
"rewards/accuracies": 0.6393749713897705,
"rewards/chosen": 0.021646475419402122,
"rewards/margins": 0.04531754553318024,
"rewards/rejected": -0.023671068251132965,
"step": 800,
"use_label": 0.0
},
{
"epoch": 0.94,
"learning_rate": 3.20139697322468e-08,
"logits/chosen": -2.8206725120544434,
"logits/rejected": -2.8274974822998047,
"logps/chosen": -277.310791015625,
"logps/rejected": -256.5542297363281,
"loss": 0.6744,
"pred_label": 0.0,
"rewards/accuracies": 0.6443750262260437,
"rewards/chosen": 0.019743308424949646,
"rewards/margins": 0.041582074016332626,
"rewards/rejected": -0.021838760003447533,
"step": 900,
"use_label": 0.0
},
{
"epoch": 1.0,
"eval_logits/chosen": -2.8419151306152344,
"eval_logits/rejected": -2.845423936843872,
"eval_logps/chosen": -284.0372314453125,
"eval_logps/rejected": -259.5419921875,
"eval_loss": 0.6741092801094055,
"eval_pred_label": 0.0,
"eval_rewards/accuracies": 0.6679999828338623,
"eval_rewards/chosen": 0.02201448194682598,
"eval_rewards/margins": 0.04624143987894058,
"eval_rewards/rejected": -0.02422695979475975,
"eval_runtime": 469.0597,
"eval_samples_per_second": 4.264,
"eval_steps_per_second": 0.266,
"eval_use_label": 0.0,
"step": 955
},
{
"epoch": 1.0,
"step": 955,
"total_flos": 0.0,
"train_loss": 0.6817296707193264,
"train_runtime": 25631.2708,
"train_samples_per_second": 2.385,
"train_steps_per_second": 0.037
}
],
"logging_steps": 100,
"max_steps": 955,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}