ironrock's picture
Upload folder using huggingface_hub
4bb424c verified
{
"best_metric": 0.529485821723938,
"best_model_checkpoint": "./mistral/20-04-24-Weni-WeniGPT-Agents-Mistral-1.0.6-SFT-1.0.5-DPO_Experiment on DPO with other hyperparameters and best SFT model of WeniGPT-2_max_steps-366_batch_4_2024-04-20_ppid_9/checkpoint-90",
"epoch": 1.4634146341463414,
"eval_steps": 30,
"global_step": 90,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.16,
"grad_norm": 8.378021240234375,
"learning_rate": 4.0909090909090915e-06,
"logits/chosen": -1.830958604812622,
"logits/rejected": -1.8507845401763916,
"logps/chosen": -28.701984405517578,
"logps/rejected": -54.28569793701172,
"loss": 0.6924,
"rewards/accuracies": 0.20000000298023224,
"rewards/chosen": 0.0008967495523393154,
"rewards/margins": 0.0014666033675894141,
"rewards/rejected": -0.0005698538152500987,
"step": 10
},
{
"epoch": 0.33,
"grad_norm": 5.193418502807617,
"learning_rate": 4.887323943661972e-06,
"logits/chosen": -1.7550897598266602,
"logits/rejected": -1.770708680152893,
"logps/chosen": -47.344207763671875,
"logps/rejected": -64.0368423461914,
"loss": 0.6852,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": 0.017231885343790054,
"rewards/margins": 0.01606021076440811,
"rewards/rejected": 0.0011716745793819427,
"step": 20
},
{
"epoch": 0.49,
"grad_norm": 7.308932304382324,
"learning_rate": 4.746478873239437e-06,
"logits/chosen": -1.781267762184143,
"logits/rejected": -1.8114898204803467,
"logps/chosen": -54.274559020996094,
"logps/rejected": -95.20500183105469,
"loss": 0.6635,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.0641159638762474,
"rewards/margins": 0.061691801995038986,
"rewards/rejected": 0.0024241588544100523,
"step": 30
},
{
"epoch": 0.49,
"eval_logits/chosen": -1.7831767797470093,
"eval_logits/rejected": -1.8043663501739502,
"eval_logps/chosen": -55.16960906982422,
"eval_logps/rejected": -97.32585144042969,
"eval_loss": 0.6523757576942444,
"eval_rewards/accuracies": 0.4642857015132904,
"eval_rewards/chosen": 0.09036973863840103,
"eval_rewards/margins": 0.08673857897520065,
"eval_rewards/rejected": 0.0036311547737568617,
"eval_runtime": 8.141,
"eval_samples_per_second": 3.439,
"eval_steps_per_second": 1.72,
"step": 30
},
{
"epoch": 0.65,
"grad_norm": 0.0,
"learning_rate": 4.6056338028169015e-06,
"logits/chosen": -1.889905333518982,
"logits/rejected": -1.9024461507797241,
"logps/chosen": -27.918941497802734,
"logps/rejected": -42.093284606933594,
"loss": 0.668,
"rewards/accuracies": 0.25,
"rewards/chosen": 0.054457180202007294,
"rewards/margins": 0.0539846234023571,
"rewards/rejected": 0.0004725646285805851,
"step": 40
},
{
"epoch": 0.81,
"grad_norm": 8.53225326538086,
"learning_rate": 4.464788732394367e-06,
"logits/chosen": -1.8278567790985107,
"logits/rejected": -1.849957823753357,
"logps/chosen": -43.8238639831543,
"logps/rejected": -68.02179718017578,
"loss": 0.6358,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": 0.13941256701946259,
"rewards/margins": 0.13133978843688965,
"rewards/rejected": 0.008072790689766407,
"step": 50
},
{
"epoch": 0.98,
"grad_norm": 9.436968803405762,
"learning_rate": 4.3239436619718315e-06,
"logits/chosen": -1.805991768836975,
"logits/rejected": -1.8437427282333374,
"logps/chosen": -43.8873291015625,
"logps/rejected": -95.2943115234375,
"loss": 0.6026,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": 0.18793432414531708,
"rewards/margins": 0.21308371424674988,
"rewards/rejected": -0.025149401277303696,
"step": 60
},
{
"epoch": 0.98,
"eval_logits/chosen": -1.7877694368362427,
"eval_logits/rejected": -1.8098936080932617,
"eval_logps/chosen": -53.567203521728516,
"eval_logps/rejected": -97.33795928955078,
"eval_loss": 0.5890871286392212,
"eval_rewards/accuracies": 0.4642857015132904,
"eval_rewards/chosen": 0.25061002373695374,
"eval_rewards/margins": 0.2481890469789505,
"eval_rewards/rejected": 0.002420984674245119,
"eval_runtime": 8.1404,
"eval_samples_per_second": 3.44,
"eval_steps_per_second": 1.72,
"step": 60
},
{
"epoch": 1.14,
"grad_norm": 0.0,
"learning_rate": 4.183098591549296e-06,
"logits/chosen": -1.8344879150390625,
"logits/rejected": -1.8489716053009033,
"logps/chosen": -40.38930892944336,
"logps/rejected": -60.9084358215332,
"loss": 0.6031,
"rewards/accuracies": 0.375,
"rewards/chosen": 0.19739331305027008,
"rewards/margins": 0.22638121247291565,
"rewards/rejected": -0.028987903147935867,
"step": 70
},
{
"epoch": 1.3,
"grad_norm": 5.49536657333374,
"learning_rate": 4.042253521126761e-06,
"logits/chosen": -1.7903095483779907,
"logits/rejected": -1.8362411260604858,
"logps/chosen": -44.288116455078125,
"logps/rejected": -90.21073913574219,
"loss": 0.5357,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": 0.34061312675476074,
"rewards/margins": 0.40679749846458435,
"rewards/rejected": -0.06618441641330719,
"step": 80
},
{
"epoch": 1.46,
"grad_norm": 13.401692390441895,
"learning_rate": 3.901408450704225e-06,
"logits/chosen": -1.8004281520843506,
"logits/rejected": -1.8247934579849243,
"logps/chosen": -42.32465362548828,
"logps/rejected": -70.9749984741211,
"loss": 0.5387,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": 0.3678433299064636,
"rewards/margins": 0.4186524450778961,
"rewards/rejected": -0.05080908536911011,
"step": 90
},
{
"epoch": 1.46,
"eval_logits/chosen": -1.7943389415740967,
"eval_logits/rejected": -1.8181126117706299,
"eval_logps/chosen": -51.677486419677734,
"eval_logps/rejected": -97.63689422607422,
"eval_loss": 0.529485821723938,
"eval_rewards/accuracies": 0.4642857015132904,
"eval_rewards/chosen": 0.4395819306373596,
"eval_rewards/margins": 0.4670555889606476,
"eval_rewards/rejected": -0.027473628520965576,
"eval_runtime": 8.1412,
"eval_samples_per_second": 3.439,
"eval_steps_per_second": 1.72,
"step": 90
}
],
"logging_steps": 10,
"max_steps": 366,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 90,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}