{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9997382884061764, "eval_steps": 100, "global_step": 955, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.208333333333333e-09, "logits/chosen": -2.980285167694092, "logits/rejected": -2.87275767326355, "logps/chosen": -313.4390563964844, "logps/rejected": -236.1754150390625, "loss": 0.6931, "pred_label": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1, "use_label": 0.0 }, { "epoch": 0.1, "learning_rate": 4.976717112922002e-07, "logits/chosen": -2.8195502758026123, "logits/rejected": -2.828876495361328, "logps/chosen": -285.2825012207031, "logps/rejected": -270.9394836425781, "loss": 0.6931, "pred_label": 0.0, "rewards/accuracies": 0.4873737394809723, "rewards/chosen": -0.0001882972428575158, "rewards/margins": -0.0012037099804729223, "rewards/rejected": 0.0010154128540307283, "step": 100, "use_label": 0.0 }, { "epoch": 0.21, "learning_rate": 4.3946449359720607e-07, "logits/chosen": -2.8279786109924316, "logits/rejected": -2.8164660930633545, "logps/chosen": -278.72576904296875, "logps/rejected": -260.5760498046875, "loss": 0.6901, "pred_label": 0.0, "rewards/accuracies": 0.5506250262260437, "rewards/chosen": 0.0056201983243227005, "rewards/margins": 0.009352817200124264, "rewards/rejected": -0.003732620272785425, "step": 200, "use_label": 0.0 }, { "epoch": 0.31, "learning_rate": 3.812572759022118e-07, "logits/chosen": -2.8140082359313965, "logits/rejected": -2.8158328533172607, "logps/chosen": -284.0040283203125, "logps/rejected": -253.93580627441406, "loss": 0.6875, "pred_label": 0.0, "rewards/accuracies": 0.5793750286102295, "rewards/chosen": 0.006361996755003929, "rewards/margins": 0.012689676135778427, "rewards/rejected": -0.006327680312097073, "step": 300, "use_label": 0.0 }, { "epoch": 0.42, "learning_rate": 3.230500582072177e-07, "logits/chosen": -2.826840877532959, "logits/rejected": -2.8091540336608887, "logps/chosen": -284.3250427246094, "logps/rejected": -262.8480529785156, "loss": 0.6842, "pred_label": 0.0, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": 0.010542460717260838, "rewards/margins": 0.02265419438481331, "rewards/rejected": -0.012111731804907322, "step": 400, "use_label": 0.0 }, { "epoch": 0.52, "learning_rate": 2.648428405122235e-07, "logits/chosen": -2.8074240684509277, "logits/rejected": -2.795992136001587, "logps/chosen": -269.97344970703125, "logps/rejected": -252.08152770996094, "loss": 0.6821, "pred_label": 0.0, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.007951202802360058, "rewards/margins": 0.017641538754105568, "rewards/rejected": -0.00969033595174551, "step": 500, "use_label": 0.0 }, { "epoch": 0.63, "learning_rate": 2.0663562281722933e-07, "logits/chosen": -2.8337247371673584, "logits/rejected": -2.8203465938568115, "logps/chosen": -285.0716857910156, "logps/rejected": -265.4593811035156, "loss": 0.6782, "pred_label": 0.0, "rewards/accuracies": 0.6356250047683716, "rewards/chosen": 0.015922056511044502, "rewards/margins": 0.035635706037282944, "rewards/rejected": -0.01971365138888359, "step": 600, "use_label": 0.0 }, { "epoch": 0.73, "learning_rate": 1.4842840512223514e-07, "logits/chosen": -2.8270840644836426, "logits/rejected": -2.8112306594848633, "logps/chosen": -281.3733215332031, "logps/rejected": -248.8465576171875, "loss": 0.6762, "pred_label": 0.0, "rewards/accuracies": 0.6324999928474426, "rewards/chosen": 0.018638433888554573, "rewards/margins": 0.03718380257487297, "rewards/rejected": -0.018545370548963547, "step": 700, "use_label": 0.0 }, { "epoch": 0.84, "learning_rate": 9.022118742724097e-08, "logits/chosen": -2.8297488689422607, "logits/rejected": -2.83492374420166, "logps/chosen": -288.54058837890625, "logps/rejected": -261.6487731933594, "loss": 0.6736, "pred_label": 0.0, "rewards/accuracies": 0.6393749713897705, "rewards/chosen": 0.021646475419402122, "rewards/margins": 0.04531754553318024, "rewards/rejected": -0.023671068251132965, "step": 800, "use_label": 0.0 }, { "epoch": 0.94, "learning_rate": 3.20139697322468e-08, "logits/chosen": -2.8206725120544434, "logits/rejected": -2.8274974822998047, "logps/chosen": -277.310791015625, "logps/rejected": -256.5542297363281, "loss": 0.6744, "pred_label": 0.0, "rewards/accuracies": 0.6443750262260437, "rewards/chosen": 0.019743308424949646, "rewards/margins": 0.041582074016332626, "rewards/rejected": -0.021838760003447533, "step": 900, "use_label": 0.0 }, { "epoch": 1.0, "eval_logits/chosen": -2.8419151306152344, "eval_logits/rejected": -2.845423936843872, "eval_logps/chosen": -284.0372314453125, "eval_logps/rejected": -259.5419921875, "eval_loss": 0.6741092801094055, "eval_pred_label": 0.0, "eval_rewards/accuracies": 0.6679999828338623, "eval_rewards/chosen": 0.02201448194682598, "eval_rewards/margins": 0.04624143987894058, "eval_rewards/rejected": -0.02422695979475975, "eval_runtime": 469.0597, "eval_samples_per_second": 4.264, "eval_steps_per_second": 0.266, "eval_use_label": 0.0, "step": 955 }, { "epoch": 1.0, "step": 955, "total_flos": 0.0, "train_loss": 0.6817296707193264, "train_runtime": 25631.2708, "train_samples_per_second": 2.385, "train_steps_per_second": 0.037 } ], "logging_steps": 100, "max_steps": 955, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "trial_name": null, "trial_params": null }