|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.208333333333333e-09, |
|
"logits/chosen": -2.980285167694092, |
|
"logits/rejected": -2.87275767326355, |
|
"logps/chosen": -313.4390563964844, |
|
"logps/rejected": -236.1754150390625, |
|
"loss": 0.6931, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.976717112922002e-07, |
|
"logits/chosen": -2.8195502758026123, |
|
"logits/rejected": -2.828876495361328, |
|
"logps/chosen": -285.2825012207031, |
|
"logps/rejected": -270.9394836425781, |
|
"loss": 0.6931, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.4873737394809723, |
|
"rewards/chosen": -0.0001882972428575158, |
|
"rewards/margins": -0.0012037099804729223, |
|
"rewards/rejected": 0.0010154128540307283, |
|
"step": 100, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.3946449359720607e-07, |
|
"logits/chosen": -2.8279786109924316, |
|
"logits/rejected": -2.8164660930633545, |
|
"logps/chosen": -278.72576904296875, |
|
"logps/rejected": -260.5760498046875, |
|
"loss": 0.6901, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.5506250262260437, |
|
"rewards/chosen": 0.0056201983243227005, |
|
"rewards/margins": 0.009352817200124264, |
|
"rewards/rejected": -0.003732620272785425, |
|
"step": 200, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.812572759022118e-07, |
|
"logits/chosen": -2.8140082359313965, |
|
"logits/rejected": -2.8158328533172607, |
|
"logps/chosen": -284.0040283203125, |
|
"logps/rejected": -253.93580627441406, |
|
"loss": 0.6875, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.5793750286102295, |
|
"rewards/chosen": 0.006361996755003929, |
|
"rewards/margins": 0.012689676135778427, |
|
"rewards/rejected": -0.006327680312097073, |
|
"step": 300, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.230500582072177e-07, |
|
"logits/chosen": -2.826840877532959, |
|
"logits/rejected": -2.8091540336608887, |
|
"logps/chosen": -284.3250427246094, |
|
"logps/rejected": -262.8480529785156, |
|
"loss": 0.6842, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": 0.010542460717260838, |
|
"rewards/margins": 0.02265419438481331, |
|
"rewards/rejected": -0.012111731804907322, |
|
"step": 400, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.648428405122235e-07, |
|
"logits/chosen": -2.8074240684509277, |
|
"logits/rejected": -2.795992136001587, |
|
"logps/chosen": -269.97344970703125, |
|
"logps/rejected": -252.08152770996094, |
|
"loss": 0.6821, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.007951202802360058, |
|
"rewards/margins": 0.017641538754105568, |
|
"rewards/rejected": -0.00969033595174551, |
|
"step": 500, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0663562281722933e-07, |
|
"logits/chosen": -2.8337247371673584, |
|
"logits/rejected": -2.8203465938568115, |
|
"logps/chosen": -285.0716857910156, |
|
"logps/rejected": -265.4593811035156, |
|
"loss": 0.6782, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.6356250047683716, |
|
"rewards/chosen": 0.015922056511044502, |
|
"rewards/margins": 0.035635706037282944, |
|
"rewards/rejected": -0.01971365138888359, |
|
"step": 600, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4842840512223514e-07, |
|
"logits/chosen": -2.8270840644836426, |
|
"logits/rejected": -2.8112306594848633, |
|
"logps/chosen": -281.3733215332031, |
|
"logps/rejected": -248.8465576171875, |
|
"loss": 0.6762, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.6324999928474426, |
|
"rewards/chosen": 0.018638433888554573, |
|
"rewards/margins": 0.03718380257487297, |
|
"rewards/rejected": -0.018545370548963547, |
|
"step": 700, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.022118742724097e-08, |
|
"logits/chosen": -2.8297488689422607, |
|
"logits/rejected": -2.83492374420166, |
|
"logps/chosen": -288.54058837890625, |
|
"logps/rejected": -261.6487731933594, |
|
"loss": 0.6736, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.6393749713897705, |
|
"rewards/chosen": 0.021646475419402122, |
|
"rewards/margins": 0.04531754553318024, |
|
"rewards/rejected": -0.023671068251132965, |
|
"step": 800, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.20139697322468e-08, |
|
"logits/chosen": -2.8206725120544434, |
|
"logits/rejected": -2.8274974822998047, |
|
"logps/chosen": -277.310791015625, |
|
"logps/rejected": -256.5542297363281, |
|
"loss": 0.6744, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.6443750262260437, |
|
"rewards/chosen": 0.019743308424949646, |
|
"rewards/margins": 0.041582074016332626, |
|
"rewards/rejected": -0.021838760003447533, |
|
"step": 900, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.8419151306152344, |
|
"eval_logits/rejected": -2.845423936843872, |
|
"eval_logps/chosen": -284.0372314453125, |
|
"eval_logps/rejected": -259.5419921875, |
|
"eval_loss": 0.6741092801094055, |
|
"eval_pred_label": 0.0, |
|
"eval_rewards/accuracies": 0.6679999828338623, |
|
"eval_rewards/chosen": 0.02201448194682598, |
|
"eval_rewards/margins": 0.04624143987894058, |
|
"eval_rewards/rejected": -0.02422695979475975, |
|
"eval_runtime": 469.0597, |
|
"eval_samples_per_second": 4.264, |
|
"eval_steps_per_second": 0.266, |
|
"eval_use_label": 0.0, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6817296707193264, |
|
"train_runtime": 25631.2708, |
|
"train_samples_per_second": 2.385, |
|
"train_steps_per_second": 0.037 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 955, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|