lewtun's picture
lewtun HF staff
Model save
ce6fffb verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.957345971563981,
"eval_steps": 100,
"global_step": 156,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"grad_norm": 1063.9925388773709,
"learning_rate": 3.125e-08,
"logits/chosen": 123.11854553222656,
"logits/rejected": 97.00198364257812,
"logps/chosen": -425.18585205078125,
"logps/rejected": -424.1869201660156,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.19,
"grad_norm": 1254.7036040526557,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": 117.43434143066406,
"logits/rejected": 136.35675048828125,
"logps/chosen": -442.97802734375,
"logps/rejected": -524.6129760742188,
"loss": 1.564,
"rewards/accuracies": 0.4583333432674408,
"rewards/chosen": 0.16415566205978394,
"rewards/margins": 0.36335471272468567,
"rewards/rejected": -0.19919908046722412,
"step": 10
},
{
"epoch": 0.38,
"grad_norm": 909.9038063820053,
"learning_rate": 4.989935734988097e-07,
"logits/chosen": 126.66890716552734,
"logits/rejected": 134.35414123535156,
"logps/chosen": -426.7857360839844,
"logps/rejected": -491.2925720214844,
"loss": 1.3477,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.2084747850894928,
"rewards/margins": 1.3043320178985596,
"rewards/rejected": -1.51280677318573,
"step": 20
},
{
"epoch": 0.57,
"grad_norm": 821.3627225843074,
"learning_rate": 4.877641290737883e-07,
"logits/chosen": 125.84306335449219,
"logits/rejected": 129.29446411132812,
"logps/chosen": -467.2300720214844,
"logps/rejected": -528.94189453125,
"loss": 1.4491,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -4.843996524810791,
"rewards/margins": 3.846839189529419,
"rewards/rejected": -8.690834999084473,
"step": 30
},
{
"epoch": 0.76,
"grad_norm": 865.3817040985649,
"learning_rate": 4.646121984004665e-07,
"logits/chosen": 127.130859375,
"logits/rejected": 122.1098861694336,
"logps/chosen": -485.9337463378906,
"logps/rejected": -506.68548583984375,
"loss": 1.4982,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -5.1009392738342285,
"rewards/margins": 3.6673903465270996,
"rewards/rejected": -8.768329620361328,
"step": 40
},
{
"epoch": 0.95,
"grad_norm": 907.7995009069369,
"learning_rate": 4.3069871595684787e-07,
"logits/chosen": 133.64224243164062,
"logits/rejected": 133.93919372558594,
"logps/chosen": -493.3519592285156,
"logps/rejected": -511.6605529785156,
"loss": 1.3238,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -4.211705684661865,
"rewards/margins": 3.953176975250244,
"rewards/rejected": -8.164883613586426,
"step": 50
},
{
"epoch": 1.14,
"grad_norm": 320.5550768420106,
"learning_rate": 3.877242453630256e-07,
"logits/chosen": 133.02821350097656,
"logits/rejected": 136.251953125,
"logps/chosen": -460.30291748046875,
"logps/rejected": -494.0633239746094,
"loss": 0.5962,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": -1.4149787425994873,
"rewards/margins": 10.490567207336426,
"rewards/rejected": -11.905545234680176,
"step": 60
},
{
"epoch": 1.33,
"grad_norm": 427.3414833642942,
"learning_rate": 3.378437060203357e-07,
"logits/chosen": 129.8929901123047,
"logits/rejected": 130.46600341796875,
"logps/chosen": -432.332275390625,
"logps/rejected": -534.7671508789062,
"loss": 0.1911,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -1.3673985004425049,
"rewards/margins": 11.68455696105957,
"rewards/rejected": -13.051956176757812,
"step": 70
},
{
"epoch": 1.52,
"grad_norm": 381.35819492011535,
"learning_rate": 2.8355831645441387e-07,
"logits/chosen": 135.67372131347656,
"logits/rejected": 136.30862426757812,
"logps/chosen": -487.7591247558594,
"logps/rejected": -561.80712890625,
"loss": 0.2207,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 0.5364077091217041,
"rewards/margins": 14.564852714538574,
"rewards/rejected": -14.02844524383545,
"step": 80
},
{
"epoch": 1.71,
"grad_norm": 263.6170847100913,
"learning_rate": 2.2759017277414164e-07,
"logits/chosen": 122.95021057128906,
"logits/rejected": 125.04380798339844,
"logps/chosen": -465.0882873535156,
"logps/rejected": -521.892578125,
"loss": 0.2059,
"rewards/accuracies": 0.9375,
"rewards/chosen": -1.1682957410812378,
"rewards/margins": 13.839956283569336,
"rewards/rejected": -15.00825309753418,
"step": 90
},
{
"epoch": 1.9,
"grad_norm": 498.9305336886761,
"learning_rate": 1.7274575140626315e-07,
"logits/chosen": 137.44198608398438,
"logits/rejected": 127.8071060180664,
"logps/chosen": -482.68829345703125,
"logps/rejected": -564.5560913085938,
"loss": 0.2569,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -1.7192827463150024,
"rewards/margins": 14.991134643554688,
"rewards/rejected": -16.710416793823242,
"step": 100
},
{
"epoch": 1.9,
"eval_logits/chosen": 105.28992462158203,
"eval_logits/rejected": 99.2330093383789,
"eval_logps/chosen": -470.2108459472656,
"eval_logps/rejected": -482.4095153808594,
"eval_loss": 1.0775203704833984,
"eval_rewards/accuracies": 0.7395833134651184,
"eval_rewards/chosen": -6.722555160522461,
"eval_rewards/margins": 5.621420383453369,
"eval_rewards/rejected": -12.343975067138672,
"eval_runtime": 52.5735,
"eval_samples_per_second": 14.266,
"eval_steps_per_second": 0.457,
"step": 100
},
{
"epoch": 2.09,
"grad_norm": 312.21164489149646,
"learning_rate": 1.2177518064852348e-07,
"logits/chosen": 116.9559555053711,
"logits/rejected": 130.40074157714844,
"logps/chosen": -501.41314697265625,
"logps/rejected": -597.8336181640625,
"loss": 0.1917,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -2.5299580097198486,
"rewards/margins": 14.8624906539917,
"rewards/rejected": -17.3924503326416,
"step": 110
},
{
"epoch": 2.27,
"grad_norm": 88.0209774984605,
"learning_rate": 7.723433775328384e-08,
"logits/chosen": 128.97409057617188,
"logits/rejected": 129.96273803710938,
"logps/chosen": -482.223876953125,
"logps/rejected": -545.4796752929688,
"loss": 0.054,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -1.9787194728851318,
"rewards/margins": 14.918545722961426,
"rewards/rejected": -16.897266387939453,
"step": 120
},
{
"epoch": 2.46,
"grad_norm": 195.2044766984358,
"learning_rate": 4.1356686569674335e-08,
"logits/chosen": 134.0525665283203,
"logits/rejected": 139.18789672851562,
"logps/chosen": -496.6250915527344,
"logps/rejected": -565.2105712890625,
"loss": 0.0788,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -1.1465753316879272,
"rewards/margins": 14.6506986618042,
"rewards/rejected": -15.797274589538574,
"step": 130
},
{
"epoch": 2.65,
"grad_norm": 133.44761939021552,
"learning_rate": 1.5941282340065697e-08,
"logits/chosen": 119.77888488769531,
"logits/rejected": 119.9384536743164,
"logps/chosen": -450.76904296875,
"logps/rejected": -552.9923095703125,
"loss": 0.0613,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -2.1010565757751465,
"rewards/margins": 16.25819206237793,
"rewards/rejected": -18.359249114990234,
"step": 140
},
{
"epoch": 2.84,
"grad_norm": 48.801935911090936,
"learning_rate": 2.2625595580163247e-09,
"logits/chosen": 127.36897277832031,
"logits/rejected": 140.77224731445312,
"logps/chosen": -477.6751403808594,
"logps/rejected": -549.9277954101562,
"loss": 0.0601,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -1.1382229328155518,
"rewards/margins": 15.574376106262207,
"rewards/rejected": -16.712596893310547,
"step": 150
},
{
"epoch": 2.96,
"step": 156,
"total_flos": 0.0,
"train_loss": 0.5786063394103295,
"train_runtime": 1791.9811,
"train_samples_per_second": 11.3,
"train_steps_per_second": 0.087
}
],
"logging_steps": 10,
"max_steps": 156,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}