|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.971563981042654, |
|
"eval_steps": 100, |
|
"global_step": 104, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.018957345971563982, |
|
"grad_norm": 264.3919199015695, |
|
"learning_rate": 4.545454545454545e-08, |
|
"logits/chosen": 117.53560638427734, |
|
"logits/rejected": 126.8960952758789, |
|
"logps/chosen": -335.40118408203125, |
|
"logps/rejected": -439.16552734375, |
|
"loss": 1.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1895734597156398, |
|
"grad_norm": 268.0892149105686, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": 135.0012969970703, |
|
"logits/rejected": 138.34600830078125, |
|
"logps/chosen": -395.8360595703125, |
|
"logps/rejected": -439.23095703125, |
|
"loss": 0.9693, |
|
"rewards/accuracies": 0.4513888955116272, |
|
"rewards/chosen": 0.008023276925086975, |
|
"rewards/margins": 0.0031940473709255457, |
|
"rewards/rejected": 0.004829231183975935, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3791469194312796, |
|
"grad_norm": 204.23414477809496, |
|
"learning_rate": 4.885348141000122e-07, |
|
"logits/chosen": 121.4811019897461, |
|
"logits/rejected": 125.18589782714844, |
|
"logps/chosen": -370.91253662109375, |
|
"logps/rejected": -425.2193298339844, |
|
"loss": 0.8003, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.07497303187847137, |
|
"rewards/margins": 0.33636990189552307, |
|
"rewards/rejected": -0.2613968253135681, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5687203791469194, |
|
"grad_norm": 173.7304699078016, |
|
"learning_rate": 4.5025027361734613e-07, |
|
"logits/chosen": 141.61241149902344, |
|
"logits/rejected": 135.17759704589844, |
|
"logps/chosen": -426.39410400390625, |
|
"logps/rejected": -472.140625, |
|
"loss": 0.7258, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.6964428424835205, |
|
"rewards/margins": 0.9104781150817871, |
|
"rewards/rejected": -2.6069209575653076, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7582938388625592, |
|
"grad_norm": 189.807566233107, |
|
"learning_rate": 3.893311157806091e-07, |
|
"logits/chosen": 125.9510498046875, |
|
"logits/rejected": 114.42036437988281, |
|
"logps/chosen": -409.6640625, |
|
"logps/rejected": -436.3352966308594, |
|
"loss": 0.6779, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.7734901905059814, |
|
"rewards/margins": 1.1495102643966675, |
|
"rewards/rejected": -3.9230003356933594, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.9478672985781991, |
|
"grad_norm": 208.75706608415575, |
|
"learning_rate": 3.126631330646801e-07, |
|
"logits/chosen": 141.94976806640625, |
|
"logits/rejected": 145.94911193847656, |
|
"logps/chosen": -465.7303161621094, |
|
"logps/rejected": -547.1525268554688, |
|
"loss": 0.5763, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.3293235301971436, |
|
"rewards/margins": 1.2119842767715454, |
|
"rewards/rejected": -3.5413079261779785, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.1374407582938388, |
|
"grad_norm": 137.79002373576745, |
|
"learning_rate": 2.2891223348923882e-07, |
|
"logits/chosen": 134.63436889648438, |
|
"logits/rejected": 138.12661743164062, |
|
"logps/chosen": -450.57525634765625, |
|
"logps/rejected": -530.6395263671875, |
|
"loss": 0.3625, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -2.628477096557617, |
|
"rewards/margins": 2.0852205753326416, |
|
"rewards/rejected": -4.713697910308838, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.3270142180094786, |
|
"grad_norm": 111.85723665841337, |
|
"learning_rate": 1.4754491880085317e-07, |
|
"logits/chosen": 128.9261932373047, |
|
"logits/rejected": 130.80894470214844, |
|
"logps/chosen": -414.4986877441406, |
|
"logps/rejected": -507.4915466308594, |
|
"loss": 0.2163, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.367741823196411, |
|
"rewards/margins": 2.3311409950256348, |
|
"rewards/rejected": -4.698883056640625, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.5165876777251186, |
|
"grad_norm": 115.1507773079353, |
|
"learning_rate": 7.775827023107834e-08, |
|
"logits/chosen": 116.55960845947266, |
|
"logits/rejected": 133.2828369140625, |
|
"logps/chosen": -403.3376770019531, |
|
"logps/rejected": -510.32763671875, |
|
"loss": 0.1811, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -2.4843320846557617, |
|
"rewards/margins": 2.370164394378662, |
|
"rewards/rejected": -4.854496955871582, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.7061611374407581, |
|
"grad_norm": 106.89250321890712, |
|
"learning_rate": 2.7440387297912122e-08, |
|
"logits/chosen": 116.5929183959961, |
|
"logits/rejected": 129.53289794921875, |
|
"logps/chosen": -430.5577087402344, |
|
"logps/rejected": -534.07861328125, |
|
"loss": 0.1604, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.206333637237549, |
|
"rewards/margins": 2.613506555557251, |
|
"rewards/rejected": -4.819840431213379, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"grad_norm": 92.45827774262504, |
|
"learning_rate": 2.27878296044029e-09, |
|
"logits/chosen": 122.9133529663086, |
|
"logits/rejected": 122.8787612915039, |
|
"logps/chosen": -420.15228271484375, |
|
"logps/rejected": -503.9156188964844, |
|
"loss": 0.1516, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.9088605642318726, |
|
"rewards/margins": 2.448718547821045, |
|
"rewards/rejected": -4.357579231262207, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"eval_logits/chosen": 101.81689453125, |
|
"eval_logits/rejected": 95.97393798828125, |
|
"eval_logps/chosen": -416.4397888183594, |
|
"eval_logps/rejected": -440.0738830566406, |
|
"eval_loss": 0.5250489711761475, |
|
"eval_rewards/accuracies": 0.71875, |
|
"eval_rewards/chosen": -2.638577699661255, |
|
"eval_rewards/margins": 1.2615679502487183, |
|
"eval_rewards/rejected": -3.9001457691192627, |
|
"eval_runtime": 123.0399, |
|
"eval_samples_per_second": 6.096, |
|
"eval_steps_per_second": 0.195, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.971563981042654, |
|
"step": 104, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4740314678503917, |
|
"train_runtime": 2291.7376, |
|
"train_samples_per_second": 5.891, |
|
"train_steps_per_second": 0.045 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 104, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|