|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.957345971563981, |
|
"eval_steps": 100, |
|
"global_step": 156, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 132.97353908293687, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": 123.11854553222656, |
|
"logits/rejected": 97.00198364257812, |
|
"logps/chosen": -425.18585205078125, |
|
"logps/rejected": -424.1869201660156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 206.0883100010928, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": 117.39097595214844, |
|
"logits/rejected": 136.3163299560547, |
|
"logps/chosen": -442.6399230957031, |
|
"logps/rejected": -524.91015625, |
|
"loss": 0.7186, |
|
"rewards/accuracies": 0.4930555522441864, |
|
"rewards/chosen": 0.037425246089696884, |
|
"rewards/margins": 0.07718456536531448, |
|
"rewards/rejected": -0.0397593155503273, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 114.8435303205146, |
|
"learning_rate": 4.989935734988097e-07, |
|
"logits/chosen": 125.3319091796875, |
|
"logits/rejected": 132.9754638671875, |
|
"logps/chosen": -422.8042907714844, |
|
"logps/rejected": -491.63226318359375, |
|
"loss": 0.6164, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.17301546037197113, |
|
"rewards/margins": 0.379099041223526, |
|
"rewards/rejected": -0.20608356595039368, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 99.27143207986335, |
|
"learning_rate": 4.877641290737883e-07, |
|
"logits/chosen": 122.47686767578125, |
|
"logits/rejected": 125.91865539550781, |
|
"logps/chosen": -466.9618225097656, |
|
"logps/rejected": -540.3817138671875, |
|
"loss": 0.5813, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.5920838117599487, |
|
"rewards/margins": 1.0662639141082764, |
|
"rewards/rejected": -1.658347725868225, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 109.14521515462766, |
|
"learning_rate": 4.646121984004665e-07, |
|
"logits/chosen": 124.97059631347656, |
|
"logits/rejected": 119.9173583984375, |
|
"logps/chosen": -497.7147521972656, |
|
"logps/rejected": -527.3887939453125, |
|
"loss": 0.5426, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.2266700267791748, |
|
"rewards/margins": 0.9045358896255493, |
|
"rewards/rejected": -2.1312055587768555, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 115.6113002085735, |
|
"learning_rate": 4.3069871595684787e-07, |
|
"logits/chosen": 132.8910369873047, |
|
"logits/rejected": 133.22190856933594, |
|
"logps/chosen": -520.63037109375, |
|
"logps/rejected": -549.1149291992188, |
|
"loss": 0.5202, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.8903814554214478, |
|
"rewards/margins": 1.0029468536376953, |
|
"rewards/rejected": -2.8933284282684326, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 46.9650475313439, |
|
"learning_rate": 3.877242453630256e-07, |
|
"logits/chosen": 131.47854614257812, |
|
"logits/rejected": 134.71681213378906, |
|
"logps/chosen": -481.8072814941406, |
|
"logps/rejected": -534.0516357421875, |
|
"loss": 0.2837, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -1.2520891427993774, |
|
"rewards/margins": 2.2355263233184814, |
|
"rewards/rejected": -3.4876155853271484, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 47.26485069523079, |
|
"learning_rate": 3.378437060203357e-07, |
|
"logits/chosen": 126.1490707397461, |
|
"logits/rejected": 126.75111389160156, |
|
"logps/chosen": -452.6795349121094, |
|
"logps/rejected": -579.5133056640625, |
|
"loss": 0.1756, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -1.188291072845459, |
|
"rewards/margins": 2.6805100440979004, |
|
"rewards/rejected": -3.868800640106201, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 46.43874254029814, |
|
"learning_rate": 2.8355831645441387e-07, |
|
"logits/chosen": 127.46858978271484, |
|
"logits/rejected": 128.4056396484375, |
|
"logps/chosen": -514.4637451171875, |
|
"logps/rejected": -621.2301635742188, |
|
"loss": 0.1711, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.268179178237915, |
|
"rewards/margins": 3.4565296173095703, |
|
"rewards/rejected": -4.724708557128906, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 49.11808633093636, |
|
"learning_rate": 2.2759017277414164e-07, |
|
"logits/chosen": 112.5447998046875, |
|
"logits/rejected": 114.98893737792969, |
|
"logps/chosen": -497.70001220703125, |
|
"logps/rejected": -589.730224609375, |
|
"loss": 0.1524, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.7766218185424805, |
|
"rewards/margins": 3.491291046142578, |
|
"rewards/rejected": -5.2679123878479, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 47.47224806448749, |
|
"learning_rate": 1.7274575140626315e-07, |
|
"logits/chosen": 124.581787109375, |
|
"logits/rejected": 115.68563079833984, |
|
"logps/chosen": -516.1900634765625, |
|
"logps/rejected": -632.6817626953125, |
|
"loss": 0.1623, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.8899990320205688, |
|
"rewards/margins": 3.6050896644592285, |
|
"rewards/rejected": -5.495089054107666, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_logits/chosen": 93.8502197265625, |
|
"eval_logits/rejected": 87.7247543334961, |
|
"eval_logps/chosen": -512.8825073242188, |
|
"eval_logps/rejected": -541.5043334960938, |
|
"eval_loss": 0.48611319065093994, |
|
"eval_rewards/accuracies": 0.6770833134651184, |
|
"eval_rewards/chosen": -2.9739017486572266, |
|
"eval_rewards/margins": 1.5238369703292847, |
|
"eval_rewards/rejected": -4.497739315032959, |
|
"eval_runtime": 53.4905, |
|
"eval_samples_per_second": 14.021, |
|
"eval_steps_per_second": 0.449, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 25.494387206609645, |
|
"learning_rate": 1.2177518064852348e-07, |
|
"logits/chosen": 102.986083984375, |
|
"logits/rejected": 116.60546875, |
|
"logps/chosen": -538.074951171875, |
|
"logps/rejected": -667.3218383789062, |
|
"loss": 0.1318, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -2.1493353843688965, |
|
"rewards/margins": 3.4991326332092285, |
|
"rewards/rejected": -5.648468017578125, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 24.60483354043265, |
|
"learning_rate": 7.723433775328384e-08, |
|
"logits/chosen": 113.220703125, |
|
"logits/rejected": 114.29705810546875, |
|
"logps/chosen": -522.1823120117188, |
|
"logps/rejected": -628.0721435546875, |
|
"loss": 0.0837, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.2452592849731445, |
|
"rewards/margins": 3.996518611907959, |
|
"rewards/rejected": -6.2417778968811035, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 32.75955455536007, |
|
"learning_rate": 4.1356686569674335e-08, |
|
"logits/chosen": 115.95035552978516, |
|
"logits/rejected": 120.65645599365234, |
|
"logps/chosen": -537.8087158203125, |
|
"logps/rejected": -653.7862548828125, |
|
"loss": 0.0781, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.2025058269500732, |
|
"rewards/margins": 4.200939178466797, |
|
"rewards/rejected": -6.403443813323975, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 23.375967561613557, |
|
"learning_rate": 1.5941282340065697e-08, |
|
"logits/chosen": 101.51383972167969, |
|
"logits/rejected": 102.2659683227539, |
|
"logps/chosen": -499.16229248046875, |
|
"logps/rejected": -645.9388427734375, |
|
"loss": 0.0791, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -2.6822938919067383, |
|
"rewards/margins": 4.259942054748535, |
|
"rewards/rejected": -6.942234992980957, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 27.725216044545164, |
|
"learning_rate": 2.2625595580163247e-09, |
|
"logits/chosen": 108.08512878417969, |
|
"logits/rejected": 121.6434097290039, |
|
"logps/chosen": -524.5687866210938, |
|
"logps/rejected": -647.0615844726562, |
|
"loss": 0.079, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.486959457397461, |
|
"rewards/margins": 4.458805084228516, |
|
"rewards/rejected": -6.945765018463135, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"step": 156, |
|
"total_flos": 0.0, |
|
"train_loss": 0.28389122929328525, |
|
"train_runtime": 1811.0132, |
|
"train_samples_per_second": 11.182, |
|
"train_steps_per_second": 0.086 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 156, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|