|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 3179, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00031456432840515884, |
|
"grad_norm": 0.04916731268167496, |
|
"learning_rate": 1.5723270440251573e-08, |
|
"logits/chosen": -1.942791223526001, |
|
"logits/rejected": -1.9583369493484497, |
|
"logps/chosen": -37.350425720214844, |
|
"logps/rejected": -35.84906005859375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0031456432840515887, |
|
"grad_norm": 0.050606776028871536, |
|
"learning_rate": 1.5723270440251575e-07, |
|
"logits/chosen": -1.8411260843276978, |
|
"logits/rejected": -1.9121019840240479, |
|
"logps/chosen": -33.17695617675781, |
|
"logps/rejected": -35.013282775878906, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4652777910232544, |
|
"rewards/chosen": -0.000264569855062291, |
|
"rewards/margins": 5.910781055717962e-06, |
|
"rewards/rejected": -0.00027048063930124044, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0062912865681031774, |
|
"grad_norm": 0.045017264783382416, |
|
"learning_rate": 3.144654088050315e-07, |
|
"logits/chosen": -1.8443002700805664, |
|
"logits/rejected": -1.8706448078155518, |
|
"logps/chosen": -32.37808609008789, |
|
"logps/rejected": -35.20783233642578, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.00013874513388145715, |
|
"rewards/margins": 0.0003704810806084424, |
|
"rewards/rejected": -0.00023173594672698528, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.009436929852154765, |
|
"grad_norm": 0.0454835444688797, |
|
"learning_rate": 4.716981132075472e-07, |
|
"logits/chosen": -1.8223037719726562, |
|
"logits/rejected": -1.8500335216522217, |
|
"logps/chosen": -32.97340774536133, |
|
"logps/rejected": -34.259952545166016, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 1.1447194083302747e-05, |
|
"rewards/margins": 0.00015726670972071588, |
|
"rewards/rejected": -0.00014581947471015155, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.012582573136206355, |
|
"grad_norm": 0.04719178006052971, |
|
"learning_rate": 6.28930817610063e-07, |
|
"logits/chosen": -1.8486772775650024, |
|
"logits/rejected": -1.8450359106063843, |
|
"logps/chosen": -33.700775146484375, |
|
"logps/rejected": -37.18444061279297, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.00020007109560538083, |
|
"rewards/margins": 0.00011606378393480554, |
|
"rewards/rejected": -0.0003161348286084831, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.015728216420257943, |
|
"grad_norm": 0.0442265048623085, |
|
"learning_rate": 7.861635220125787e-07, |
|
"logits/chosen": -1.8833131790161133, |
|
"logits/rejected": -1.913037657737732, |
|
"logps/chosen": -33.82078170776367, |
|
"logps/rejected": -34.307395935058594, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.00041835257434286177, |
|
"rewards/margins": 0.00020049764134455472, |
|
"rewards/rejected": -0.0006188501720316708, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01887385970430953, |
|
"grad_norm": 0.05058182030916214, |
|
"learning_rate": 9.433962264150944e-07, |
|
"logits/chosen": -1.7616949081420898, |
|
"logits/rejected": -1.8223813772201538, |
|
"logps/chosen": -33.48662567138672, |
|
"logps/rejected": -36.167518615722656, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 6.629432027693838e-05, |
|
"rewards/margins": 0.001108249882236123, |
|
"rewards/rejected": -0.0010419555474072695, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02201950298836112, |
|
"grad_norm": 0.04823003336787224, |
|
"learning_rate": 1.1006289308176102e-06, |
|
"logits/chosen": -1.782994270324707, |
|
"logits/rejected": -1.8296699523925781, |
|
"logps/chosen": -33.87117004394531, |
|
"logps/rejected": -36.236976623535156, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.00032349638058803976, |
|
"rewards/margins": 0.0015318433288484812, |
|
"rewards/rejected": -0.0018553396221250296, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02516514627241271, |
|
"grad_norm": 0.05422540009021759, |
|
"learning_rate": 1.257861635220126e-06, |
|
"logits/chosen": -1.7576888799667358, |
|
"logits/rejected": -1.7962068319320679, |
|
"logps/chosen": -32.26203155517578, |
|
"logps/rejected": -34.199798583984375, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -6.839539855718613e-05, |
|
"rewards/margins": 0.0013875927543267608, |
|
"rewards/rejected": -0.0014559882692992687, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.028310789556464298, |
|
"grad_norm": 0.055276062339544296, |
|
"learning_rate": 1.4150943396226415e-06, |
|
"logits/chosen": -1.8102718591690063, |
|
"logits/rejected": -1.844451904296875, |
|
"logps/chosen": -31.800710678100586, |
|
"logps/rejected": -34.06111526489258, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0007771268719807267, |
|
"rewards/margins": 0.0020528400782495737, |
|
"rewards/rejected": -0.0028299668338149786, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.031456432840515886, |
|
"grad_norm": 0.04816555231809616, |
|
"learning_rate": 1.5723270440251573e-06, |
|
"logits/chosen": -1.801669716835022, |
|
"logits/rejected": -1.8445507287979126, |
|
"logps/chosen": -34.190731048583984, |
|
"logps/rejected": -35.301910400390625, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.0014222835889086127, |
|
"rewards/margins": 0.004616752732545137, |
|
"rewards/rejected": -0.003194468794390559, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.031456432840515886, |
|
"eval_logits/chosen": -1.6348028182983398, |
|
"eval_logits/rejected": -1.6823533773422241, |
|
"eval_logps/chosen": -32.72854995727539, |
|
"eval_logps/rejected": -36.27182388305664, |
|
"eval_loss": 0.6911075115203857, |
|
"eval_rewards/accuracies": 0.6220149397850037, |
|
"eval_rewards/chosen": 0.0006819414556957781, |
|
"eval_rewards/margins": 0.00424056826159358, |
|
"eval_rewards/rejected": -0.0035586270969361067, |
|
"eval_runtime": 220.3412, |
|
"eval_samples_per_second": 97.199, |
|
"eval_steps_per_second": 1.52, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03460207612456748, |
|
"grad_norm": 0.0516941212117672, |
|
"learning_rate": 1.7295597484276729e-06, |
|
"logits/chosen": -1.7992357015609741, |
|
"logits/rejected": -1.819049596786499, |
|
"logps/chosen": -33.700836181640625, |
|
"logps/rejected": -37.1832275390625, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.0007088495185598731, |
|
"rewards/margins": 0.0035199751146137714, |
|
"rewards/rejected": -0.004228824749588966, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03774771940861906, |
|
"grad_norm": 0.05639781430363655, |
|
"learning_rate": 1.8867924528301889e-06, |
|
"logits/chosen": -1.7982286214828491, |
|
"logits/rejected": -1.8285636901855469, |
|
"logps/chosen": -32.256858825683594, |
|
"logps/rejected": -34.39281463623047, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.0023287234362214804, |
|
"rewards/margins": 0.009236618876457214, |
|
"rewards/rejected": -0.00690789520740509, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04089336269267065, |
|
"grad_norm": 0.052410390228033066, |
|
"learning_rate": 2.044025157232705e-06, |
|
"logits/chosen": -1.8025617599487305, |
|
"logits/rejected": -1.8154237270355225, |
|
"logps/chosen": -32.6540641784668, |
|
"logps/rejected": -35.91321563720703, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.0039227548986673355, |
|
"rewards/margins": 0.011436911299824715, |
|
"rewards/rejected": -0.007514156401157379, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04403900597672224, |
|
"grad_norm": 0.05888905003666878, |
|
"learning_rate": 2.2012578616352204e-06, |
|
"logits/chosen": -1.7658809423446655, |
|
"logits/rejected": -1.8309358358383179, |
|
"logps/chosen": -32.095314025878906, |
|
"logps/rejected": -38.73040771484375, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.004927013069391251, |
|
"rewards/margins": 0.016417894512414932, |
|
"rewards/rejected": -0.011490881443023682, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04718464926077383, |
|
"grad_norm": 0.06400807201862335, |
|
"learning_rate": 2.358490566037736e-06, |
|
"logits/chosen": -1.7594534158706665, |
|
"logits/rejected": -1.798018455505371, |
|
"logps/chosen": -34.921791076660156, |
|
"logps/rejected": -36.6182861328125, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.0009333366760984063, |
|
"rewards/margins": 0.010404362343251705, |
|
"rewards/rejected": -0.011337699368596077, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05033029254482542, |
|
"grad_norm": 0.06766606122255325, |
|
"learning_rate": 2.515723270440252e-06, |
|
"logits/chosen": -1.730738878250122, |
|
"logits/rejected": -1.795153260231018, |
|
"logps/chosen": -30.81954002380371, |
|
"logps/rejected": -36.08769607543945, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.009594108909368515, |
|
"rewards/margins": 0.0193181075155735, |
|
"rewards/rejected": -0.009723997674882412, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.053475935828877004, |
|
"grad_norm": 0.07456081360578537, |
|
"learning_rate": 2.6729559748427675e-06, |
|
"logits/chosen": -1.7231314182281494, |
|
"logits/rejected": -1.7290071249008179, |
|
"logps/chosen": -31.769222259521484, |
|
"logps/rejected": -36.441688537597656, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.014062667265534401, |
|
"rewards/margins": 0.020307175815105438, |
|
"rewards/rejected": -0.006244509480893612, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.056621579112928595, |
|
"grad_norm": 0.06826143711805344, |
|
"learning_rate": 2.830188679245283e-06, |
|
"logits/chosen": -1.739983320236206, |
|
"logits/rejected": -1.7577593326568604, |
|
"logps/chosen": -31.265216827392578, |
|
"logps/rejected": -37.26996994018555, |
|
"loss": 0.6828, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.016319114714860916, |
|
"rewards/margins": 0.030752727761864662, |
|
"rewards/rejected": -0.014433610253036022, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05976722239698018, |
|
"grad_norm": 0.0847531110048294, |
|
"learning_rate": 2.987421383647799e-06, |
|
"logits/chosen": -1.7099155187606812, |
|
"logits/rejected": -1.7371807098388672, |
|
"logps/chosen": -30.013824462890625, |
|
"logps/rejected": -37.95855712890625, |
|
"loss": 0.6785, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.009831647388637066, |
|
"rewards/margins": 0.03985407203435898, |
|
"rewards/rejected": -0.030022427439689636, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.06291286568103177, |
|
"grad_norm": 0.0835421085357666, |
|
"learning_rate": 3.1446540880503146e-06, |
|
"logits/chosen": -1.6245222091674805, |
|
"logits/rejected": -1.711726427078247, |
|
"logps/chosen": -27.14463233947754, |
|
"logps/rejected": -36.3680305480957, |
|
"loss": 0.6742, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.023222357034683228, |
|
"rewards/margins": 0.03531500697135925, |
|
"rewards/rejected": -0.01209265273064375, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06291286568103177, |
|
"eval_logits/chosen": -1.4586008787155151, |
|
"eval_logits/rejected": -1.5097368955612183, |
|
"eval_logps/chosen": -32.76309585571289, |
|
"eval_logps/rejected": -40.45964050292969, |
|
"eval_loss": 0.6750917434692383, |
|
"eval_rewards/accuracies": 0.6276119351387024, |
|
"eval_rewards/chosen": 0.0003364614094607532, |
|
"eval_rewards/margins": 0.0457732118666172, |
|
"eval_rewards/rejected": -0.04543674364686012, |
|
"eval_runtime": 216.3229, |
|
"eval_samples_per_second": 99.005, |
|
"eval_steps_per_second": 1.549, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06605850896508336, |
|
"grad_norm": 0.09109613299369812, |
|
"learning_rate": 3.30188679245283e-06, |
|
"logits/chosen": -1.6076081991195679, |
|
"logits/rejected": -1.6447021961212158, |
|
"logps/chosen": -36.27522277832031, |
|
"logps/rejected": -38.27228546142578, |
|
"loss": 0.678, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.015005774796009064, |
|
"rewards/margins": 0.01694519817829132, |
|
"rewards/rejected": -0.031950972974300385, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06920415224913495, |
|
"grad_norm": 0.1037890687584877, |
|
"learning_rate": 3.4591194968553458e-06, |
|
"logits/chosen": -1.5653865337371826, |
|
"logits/rejected": -1.602521300315857, |
|
"logps/chosen": -35.69440460205078, |
|
"logps/rejected": -40.78470230102539, |
|
"loss": 0.6718, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.025828268378973007, |
|
"rewards/margins": 0.031778041273355484, |
|
"rewards/rejected": -0.057606302201747894, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.07234979553318653, |
|
"grad_norm": 0.11334498226642609, |
|
"learning_rate": 3.6163522012578618e-06, |
|
"logits/chosen": -1.6311982870101929, |
|
"logits/rejected": -1.6453990936279297, |
|
"logps/chosen": -37.407859802246094, |
|
"logps/rejected": -43.464874267578125, |
|
"loss": 0.6706, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.04616266116499901, |
|
"rewards/margins": 0.025804489850997925, |
|
"rewards/rejected": -0.07196714729070663, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.07549543881723812, |
|
"grad_norm": 0.157041534781456, |
|
"learning_rate": 3.7735849056603777e-06, |
|
"logits/chosen": -1.5806419849395752, |
|
"logits/rejected": -1.6178245544433594, |
|
"logps/chosen": -37.05290985107422, |
|
"logps/rejected": -45.771514892578125, |
|
"loss": 0.6665, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.033209070563316345, |
|
"rewards/margins": 0.0657048299908638, |
|
"rewards/rejected": -0.09891389310359955, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07864108210128971, |
|
"grad_norm": 0.16409841179847717, |
|
"learning_rate": 3.930817610062894e-06, |
|
"logits/chosen": -1.6633002758026123, |
|
"logits/rejected": -1.6584806442260742, |
|
"logps/chosen": -43.37720489501953, |
|
"logps/rejected": -45.945579528808594, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1028851717710495, |
|
"rewards/margins": 0.014506662264466286, |
|
"rewards/rejected": -0.11739183962345123, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0817867253853413, |
|
"grad_norm": 0.2012060135602951, |
|
"learning_rate": 4.08805031446541e-06, |
|
"logits/chosen": -1.6090081930160522, |
|
"logits/rejected": -1.6804996728897095, |
|
"logps/chosen": -44.958030700683594, |
|
"logps/rejected": -51.669647216796875, |
|
"loss": 0.6527, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.11712668091058731, |
|
"rewards/margins": 0.04562956839799881, |
|
"rewards/rejected": -0.16275624930858612, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0849323686693929, |
|
"grad_norm": 0.20246672630310059, |
|
"learning_rate": 4.245283018867925e-06, |
|
"logits/chosen": -1.430743932723999, |
|
"logits/rejected": -1.520498514175415, |
|
"logps/chosen": -43.613075256347656, |
|
"logps/rejected": -59.78533172607422, |
|
"loss": 0.6418, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.10202650725841522, |
|
"rewards/margins": 0.14727506041526794, |
|
"rewards/rejected": -0.24930159747600555, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.08807801195344447, |
|
"grad_norm": 0.30764126777648926, |
|
"learning_rate": 4.402515723270441e-06, |
|
"logits/chosen": -1.369045615196228, |
|
"logits/rejected": -1.4158138036727905, |
|
"logps/chosen": -56.09346389770508, |
|
"logps/rejected": -61.998687744140625, |
|
"loss": 0.6408, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.2292211800813675, |
|
"rewards/margins": 0.048012204468250275, |
|
"rewards/rejected": -0.2772333323955536, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.09122365523749607, |
|
"grad_norm": 0.529365062713623, |
|
"learning_rate": 4.559748427672957e-06, |
|
"logits/chosen": -1.0204923152923584, |
|
"logits/rejected": -1.1075626611709595, |
|
"logps/chosen": -75.31793212890625, |
|
"logps/rejected": -101.33333587646484, |
|
"loss": 0.6085, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4275882840156555, |
|
"rewards/margins": 0.2434498369693756, |
|
"rewards/rejected": -0.6710380911827087, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.09436929852154766, |
|
"grad_norm": 0.5780752301216125, |
|
"learning_rate": 4.716981132075472e-06, |
|
"logits/chosen": -0.8171870112419128, |
|
"logits/rejected": -0.8328253626823425, |
|
"logps/chosen": -90.27127838134766, |
|
"logps/rejected": -113.9675521850586, |
|
"loss": 0.6081, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.5765252709388733, |
|
"rewards/margins": 0.2026272714138031, |
|
"rewards/rejected": -0.779152512550354, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09436929852154766, |
|
"eval_logits/chosen": -0.38301563262939453, |
|
"eval_logits/rejected": -0.47008848190307617, |
|
"eval_logps/chosen": -84.73033142089844, |
|
"eval_logps/rejected": -122.3552474975586, |
|
"eval_loss": 0.5872128009796143, |
|
"eval_rewards/accuracies": 0.6619402766227722, |
|
"eval_rewards/chosen": -0.5193358659744263, |
|
"eval_rewards/margins": 0.34505695104599, |
|
"eval_rewards/rejected": -0.8643926978111267, |
|
"eval_runtime": 216.4352, |
|
"eval_samples_per_second": 98.953, |
|
"eval_steps_per_second": 1.548, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09751494180559925, |
|
"grad_norm": 0.7804479002952576, |
|
"learning_rate": 4.874213836477988e-06, |
|
"logits/chosen": -0.6313827037811279, |
|
"logits/rejected": -0.6968099474906921, |
|
"logps/chosen": -96.16786193847656, |
|
"logps/rejected": -143.1085968017578, |
|
"loss": 0.5923, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6421507596969604, |
|
"rewards/margins": 0.423184335231781, |
|
"rewards/rejected": -1.0653350353240967, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.10066058508965084, |
|
"grad_norm": 0.9274519085884094, |
|
"learning_rate": 4.999993971158594e-06, |
|
"logits/chosen": -0.6999994516372681, |
|
"logits/rejected": -0.7624176144599915, |
|
"logps/chosen": -130.17660522460938, |
|
"logps/rejected": -174.801513671875, |
|
"loss": 0.5817, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.9600201845169067, |
|
"rewards/margins": 0.4198983609676361, |
|
"rewards/rejected": -1.3799186944961548, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.10380622837370242, |
|
"grad_norm": 0.7980369925498962, |
|
"learning_rate": 4.9997829647624885e-06, |
|
"logits/chosen": -0.5264394879341125, |
|
"logits/rejected": -0.5980736017227173, |
|
"logps/chosen": -137.11154174804688, |
|
"logps/rejected": -191.84661865234375, |
|
"loss": 0.5692, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.0387710332870483, |
|
"rewards/margins": 0.5226823091506958, |
|
"rewards/rejected": -1.5614532232284546, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.10695187165775401, |
|
"grad_norm": 0.898904025554657, |
|
"learning_rate": 4.999270545372964e-06, |
|
"logits/chosen": -0.8326930999755859, |
|
"logits/rejected": -0.9346593618392944, |
|
"logps/chosen": -126.531005859375, |
|
"logps/rejected": -181.36660766601562, |
|
"loss": 0.5452, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.9337248802185059, |
|
"rewards/margins": 0.5069459676742554, |
|
"rewards/rejected": -1.4406708478927612, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.1100975149418056, |
|
"grad_norm": 1.0713428258895874, |
|
"learning_rate": 4.998456774775329e-06, |
|
"logits/chosen": -0.9333699941635132, |
|
"logits/rejected": -0.9694175720214844, |
|
"logps/chosen": -173.23915100097656, |
|
"logps/rejected": -227.34555053710938, |
|
"loss": 0.5273, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.3962385654449463, |
|
"rewards/margins": 0.5004255175590515, |
|
"rewards/rejected": -1.896664023399353, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.11324315822585719, |
|
"grad_norm": 0.9318549036979675, |
|
"learning_rate": 4.997341751090515e-06, |
|
"logits/chosen": -0.9799184799194336, |
|
"logits/rejected": -1.085832118988037, |
|
"logps/chosen": -178.21102905273438, |
|
"logps/rejected": -254.60488891601562, |
|
"loss": 0.4851, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.4430134296417236, |
|
"rewards/margins": 0.741083562374115, |
|
"rewards/rejected": -2.1840968132019043, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.11638880150990878, |
|
"grad_norm": 1.1828168630599976, |
|
"learning_rate": 4.995925608763244e-06, |
|
"logits/chosen": -1.0449360609054565, |
|
"logits/rejected": -1.1332659721374512, |
|
"logps/chosen": -176.7970428466797, |
|
"logps/rejected": -264.9158020019531, |
|
"loss": 0.5227, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.4418004751205444, |
|
"rewards/margins": 0.8605502843856812, |
|
"rewards/rejected": -2.3023507595062256, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.11953444479396036, |
|
"grad_norm": 1.250595211982727, |
|
"learning_rate": 4.994208518545819e-06, |
|
"logits/chosen": -1.1048619747161865, |
|
"logits/rejected": -1.2548190355300903, |
|
"logps/chosen": -191.09576416015625, |
|
"logps/rejected": -273.36431884765625, |
|
"loss": 0.4691, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.585456371307373, |
|
"rewards/margins": 0.7954282164573669, |
|
"rewards/rejected": -2.3808846473693848, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.12268008807801195, |
|
"grad_norm": 1.1175463199615479, |
|
"learning_rate": 4.992190687477535e-06, |
|
"logits/chosen": -1.1619799137115479, |
|
"logits/rejected": -1.284158706665039, |
|
"logps/chosen": -190.9915008544922, |
|
"logps/rejected": -292.0174865722656, |
|
"loss": 0.4549, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.5870094299316406, |
|
"rewards/margins": 0.9745893478393555, |
|
"rewards/rejected": -2.561598539352417, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.12582573136206354, |
|
"grad_norm": 1.3056316375732422, |
|
"learning_rate": 4.989872358859716e-06, |
|
"logits/chosen": -0.7161710858345032, |
|
"logits/rejected": -0.9287319183349609, |
|
"logps/chosen": -227.08779907226562, |
|
"logps/rejected": -343.8471374511719, |
|
"loss": 0.4463, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.938988447189331, |
|
"rewards/margins": 1.1392673254013062, |
|
"rewards/rejected": -3.078256130218506, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.12582573136206354, |
|
"eval_logits/chosen": -0.21009895205497742, |
|
"eval_logits/rejected": -0.36726853251457214, |
|
"eval_logps/chosen": -235.92169189453125, |
|
"eval_logps/rejected": -358.0407409667969, |
|
"eval_loss": 0.39779898524284363, |
|
"eval_rewards/accuracies": 0.7190298438072205, |
|
"eval_rewards/chosen": -2.0312490463256836, |
|
"eval_rewards/margins": 1.1899985074996948, |
|
"eval_rewards/rejected": -3.221247911453247, |
|
"eval_runtime": 216.5347, |
|
"eval_samples_per_second": 98.908, |
|
"eval_steps_per_second": 1.547, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.12897137464611513, |
|
"grad_norm": 1.398200273513794, |
|
"learning_rate": 4.987253812226373e-06, |
|
"logits/chosen": -0.9023457765579224, |
|
"logits/rejected": -1.0780017375946045, |
|
"logps/chosen": -238.050048828125, |
|
"logps/rejected": -369.2995300292969, |
|
"loss": 0.4253, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.0568206310272217, |
|
"rewards/margins": 1.2894903421401978, |
|
"rewards/rejected": -3.346311092376709, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.13211701793016672, |
|
"grad_norm": 1.451423168182373, |
|
"learning_rate": 4.984335363310513e-06, |
|
"logits/chosen": -0.7545775771141052, |
|
"logits/rejected": -0.9126666784286499, |
|
"logps/chosen": -201.9867706298828, |
|
"logps/rejected": -319.11279296875, |
|
"loss": 0.4364, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.7043613195419312, |
|
"rewards/margins": 1.1467878818511963, |
|
"rewards/rejected": -2.851149082183838, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.13526266121421832, |
|
"grad_norm": 1.5594825744628906, |
|
"learning_rate": 4.9811173640060516e-06, |
|
"logits/chosen": -0.8629263043403625, |
|
"logits/rejected": -0.8639553189277649, |
|
"logps/chosen": -271.3802795410156, |
|
"logps/rejected": -379.85430908203125, |
|
"loss": 0.4211, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.378561019897461, |
|
"rewards/margins": 1.0667835474014282, |
|
"rewards/rejected": -3.4453444480895996, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.1384083044982699, |
|
"grad_norm": 1.3122005462646484, |
|
"learning_rate": 4.977600202325396e-06, |
|
"logits/chosen": -1.0775721073150635, |
|
"logits/rejected": -1.1867996454238892, |
|
"logps/chosen": -253.1031036376953, |
|
"logps/rejected": -368.8877868652344, |
|
"loss": 0.3914, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.1909329891204834, |
|
"rewards/margins": 1.1237446069717407, |
|
"rewards/rejected": -3.3146774768829346, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.14155394778232147, |
|
"grad_norm": 1.3066484928131104, |
|
"learning_rate": 4.973784302352654e-06, |
|
"logits/chosen": -0.8042716979980469, |
|
"logits/rejected": -0.9650095105171204, |
|
"logps/chosen": -248.71920776367188, |
|
"logps/rejected": -361.9927673339844, |
|
"loss": 0.3995, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.164215564727783, |
|
"rewards/margins": 1.123422622680664, |
|
"rewards/rejected": -3.2876381874084473, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.14469959106637306, |
|
"grad_norm": 1.8094184398651123, |
|
"learning_rate": 4.969670124192504e-06, |
|
"logits/chosen": -0.4726603627204895, |
|
"logits/rejected": -0.5924113988876343, |
|
"logps/chosen": -238.7507781982422, |
|
"logps/rejected": -370.9023742675781, |
|
"loss": 0.3829, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.0481951236724854, |
|
"rewards/margins": 1.3083794116973877, |
|
"rewards/rejected": -3.356574296951294, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.14784523435042465, |
|
"grad_norm": 1.7695521116256714, |
|
"learning_rate": 4.965258163914713e-06, |
|
"logits/chosen": -0.552959144115448, |
|
"logits/rejected": -0.5529184341430664, |
|
"logps/chosen": -275.89312744140625, |
|
"logps/rejected": -398.515380859375, |
|
"loss": 0.3778, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.421806812286377, |
|
"rewards/margins": 1.2187397480010986, |
|
"rewards/rejected": -3.6405467987060547, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.15099087763447624, |
|
"grad_norm": 1.5262759923934937, |
|
"learning_rate": 4.960548953494325e-06, |
|
"logits/chosen": -0.6184431314468384, |
|
"logits/rejected": -0.6488745212554932, |
|
"logps/chosen": -279.20599365234375, |
|
"logps/rejected": -406.8487548828125, |
|
"loss": 0.3824, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.461134672164917, |
|
"rewards/margins": 1.263789415359497, |
|
"rewards/rejected": -3.724924087524414, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.15413652091852784, |
|
"grad_norm": 1.7929073572158813, |
|
"learning_rate": 4.9555430607475194e-06, |
|
"logits/chosen": -0.2868812382221222, |
|
"logits/rejected": -0.37936121225357056, |
|
"logps/chosen": -271.26873779296875, |
|
"logps/rejected": -406.4844665527344, |
|
"loss": 0.3623, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.3893320560455322, |
|
"rewards/margins": 1.3300443887710571, |
|
"rewards/rejected": -3.7193763256073, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.15728216420257943, |
|
"grad_norm": 2.1217312812805176, |
|
"learning_rate": 4.9502410892631426e-06, |
|
"logits/chosen": -0.17514923214912415, |
|
"logits/rejected": -0.22318892180919647, |
|
"logps/chosen": -285.77001953125, |
|
"logps/rejected": -431.67822265625, |
|
"loss": 0.3548, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.51606822013855, |
|
"rewards/margins": 1.4269219636917114, |
|
"rewards/rejected": -3.9429900646209717, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15728216420257943, |
|
"eval_logits/chosen": 0.6032934188842773, |
|
"eval_logits/rejected": 0.4417119324207306, |
|
"eval_logps/chosen": -284.2136535644531, |
|
"eval_logps/rejected": -451.9689025878906, |
|
"eval_loss": 0.3047660291194916, |
|
"eval_rewards/accuracies": 0.7697761058807373, |
|
"eval_rewards/chosen": -2.514169216156006, |
|
"eval_rewards/margins": 1.6463606357574463, |
|
"eval_rewards/rejected": -4.160529613494873, |
|
"eval_runtime": 216.4531, |
|
"eval_samples_per_second": 98.945, |
|
"eval_steps_per_second": 1.548, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16042780748663102, |
|
"grad_norm": 2.802795648574829, |
|
"learning_rate": 4.9446436783299315e-06, |
|
"logits/chosen": -0.009161519818007946, |
|
"logits/rejected": -0.1047726422548294, |
|
"logps/chosen": -297.81243896484375, |
|
"logps/rejected": -442.32080078125, |
|
"loss": 0.3762, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.6446642875671387, |
|
"rewards/margins": 1.4199548959732056, |
|
"rewards/rejected": -4.064619064331055, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.1635734507706826, |
|
"grad_norm": 1.8657071590423584, |
|
"learning_rate": 4.938751502859433e-06, |
|
"logits/chosen": -0.21130414307117462, |
|
"logits/rejected": -0.31424680352211, |
|
"logps/chosen": -301.1555480957031, |
|
"logps/rejected": -447.023193359375, |
|
"loss": 0.3961, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.681652069091797, |
|
"rewards/margins": 1.4277719259262085, |
|
"rewards/rejected": -4.109424114227295, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.1667190940547342, |
|
"grad_norm": 1.8475096225738525, |
|
"learning_rate": 4.932565273304623e-06, |
|
"logits/chosen": -0.23862802982330322, |
|
"logits/rejected": -0.27317532896995544, |
|
"logps/chosen": -288.6937561035156, |
|
"logps/rejected": -411.35443115234375, |
|
"loss": 0.3354, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.549204111099243, |
|
"rewards/margins": 1.2228602170944214, |
|
"rewards/rejected": -3.772064685821533, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1698647373387858, |
|
"grad_norm": 2.7880358695983887, |
|
"learning_rate": 4.926085735574244e-06, |
|
"logits/chosen": -0.12997403740882874, |
|
"logits/rejected": -0.3530030846595764, |
|
"logps/chosen": -315.6742248535156, |
|
"logps/rejected": -505.62261962890625, |
|
"loss": 0.3532, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.8122756481170654, |
|
"rewards/margins": 1.881052017211914, |
|
"rewards/rejected": -4.693327903747559, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.17301038062283736, |
|
"grad_norm": 2.3125181198120117, |
|
"learning_rate": 4.9193136709428666e-06, |
|
"logits/chosen": 0.041652340441942215, |
|
"logits/rejected": -0.007311803288757801, |
|
"logps/chosen": -301.4400634765625, |
|
"logps/rejected": -451.60888671875, |
|
"loss": 0.3412, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.6784582138061523, |
|
"rewards/margins": 1.4726378917694092, |
|
"rewards/rejected": -4.151095867156982, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.17615602390688895, |
|
"grad_norm": 2.2698020935058594, |
|
"learning_rate": 4.912249895956687e-06, |
|
"logits/chosen": 0.3050948977470398, |
|
"logits/rejected": 0.16047334671020508, |
|
"logps/chosen": -294.1412353515625, |
|
"logps/rejected": -494.5099182128906, |
|
"loss": 0.3382, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.614988327026367, |
|
"rewards/margins": 1.9638454914093018, |
|
"rewards/rejected": -4.57883358001709, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.17930166719094054, |
|
"grad_norm": 2.4010682106018066, |
|
"learning_rate": 4.904895262335072e-06, |
|
"logits/chosen": 0.34259656071662903, |
|
"logits/rejected": 0.16924947500228882, |
|
"logps/chosen": -317.7947692871094, |
|
"logps/rejected": -521.7095336914062, |
|
"loss": 0.3234, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.8498454093933105, |
|
"rewards/margins": 2.0226728916168213, |
|
"rewards/rejected": -4.872518062591553, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.18244731047499213, |
|
"grad_norm": 2.444711208343506, |
|
"learning_rate": 4.897250656867863e-06, |
|
"logits/chosen": 0.4256654381752014, |
|
"logits/rejected": 0.26078343391418457, |
|
"logps/chosen": -328.4891052246094, |
|
"logps/rejected": -486.4072265625, |
|
"loss": 0.3038, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.9353702068328857, |
|
"rewards/margins": 1.5686007738113403, |
|
"rewards/rejected": -4.503971576690674, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.18559295375904372, |
|
"grad_norm": 2.8254048824310303, |
|
"learning_rate": 4.889317001308447e-06, |
|
"logits/chosen": 0.36937472224235535, |
|
"logits/rejected": 0.22195684909820557, |
|
"logps/chosen": -374.08203125, |
|
"logps/rejected": -554.9903564453125, |
|
"loss": 0.3311, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.4078540802001953, |
|
"rewards/margins": 1.8045555353164673, |
|
"rewards/rejected": -5.212409496307373, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.1887385970430953, |
|
"grad_norm": 2.11535382270813, |
|
"learning_rate": 4.881095252262619e-06, |
|
"logits/chosen": 0.2527967095375061, |
|
"logits/rejected": 0.26717156171798706, |
|
"logps/chosen": -343.63336181640625, |
|
"logps/rejected": -514.2552490234375, |
|
"loss": 0.3014, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.1175334453582764, |
|
"rewards/margins": 1.6867754459381104, |
|
"rewards/rejected": -4.804308891296387, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1887385970430953, |
|
"eval_logits/chosen": 1.1670206785202026, |
|
"eval_logits/rejected": 1.002642273902893, |
|
"eval_logps/chosen": -309.413818359375, |
|
"eval_logps/rejected": -516.2450561523438, |
|
"eval_loss": 0.2395239621400833, |
|
"eval_rewards/accuracies": 0.7962686419487, |
|
"eval_rewards/chosen": -2.7661707401275635, |
|
"eval_rewards/margins": 2.0371201038360596, |
|
"eval_rewards/rejected": -4.803291320800781, |
|
"eval_runtime": 216.5737, |
|
"eval_samples_per_second": 98.89, |
|
"eval_steps_per_second": 1.547, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1918842403271469, |
|
"grad_norm": 1.8349930047988892, |
|
"learning_rate": 4.872586401073238e-06, |
|
"logits/chosen": 0.5030576586723328, |
|
"logits/rejected": 0.5036323070526123, |
|
"logps/chosen": -293.0291442871094, |
|
"logps/rejected": -525.871826171875, |
|
"loss": 0.302, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.590064525604248, |
|
"rewards/margins": 2.304649829864502, |
|
"rewards/rejected": -4.89471435546875, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.1950298836111985, |
|
"grad_norm": 2.018960475921631, |
|
"learning_rate": 4.863791473700695e-06, |
|
"logits/chosen": 0.6549355983734131, |
|
"logits/rejected": 0.6538249254226685, |
|
"logps/chosen": -299.74774169921875, |
|
"logps/rejected": -482.9994201660156, |
|
"loss": 0.3224, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.6672844886779785, |
|
"rewards/margins": 1.806138277053833, |
|
"rewards/rejected": -4.473422050476074, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.1981755268952501, |
|
"grad_norm": 2.5601086616516113, |
|
"learning_rate": 4.854711530599207e-06, |
|
"logits/chosen": 0.4226033091545105, |
|
"logits/rejected": 0.4217701852321625, |
|
"logps/chosen": -349.31597900390625, |
|
"logps/rejected": -569.6148681640625, |
|
"loss": 0.3058, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.159015417098999, |
|
"rewards/margins": 2.176971435546875, |
|
"rewards/rejected": -5.335987091064453, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.20132117017930168, |
|
"grad_norm": 2.710289478302002, |
|
"learning_rate": 4.845347666588952e-06, |
|
"logits/chosen": 0.6457022428512573, |
|
"logits/rejected": 0.5944028496742249, |
|
"logps/chosen": -350.6747131347656, |
|
"logps/rejected": -578.0604858398438, |
|
"loss": 0.2957, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.1813666820526123, |
|
"rewards/margins": 2.2217352390289307, |
|
"rewards/rejected": -5.403101921081543, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.20446681346335324, |
|
"grad_norm": 2.493267059326172, |
|
"learning_rate": 4.835701010724061e-06, |
|
"logits/chosen": 0.6264899373054504, |
|
"logits/rejected": 0.43023762106895447, |
|
"logps/chosen": -328.0311279296875, |
|
"logps/rejected": -580.6969604492188, |
|
"loss": 0.287, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.9558825492858887, |
|
"rewards/margins": 2.48372483253479, |
|
"rewards/rejected": -5.4396071434021, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.20761245674740483, |
|
"grad_norm": 2.2407076358795166, |
|
"learning_rate": 4.825772726156479e-06, |
|
"logits/chosen": 0.6440389752388, |
|
"logits/rejected": 0.4974190294742584, |
|
"logps/chosen": -372.54498291015625, |
|
"logps/rejected": -562.779541015625, |
|
"loss": 0.2556, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.39410662651062, |
|
"rewards/margins": 1.8999735116958618, |
|
"rewards/rejected": -5.294079780578613, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.21075810003145642, |
|
"grad_norm": 2.0860157012939453, |
|
"learning_rate": 4.8155640099957206e-06, |
|
"logits/chosen": 0.5455148816108704, |
|
"logits/rejected": 0.4411854147911072, |
|
"logps/chosen": -334.1847229003906, |
|
"logps/rejected": -556.5390625, |
|
"loss": 0.265, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.0214390754699707, |
|
"rewards/margins": 2.2172813415527344, |
|
"rewards/rejected": -5.238720893859863, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.21390374331550802, |
|
"grad_norm": 2.7552173137664795, |
|
"learning_rate": 4.805076093164527e-06, |
|
"logits/chosen": 0.6582053899765015, |
|
"logits/rejected": 0.5497349500656128, |
|
"logps/chosen": -365.2132873535156, |
|
"logps/rejected": -603.5557250976562, |
|
"loss": 0.2555, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.3236758708953857, |
|
"rewards/margins": 2.3519177436828613, |
|
"rewards/rejected": -5.675593376159668, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.2170493865995596, |
|
"grad_norm": 2.692828893661499, |
|
"learning_rate": 4.794310240250444e-06, |
|
"logits/chosen": 0.671535849571228, |
|
"logits/rejected": 0.7082802653312683, |
|
"logps/chosen": -412.1849060058594, |
|
"logps/rejected": -631.167724609375, |
|
"loss": 0.2636, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -3.7698707580566406, |
|
"rewards/margins": 2.1822402477264404, |
|
"rewards/rejected": -5.95211124420166, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.2201950298836112, |
|
"grad_norm": 3.005434274673462, |
|
"learning_rate": 4.783267749353346e-06, |
|
"logits/chosen": 1.0438460111618042, |
|
"logits/rejected": 0.8579059839248657, |
|
"logps/chosen": -337.77716064453125, |
|
"logps/rejected": -573.6937255859375, |
|
"loss": 0.25, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -3.0438902378082275, |
|
"rewards/margins": 2.341344118118286, |
|
"rewards/rejected": -5.385234355926514, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2201950298836112, |
|
"eval_logits/chosen": 1.505146861076355, |
|
"eval_logits/rejected": 1.3420602083206177, |
|
"eval_logps/chosen": -343.1828308105469, |
|
"eval_logps/rejected": -577.8538208007812, |
|
"eval_loss": 0.19892141222953796, |
|
"eval_rewards/accuracies": 0.8235074877738953, |
|
"eval_rewards/chosen": -3.103861093521118, |
|
"eval_rewards/margins": 2.3155174255371094, |
|
"eval_rewards/rejected": -5.41937780380249, |
|
"eval_runtime": 216.5153, |
|
"eval_samples_per_second": 98.917, |
|
"eval_steps_per_second": 1.547, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2233406731676628, |
|
"grad_norm": 2.0825021266937256, |
|
"learning_rate": 4.771949951928918e-06, |
|
"logits/chosen": 0.7122886776924133, |
|
"logits/rejected": 0.5764984488487244, |
|
"logps/chosen": -368.8441162109375, |
|
"logps/rejected": -645.1354370117188, |
|
"loss": 0.2539, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.350966215133667, |
|
"rewards/margins": 2.7285890579223633, |
|
"rewards/rejected": -6.079554557800293, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.22648631645171438, |
|
"grad_norm": 2.4290201663970947, |
|
"learning_rate": 4.76035821262811e-06, |
|
"logits/chosen": 0.9211057424545288, |
|
"logits/rejected": 0.761319637298584, |
|
"logps/chosen": -340.1125183105469, |
|
"logps/rejected": -624.1261596679688, |
|
"loss": 0.2178, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.0774872303009033, |
|
"rewards/margins": 2.7953667640686035, |
|
"rewards/rejected": -5.872854232788086, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.22963195973576597, |
|
"grad_norm": 2.927579879760742, |
|
"learning_rate": 4.748493929132599e-06, |
|
"logits/chosen": 0.9934293031692505, |
|
"logits/rejected": 0.7011948227882385, |
|
"logps/chosen": -343.99798583984375, |
|
"logps/rejected": -611.0202026367188, |
|
"loss": 0.2574, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.108729839324951, |
|
"rewards/margins": 2.6548655033111572, |
|
"rewards/rejected": -5.763595104217529, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.23277760301981756, |
|
"grad_norm": 2.413174629211426, |
|
"learning_rate": 4.7363585319862535e-06, |
|
"logits/chosen": 0.8890258073806763, |
|
"logits/rejected": 0.8161128759384155, |
|
"logps/chosen": -297.27734375, |
|
"logps/rejected": -534.4481201171875, |
|
"loss": 0.2185, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.6455435752868652, |
|
"rewards/margins": 2.361142158508301, |
|
"rewards/rejected": -5.006685256958008, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.23592324630386913, |
|
"grad_norm": 1.7433052062988281, |
|
"learning_rate": 4.7239534844226595e-06, |
|
"logits/chosen": 1.1142632961273193, |
|
"logits/rejected": 1.041259527206421, |
|
"logps/chosen": -389.9774169921875, |
|
"logps/rejected": -677.3299560546875, |
|
"loss": 0.2308, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.550323009490967, |
|
"rewards/margins": 2.8546836376190186, |
|
"rewards/rejected": -6.405006408691406, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.23906888958792072, |
|
"grad_norm": 2.963017225265503, |
|
"learning_rate": 4.711280282188674e-06, |
|
"logits/chosen": 1.2721364498138428, |
|
"logits/rejected": 1.072861909866333, |
|
"logps/chosen": -407.93896484375, |
|
"logps/rejected": -637.9373779296875, |
|
"loss": 0.2364, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.7379825115203857, |
|
"rewards/margins": 2.2759547233581543, |
|
"rewards/rejected": -6.013937950134277, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.2422145328719723, |
|
"grad_norm": 2.531205654144287, |
|
"learning_rate": 4.698340453364087e-06, |
|
"logits/chosen": 1.2041015625, |
|
"logits/rejected": 0.9627124667167664, |
|
"logps/chosen": -381.7981262207031, |
|
"logps/rejected": -627.6697998046875, |
|
"loss": 0.225, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -3.4880454540252686, |
|
"rewards/margins": 2.4269821643829346, |
|
"rewards/rejected": -5.915027618408203, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.2453601761560239, |
|
"grad_norm": 2.7467708587646484, |
|
"learning_rate": 4.685135558177361e-06, |
|
"logits/chosen": 1.0979419946670532, |
|
"logits/rejected": 0.9988969564437866, |
|
"logps/chosen": -381.86737060546875, |
|
"logps/rejected": -638.2721557617188, |
|
"loss": 0.2466, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.4817283153533936, |
|
"rewards/margins": 2.545217990875244, |
|
"rewards/rejected": -6.026947021484375, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.2485058194400755, |
|
"grad_norm": 2.4694933891296387, |
|
"learning_rate": 4.671667188817516e-06, |
|
"logits/chosen": 0.9596866369247437, |
|
"logits/rejected": 0.988632321357727, |
|
"logps/chosen": -394.10980224609375, |
|
"logps/rejected": -659.1544799804688, |
|
"loss": 0.2089, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.600590944290161, |
|
"rewards/margins": 2.62831711769104, |
|
"rewards/rejected": -6.228908538818359, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2516514627241271, |
|
"grad_norm": 3.156921863555908, |
|
"learning_rate": 4.657936969242146e-06, |
|
"logits/chosen": 1.0584884881973267, |
|
"logits/rejected": 1.056359052658081, |
|
"logps/chosen": -367.99749755859375, |
|
"logps/rejected": -622.4884033203125, |
|
"loss": 0.2163, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.3479487895965576, |
|
"rewards/margins": 2.537045955657959, |
|
"rewards/rejected": -5.8849945068359375, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2516514627241271, |
|
"eval_logits/chosen": 1.969736933708191, |
|
"eval_logits/rejected": 1.8083586692810059, |
|
"eval_logps/chosen": -378.15106201171875, |
|
"eval_logps/rejected": -674.7254638671875, |
|
"eval_loss": 0.1564020812511444, |
|
"eval_rewards/accuracies": 0.8369402885437012, |
|
"eval_rewards/chosen": -3.453542947769165, |
|
"eval_rewards/margins": 2.9345521926879883, |
|
"eval_rewards/rejected": -6.388094902038574, |
|
"eval_runtime": 216.5645, |
|
"eval_samples_per_second": 98.894, |
|
"eval_steps_per_second": 1.547, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2547971060081787, |
|
"grad_norm": 2.260498523712158, |
|
"learning_rate": 4.643946554981607e-06, |
|
"logits/chosen": 1.3992656469345093, |
|
"logits/rejected": 1.3807123899459839, |
|
"logps/chosen": -410.2210388183594, |
|
"logps/rejected": -693.8885498046875, |
|
"loss": 0.2184, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.762162685394287, |
|
"rewards/margins": 2.8245062828063965, |
|
"rewards/rejected": -6.586669921875, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.25794274929223027, |
|
"grad_norm": 2.421734571456909, |
|
"learning_rate": 4.629697632939402e-06, |
|
"logits/chosen": 0.9631272554397583, |
|
"logits/rejected": 0.9137741923332214, |
|
"logps/chosen": -403.3345642089844, |
|
"logps/rejected": -690.8912963867188, |
|
"loss": 0.1978, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.6943416595458984, |
|
"rewards/margins": 2.8497314453125, |
|
"rewards/rejected": -6.544073581695557, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.26108839257628186, |
|
"grad_norm": 2.466891050338745, |
|
"learning_rate": 4.615191921188782e-06, |
|
"logits/chosen": 1.216644287109375, |
|
"logits/rejected": 1.0390194654464722, |
|
"logps/chosen": -462.14080810546875, |
|
"logps/rejected": -755.9129028320312, |
|
"loss": 0.1893, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -4.278713226318359, |
|
"rewards/margins": 2.9179956912994385, |
|
"rewards/rejected": -7.196709632873535, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.26423403586033345, |
|
"grad_norm": 2.796055793762207, |
|
"learning_rate": 4.600431168765588e-06, |
|
"logits/chosen": 1.23114013671875, |
|
"logits/rejected": 1.243263840675354, |
|
"logps/chosen": -384.63330078125, |
|
"logps/rejected": -629.2211303710938, |
|
"loss": 0.2038, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.522122859954834, |
|
"rewards/margins": 2.426643133163452, |
|
"rewards/rejected": -5.948765754699707, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.26737967914438504, |
|
"grad_norm": 3.352532148361206, |
|
"learning_rate": 4.58541715545736e-06, |
|
"logits/chosen": 1.1673449277877808, |
|
"logits/rejected": 1.1369606256484985, |
|
"logps/chosen": -424.13214111328125, |
|
"logps/rejected": -703.1793212890625, |
|
"loss": 0.2075, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.89024019241333, |
|
"rewards/margins": 2.7663023471832275, |
|
"rewards/rejected": -6.656541347503662, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.27052532242843663, |
|
"grad_norm": 2.815277338027954, |
|
"learning_rate": 4.570151691588739e-06, |
|
"logits/chosen": 1.4445253610610962, |
|
"logits/rejected": 1.3472189903259277, |
|
"logps/chosen": -411.64410400390625, |
|
"logps/rejected": -683.1400756835938, |
|
"loss": 0.2071, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.766723155975342, |
|
"rewards/margins": 2.7148866653442383, |
|
"rewards/rejected": -6.481610298156738, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.2736709657124882, |
|
"grad_norm": 2.5210723876953125, |
|
"learning_rate": 4.554636617803182e-06, |
|
"logits/chosen": 1.489781141281128, |
|
"logits/rejected": 1.1546220779418945, |
|
"logps/chosen": -364.3757019042969, |
|
"logps/rejected": -662.89990234375, |
|
"loss": 0.1936, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.301492691040039, |
|
"rewards/margins": 2.953282117843628, |
|
"rewards/rejected": -6.254774570465088, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.2768166089965398, |
|
"grad_norm": 2.1922731399536133, |
|
"learning_rate": 4.538873804841028e-06, |
|
"logits/chosen": 1.289786696434021, |
|
"logits/rejected": 1.0749728679656982, |
|
"logps/chosen": -441.4300842285156, |
|
"logps/rejected": -771.0155029296875, |
|
"loss": 0.1595, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -4.090059757232666, |
|
"rewards/margins": 3.2505059242248535, |
|
"rewards/rejected": -7.3405656814575195, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.2799622522805914, |
|
"grad_norm": 3.1142098903656006, |
|
"learning_rate": 4.522865153313932e-06, |
|
"logits/chosen": 1.5812687873840332, |
|
"logits/rejected": 1.544809103012085, |
|
"logps/chosen": -445.30047607421875, |
|
"logps/rejected": -758.8344116210938, |
|
"loss": 0.1982, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -4.112375259399414, |
|
"rewards/margins": 3.127253770828247, |
|
"rewards/rejected": -7.239628791809082, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.28310789556464294, |
|
"grad_norm": 3.9424490928649902, |
|
"learning_rate": 4.506612593475701e-06, |
|
"logits/chosen": 1.2013763189315796, |
|
"logits/rejected": 1.0818957090377808, |
|
"logps/chosen": -384.22625732421875, |
|
"logps/rejected": -698.1456909179688, |
|
"loss": 0.178, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.513953447341919, |
|
"rewards/margins": 3.0984597206115723, |
|
"rewards/rejected": -6.612412929534912, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.28310789556464294, |
|
"eval_logits/chosen": 1.9492088556289673, |
|
"eval_logits/rejected": 1.768805742263794, |
|
"eval_logps/chosen": -376.3503112792969, |
|
"eval_logps/rejected": -690.027587890625, |
|
"eval_loss": 0.13491272926330566, |
|
"eval_rewards/accuracies": 0.858582079410553, |
|
"eval_rewards/chosen": -3.4355356693267822, |
|
"eval_rewards/margins": 3.1055805683135986, |
|
"eval_rewards/rejected": -6.541116237640381, |
|
"eval_runtime": 216.5072, |
|
"eval_samples_per_second": 98.92, |
|
"eval_steps_per_second": 1.547, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.28625353884869453, |
|
"grad_norm": 2.6901655197143555, |
|
"learning_rate": 4.490118084989544e-06, |
|
"logits/chosen": 1.189724326133728, |
|
"logits/rejected": 1.1710891723632812, |
|
"logps/chosen": -429.6070251464844, |
|
"logps/rejected": -719.17236328125, |
|
"loss": 0.2161, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -3.961094379425049, |
|
"rewards/margins": 2.875734329223633, |
|
"rewards/rejected": -6.836828708648682, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.2893991821327461, |
|
"grad_norm": 2.5591769218444824, |
|
"learning_rate": 4.473383616691792e-06, |
|
"logits/chosen": 1.467657446861267, |
|
"logits/rejected": 1.3278765678405762, |
|
"logps/chosen": -419.4930114746094, |
|
"logps/rejected": -711.3831787109375, |
|
"loss": 0.193, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -3.870819091796875, |
|
"rewards/margins": 2.892895221710205, |
|
"rewards/rejected": -6.763714790344238, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.2925448254167977, |
|
"grad_norm": 2.1629209518432617, |
|
"learning_rate": 4.456411206352088e-06, |
|
"logits/chosen": 1.140165090560913, |
|
"logits/rejected": 1.0872342586517334, |
|
"logps/chosen": -381.86505126953125, |
|
"logps/rejected": -660.9152221679688, |
|
"loss": 0.1876, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.495915651321411, |
|
"rewards/margins": 2.757035493850708, |
|
"rewards/rejected": -6.252951145172119, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.2956904687008493, |
|
"grad_norm": 3.120842933654785, |
|
"learning_rate": 4.439202900430098e-06, |
|
"logits/chosen": 1.248095154762268, |
|
"logits/rejected": 1.0922152996063232, |
|
"logps/chosen": -402.4951171875, |
|
"logps/rejected": -733.9520263671875, |
|
"loss": 0.2028, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.690582275390625, |
|
"rewards/margins": 3.286928176879883, |
|
"rewards/rejected": -6.97750997543335, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.2988361119849009, |
|
"grad_norm": 3.5598092079162598, |
|
"learning_rate": 4.421760773828749e-06, |
|
"logits/chosen": 1.1661925315856934, |
|
"logits/rejected": 1.0520888566970825, |
|
"logps/chosen": -386.32763671875, |
|
"logps/rejected": -687.5399169921875, |
|
"loss": 0.227, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -3.5252959728240967, |
|
"rewards/margins": 3.0088343620300293, |
|
"rewards/rejected": -6.534131050109863, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.3019817552689525, |
|
"grad_norm": 2.8260598182678223, |
|
"learning_rate": 4.4040869296440595e-06, |
|
"logits/chosen": 1.0562385320663452, |
|
"logits/rejected": 1.0342681407928467, |
|
"logps/chosen": -418.41790771484375, |
|
"logps/rejected": -755.587158203125, |
|
"loss": 0.1787, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.8422763347625732, |
|
"rewards/margins": 3.352571964263916, |
|
"rewards/rejected": -7.19484806060791, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.3051273985530041, |
|
"grad_norm": 2.8546972274780273, |
|
"learning_rate": 4.3861834989115435e-06, |
|
"logits/chosen": 1.0627679824829102, |
|
"logits/rejected": 1.1002824306488037, |
|
"logps/chosen": -389.6894226074219, |
|
"logps/rejected": -745.2379760742188, |
|
"loss": 0.1601, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.560076951980591, |
|
"rewards/margins": 3.527824878692627, |
|
"rewards/rejected": -7.0879011154174805, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.3082730418370557, |
|
"grad_norm": 3.4414689540863037, |
|
"learning_rate": 4.368052640349269e-06, |
|
"logits/chosen": 1.4899530410766602, |
|
"logits/rejected": 1.3966710567474365, |
|
"logps/chosen": -402.5296630859375, |
|
"logps/rejected": -745.9603271484375, |
|
"loss": 0.1528, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.698240280151367, |
|
"rewards/margins": 3.4171810150146484, |
|
"rewards/rejected": -7.115421295166016, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.31141868512110726, |
|
"grad_norm": 3.587970018386841, |
|
"learning_rate": 4.349696540097564e-06, |
|
"logits/chosen": 1.3497339487075806, |
|
"logits/rejected": 1.1979681253433228, |
|
"logps/chosen": -443.26507568359375, |
|
"logps/rejected": -760.2127685546875, |
|
"loss": 0.1554, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -4.1059465408325195, |
|
"rewards/margins": 3.154737949371338, |
|
"rewards/rejected": -7.260683536529541, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.31456432840515886, |
|
"grad_norm": 2.6379456520080566, |
|
"learning_rate": 4.331117411455425e-06, |
|
"logits/chosen": 1.482219934463501, |
|
"logits/rejected": 1.5086220502853394, |
|
"logps/chosen": -444.00848388671875, |
|
"logps/rejected": -734.5386962890625, |
|
"loss": 0.1736, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -4.107657432556152, |
|
"rewards/margins": 2.892225980758667, |
|
"rewards/rejected": -6.99988317489624, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.31456432840515886, |
|
"eval_logits/chosen": 2.243989944458008, |
|
"eval_logits/rejected": 2.084845542907715, |
|
"eval_logps/chosen": -387.5068664550781, |
|
"eval_logps/rejected": -731.9054565429688, |
|
"eval_loss": 0.11270873993635178, |
|
"eval_rewards/accuracies": 0.8667910695075989, |
|
"eval_rewards/chosen": -3.5471012592315674, |
|
"eval_rewards/margins": 3.4127936363220215, |
|
"eval_rewards/rejected": -6.95989465713501, |
|
"eval_runtime": 216.499, |
|
"eval_samples_per_second": 98.924, |
|
"eval_steps_per_second": 1.547, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.31770997168921045, |
|
"grad_norm": 2.6611509323120117, |
|
"learning_rate": 4.312317494613642e-06, |
|
"logits/chosen": 1.5053398609161377, |
|
"logits/rejected": 1.3990230560302734, |
|
"logps/chosen": -433.1009826660156, |
|
"logps/rejected": -780.1348266601562, |
|
"loss": 0.1505, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -3.9901912212371826, |
|
"rewards/margins": 3.4618980884552, |
|
"rewards/rejected": -7.452090263366699, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.32085561497326204, |
|
"grad_norm": 3.133005380630493, |
|
"learning_rate": 4.293299056384692e-06, |
|
"logits/chosen": 1.581313133239746, |
|
"logits/rejected": 1.3655680418014526, |
|
"logps/chosen": -428.97039794921875, |
|
"logps/rejected": -712.6785278320312, |
|
"loss": 0.1824, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.9461722373962402, |
|
"rewards/margins": 2.822526454925537, |
|
"rewards/rejected": -6.768698215484619, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.32400125825731363, |
|
"grad_norm": 2.3489255905151367, |
|
"learning_rate": 4.274064389929412e-06, |
|
"logits/chosen": 1.619148850440979, |
|
"logits/rejected": 1.3803061246871948, |
|
"logps/chosen": -380.7969055175781, |
|
"logps/rejected": -708.0203857421875, |
|
"loss": 0.1684, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.4878249168395996, |
|
"rewards/margins": 3.2536826133728027, |
|
"rewards/rejected": -6.741507530212402, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.3271469015413652, |
|
"grad_norm": 4.2949042320251465, |
|
"learning_rate": 4.254615814480501e-06, |
|
"logits/chosen": 1.4520130157470703, |
|
"logits/rejected": 1.3369412422180176, |
|
"logps/chosen": -459.8309631347656, |
|
"logps/rejected": -775.1959838867188, |
|
"loss": 0.165, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.2733917236328125, |
|
"rewards/margins": 3.1295056343078613, |
|
"rewards/rejected": -7.402897834777832, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.3302925448254168, |
|
"grad_norm": 3.195559024810791, |
|
"learning_rate": 4.234955675062881e-06, |
|
"logits/chosen": 1.5306510925292969, |
|
"logits/rejected": 1.4327778816223145, |
|
"logps/chosen": -418.94842529296875, |
|
"logps/rejected": -751.5892944335938, |
|
"loss": 0.1749, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -3.8681347370147705, |
|
"rewards/margins": 3.3033840656280518, |
|
"rewards/rejected": -7.171518802642822, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.3334381881094684, |
|
"grad_norm": 2.2908482551574707, |
|
"learning_rate": 4.215086342210932e-06, |
|
"logits/chosen": 1.6077830791473389, |
|
"logits/rejected": 1.3920700550079346, |
|
"logps/chosen": -367.6110534667969, |
|
"logps/rejected": -708.1036376953125, |
|
"loss": 0.1536, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.358769655227661, |
|
"rewards/margins": 3.355515718460083, |
|
"rewards/rejected": -6.714285373687744, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.33658383139352, |
|
"grad_norm": 3.3649628162384033, |
|
"learning_rate": 4.19501021168268e-06, |
|
"logits/chosen": 1.5467596054077148, |
|
"logits/rejected": 1.4269657135009766, |
|
"logps/chosen": -465.55694580078125, |
|
"logps/rejected": -830.7658081054688, |
|
"loss": 0.176, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -4.3319597244262695, |
|
"rewards/margins": 3.633349657058716, |
|
"rewards/rejected": -7.965310096740723, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.3397294746775716, |
|
"grad_norm": 2.973006248474121, |
|
"learning_rate": 4.174729704170914e-06, |
|
"logits/chosen": 1.5441627502441406, |
|
"logits/rejected": 1.4771041870117188, |
|
"logps/chosen": -426.13592529296875, |
|
"logps/rejected": -808.2399291992188, |
|
"loss": 0.1639, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -3.9298815727233887, |
|
"rewards/margins": 3.805147171020508, |
|
"rewards/rejected": -7.7350287437438965, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.3428751179616232, |
|
"grad_norm": 3.8005123138427734, |
|
"learning_rate": 4.154247265011313e-06, |
|
"logits/chosen": 1.4085278511047363, |
|
"logits/rejected": 1.1824581623077393, |
|
"logps/chosen": -415.9977111816406, |
|
"logps/rejected": -807.1639404296875, |
|
"loss": 0.1373, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -3.8302745819091797, |
|
"rewards/margins": 3.8619492053985596, |
|
"rewards/rejected": -7.692224025726318, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.3460207612456747, |
|
"grad_norm": 2.478180170059204, |
|
"learning_rate": 4.133565363887602e-06, |
|
"logits/chosen": 1.4384472370147705, |
|
"logits/rejected": 1.2546355724334717, |
|
"logps/chosen": -395.85516357421875, |
|
"logps/rejected": -749.6239013671875, |
|
"loss": 0.1474, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -3.635516405105591, |
|
"rewards/margins": 3.4953994750976562, |
|
"rewards/rejected": -7.130915641784668, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3460207612456747, |
|
"eval_logits/chosen": 2.0075626373291016, |
|
"eval_logits/rejected": 1.8279625177383423, |
|
"eval_logps/chosen": -394.5699768066406, |
|
"eval_logps/rejected": -759.1402587890625, |
|
"eval_loss": 0.09821247309446335, |
|
"eval_rewards/accuracies": 0.8798507452011108, |
|
"eval_rewards/chosen": -3.617732286453247, |
|
"eval_rewards/margins": 3.6145107746124268, |
|
"eval_rewards/rejected": -7.232241630554199, |
|
"eval_runtime": 216.4313, |
|
"eval_samples_per_second": 98.955, |
|
"eval_steps_per_second": 1.548, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3491664045297263, |
|
"grad_norm": 3.5338029861450195, |
|
"learning_rate": 4.112686494533762e-06, |
|
"logits/chosen": 1.5566942691802979, |
|
"logits/rejected": 1.3428875207901, |
|
"logps/chosen": -441.243896484375, |
|
"logps/rejected": -809.8138427734375, |
|
"loss": 0.1096, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.087360858917236, |
|
"rewards/margins": 3.6627678871154785, |
|
"rewards/rejected": -7.750128746032715, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.3523120478137779, |
|
"grad_norm": 3.721736431121826, |
|
"learning_rate": 4.091613174433351e-06, |
|
"logits/chosen": 1.1858751773834229, |
|
"logits/rejected": 1.2289283275604248, |
|
"logps/chosen": -492.10211181640625, |
|
"logps/rejected": -857.7195434570312, |
|
"loss": 0.1577, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.586886405944824, |
|
"rewards/margins": 3.6530747413635254, |
|
"rewards/rejected": -8.239961624145508, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3554576910978295, |
|
"grad_norm": 2.7472167015075684, |
|
"learning_rate": 4.070347944515955e-06, |
|
"logits/chosen": 1.0944794416427612, |
|
"logits/rejected": 0.9718171954154968, |
|
"logps/chosen": -467.4380798339844, |
|
"logps/rejected": -860.9664916992188, |
|
"loss": 0.1574, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -4.340346813201904, |
|
"rewards/margins": 3.920635223388672, |
|
"rewards/rejected": -8.260981559753418, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3586033343818811, |
|
"grad_norm": 3.2876129150390625, |
|
"learning_rate": 4.048893368850812e-06, |
|
"logits/chosen": 1.5173089504241943, |
|
"logits/rejected": 1.392333745956421, |
|
"logps/chosen": -432.60760498046875, |
|
"logps/rejected": -770.94384765625, |
|
"loss": 0.1599, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -4.002845287322998, |
|
"rewards/margins": 3.364281415939331, |
|
"rewards/rejected": -7.36712646484375, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.36174897766593267, |
|
"grad_norm": 3.355199098587036, |
|
"learning_rate": 4.027252034337653e-06, |
|
"logits/chosen": 1.4846436977386475, |
|
"logits/rejected": 1.3371527194976807, |
|
"logps/chosen": -445.65972900390625, |
|
"logps/rejected": -809.9754638671875, |
|
"loss": 0.1504, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -4.108954429626465, |
|
"rewards/margins": 3.627216339111328, |
|
"rewards/rejected": -7.736170291900635, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.36489462094998426, |
|
"grad_norm": 2.369302749633789, |
|
"learning_rate": 4.005426550394777e-06, |
|
"logits/chosen": 1.3805218935012817, |
|
"logits/rejected": 1.427558422088623, |
|
"logps/chosen": -457.25701904296875, |
|
"logps/rejected": -851.77783203125, |
|
"loss": 0.1477, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -4.240313529968262, |
|
"rewards/margins": 3.947707414627075, |
|
"rewards/rejected": -8.188020706176758, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.36804026423403585, |
|
"grad_norm": 2.904909133911133, |
|
"learning_rate": 3.983419548644427e-06, |
|
"logits/chosen": 1.5399326086044312, |
|
"logits/rejected": 1.3559725284576416, |
|
"logps/chosen": -400.92474365234375, |
|
"logps/rejected": -741.2730712890625, |
|
"loss": 0.1555, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.684882640838623, |
|
"rewards/margins": 3.3871047496795654, |
|
"rewards/rejected": -7.071987152099609, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.37118590751808744, |
|
"grad_norm": 2.247695207595825, |
|
"learning_rate": 3.961233682595474e-06, |
|
"logits/chosen": 1.6838607788085938, |
|
"logits/rejected": 1.5416187047958374, |
|
"logps/chosen": -453.5838928222656, |
|
"logps/rejected": -840.7737426757812, |
|
"loss": 0.161, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -4.202359199523926, |
|
"rewards/margins": 3.8470425605773926, |
|
"rewards/rejected": -8.049402236938477, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.37433155080213903, |
|
"grad_norm": 1.9283421039581299, |
|
"learning_rate": 3.93887162732347e-06, |
|
"logits/chosen": 1.4460527896881104, |
|
"logits/rejected": 1.4111425876617432, |
|
"logps/chosen": -418.48614501953125, |
|
"logps/rejected": -816.6475219726562, |
|
"loss": 0.1523, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.866959810256958, |
|
"rewards/margins": 3.951749801635742, |
|
"rewards/rejected": -7.818709373474121, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.3774771940861906, |
|
"grad_norm": 3.37664532661438, |
|
"learning_rate": 3.916336079148102e-06, |
|
"logits/chosen": 1.838653802871704, |
|
"logits/rejected": 1.687483549118042, |
|
"logps/chosen": -466.60052490234375, |
|
"logps/rejected": -841.2741088867188, |
|
"loss": 0.1382, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.350468635559082, |
|
"rewards/margins": 3.6922600269317627, |
|
"rewards/rejected": -8.04272747039795, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3774771940861906, |
|
"eval_logits/chosen": 2.2832651138305664, |
|
"eval_logits/rejected": 2.0966413021087646, |
|
"eval_logps/chosen": -464.0286865234375, |
|
"eval_logps/rejected": -871.9454956054688, |
|
"eval_loss": 0.08190137147903442, |
|
"eval_rewards/accuracies": 0.8861940503120422, |
|
"eval_rewards/chosen": -4.312319278717041, |
|
"eval_rewards/margins": 4.047975063323975, |
|
"eval_rewards/rejected": -8.360294342041016, |
|
"eval_runtime": 216.3914, |
|
"eval_samples_per_second": 98.973, |
|
"eval_steps_per_second": 1.548, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3806228373702422, |
|
"grad_norm": 2.987619400024414, |
|
"learning_rate": 3.893629755308078e-06, |
|
"logits/chosen": 1.2712478637695312, |
|
"logits/rejected": 1.2399590015411377, |
|
"logps/chosen": -473.53546142578125, |
|
"logps/rejected": -809.8004150390625, |
|
"loss": 0.1397, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.39472770690918, |
|
"rewards/margins": 3.3684539794921875, |
|
"rewards/rejected": -7.763182163238525, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.3837684806542938, |
|
"grad_norm": 2.556084394454956, |
|
"learning_rate": 3.870755393633495e-06, |
|
"logits/chosen": 1.4189751148223877, |
|
"logits/rejected": 1.2690774202346802, |
|
"logps/chosen": -449.70166015625, |
|
"logps/rejected": -821.1251831054688, |
|
"loss": 0.1232, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -4.166979789733887, |
|
"rewards/margins": 3.6986324787139893, |
|
"rewards/rejected": -7.8656110763549805, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.3869141239383454, |
|
"grad_norm": 3.653494358062744, |
|
"learning_rate": 3.847715752215725e-06, |
|
"logits/chosen": 1.3309882879257202, |
|
"logits/rejected": 1.269213080406189, |
|
"logps/chosen": -459.6454162597656, |
|
"logps/rejected": -866.3443603515625, |
|
"loss": 0.1229, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.266015529632568, |
|
"rewards/margins": 4.048908710479736, |
|
"rewards/rejected": -8.314924240112305, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.390059767222397, |
|
"grad_norm": 3.7244603633880615, |
|
"learning_rate": 3.824513609074853e-06, |
|
"logits/chosen": 1.4364420175552368, |
|
"logits/rejected": 1.3250478506088257, |
|
"logps/chosen": -494.782470703125, |
|
"logps/rejected": -882.2566528320312, |
|
"loss": 0.1304, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -4.621702194213867, |
|
"rewards/margins": 3.8449578285217285, |
|
"rewards/rejected": -8.466659545898438, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.3932054105064486, |
|
"grad_norm": 2.3005125522613525, |
|
"learning_rate": 3.8011517618247208e-06, |
|
"logits/chosen": 1.616355299949646, |
|
"logits/rejected": 1.4450652599334717, |
|
"logps/chosen": -475.3401794433594, |
|
"logps/rejected": -832.5036010742188, |
|
"loss": 0.1201, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -4.421032905578613, |
|
"rewards/margins": 3.5559539794921875, |
|
"rewards/rejected": -7.976986885070801, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.3963510537905002, |
|
"grad_norm": 3.406710624694824, |
|
"learning_rate": 3.777633027335594e-06, |
|
"logits/chosen": 1.4149603843688965, |
|
"logits/rejected": 1.2735809087753296, |
|
"logps/chosen": -464.1859436035156, |
|
"logps/rejected": -824.88818359375, |
|
"loss": 0.1362, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.316610336303711, |
|
"rewards/margins": 3.572819471359253, |
|
"rewards/rejected": -7.889430046081543, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.39949669707455177, |
|
"grad_norm": 2.030447006225586, |
|
"learning_rate": 3.7539602413945264e-06, |
|
"logits/chosen": 1.3235461711883545, |
|
"logits/rejected": 1.1515737771987915, |
|
"logps/chosen": -474.85174560546875, |
|
"logps/rejected": -877.0484619140625, |
|
"loss": 0.1061, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -4.426663398742676, |
|
"rewards/margins": 4.007308006286621, |
|
"rewards/rejected": -8.433971405029297, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.40264234035860336, |
|
"grad_norm": 2.5130529403686523, |
|
"learning_rate": 3.7301362583634255e-06, |
|
"logits/chosen": 1.4249566793441772, |
|
"logits/rejected": 1.328649640083313, |
|
"logps/chosen": -446.62164306640625, |
|
"logps/rejected": -855.87939453125, |
|
"loss": 0.1303, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -4.137589454650879, |
|
"rewards/margins": 4.056654930114746, |
|
"rewards/rejected": -8.194245338439941, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.40578798364265495, |
|
"grad_norm": 2.781508684158325, |
|
"learning_rate": 3.7061639508348883e-06, |
|
"logits/chosen": 1.2699975967407227, |
|
"logits/rejected": 1.1878561973571777, |
|
"logps/chosen": -464.4150390625, |
|
"logps/rejected": -912.4680786132812, |
|
"loss": 0.1063, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.31067419052124, |
|
"rewards/margins": 4.446977138519287, |
|
"rewards/rejected": -8.757651329040527, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.4089336269267065, |
|
"grad_norm": 3.4436986446380615, |
|
"learning_rate": 3.6820462092858388e-06, |
|
"logits/chosen": 1.3347591161727905, |
|
"logits/rejected": 1.1586949825286865, |
|
"logps/chosen": -493.66229248046875, |
|
"logps/rejected": -909.53076171875, |
|
"loss": 0.1133, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.606060028076172, |
|
"rewards/margins": 4.134642124176025, |
|
"rewards/rejected": -8.740701675415039, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.4089336269267065, |
|
"eval_logits/chosen": 2.1043541431427, |
|
"eval_logits/rejected": 1.9082162380218506, |
|
"eval_logps/chosen": -439.5054626464844, |
|
"eval_logps/rejected": -869.0028686523438, |
|
"eval_loss": 0.07139625400304794, |
|
"eval_rewards/accuracies": 0.89552241563797, |
|
"eval_rewards/chosen": -4.067087173461914, |
|
"eval_rewards/margins": 4.263782501220703, |
|
"eval_rewards/rejected": -8.330869674682617, |
|
"eval_runtime": 215.9237, |
|
"eval_samples_per_second": 99.188, |
|
"eval_steps_per_second": 1.551, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.4120792702107581, |
|
"grad_norm": 1.8959569931030273, |
|
"learning_rate": 3.6577859417290036e-06, |
|
"logits/chosen": 1.2066175937652588, |
|
"logits/rejected": 1.1854221820831299, |
|
"logps/chosen": -471.34088134765625, |
|
"logps/rejected": -895.9601440429688, |
|
"loss": 0.1126, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.3812336921691895, |
|
"rewards/margins": 4.223142623901367, |
|
"rewards/rejected": -8.604375839233398, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.41522491349480967, |
|
"grad_norm": 4.2024030685424805, |
|
"learning_rate": 3.633386073362275e-06, |
|
"logits/chosen": 1.405761480331421, |
|
"logits/rejected": 1.1724328994750977, |
|
"logps/chosen": -487.13116455078125, |
|
"logps/rejected": -914.7552490234375, |
|
"loss": 0.1049, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.5441741943359375, |
|
"rewards/margins": 4.243566036224365, |
|
"rewards/rejected": -8.787739753723145, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.41837055677886126, |
|
"grad_norm": 2.757516622543335, |
|
"learning_rate": 3.6088495462160108e-06, |
|
"logits/chosen": 1.2204030752182007, |
|
"logits/rejected": 1.1215746402740479, |
|
"logps/chosen": -494.38232421875, |
|
"logps/rejected": -946.015625, |
|
"loss": 0.107, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -4.630240440368652, |
|
"rewards/margins": 4.480798244476318, |
|
"rewards/rejected": -9.111038208007812, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.42151620006291285, |
|
"grad_norm": 3.821265459060669, |
|
"learning_rate": 3.584179318798287e-06, |
|
"logits/chosen": 1.4361203908920288, |
|
"logits/rejected": 1.1379070281982422, |
|
"logps/chosen": -462.8627014160156, |
|
"logps/rejected": -889.4700927734375, |
|
"loss": 0.1151, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.30374002456665, |
|
"rewards/margins": 4.2384233474731445, |
|
"rewards/rejected": -8.542162895202637, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.42466184334696444, |
|
"grad_norm": 2.9233007431030273, |
|
"learning_rate": 3.5593783657381832e-06, |
|
"logits/chosen": 1.4705383777618408, |
|
"logits/rejected": 1.309003472328186, |
|
"logps/chosen": -431.55181884765625, |
|
"logps/rejected": -872.4307861328125, |
|
"loss": 0.1165, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.9765896797180176, |
|
"rewards/margins": 4.378879070281982, |
|
"rewards/rejected": -8.35546875, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.42780748663101603, |
|
"grad_norm": 2.001502752304077, |
|
"learning_rate": 3.534449677427106e-06, |
|
"logits/chosen": 1.4111711978912354, |
|
"logits/rejected": 1.179564118385315, |
|
"logps/chosen": -390.43218994140625, |
|
"logps/rejected": -836.7662963867188, |
|
"loss": 0.1152, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.5844473838806152, |
|
"rewards/margins": 4.41940975189209, |
|
"rewards/rejected": -8.003857612609863, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.4309531299150676, |
|
"grad_norm": 1.7398028373718262, |
|
"learning_rate": 3.5093962596582288e-06, |
|
"logits/chosen": 1.4407987594604492, |
|
"logits/rejected": 1.372896432876587, |
|
"logps/chosen": -444.43994140625, |
|
"logps/rejected": -889.0983276367188, |
|
"loss": 0.1256, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.119224548339844, |
|
"rewards/margins": 4.428411483764648, |
|
"rewards/rejected": -8.547636032104492, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.4340987731991192, |
|
"grad_norm": 2.6257755756378174, |
|
"learning_rate": 3.4842211332640595e-06, |
|
"logits/chosen": 1.5364018678665161, |
|
"logits/rejected": 1.21645188331604, |
|
"logps/chosen": -453.30401611328125, |
|
"logps/rejected": -896.1051025390625, |
|
"loss": 0.1219, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.197838306427002, |
|
"rewards/margins": 4.3818278312683105, |
|
"rewards/rejected": -8.579666137695312, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.4372444164831708, |
|
"grad_norm": 2.9230072498321533, |
|
"learning_rate": 3.4589273337522055e-06, |
|
"logits/chosen": 1.8644187450408936, |
|
"logits/rejected": 1.5944981575012207, |
|
"logps/chosen": -427.46533203125, |
|
"logps/rejected": -885.3250122070312, |
|
"loss": 0.1007, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -3.9393608570098877, |
|
"rewards/margins": 4.555096626281738, |
|
"rewards/rejected": -8.494457244873047, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.4403900597672224, |
|
"grad_norm": 2.494887351989746, |
|
"learning_rate": 3.433517910939364e-06, |
|
"logits/chosen": 1.6030244827270508, |
|
"logits/rejected": 1.3681625127792358, |
|
"logps/chosen": -540.4693603515625, |
|
"logps/rejected": -944.8883666992188, |
|
"loss": 0.1209, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -5.091638565063477, |
|
"rewards/margins": 3.998020887374878, |
|
"rewards/rejected": -9.089658737182617, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.4403900597672224, |
|
"eval_logits/chosen": 2.2677738666534424, |
|
"eval_logits/rejected": 2.057406187057495, |
|
"eval_logps/chosen": -516.4533081054688, |
|
"eval_logps/rejected": -983.30810546875, |
|
"eval_loss": 0.06340682506561279, |
|
"eval_rewards/accuracies": 0.8932836055755615, |
|
"eval_rewards/chosen": -4.8365654945373535, |
|
"eval_rewards/margins": 4.637356281280518, |
|
"eval_rewards/rejected": -9.473921775817871, |
|
"eval_runtime": 216.4046, |
|
"eval_samples_per_second": 98.967, |
|
"eval_steps_per_second": 1.548, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.443535703051274, |
|
"grad_norm": 3.8545944690704346, |
|
"learning_rate": 3.4079959285835895e-06, |
|
"logits/chosen": 1.2882033586502075, |
|
"logits/rejected": 1.2805908918380737, |
|
"logps/chosen": -486.8324279785156, |
|
"logps/rejected": -901.2437744140625, |
|
"loss": 0.1161, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -4.533527851104736, |
|
"rewards/margins": 4.132518291473389, |
|
"rewards/rejected": -8.666045188903809, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.4466813463353256, |
|
"grad_norm": 2.2411046028137207, |
|
"learning_rate": 3.3823644640148767e-06, |
|
"logits/chosen": 1.302230715751648, |
|
"logits/rejected": 1.0823272466659546, |
|
"logps/chosen": -426.0066833496094, |
|
"logps/rejected": -929.7628173828125, |
|
"loss": 0.1074, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.933643341064453, |
|
"rewards/margins": 5.0114426612854, |
|
"rewards/rejected": -8.945085525512695, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.44982698961937717, |
|
"grad_norm": 4.858677864074707, |
|
"learning_rate": 3.356626607764113e-06, |
|
"logits/chosen": 1.5180631875991821, |
|
"logits/rejected": 1.3998795747756958, |
|
"logps/chosen": -523.0291137695312, |
|
"logps/rejected": -951.4177856445312, |
|
"loss": 0.1093, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -4.899743556976318, |
|
"rewards/margins": 4.265069484710693, |
|
"rewards/rejected": -9.164813041687012, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.45297263290342876, |
|
"grad_norm": 3.5026304721832275, |
|
"learning_rate": 3.3307854631904315e-06, |
|
"logits/chosen": 1.148923635482788, |
|
"logits/rejected": 1.0456730127334595, |
|
"logps/chosen": -508.77935791015625, |
|
"logps/rejected": -951.7108154296875, |
|
"loss": 0.1224, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.749743461608887, |
|
"rewards/margins": 4.407544136047363, |
|
"rewards/rejected": -9.157288551330566, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.45611827618748035, |
|
"grad_norm": 2.1828246116638184, |
|
"learning_rate": 3.3048441461070234e-06, |
|
"logits/chosen": 1.3276925086975098, |
|
"logits/rejected": 1.2130540609359741, |
|
"logps/chosen": -421.02197265625, |
|
"logps/rejected": -818.87939453125, |
|
"loss": 0.1112, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -3.8857734203338623, |
|
"rewards/margins": 3.9558587074279785, |
|
"rewards/rejected": -7.841631889343262, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.45926391947153195, |
|
"grad_norm": 2.359039783477783, |
|
"learning_rate": 3.278805784405451e-06, |
|
"logits/chosen": 1.4966380596160889, |
|
"logits/rejected": 1.2815477848052979, |
|
"logps/chosen": -455.6396484375, |
|
"logps/rejected": -913.0255737304688, |
|
"loss": 0.1069, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.213668346405029, |
|
"rewards/margins": 4.552624702453613, |
|
"rewards/rejected": -8.7662935256958, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.46240956275558354, |
|
"grad_norm": 3.10565447807312, |
|
"learning_rate": 3.2526735176784897e-06, |
|
"logits/chosen": 1.8660796880722046, |
|
"logits/rejected": 1.6580556631088257, |
|
"logps/chosen": -490.677001953125, |
|
"logps/rejected": -909.7364501953125, |
|
"loss": 0.1092, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.582307815551758, |
|
"rewards/margins": 4.146181583404541, |
|
"rewards/rejected": -8.728489875793457, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.4655552060396351, |
|
"grad_norm": 2.580960750579834, |
|
"learning_rate": 3.2264504968415805e-06, |
|
"logits/chosen": 1.4424006938934326, |
|
"logits/rejected": 1.2268205881118774, |
|
"logps/chosen": -466.53887939453125, |
|
"logps/rejected": -913.9548950195312, |
|
"loss": 0.1048, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.335131645202637, |
|
"rewards/margins": 4.450761795043945, |
|
"rewards/rejected": -8.785893440246582, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.4687008493236867, |
|
"grad_norm": 2.8086788654327393, |
|
"learning_rate": 3.2001398837529e-06, |
|
"logits/chosen": 1.4587178230285645, |
|
"logits/rejected": 1.2750834226608276, |
|
"logps/chosen": -459.68896484375, |
|
"logps/rejected": -923.9265747070312, |
|
"loss": 0.0928, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.283493995666504, |
|
"rewards/margins": 4.605031490325928, |
|
"rewards/rejected": -8.88852596282959, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.47184649260773825, |
|
"grad_norm": 4.19993257522583, |
|
"learning_rate": 3.1737448508321176e-06, |
|
"logits/chosen": 1.593569040298462, |
|
"logits/rejected": 1.276476502418518, |
|
"logps/chosen": -481.2831115722656, |
|
"logps/rejected": -942.7236328125, |
|
"loss": 0.1057, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.496027946472168, |
|
"rewards/margins": 4.565060615539551, |
|
"rewards/rejected": -9.061088562011719, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.47184649260773825, |
|
"eval_logits/chosen": 2.277968406677246, |
|
"eval_logits/rejected": 2.0906763076782227, |
|
"eval_logps/chosen": -451.1487731933594, |
|
"eval_logps/rejected": -921.7240600585938, |
|
"eval_loss": 0.05749654024839401, |
|
"eval_rewards/accuracies": 0.9018656611442566, |
|
"eval_rewards/chosen": -4.1835198402404785, |
|
"eval_rewards/margins": 4.674561023712158, |
|
"eval_rewards/rejected": -8.858080863952637, |
|
"eval_runtime": 216.5654, |
|
"eval_samples_per_second": 98.894, |
|
"eval_steps_per_second": 1.547, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.47499213589178985, |
|
"grad_norm": 3.436180591583252, |
|
"learning_rate": 3.1472685806778837e-06, |
|
"logits/chosen": 1.5782949924468994, |
|
"logits/rejected": 1.247202754020691, |
|
"logps/chosen": -464.5965270996094, |
|
"logps/rejected": -940.6214599609375, |
|
"loss": 0.0983, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -4.317359447479248, |
|
"rewards/margins": 4.72481632232666, |
|
"rewards/rejected": -9.042176246643066, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.47813777917584144, |
|
"grad_norm": 2.2029030323028564, |
|
"learning_rate": 3.1207142656840782e-06, |
|
"logits/chosen": 1.4659521579742432, |
|
"logits/rejected": 1.255765676498413, |
|
"logps/chosen": -502.91094970703125, |
|
"logps/rejected": -932.7726440429688, |
|
"loss": 0.0947, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.687650203704834, |
|
"rewards/margins": 4.2687153816223145, |
|
"rewards/rejected": -8.956365585327148, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.48128342245989303, |
|
"grad_norm": 2.461082935333252, |
|
"learning_rate": 3.094085107654891e-06, |
|
"logits/chosen": 1.3395097255706787, |
|
"logits/rejected": 1.0628923177719116, |
|
"logps/chosen": -543.1881103515625, |
|
"logps/rejected": -978.9011840820312, |
|
"loss": 0.102, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -5.116451263427734, |
|
"rewards/margins": 4.314597129821777, |
|
"rewards/rejected": -9.431048393249512, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.4844290657439446, |
|
"grad_norm": 2.2011494636535645, |
|
"learning_rate": 3.067384317418761e-06, |
|
"logits/chosen": 1.6539561748504639, |
|
"logits/rejected": 1.5385477542877197, |
|
"logps/chosen": -459.3910217285156, |
|
"logps/rejected": -853.07177734375, |
|
"loss": 0.1091, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -4.261325836181641, |
|
"rewards/margins": 3.9113669395446777, |
|
"rewards/rejected": -8.172693252563477, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.4875747090279962, |
|
"grad_norm": 4.575362682342529, |
|
"learning_rate": 3.0406151144412277e-06, |
|
"logits/chosen": 1.5317912101745605, |
|
"logits/rejected": 1.4458856582641602, |
|
"logps/chosen": -465.4632263183594, |
|
"logps/rejected": -876.3185424804688, |
|
"loss": 0.1009, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.327389717102051, |
|
"rewards/margins": 4.079252243041992, |
|
"rewards/rejected": -8.406641960144043, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.4907203523120478, |
|
"grad_norm": 2.7277722358703613, |
|
"learning_rate": 3.013780726436743e-06, |
|
"logits/chosen": 1.4253225326538086, |
|
"logits/rejected": 1.1030299663543701, |
|
"logps/chosen": -470.4295959472656, |
|
"logps/rejected": -924.5455322265625, |
|
"loss": 0.0854, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.38973331451416, |
|
"rewards/margins": 4.496077537536621, |
|
"rewards/rejected": -8.885812759399414, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.4938659955960994, |
|
"grad_norm": 2.7925002574920654, |
|
"learning_rate": 2.9868843889794867e-06, |
|
"logits/chosen": 1.7626025676727295, |
|
"logits/rejected": 1.603830099105835, |
|
"logps/chosen": -491.6067810058594, |
|
"logps/rejected": -946.78076171875, |
|
"loss": 0.0865, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.5813679695129395, |
|
"rewards/margins": 4.5411176681518555, |
|
"rewards/rejected": -9.122485160827637, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.497011638880151, |
|
"grad_norm": 3.2050352096557617, |
|
"learning_rate": 2.9599293451132338e-06, |
|
"logits/chosen": 1.5611720085144043, |
|
"logits/rejected": 1.3016364574432373, |
|
"logps/chosen": -469.46661376953125, |
|
"logps/rejected": -943.2091674804688, |
|
"loss": 0.0895, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.3585710525512695, |
|
"rewards/margins": 4.71451997756958, |
|
"rewards/rejected": -9.073091506958008, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.5001572821642026, |
|
"grad_norm": 2.416383743286133, |
|
"learning_rate": 2.9329188449603245e-06, |
|
"logits/chosen": 1.4920631647109985, |
|
"logits/rejected": 1.465451955795288, |
|
"logps/chosen": -469.51873779296875, |
|
"logps/rejected": -898.5816650390625, |
|
"loss": 0.0846, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -4.3615593910217285, |
|
"rewards/margins": 4.273259162902832, |
|
"rewards/rejected": -8.634818077087402, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.5033029254482542, |
|
"grad_norm": 3.518874168395996, |
|
"learning_rate": 2.9058561453297783e-06, |
|
"logits/chosen": 1.5393750667572021, |
|
"logits/rejected": 1.3930766582489014, |
|
"logps/chosen": -449.69879150390625, |
|
"logps/rejected": -957.0568237304688, |
|
"loss": 0.1057, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.184264183044434, |
|
"rewards/margins": 5.050030708312988, |
|
"rewards/rejected": -9.234294891357422, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5033029254482542, |
|
"eval_logits/chosen": 2.213601589202881, |
|
"eval_logits/rejected": 2.019822120666504, |
|
"eval_logps/chosen": -453.72308349609375, |
|
"eval_logps/rejected": -928.4155883789062, |
|
"eval_loss": 0.053593434393405914, |
|
"eval_rewards/accuracies": 0.9130597114562988, |
|
"eval_rewards/chosen": -4.209263324737549, |
|
"eval_rewards/margins": 4.715731620788574, |
|
"eval_rewards/rejected": -8.924994468688965, |
|
"eval_runtime": 216.4803, |
|
"eval_samples_per_second": 98.933, |
|
"eval_steps_per_second": 1.547, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5064485687323057, |
|
"grad_norm": 4.083642482757568, |
|
"learning_rate": 2.8787445093246004e-06, |
|
"logits/chosen": 1.4022510051727295, |
|
"logits/rejected": 1.2157288789749146, |
|
"logps/chosen": -457.09552001953125, |
|
"logps/rejected": -900.7742309570312, |
|
"loss": 0.1105, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.262914180755615, |
|
"rewards/margins": 4.395590782165527, |
|
"rewards/rejected": -8.6585054397583, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.5095942120163574, |
|
"grad_norm": 3.1871087551116943, |
|
"learning_rate": 2.8515872059483326e-06, |
|
"logits/chosen": 1.3273845911026, |
|
"logits/rejected": 1.1355845928192139, |
|
"logps/chosen": -478.5135803222656, |
|
"logps/rejected": -951.0009765625, |
|
"loss": 0.1045, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.429346561431885, |
|
"rewards/margins": 4.699108600616455, |
|
"rewards/rejected": -9.12845516204834, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.5127398553004089, |
|
"grad_norm": 1.756052851676941, |
|
"learning_rate": 2.8243875097108897e-06, |
|
"logits/chosen": 1.482096552848816, |
|
"logits/rejected": 1.359783411026001, |
|
"logps/chosen": -482.2904357910156, |
|
"logps/rejected": -976.9202270507812, |
|
"loss": 0.0854, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.482015609741211, |
|
"rewards/margins": 4.936542510986328, |
|
"rewards/rejected": -9.418558120727539, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.5158854985844605, |
|
"grad_norm": 2.7499189376831055, |
|
"learning_rate": 2.7971487002337344e-06, |
|
"logits/chosen": 1.6604124307632446, |
|
"logits/rejected": 1.5121276378631592, |
|
"logps/chosen": -536.4432373046875, |
|
"logps/rejected": -991.6915893554688, |
|
"loss": 0.0835, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -5.0419921875, |
|
"rewards/margins": 4.537399768829346, |
|
"rewards/rejected": -9.579391479492188, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.5190311418685121, |
|
"grad_norm": 2.7009730339050293, |
|
"learning_rate": 2.769874061854434e-06, |
|
"logits/chosen": 1.630128264427185, |
|
"logits/rejected": 1.43096923828125, |
|
"logps/chosen": -473.77838134765625, |
|
"logps/rejected": -935.79931640625, |
|
"loss": 0.0789, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.406306743621826, |
|
"rewards/margins": 4.593745708465576, |
|
"rewards/rejected": -9.000051498413086, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.5221767851525637, |
|
"grad_norm": 2.960386037826538, |
|
"learning_rate": 2.74256688323065e-06, |
|
"logits/chosen": 1.2850834131240845, |
|
"logits/rejected": 1.1641004085540771, |
|
"logps/chosen": -516.1502685546875, |
|
"logps/rejected": -943.5867309570312, |
|
"loss": 0.0922, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.829821586608887, |
|
"rewards/margins": 4.256933689117432, |
|
"rewards/rejected": -9.086755752563477, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.5253224284366153, |
|
"grad_norm": 3.012941360473633, |
|
"learning_rate": 2.7152304569436055e-06, |
|
"logits/chosen": 1.3663134574890137, |
|
"logits/rejected": 1.1525830030441284, |
|
"logps/chosen": -484.38641357421875, |
|
"logps/rejected": -991.9713134765625, |
|
"loss": 0.0713, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.521730899810791, |
|
"rewards/margins": 5.04987907409668, |
|
"rewards/rejected": -9.571609497070312, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.5284680717206669, |
|
"grad_norm": 3.185410976409912, |
|
"learning_rate": 2.6878680791010786e-06, |
|
"logits/chosen": 1.3727288246154785, |
|
"logits/rejected": 1.32237708568573, |
|
"logps/chosen": -528.0745849609375, |
|
"logps/rejected": -955.1348876953125, |
|
"loss": 0.0918, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.948831558227539, |
|
"rewards/margins": 4.25364351272583, |
|
"rewards/rejected": -9.202474594116211, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.5316137150047184, |
|
"grad_norm": 4.06403923034668, |
|
"learning_rate": 2.6604830489399763e-06, |
|
"logits/chosen": 1.203892707824707, |
|
"logits/rejected": 1.0114085674285889, |
|
"logps/chosen": -481.3412170410156, |
|
"logps/rejected": -961.1526489257812, |
|
"loss": 0.1006, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.478043556213379, |
|
"rewards/margins": 4.7761616706848145, |
|
"rewards/rejected": -9.254205703735352, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.5347593582887701, |
|
"grad_norm": 4.09527587890625, |
|
"learning_rate": 2.6330786684285203e-06, |
|
"logits/chosen": 1.3332659006118774, |
|
"logits/rejected": 1.002687692642212, |
|
"logps/chosen": -484.726318359375, |
|
"logps/rejected": -1045.978271484375, |
|
"loss": 0.0881, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.514087677001953, |
|
"rewards/margins": 5.59476900100708, |
|
"rewards/rejected": -10.108857154846191, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.5347593582887701, |
|
"eval_logits/chosen": 2.0804264545440674, |
|
"eval_logits/rejected": 1.876003384590149, |
|
"eval_logps/chosen": -478.5644226074219, |
|
"eval_logps/rejected": -972.8605346679688, |
|
"eval_loss": 0.0490301214158535, |
|
"eval_rewards/accuracies": 0.9100746512413025, |
|
"eval_rewards/chosen": -4.457676410675049, |
|
"eval_rewards/margins": 4.911769866943359, |
|
"eval_rewards/rejected": -9.369446754455566, |
|
"eval_runtime": 215.8813, |
|
"eval_samples_per_second": 99.207, |
|
"eval_steps_per_second": 1.552, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.5379050015728216, |
|
"grad_norm": 2.8809056282043457, |
|
"learning_rate": 2.6056582418681164e-06, |
|
"logits/chosen": 1.3827991485595703, |
|
"logits/rejected": 1.080010175704956, |
|
"logps/chosen": -460.39697265625, |
|
"logps/rejected": -988.3931884765625, |
|
"loss": 0.083, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.2614946365356445, |
|
"rewards/margins": 5.26639986038208, |
|
"rewards/rejected": -9.527894973754883, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.5410506448568733, |
|
"grad_norm": 3.1663739681243896, |
|
"learning_rate": 2.5782250754949334e-06, |
|
"logits/chosen": 1.3737657070159912, |
|
"logits/rejected": 1.1691869497299194, |
|
"logps/chosen": -492.1012268066406, |
|
"logps/rejected": -988.88623046875, |
|
"loss": 0.081, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.593070030212402, |
|
"rewards/margins": 4.955358028411865, |
|
"rewards/rejected": -9.548428535461426, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.5441962881409248, |
|
"grad_norm": 3.5924928188323975, |
|
"learning_rate": 2.55078247708125e-06, |
|
"logits/chosen": 1.6769235134124756, |
|
"logits/rejected": 1.4500882625579834, |
|
"logps/chosen": -518.3870849609375, |
|
"logps/rejected": -1004.2853393554688, |
|
"loss": 0.081, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.87607479095459, |
|
"rewards/margins": 4.8361406326293945, |
|
"rewards/rejected": -9.712215423583984, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.5473419314249764, |
|
"grad_norm": 4.5862908363342285, |
|
"learning_rate": 2.5233337555366206e-06, |
|
"logits/chosen": 1.373237133026123, |
|
"logits/rejected": 1.3128149509429932, |
|
"logps/chosen": -525.4005737304688, |
|
"logps/rejected": -1018.37646484375, |
|
"loss": 0.1082, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.924373626708984, |
|
"rewards/margins": 4.894657135009766, |
|
"rewards/rejected": -9.81903076171875, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.550487574709028, |
|
"grad_norm": 2.2820427417755127, |
|
"learning_rate": 2.4958822205089e-06, |
|
"logits/chosen": 1.3243175745010376, |
|
"logits/rejected": 1.246760606765747, |
|
"logps/chosen": -448.7105407714844, |
|
"logps/rejected": -906.8756103515625, |
|
"loss": 0.0957, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.1411871910095215, |
|
"rewards/margins": 4.5750932693481445, |
|
"rewards/rejected": -8.716279983520508, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.5536332179930796, |
|
"grad_norm": 3.8977651596069336, |
|
"learning_rate": 2.468431181985179e-06, |
|
"logits/chosen": 1.4766485691070557, |
|
"logits/rejected": 1.2319351434707642, |
|
"logps/chosen": -504.727294921875, |
|
"logps/rejected": -1006.2886962890625, |
|
"loss": 0.0752, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.717135906219482, |
|
"rewards/margins": 4.985989093780518, |
|
"rewards/rejected": -9.703125953674316, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.5567788612771312, |
|
"grad_norm": 2.3041610717773438, |
|
"learning_rate": 2.4409839498926848e-06, |
|
"logits/chosen": 1.6048414707183838, |
|
"logits/rejected": 1.3524333238601685, |
|
"logps/chosen": -490.019287109375, |
|
"logps/rejected": -1014.9865112304688, |
|
"loss": 0.0925, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.56765079498291, |
|
"rewards/margins": 5.2225165367126465, |
|
"rewards/rejected": -9.790167808532715, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.5599245045611828, |
|
"grad_norm": 1.7596114873886108, |
|
"learning_rate": 2.41354383369968e-06, |
|
"logits/chosen": 1.3451988697052002, |
|
"logits/rejected": 1.2149244546890259, |
|
"logps/chosen": -526.4452514648438, |
|
"logps/rejected": -1049.12646484375, |
|
"loss": 0.0762, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.929293155670166, |
|
"rewards/margins": 5.181774616241455, |
|
"rewards/rejected": -10.111066818237305, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.5630701478452343, |
|
"grad_norm": 2.1974737644195557, |
|
"learning_rate": 2.3861141420164246e-06, |
|
"logits/chosen": 1.6341116428375244, |
|
"logits/rejected": 1.4171388149261475, |
|
"logps/chosen": -478.0091247558594, |
|
"logps/rejected": -1021.9977416992188, |
|
"loss": 0.0787, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.472123146057129, |
|
"rewards/margins": 5.393218517303467, |
|
"rewards/rejected": -9.865342140197754, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.5662157911292859, |
|
"grad_norm": 2.651939630508423, |
|
"learning_rate": 2.3586981821962325e-06, |
|
"logits/chosen": 1.4779160022735596, |
|
"logits/rejected": 1.3249865770339966, |
|
"logps/chosen": -514.2837524414062, |
|
"logps/rejected": -986.8079833984375, |
|
"loss": 0.0847, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.807987213134766, |
|
"rewards/margins": 4.707097053527832, |
|
"rewards/rejected": -9.515085220336914, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.5662157911292859, |
|
"eval_logits/chosen": 2.2903590202331543, |
|
"eval_logits/rejected": 2.0999255180358887, |
|
"eval_logps/chosen": -458.1053771972656, |
|
"eval_logps/rejected": -977.0005493164062, |
|
"eval_loss": 0.044130466878414154, |
|
"eval_rewards/accuracies": 0.9130597114562988, |
|
"eval_rewards/chosen": -4.253086566925049, |
|
"eval_rewards/margins": 5.157759666442871, |
|
"eval_rewards/rejected": -9.410846710205078, |
|
"eval_runtime": 216.0337, |
|
"eval_samples_per_second": 99.137, |
|
"eval_steps_per_second": 1.551, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.5693614344133375, |
|
"grad_norm": 4.547900676727295, |
|
"learning_rate": 2.3312992599366922e-06, |
|
"logits/chosen": 1.3972145318984985, |
|
"logits/rejected": 1.2784581184387207, |
|
"logps/chosen": -507.777587890625, |
|
"logps/rejected": -977.9973754882812, |
|
"loss": 0.0925, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.732403755187988, |
|
"rewards/margins": 4.675986289978027, |
|
"rewards/rejected": -9.4083890914917, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.5725070776973891, |
|
"grad_norm": 3.3379406929016113, |
|
"learning_rate": 2.3039206788810772e-06, |
|
"logits/chosen": 1.3549931049346924, |
|
"logits/rejected": 1.1878349781036377, |
|
"logps/chosen": -511.08306884765625, |
|
"logps/rejected": -1061.042236328125, |
|
"loss": 0.0734, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.774610996246338, |
|
"rewards/margins": 5.468084812164307, |
|
"rewards/rejected": -10.242693901062012, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.5756527209814407, |
|
"grad_norm": 3.089817523956299, |
|
"learning_rate": 2.276565740220006e-06, |
|
"logits/chosen": 1.3616137504577637, |
|
"logits/rejected": 1.2165549993515015, |
|
"logps/chosen": -518.4862060546875, |
|
"logps/rejected": -1029.1578369140625, |
|
"loss": 0.088, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.843867778778076, |
|
"rewards/margins": 5.105698108673096, |
|
"rewards/rejected": -9.949565887451172, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.5787983642654922, |
|
"grad_norm": 3.8508617877960205, |
|
"learning_rate": 2.249237742293399e-06, |
|
"logits/chosen": 1.7054471969604492, |
|
"logits/rejected": 1.4460281133651733, |
|
"logps/chosen": -489.3634338378906, |
|
"logps/rejected": -1021.7463989257812, |
|
"loss": 0.0793, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.548079013824463, |
|
"rewards/margins": 5.296806812286377, |
|
"rewards/rejected": -9.844886779785156, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.5819440075495439, |
|
"grad_norm": 3.4135677814483643, |
|
"learning_rate": 2.2219399801927818e-06, |
|
"logits/chosen": 1.3625301122665405, |
|
"logits/rejected": 1.4042856693267822, |
|
"logps/chosen": -479.4251403808594, |
|
"logps/rejected": -991.6087036132812, |
|
"loss": 0.0736, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.46974515914917, |
|
"rewards/margins": 5.104647636413574, |
|
"rewards/rejected": -9.574393272399902, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.5850896508335954, |
|
"grad_norm": 1.2604960203170776, |
|
"learning_rate": 2.194675745363971e-06, |
|
"logits/chosen": 1.2001352310180664, |
|
"logits/rejected": 1.0285674333572388, |
|
"logps/chosen": -497.36669921875, |
|
"logps/rejected": -994.6357421875, |
|
"loss": 0.0587, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.651559829711914, |
|
"rewards/margins": 4.947619915008545, |
|
"rewards/rejected": -9.599178314208984, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.5882352941176471, |
|
"grad_norm": 1.9666972160339355, |
|
"learning_rate": 2.167448325210214e-06, |
|
"logits/chosen": 1.406222939491272, |
|
"logits/rejected": 1.2748348712921143, |
|
"logps/chosen": -512.0443115234375, |
|
"logps/rejected": -981.5230712890625, |
|
"loss": 0.0841, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.797163486480713, |
|
"rewards/margins": 4.677144527435303, |
|
"rewards/rejected": -9.474308967590332, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.5913809374016986, |
|
"grad_norm": 2.399761915206909, |
|
"learning_rate": 2.140261002695804e-06, |
|
"logits/chosen": 1.6316009759902954, |
|
"logits/rejected": 1.5087294578552246, |
|
"logps/chosen": -472.5790100097656, |
|
"logps/rejected": -953.4318237304688, |
|
"loss": 0.0648, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.410122394561768, |
|
"rewards/margins": 4.779942512512207, |
|
"rewards/rejected": -9.190065383911133, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.5945265806857503, |
|
"grad_norm": 4.341762065887451, |
|
"learning_rate": 2.1131170559502328e-06, |
|
"logits/chosen": 1.6851389408111572, |
|
"logits/rejected": 1.484548807144165, |
|
"logps/chosen": -491.853271484375, |
|
"logps/rejected": -999.4378051757812, |
|
"loss": 0.0846, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.5716047286987305, |
|
"rewards/margins": 5.062628269195557, |
|
"rewards/rejected": -9.634233474731445, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.5976722239698018, |
|
"grad_norm": 3.187537431716919, |
|
"learning_rate": 2.0860197578729306e-06, |
|
"logits/chosen": 1.5915769338607788, |
|
"logits/rejected": 1.439206838607788, |
|
"logps/chosen": -475.848876953125, |
|
"logps/rejected": -1013.7513427734375, |
|
"loss": 0.0713, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.451183795928955, |
|
"rewards/margins": 5.336948871612549, |
|
"rewards/rejected": -9.788132667541504, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5976722239698018, |
|
"eval_logits/chosen": 2.286052942276001, |
|
"eval_logits/rejected": 2.0887067317962646, |
|
"eval_logps/chosen": -473.8065185546875, |
|
"eval_logps/rejected": -1001.3447875976562, |
|
"eval_loss": 0.04112754389643669, |
|
"eval_rewards/accuracies": 0.9167910218238831, |
|
"eval_rewards/chosen": -4.410098075866699, |
|
"eval_rewards/margins": 5.244190692901611, |
|
"eval_rewards/rejected": -9.654288291931152, |
|
"eval_runtime": 216.0279, |
|
"eval_samples_per_second": 99.14, |
|
"eval_steps_per_second": 1.551, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6008178672538534, |
|
"grad_norm": 1.875487208366394, |
|
"learning_rate": 2.058972375738635e-06, |
|
"logits/chosen": 1.528555154800415, |
|
"logits/rejected": 1.322067379951477, |
|
"logps/chosen": -459.99029541015625, |
|
"logps/rejected": -968.6251831054688, |
|
"loss": 0.0775, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.283050537109375, |
|
"rewards/margins": 5.045687198638916, |
|
"rewards/rejected": -9.328737258911133, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.603963510537905, |
|
"grad_norm": 2.8932604789733887, |
|
"learning_rate": 2.031978170803433e-06, |
|
"logits/chosen": 1.578962802886963, |
|
"logits/rejected": 1.275937795639038, |
|
"logps/chosen": -466.869873046875, |
|
"logps/rejected": -997.1063232421875, |
|
"loss": 0.0638, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.332566261291504, |
|
"rewards/margins": 5.287723541259766, |
|
"rewards/rejected": -9.62028980255127, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.6071091538219566, |
|
"grad_norm": 3.8268966674804688, |
|
"learning_rate": 2.0050403979115372e-06, |
|
"logits/chosen": 1.6024353504180908, |
|
"logits/rejected": 1.4667185544967651, |
|
"logps/chosen": -519.0797119140625, |
|
"logps/rejected": -1030.8382568359375, |
|
"loss": 0.0863, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.873663902282715, |
|
"rewards/margins": 5.082743167877197, |
|
"rewards/rejected": -9.956406593322754, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.6102547971060082, |
|
"grad_norm": 1.616674542427063, |
|
"learning_rate": 1.978162305102828e-06, |
|
"logits/chosen": 1.3861491680145264, |
|
"logits/rejected": 1.261344313621521, |
|
"logps/chosen": -475.9474182128906, |
|
"logps/rejected": -974.275390625, |
|
"loss": 0.0714, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.4340996742248535, |
|
"rewards/margins": 4.956498146057129, |
|
"rewards/rejected": -9.390597343444824, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.6134004403900598, |
|
"grad_norm": 2.2611825466156006, |
|
"learning_rate": 1.9513471332212218e-06, |
|
"logits/chosen": 1.4959056377410889, |
|
"logits/rejected": 1.239583969116211, |
|
"logps/chosen": -499.00299072265625, |
|
"logps/rejected": -1015.1644287109375, |
|
"loss": 0.0668, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.661846160888672, |
|
"rewards/margins": 5.131340503692627, |
|
"rewards/rejected": -9.793185234069824, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.6165460836741113, |
|
"grad_norm": 2.701421022415161, |
|
"learning_rate": 1.9245981155239003e-06, |
|
"logits/chosen": 1.2852675914764404, |
|
"logits/rejected": 1.0676230192184448, |
|
"logps/chosen": -492.07159423828125, |
|
"logps/rejected": -1062.6197509765625, |
|
"loss": 0.0787, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.603121757507324, |
|
"rewards/margins": 5.659279823303223, |
|
"rewards/rejected": -10.262402534484863, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.619691726958163, |
|
"grad_norm": 1.3949973583221436, |
|
"learning_rate": 1.8979184772914626e-06, |
|
"logits/chosen": 1.3694791793823242, |
|
"logits/rejected": 1.3277966976165771, |
|
"logps/chosen": -443.76300048828125, |
|
"logps/rejected": -968.4646606445312, |
|
"loss": 0.0792, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.119488716125488, |
|
"rewards/margins": 5.219561576843262, |
|
"rewards/rejected": -9.339049339294434, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.6228373702422145, |
|
"grad_norm": 2.097567081451416, |
|
"learning_rate": 1.8713114354390302e-06, |
|
"logits/chosen": 1.3356621265411377, |
|
"logits/rejected": 1.1214429140090942, |
|
"logps/chosen": -444.4170837402344, |
|
"logps/rejected": -960.7073974609375, |
|
"loss": 0.0787, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.113885402679443, |
|
"rewards/margins": 5.141117095947266, |
|
"rewards/rejected": -9.255002975463867, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.6259830135262662, |
|
"grad_norm": 3.7821717262268066, |
|
"learning_rate": 1.8447801981283692e-06, |
|
"logits/chosen": 1.2892686128616333, |
|
"logits/rejected": 1.1204421520233154, |
|
"logps/chosen": -492.14544677734375, |
|
"logps/rejected": -1007.2326049804688, |
|
"loss": 0.0763, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.580545425415039, |
|
"rewards/margins": 5.139875411987305, |
|
"rewards/rejected": -9.720420837402344, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.6291286568103177, |
|
"grad_norm": 2.3394644260406494, |
|
"learning_rate": 1.818327964381063e-06, |
|
"logits/chosen": 1.5386545658111572, |
|
"logits/rejected": 1.2143795490264893, |
|
"logps/chosen": -512.078125, |
|
"logps/rejected": -1119.013916015625, |
|
"loss": 0.0553, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -4.799826622009277, |
|
"rewards/margins": 6.028892993927002, |
|
"rewards/rejected": -10.828720092773438, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6291286568103177, |
|
"eval_logits/chosen": 2.2607874870300293, |
|
"eval_logits/rejected": 2.046884775161743, |
|
"eval_logps/chosen": -529.6686401367188, |
|
"eval_logps/rejected": -1093.740234375, |
|
"eval_loss": 0.037754353135824203, |
|
"eval_rewards/accuracies": 0.9123134613037109, |
|
"eval_rewards/chosen": -4.9687180519104, |
|
"eval_rewards/margins": 5.609524250030518, |
|
"eval_rewards/rejected": -10.578243255615234, |
|
"eval_runtime": 216.3701, |
|
"eval_samples_per_second": 98.983, |
|
"eval_steps_per_second": 1.548, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6322743000943692, |
|
"grad_norm": 4.657315731048584, |
|
"learning_rate": 1.7919579236927873e-06, |
|
"logits/chosen": 1.5727989673614502, |
|
"logits/rejected": 1.5137349367141724, |
|
"logps/chosen": -547.173828125, |
|
"logps/rejected": -1079.36181640625, |
|
"loss": 0.0835, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.141914367675781, |
|
"rewards/margins": 5.302892684936523, |
|
"rewards/rejected": -10.444807052612305, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.6354199433784209, |
|
"grad_norm": 3.0012001991271973, |
|
"learning_rate": 1.7656732556487349e-06, |
|
"logits/chosen": 1.6588795185089111, |
|
"logits/rejected": 1.4154224395751953, |
|
"logps/chosen": -508.59136962890625, |
|
"logps/rejected": -1022.4967651367188, |
|
"loss": 0.0759, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.74569845199585, |
|
"rewards/margins": 5.124057292938232, |
|
"rewards/rejected": -9.869755744934082, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.6385655866624724, |
|
"grad_norm": 2.0503461360931396, |
|
"learning_rate": 1.7394771295402357e-06, |
|
"logits/chosen": 1.388684868812561, |
|
"logits/rejected": 1.3237318992614746, |
|
"logps/chosen": -420.70574951171875, |
|
"logps/rejected": -906.8675537109375, |
|
"loss": 0.0695, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.863591432571411, |
|
"rewards/margins": 4.836082458496094, |
|
"rewards/rejected": -8.699673652648926, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.6417112299465241, |
|
"grad_norm": 1.5963932275772095, |
|
"learning_rate": 1.713372703982616e-06, |
|
"logits/chosen": 1.6106688976287842, |
|
"logits/rejected": 1.378296136856079, |
|
"logps/chosen": -454.54437255859375, |
|
"logps/rejected": -1000.5890502929688, |
|
"loss": 0.0672, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.217381477355957, |
|
"rewards/margins": 5.434484958648682, |
|
"rewards/rejected": -9.65186595916748, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.6448568732305756, |
|
"grad_norm": 2.611083745956421, |
|
"learning_rate": 1.6873631265343482e-06, |
|
"logits/chosen": 1.5027741193771362, |
|
"logits/rejected": 1.4583556652069092, |
|
"logps/chosen": -517.8651123046875, |
|
"logps/rejected": -1013.5433349609375, |
|
"loss": 0.0746, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.838639736175537, |
|
"rewards/margins": 4.938787460327148, |
|
"rewards/rejected": -9.777427673339844, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.6480025165146273, |
|
"grad_norm": 3.0739071369171143, |
|
"learning_rate": 1.6614515333175301e-06, |
|
"logits/chosen": 1.4629807472229004, |
|
"logits/rejected": 1.3686448335647583, |
|
"logps/chosen": -518.0137939453125, |
|
"logps/rejected": -1042.031494140625, |
|
"loss": 0.0836, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.832829475402832, |
|
"rewards/margins": 5.245598793029785, |
|
"rewards/rejected": -10.0784273147583, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.6511481597986788, |
|
"grad_norm": 2.08311128616333, |
|
"learning_rate": 1.6356410486397465e-06, |
|
"logits/chosen": 1.7262866497039795, |
|
"logits/rejected": 1.4022530317306519, |
|
"logps/chosen": -440.1412658691406, |
|
"logps/rejected": -1064.7659912109375, |
|
"loss": 0.0969, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.076186180114746, |
|
"rewards/margins": 6.208160400390625, |
|
"rewards/rejected": -10.284345626831055, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.6542938030827304, |
|
"grad_norm": 2.8899753093719482, |
|
"learning_rate": 1.6099347846173515e-06, |
|
"logits/chosen": 1.777707815170288, |
|
"logits/rejected": 1.5722458362579346, |
|
"logps/chosen": -450.6910095214844, |
|
"logps/rejected": -995.6285400390625, |
|
"loss": 0.066, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -4.187132835388184, |
|
"rewards/margins": 5.425961494445801, |
|
"rewards/rejected": -9.613094329833984, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.657439446366782, |
|
"grad_norm": 4.856938362121582, |
|
"learning_rate": 1.5843358408002263e-06, |
|
"logits/chosen": 1.6731908321380615, |
|
"logits/rejected": 1.480979323387146, |
|
"logps/chosen": -527.1090087890625, |
|
"logps/rejected": -1042.3140869140625, |
|
"loss": 0.0809, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.945659637451172, |
|
"rewards/margins": 5.120595932006836, |
|
"rewards/rejected": -10.066255569458008, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.6605850896508336, |
|
"grad_norm": 2.9849703311920166, |
|
"learning_rate": 1.5588473037980448e-06, |
|
"logits/chosen": 1.7262375354766846, |
|
"logits/rejected": 1.497453212738037, |
|
"logps/chosen": -525.8902587890625, |
|
"logps/rejected": -1072.6090087890625, |
|
"loss": 0.0668, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.941816806793213, |
|
"rewards/margins": 5.42348051071167, |
|
"rewards/rejected": -10.365297317504883, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.6605850896508336, |
|
"eval_logits/chosen": 2.3367607593536377, |
|
"eval_logits/rejected": 2.1354455947875977, |
|
"eval_logps/chosen": -507.6487731933594, |
|
"eval_logps/rejected": -1068.1822509765625, |
|
"eval_loss": 0.0362231507897377, |
|
"eval_rewards/accuracies": 0.9190298318862915, |
|
"eval_rewards/chosen": -4.7485198974609375, |
|
"eval_rewards/margins": 5.574143886566162, |
|
"eval_rewards/rejected": -10.322663307189941, |
|
"eval_runtime": 216.5807, |
|
"eval_samples_per_second": 98.887, |
|
"eval_steps_per_second": 1.547, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.6637307329348852, |
|
"grad_norm": 1.8077006340026855, |
|
"learning_rate": 1.5334722469081071e-06, |
|
"logits/chosen": 1.7123782634735107, |
|
"logits/rejected": 1.5008488893508911, |
|
"logps/chosen": -503.697265625, |
|
"logps/rejected": -1033.975341796875, |
|
"loss": 0.0767, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.715047359466553, |
|
"rewards/margins": 5.269798278808594, |
|
"rewards/rejected": -9.984844207763672, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.6668763762189368, |
|
"grad_norm": 3.5828003883361816, |
|
"learning_rate": 1.508213729744773e-06, |
|
"logits/chosen": 1.653031587600708, |
|
"logits/rejected": 1.3827488422393799, |
|
"logps/chosen": -505.1139221191406, |
|
"logps/rejected": -1056.74951171875, |
|
"loss": 0.0823, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -4.713560581207275, |
|
"rewards/margins": 5.496426582336426, |
|
"rewards/rejected": -10.209986686706543, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.6700220195029883, |
|
"grad_norm": 2.5747244358062744, |
|
"learning_rate": 1.483074797870547e-06, |
|
"logits/chosen": 1.361879587173462, |
|
"logits/rejected": 1.1789066791534424, |
|
"logps/chosen": -501.65203857421875, |
|
"logps/rejected": -1063.2545166015625, |
|
"loss": 0.0678, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.677037715911865, |
|
"rewards/margins": 5.596593856811523, |
|
"rewards/rejected": -10.273633003234863, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.67316766278704, |
|
"grad_norm": 4.103507041931152, |
|
"learning_rate": 1.4580584824288585e-06, |
|
"logits/chosen": 1.5563806295394897, |
|
"logits/rejected": 1.3796488046646118, |
|
"logps/chosen": -461.9947814941406, |
|
"logps/rejected": -1061.818603515625, |
|
"loss": 0.0681, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.2779340744018555, |
|
"rewards/margins": 5.980292797088623, |
|
"rewards/rejected": -10.258227348327637, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.6763133060710915, |
|
"grad_norm": 3.899409770965576, |
|
"learning_rate": 1.4331677997785786e-06, |
|
"logits/chosen": 1.3618905544281006, |
|
"logits/rejected": 1.26822829246521, |
|
"logps/chosen": -495.64288330078125, |
|
"logps/rejected": -1018.1676025390625, |
|
"loss": 0.0746, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.625652313232422, |
|
"rewards/margins": 5.221669673919678, |
|
"rewards/rejected": -9.847322463989258, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.6794589493551432, |
|
"grad_norm": 2.615987539291382, |
|
"learning_rate": 1.4084057511303212e-06, |
|
"logits/chosen": 1.3500601053237915, |
|
"logits/rejected": 1.1144622564315796, |
|
"logps/chosen": -491.9159240722656, |
|
"logps/rejected": -1018.6404418945312, |
|
"loss": 0.078, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.590536594390869, |
|
"rewards/margins": 5.236490726470947, |
|
"rewards/rejected": -9.827028274536133, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.6826045926391947, |
|
"grad_norm": 2.499993085861206, |
|
"learning_rate": 1.383775322184569e-06, |
|
"logits/chosen": 1.3666261434555054, |
|
"logits/rejected": 1.204685926437378, |
|
"logps/chosen": -495.419677734375, |
|
"logps/rejected": -962.2130737304688, |
|
"loss": 0.0757, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.617236614227295, |
|
"rewards/margins": 4.661618232727051, |
|
"rewards/rejected": -9.278854370117188, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.6857502359232464, |
|
"grad_norm": 4.112603187561035, |
|
"learning_rate": 1.3592794827716726e-06, |
|
"logits/chosen": 1.4919835329055786, |
|
"logits/rejected": 1.2371511459350586, |
|
"logps/chosen": -509.6441955566406, |
|
"logps/rejected": -1004.6423950195312, |
|
"loss": 0.0766, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.761294364929199, |
|
"rewards/margins": 4.93589973449707, |
|
"rewards/rejected": -9.697192192077637, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.6888958792072979, |
|
"grad_norm": 2.9656105041503906, |
|
"learning_rate": 1.33492118649376e-06, |
|
"logits/chosen": 1.4110748767852783, |
|
"logits/rejected": 1.158238172531128, |
|
"logps/chosen": -515.8118286132812, |
|
"logps/rejected": -1045.42431640625, |
|
"loss": 0.0695, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.818012237548828, |
|
"rewards/margins": 5.288434028625488, |
|
"rewards/rejected": -10.106447219848633, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.6920415224913494, |
|
"grad_norm": 1.8345441818237305, |
|
"learning_rate": 1.310703370368605e-06, |
|
"logits/chosen": 1.3728022575378418, |
|
"logits/rejected": 1.2915928363800049, |
|
"logps/chosen": -491.84503173828125, |
|
"logps/rejected": -1084.5633544921875, |
|
"loss": 0.0528, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.574017524719238, |
|
"rewards/margins": 5.912226676940918, |
|
"rewards/rejected": -10.486245155334473, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.6920415224913494, |
|
"eval_logits/chosen": 2.159437417984009, |
|
"eval_logits/rejected": 1.9571863412857056, |
|
"eval_logps/chosen": -500.4605407714844, |
|
"eval_logps/rejected": -1057.6173095703125, |
|
"eval_loss": 0.03563934564590454, |
|
"eval_rewards/accuracies": 0.9175373315811157, |
|
"eval_rewards/chosen": -4.676637649536133, |
|
"eval_rewards/margins": 5.540375232696533, |
|
"eval_rewards/rejected": -10.217013359069824, |
|
"eval_runtime": 216.1143, |
|
"eval_samples_per_second": 99.1, |
|
"eval_steps_per_second": 1.55, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.6951871657754011, |
|
"grad_norm": 2.712627649307251, |
|
"learning_rate": 1.28662895447549e-06, |
|
"logits/chosen": 1.4879462718963623, |
|
"logits/rejected": 1.3425077199935913, |
|
"logps/chosen": -491.4945373535156, |
|
"logps/rejected": -1018.0159912109375, |
|
"loss": 0.0729, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -4.580411911010742, |
|
"rewards/margins": 5.243422031402588, |
|
"rewards/rejected": -9.823833465576172, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.6983328090594526, |
|
"grad_norm": 1.8771477937698364, |
|
"learning_rate": 1.2627008416031234e-06, |
|
"logits/chosen": 1.2122427225112915, |
|
"logits/rejected": 1.1126110553741455, |
|
"logps/chosen": -515.5506591796875, |
|
"logps/rejected": -1040.5745849609375, |
|
"loss": 0.0591, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.816196441650391, |
|
"rewards/margins": 5.2558979988098145, |
|
"rewards/rejected": -10.072093963623047, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.7014784523435043, |
|
"grad_norm": 3.229480743408203, |
|
"learning_rate": 1.2389219168996275e-06, |
|
"logits/chosen": 1.109668254852295, |
|
"logits/rejected": 0.8329025506973267, |
|
"logps/chosen": -496.52435302734375, |
|
"logps/rejected": -1035.8121337890625, |
|
"loss": 0.0648, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.638185024261475, |
|
"rewards/margins": 5.377082347869873, |
|
"rewards/rejected": -10.015268325805664, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.7046240956275558, |
|
"grad_norm": 2.889782667160034, |
|
"learning_rate": 1.2152950475246621e-06, |
|
"logits/chosen": 1.315767526626587, |
|
"logits/rejected": 1.145149827003479, |
|
"logps/chosen": -474.19769287109375, |
|
"logps/rejected": -1005.68701171875, |
|
"loss": 0.0715, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.41811990737915, |
|
"rewards/margins": 5.282149314880371, |
|
"rewards/rejected": -9.70026969909668, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.7077697389116074, |
|
"grad_norm": 4.062774658203125, |
|
"learning_rate": 1.191823082303715e-06, |
|
"logits/chosen": 1.4250564575195312, |
|
"logits/rejected": 1.3247438669204712, |
|
"logps/chosen": -444.0696716308594, |
|
"logps/rejected": -981.8221435546875, |
|
"loss": 0.0712, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.09854793548584, |
|
"rewards/margins": 5.370059013366699, |
|
"rewards/rejected": -9.468606948852539, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.710915382195659, |
|
"grad_norm": 4.499232292175293, |
|
"learning_rate": 1.1685088513846022e-06, |
|
"logits/chosen": 1.277414321899414, |
|
"logits/rejected": 1.2257994413375854, |
|
"logps/chosen": -500.9100646972656, |
|
"logps/rejected": -1016.2658081054688, |
|
"loss": 0.069, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -4.664144515991211, |
|
"rewards/margins": 5.134991645812988, |
|
"rewards/rejected": -9.7991361618042, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.7140610254797106, |
|
"grad_norm": 3.174175262451172, |
|
"learning_rate": 1.1453551658962216e-06, |
|
"logits/chosen": 1.4755274057388306, |
|
"logits/rejected": 1.213168740272522, |
|
"logps/chosen": -507.1438903808594, |
|
"logps/rejected": -1052.708984375, |
|
"loss": 0.0527, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.751412391662598, |
|
"rewards/margins": 5.42425012588501, |
|
"rewards/rejected": -10.175663948059082, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.7172066687637622, |
|
"grad_norm": 2.1683666706085205, |
|
"learning_rate": 1.1223648176095992e-06, |
|
"logits/chosen": 1.091498613357544, |
|
"logits/rejected": 1.0004950761795044, |
|
"logps/chosen": -503.81689453125, |
|
"logps/rejected": -1073.2960205078125, |
|
"loss": 0.0599, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.70639181137085, |
|
"rewards/margins": 5.677199840545654, |
|
"rewards/rejected": -10.38359260559082, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.7203523120478138, |
|
"grad_norm": 2.7001304626464844, |
|
"learning_rate": 1.0995405786012687e-06, |
|
"logits/chosen": 1.4330816268920898, |
|
"logits/rejected": 1.1723562479019165, |
|
"logps/chosen": -503.40789794921875, |
|
"logps/rejected": -1042.260986328125, |
|
"loss": 0.0602, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.702376365661621, |
|
"rewards/margins": 5.370804309844971, |
|
"rewards/rejected": -10.07318115234375, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.7234979553318653, |
|
"grad_norm": 2.845198392868042, |
|
"learning_rate": 1.0768852009190275e-06, |
|
"logits/chosen": 1.3931114673614502, |
|
"logits/rejected": 1.3433005809783936, |
|
"logps/chosen": -490.6017150878906, |
|
"logps/rejected": -1066.7279052734375, |
|
"loss": 0.0596, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.5713019371032715, |
|
"rewards/margins": 5.7634968757629395, |
|
"rewards/rejected": -10.334797859191895, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.7234979553318653, |
|
"eval_logits/chosen": 2.211730480194092, |
|
"eval_logits/rejected": 2.0040743350982666, |
|
"eval_logps/chosen": -494.5928955078125, |
|
"eval_logps/rejected": -1057.1298828125, |
|
"eval_loss": 0.033993348479270935, |
|
"eval_rewards/accuracies": 0.9235074520111084, |
|
"eval_rewards/chosen": -4.617961406707764, |
|
"eval_rewards/margins": 5.594176769256592, |
|
"eval_rewards/rejected": -10.212138175964355, |
|
"eval_runtime": 216.3884, |
|
"eval_samples_per_second": 98.975, |
|
"eval_steps_per_second": 1.548, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.726643598615917, |
|
"grad_norm": 1.401442289352417, |
|
"learning_rate": 1.0544014162501065e-06, |
|
"logits/chosen": 1.4220385551452637, |
|
"logits/rejected": 1.2480995655059814, |
|
"logps/chosen": -494.851318359375, |
|
"logps/rejected": -1024.6070556640625, |
|
"loss": 0.0659, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.607827186584473, |
|
"rewards/margins": 5.269275665283203, |
|
"rewards/rejected": -9.877102851867676, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.7297892418999685, |
|
"grad_norm": 2.1367287635803223, |
|
"learning_rate": 1.0320919355917951e-06, |
|
"logits/chosen": 1.483291506767273, |
|
"logits/rejected": 1.3624471426010132, |
|
"logps/chosen": -475.54974365234375, |
|
"logps/rejected": -980.1355590820312, |
|
"loss": 0.0703, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.424760341644287, |
|
"rewards/margins": 5.013537406921387, |
|
"rewards/rejected": -9.438297271728516, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.7329348851840202, |
|
"grad_norm": 3.646291732788086, |
|
"learning_rate": 1.0099594489245593e-06, |
|
"logits/chosen": 1.7658379077911377, |
|
"logits/rejected": 1.5886876583099365, |
|
"logps/chosen": -502.6063537597656, |
|
"logps/rejected": -1023.0255737304688, |
|
"loss": 0.0757, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.701047420501709, |
|
"rewards/margins": 5.172977924346924, |
|
"rewards/rejected": -9.874025344848633, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.7360805284680717, |
|
"grad_norm": 2.939910888671875, |
|
"learning_rate": 9.880066248876977e-07, |
|
"logits/chosen": 1.554218053817749, |
|
"logits/rejected": 1.3716495037078857, |
|
"logps/chosen": -507.82977294921875, |
|
"logps/rejected": -1061.218505859375, |
|
"loss": 0.0576, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.761552333831787, |
|
"rewards/margins": 5.506254196166992, |
|
"rewards/rejected": -10.267806053161621, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.7392261717521234, |
|
"grad_norm": 1.9353808164596558, |
|
"learning_rate": 9.662361104575688e-07, |
|
"logits/chosen": 1.6388695240020752, |
|
"logits/rejected": 1.5070264339447021, |
|
"logps/chosen": -507.5098571777344, |
|
"logps/rejected": -1031.0897216796875, |
|
"loss": 0.0679, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.739305019378662, |
|
"rewards/margins": 5.227367401123047, |
|
"rewards/rejected": -9.966673851013184, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.7423718150361749, |
|
"grad_norm": 2.6814684867858887, |
|
"learning_rate": 9.446505306284276e-07, |
|
"logits/chosen": 1.3904848098754883, |
|
"logits/rejected": 1.2182855606079102, |
|
"logps/chosen": -494.0530700683594, |
|
"logps/rejected": -1040.4232177734375, |
|
"loss": 0.0705, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.611108303070068, |
|
"rewards/margins": 5.45358943939209, |
|
"rewards/rejected": -10.064697265625, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.7455174583202265, |
|
"grad_norm": 2.1408207416534424, |
|
"learning_rate": 9.232524880959173e-07, |
|
"logits/chosen": 1.6318490505218506, |
|
"logits/rejected": 1.3836729526519775, |
|
"logps/chosen": -533.8260498046875, |
|
"logps/rejected": -1014.0780029296875, |
|
"loss": 0.0694, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.986173152923584, |
|
"rewards/margins": 4.802958011627197, |
|
"rewards/rejected": -9.789131164550781, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.7486631016042781, |
|
"grad_norm": 1.7775107622146606, |
|
"learning_rate": 9.02044562943247e-07, |
|
"logits/chosen": 1.561553716659546, |
|
"logits/rejected": 1.3704016208648682, |
|
"logps/chosen": -532.5505981445312, |
|
"logps/rejected": -1075.1712646484375, |
|
"loss": 0.0614, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.986807346343994, |
|
"rewards/margins": 5.408608436584473, |
|
"rewards/rejected": -10.395415306091309, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.7518087448883297, |
|
"grad_norm": 3.8989508152008057, |
|
"learning_rate": 8.810293123300956e-07, |
|
"logits/chosen": 1.4359630346298218, |
|
"logits/rejected": 1.3191871643066406, |
|
"logps/chosen": -510.1795959472656, |
|
"logps/rejected": -1077.068359375, |
|
"loss": 0.0715, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.7562456130981445, |
|
"rewards/margins": 5.643407344818115, |
|
"rewards/rejected": -10.399652481079102, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.7549543881723813, |
|
"grad_norm": 1.785208821296692, |
|
"learning_rate": 8.602092701842821e-07, |
|
"logits/chosen": 1.6097145080566406, |
|
"logits/rejected": 1.4199837446212769, |
|
"logps/chosen": -485.87200927734375, |
|
"logps/rejected": -1022.1697998046875, |
|
"loss": 0.063, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.5243449211120605, |
|
"rewards/margins": 5.349452018737793, |
|
"rewards/rejected": -9.873797416687012, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.7549543881723813, |
|
"eval_logits/chosen": 2.3488142490386963, |
|
"eval_logits/rejected": 2.149275541305542, |
|
"eval_logps/chosen": -486.36529541015625, |
|
"eval_logps/rejected": -1054.6712646484375, |
|
"eval_loss": 0.03283367305994034, |
|
"eval_rewards/accuracies": 0.9257462620735168, |
|
"eval_rewards/chosen": -4.5356855392456055, |
|
"eval_rewards/margins": 5.651867866516113, |
|
"eval_rewards/rejected": -10.187552452087402, |
|
"eval_runtime": 216.3869, |
|
"eval_samples_per_second": 98.976, |
|
"eval_steps_per_second": 1.548, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.7581000314564328, |
|
"grad_norm": 2.3619818687438965, |
|
"learning_rate": 8.395869468962337e-07, |
|
"logits/chosen": 1.6147444248199463, |
|
"logits/rejected": 1.395455241203308, |
|
"logps/chosen": -495.06939697265625, |
|
"logps/rejected": -1095.788330078125, |
|
"loss": 0.0587, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.632855415344238, |
|
"rewards/margins": 5.956536293029785, |
|
"rewards/rejected": -10.589391708374023, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.7612456747404844, |
|
"grad_norm": 1.6136853694915771, |
|
"learning_rate": 8.191648290162957e-07, |
|
"logits/chosen": 1.7205461263656616, |
|
"logits/rejected": 1.6001455783843994, |
|
"logps/chosen": -488.3507385253906, |
|
"logps/rejected": -1074.3258056640625, |
|
"loss": 0.0606, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -4.576030731201172, |
|
"rewards/margins": 5.819344520568848, |
|
"rewards/rejected": -10.39537525177002, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.764391318024536, |
|
"grad_norm": 4.33929443359375, |
|
"learning_rate": 7.989453789549131e-07, |
|
"logits/chosen": 1.5811232328414917, |
|
"logits/rejected": 1.5288398265838623, |
|
"logps/chosen": -530.9954223632812, |
|
"logps/rejected": -1079.800537109375, |
|
"loss": 0.0758, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.9601640701293945, |
|
"rewards/margins": 5.483851432800293, |
|
"rewards/rejected": -10.444014549255371, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.7675369613085876, |
|
"grad_norm": 1.9559452533721924, |
|
"learning_rate": 7.789310346857243e-07, |
|
"logits/chosen": 1.7974998950958252, |
|
"logits/rejected": 1.6878284215927124, |
|
"logps/chosen": -516.4877319335938, |
|
"logps/rejected": -997.9284057617188, |
|
"loss": 0.0624, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.846479415893555, |
|
"rewards/margins": 4.782151222229004, |
|
"rewards/rejected": -9.628629684448242, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.7706826045926392, |
|
"grad_norm": 2.785346031188965, |
|
"learning_rate": 7.591242094515983e-07, |
|
"logits/chosen": 1.5722239017486572, |
|
"logits/rejected": 1.4374796152114868, |
|
"logps/chosen": -528.6546630859375, |
|
"logps/rejected": -994.2796630859375, |
|
"loss": 0.0662, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -4.938473701477051, |
|
"rewards/margins": 4.6353349685668945, |
|
"rewards/rejected": -9.573808670043945, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.7738282478766908, |
|
"grad_norm": 2.5824944972991943, |
|
"learning_rate": 7.395272914736604e-07, |
|
"logits/chosen": 1.4237116575241089, |
|
"logits/rejected": 1.2312453985214233, |
|
"logps/chosen": -511.4111328125, |
|
"logps/rejected": -1094.482177734375, |
|
"loss": 0.0636, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.7887115478515625, |
|
"rewards/margins": 5.799224376678467, |
|
"rewards/rejected": -10.587934494018555, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.7769738911607423, |
|
"grad_norm": 3.5525786876678467, |
|
"learning_rate": 7.201426436633289e-07, |
|
"logits/chosen": 1.383684754371643, |
|
"logits/rejected": 1.2983518838882446, |
|
"logps/chosen": -495.83428955078125, |
|
"logps/rejected": -1077.6273193359375, |
|
"loss": 0.0627, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.624549865722656, |
|
"rewards/margins": 5.804167747497559, |
|
"rewards/rejected": -10.428717613220215, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.780119534444794, |
|
"grad_norm": 1.6180325746536255, |
|
"learning_rate": 7.009726033374045e-07, |
|
"logits/chosen": 1.5975977182388306, |
|
"logits/rejected": 1.3688011169433594, |
|
"logps/chosen": -537.0506591796875, |
|
"logps/rejected": -1072.892333984375, |
|
"loss": 0.0647, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -5.023386478424072, |
|
"rewards/margins": 5.3509063720703125, |
|
"rewards/rejected": -10.374292373657227, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.7832651777288455, |
|
"grad_norm": 2.176626443862915, |
|
"learning_rate": 6.820194819362477e-07, |
|
"logits/chosen": 1.496756672859192, |
|
"logits/rejected": 1.3709968328475952, |
|
"logps/chosen": -487.99371337890625, |
|
"logps/rejected": -1085.553466796875, |
|
"loss": 0.0616, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.553643226623535, |
|
"rewards/margins": 5.951053619384766, |
|
"rewards/rejected": -10.5046968460083, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.7864108210128972, |
|
"grad_norm": 2.5447304248809814, |
|
"learning_rate": 6.632855647450784e-07, |
|
"logits/chosen": 1.41390860080719, |
|
"logits/rejected": 1.2979408502578735, |
|
"logps/chosen": -501.66461181640625, |
|
"logps/rejected": -1036.599365234375, |
|
"loss": 0.0558, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.6812262535095215, |
|
"rewards/margins": 5.344993591308594, |
|
"rewards/rejected": -10.026220321655273, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.7864108210128972, |
|
"eval_logits/chosen": 2.3275063037872314, |
|
"eval_logits/rejected": 2.120762586593628, |
|
"eval_logps/chosen": -504.3434753417969, |
|
"eval_logps/rejected": -1092.718505859375, |
|
"eval_loss": 0.031053731217980385, |
|
"eval_rewards/accuracies": 0.9261193871498108, |
|
"eval_rewards/chosen": -4.71546745300293, |
|
"eval_rewards/margins": 5.852557182312012, |
|
"eval_rewards/rejected": -10.568025588989258, |
|
"eval_runtime": 216.5885, |
|
"eval_samples_per_second": 98.883, |
|
"eval_steps_per_second": 1.547, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.7895564642969487, |
|
"grad_norm": 3.3239963054656982, |
|
"learning_rate": 6.447731106184183e-07, |
|
"logits/chosen": 1.494217872619629, |
|
"logits/rejected": 1.3212921619415283, |
|
"logps/chosen": -518.6863403320312, |
|
"logps/rejected": -1040.155517578125, |
|
"loss": 0.0492, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -4.850244522094727, |
|
"rewards/margins": 5.21368932723999, |
|
"rewards/rejected": -10.063933372497559, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.7927021075810003, |
|
"grad_norm": 1.9487500190734863, |
|
"learning_rate": 6.264843517077355e-07, |
|
"logits/chosen": 1.693529486656189, |
|
"logits/rejected": 1.6498600244522095, |
|
"logps/chosen": -487.17913818359375, |
|
"logps/rejected": -1024.142822265625, |
|
"loss": 0.0597, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.552428245544434, |
|
"rewards/margins": 5.328381538391113, |
|
"rewards/rejected": -9.880809783935547, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.7958477508650519, |
|
"grad_norm": 2.9930226802825928, |
|
"learning_rate": 6.084214931922988e-07, |
|
"logits/chosen": 1.6899865865707397, |
|
"logits/rejected": 1.5415594577789307, |
|
"logps/chosen": -488.88018798828125, |
|
"logps/rejected": -1136.771484375, |
|
"loss": 0.0621, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.568345546722412, |
|
"rewards/margins": 6.412973880767822, |
|
"rewards/rejected": -10.981319427490234, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.7989933941491035, |
|
"grad_norm": 2.1499834060668945, |
|
"learning_rate": 5.905867130132858e-07, |
|
"logits/chosen": 1.594299554824829, |
|
"logits/rejected": 1.3576323986053467, |
|
"logps/chosen": -532.1160278320312, |
|
"logps/rejected": -1078.0628662109375, |
|
"loss": 0.0652, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.983504295349121, |
|
"rewards/margins": 5.434745788574219, |
|
"rewards/rejected": -10.41825008392334, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.8021390374331551, |
|
"grad_norm": 1.3630064725875854, |
|
"learning_rate": 5.729821616111777e-07, |
|
"logits/chosen": 1.3776555061340332, |
|
"logits/rejected": 1.2333744764328003, |
|
"logps/chosen": -500.73834228515625, |
|
"logps/rejected": -1054.62841796875, |
|
"loss": 0.0573, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.675589561462402, |
|
"rewards/margins": 5.534988880157471, |
|
"rewards/rejected": -10.210577011108398, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.8052846807172067, |
|
"grad_norm": 2.679340124130249, |
|
"learning_rate": 5.556099616664678e-07, |
|
"logits/chosen": 1.6116464138031006, |
|
"logits/rejected": 1.4320402145385742, |
|
"logps/chosen": -498.9954528808594, |
|
"logps/rejected": -1082.236328125, |
|
"loss": 0.0653, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.666613578796387, |
|
"rewards/margins": 5.797518253326416, |
|
"rewards/rejected": -10.464132308959961, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.8084303240012582, |
|
"grad_norm": 2.050906181335449, |
|
"learning_rate": 5.384722078437163e-07, |
|
"logits/chosen": 1.3231595754623413, |
|
"logits/rejected": 1.0914764404296875, |
|
"logps/chosen": -481.60302734375, |
|
"logps/rejected": -1077.0623779296875, |
|
"loss": 0.0558, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.489951133728027, |
|
"rewards/margins": 5.934098720550537, |
|
"rewards/rejected": -10.424050331115723, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.8115759672853099, |
|
"grad_norm": 2.0678343772888184, |
|
"learning_rate": 5.215709665389884e-07, |
|
"logits/chosen": 1.6653324365615845, |
|
"logits/rejected": 1.2603800296783447, |
|
"logps/chosen": -480.2110290527344, |
|
"logps/rejected": -1065.371337890625, |
|
"loss": 0.0603, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.470128536224365, |
|
"rewards/margins": 5.816575527191162, |
|
"rewards/rejected": -10.286703109741211, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.8147216105693614, |
|
"grad_norm": 4.421222686767578, |
|
"learning_rate": 5.049082756306933e-07, |
|
"logits/chosen": 1.2575275897979736, |
|
"logits/rejected": 1.1939196586608887, |
|
"logps/chosen": -500.1796875, |
|
"logps/rejected": -1039.970703125, |
|
"loss": 0.0656, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.680001258850098, |
|
"rewards/margins": 5.361957550048828, |
|
"rewards/rejected": -10.04195785522461, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.817867253853413, |
|
"grad_norm": 2.2859609127044678, |
|
"learning_rate": 4.884861442338703e-07, |
|
"logits/chosen": 1.4497255086898804, |
|
"logits/rejected": 1.3707365989685059, |
|
"logps/chosen": -507.47845458984375, |
|
"logps/rejected": -1101.427734375, |
|
"loss": 0.0552, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.734853744506836, |
|
"rewards/margins": 5.897332191467285, |
|
"rewards/rejected": -10.632184982299805, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.817867253853413, |
|
"eval_logits/chosen": 2.259222984313965, |
|
"eval_logits/rejected": 2.054378032684326, |
|
"eval_logps/chosen": -498.5399169921875, |
|
"eval_logps/rejected": -1072.4942626953125, |
|
"eval_loss": 0.031242508441209793, |
|
"eval_rewards/accuracies": 0.9253731369972229, |
|
"eval_rewards/chosen": -4.657431125640869, |
|
"eval_rewards/margins": 5.708352088928223, |
|
"eval_rewards/rejected": -10.36578369140625, |
|
"eval_runtime": 216.4474, |
|
"eval_samples_per_second": 98.948, |
|
"eval_steps_per_second": 1.548, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.8210128971374646, |
|
"grad_norm": 3.060898542404175, |
|
"learning_rate": 4.7230655245793286e-07, |
|
"logits/chosen": 1.5630236864089966, |
|
"logits/rejected": 1.4133121967315674, |
|
"logps/chosen": -493.82373046875, |
|
"logps/rejected": -1063.783203125, |
|
"loss": 0.0572, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.611185550689697, |
|
"rewards/margins": 5.6810383796691895, |
|
"rewards/rejected": -10.292223930358887, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.8241585404215162, |
|
"grad_norm": 5.270165920257568, |
|
"learning_rate": 4.563714511679201e-07, |
|
"logits/chosen": 1.2811131477355957, |
|
"logits/rejected": 1.0420016050338745, |
|
"logps/chosen": -521.6427612304688, |
|
"logps/rejected": -1078.5167236328125, |
|
"loss": 0.0636, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.879427909851074, |
|
"rewards/margins": 5.563521385192871, |
|
"rewards/rejected": -10.442949295043945, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.8273041837055678, |
|
"grad_norm": 1.9632947444915771, |
|
"learning_rate": 4.4068276174926624e-07, |
|
"logits/chosen": 1.5258196592330933, |
|
"logits/rejected": 1.4023991823196411, |
|
"logps/chosen": -515.6566162109375, |
|
"logps/rejected": -1040.244384765625, |
|
"loss": 0.0692, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.814579010009766, |
|
"rewards/margins": 5.215153694152832, |
|
"rewards/rejected": -10.029733657836914, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.8304498269896193, |
|
"grad_norm": 1.8762720823287964, |
|
"learning_rate": 4.25242375876132e-07, |
|
"logits/chosen": 1.8149995803833008, |
|
"logits/rejected": 1.559918999671936, |
|
"logps/chosen": -512.7207641601562, |
|
"logps/rejected": -1104.062744140625, |
|
"loss": 0.0588, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.773519039154053, |
|
"rewards/margins": 5.890836715698242, |
|
"rewards/rejected": -10.66435718536377, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.833595470273671, |
|
"grad_norm": 2.6767687797546387, |
|
"learning_rate": 4.1005215528331254e-07, |
|
"logits/chosen": 1.7480008602142334, |
|
"logits/rejected": 1.5533208847045898, |
|
"logps/chosen": -519.8841552734375, |
|
"logps/rejected": -1083.5428466796875, |
|
"loss": 0.0641, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.862178802490234, |
|
"rewards/margins": 5.612443923950195, |
|
"rewards/rejected": -10.474621772766113, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.8367411135577225, |
|
"grad_norm": 1.4044471979141235, |
|
"learning_rate": 3.9511393154175795e-07, |
|
"logits/chosen": 1.734551191329956, |
|
"logits/rejected": 1.43448805809021, |
|
"logps/chosen": -519.4636840820312, |
|
"logps/rejected": -1096.210205078125, |
|
"loss": 0.0504, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.848996162414551, |
|
"rewards/margins": 5.752721309661865, |
|
"rewards/rejected": -10.601716995239258, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.8398867568417742, |
|
"grad_norm": 2.7965497970581055, |
|
"learning_rate": 3.8042950583773054e-07, |
|
"logits/chosen": 1.638389229774475, |
|
"logits/rejected": 1.3333652019500732, |
|
"logps/chosen": -516.922607421875, |
|
"logps/rejected": -1048.8199462890625, |
|
"loss": 0.0616, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.830017566680908, |
|
"rewards/margins": 5.2858991622924805, |
|
"rewards/rejected": -10.115918159484863, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.8430324001258257, |
|
"grad_norm": 2.85134220123291, |
|
"learning_rate": 3.660006487556245e-07, |
|
"logits/chosen": 1.612595796585083, |
|
"logits/rejected": 1.4408342838287354, |
|
"logps/chosen": -476.4307556152344, |
|
"logps/rejected": -1083.2095947265625, |
|
"loss": 0.0506, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.453230857849121, |
|
"rewards/margins": 6.040000915527344, |
|
"rewards/rejected": -10.493230819702148, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.8461780434098773, |
|
"grad_norm": 1.8909926414489746, |
|
"learning_rate": 3.5182910006447775e-07, |
|
"logits/chosen": 1.4525648355484009, |
|
"logits/rejected": 1.2461488246917725, |
|
"logps/chosen": -502.4288635253906, |
|
"logps/rejected": -1073.686767578125, |
|
"loss": 0.0545, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -4.705319404602051, |
|
"rewards/margins": 5.682326793670654, |
|
"rewards/rejected": -10.38764762878418, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.8493236866939289, |
|
"grad_norm": 2.196668863296509, |
|
"learning_rate": 3.3791656850819975e-07, |
|
"logits/chosen": 1.5441794395446777, |
|
"logits/rejected": 1.3334633111953735, |
|
"logps/chosen": -505.63177490234375, |
|
"logps/rejected": -1055.5289306640625, |
|
"loss": 0.066, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.721020221710205, |
|
"rewards/margins": 5.478200912475586, |
|
"rewards/rejected": -10.19922161102295, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.8493236866939289, |
|
"eval_logits/chosen": 2.296788215637207, |
|
"eval_logits/rejected": 2.0913844108581543, |
|
"eval_logps/chosen": -497.8610534667969, |
|
"eval_logps/rejected": -1083.573974609375, |
|
"eval_loss": 0.03053486905992031, |
|
"eval_rewards/accuracies": 0.9287313222885132, |
|
"eval_rewards/chosen": -4.650643348693848, |
|
"eval_rewards/margins": 5.825936317443848, |
|
"eval_rewards/rejected": -10.476579666137695, |
|
"eval_runtime": 216.4192, |
|
"eval_samples_per_second": 98.961, |
|
"eval_steps_per_second": 1.548, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.8524693299779805, |
|
"grad_norm": 2.1117446422576904, |
|
"learning_rate": 3.2426473159953455e-07, |
|
"logits/chosen": 1.5046918392181396, |
|
"logits/rejected": 1.335351586341858, |
|
"logps/chosen": -514.4112548828125, |
|
"logps/rejected": -1087.9561767578125, |
|
"loss": 0.0614, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.799644470214844, |
|
"rewards/margins": 5.715089321136475, |
|
"rewards/rejected": -10.514734268188477, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.8556149732620321, |
|
"grad_norm": 2.354701519012451, |
|
"learning_rate": 3.108752354177963e-07, |
|
"logits/chosen": 1.6507476568222046, |
|
"logits/rejected": 1.3912304639816284, |
|
"logps/chosen": -515.09326171875, |
|
"logps/rejected": -1063.3089599609375, |
|
"loss": 0.0575, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.812552452087402, |
|
"rewards/margins": 5.467874050140381, |
|
"rewards/rejected": -10.280427932739258, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.8587606165460837, |
|
"grad_norm": 3.7214064598083496, |
|
"learning_rate": 2.9774969441039247e-07, |
|
"logits/chosen": 1.3230955600738525, |
|
"logits/rejected": 1.1357682943344116, |
|
"logps/chosen": -505.62921142578125, |
|
"logps/rejected": -1043.6890869140625, |
|
"loss": 0.0689, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.736936092376709, |
|
"rewards/margins": 5.339118003845215, |
|
"rewards/rejected": -10.076053619384766, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.8619062598301352, |
|
"grad_norm": 2.939478874206543, |
|
"learning_rate": 2.848896911981575e-07, |
|
"logits/chosen": 1.6969833374023438, |
|
"logits/rejected": 1.3960387706756592, |
|
"logps/chosen": -471.97393798828125, |
|
"logps/rejected": -1061.218505859375, |
|
"loss": 0.072, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.4052605628967285, |
|
"rewards/margins": 5.870527267456055, |
|
"rewards/rejected": -10.275788307189941, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.8650519031141869, |
|
"grad_norm": 1.170915961265564, |
|
"learning_rate": 2.722967763845316e-07, |
|
"logits/chosen": 1.487412691116333, |
|
"logits/rejected": 1.3554164171218872, |
|
"logps/chosen": -488.6513671875, |
|
"logps/rejected": -1064.09912109375, |
|
"loss": 0.0444, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.568634986877441, |
|
"rewards/margins": 5.726897239685059, |
|
"rewards/rejected": -10.2955322265625, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.8681975463982384, |
|
"grad_norm": 3.7928836345672607, |
|
"learning_rate": 2.5997246836859335e-07, |
|
"logits/chosen": 1.6719999313354492, |
|
"logits/rejected": 1.3611785173416138, |
|
"logps/chosen": -537.3580322265625, |
|
"logps/rejected": -1134.085205078125, |
|
"loss": 0.0707, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.030951976776123, |
|
"rewards/margins": 5.935522556304932, |
|
"rewards/rejected": -10.966474533081055, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.8713431896822901, |
|
"grad_norm": 4.211234092712402, |
|
"learning_rate": 2.479182531619778e-07, |
|
"logits/chosen": 1.5656145811080933, |
|
"logits/rejected": 1.356136679649353, |
|
"logps/chosen": -540.0008544921875, |
|
"logps/rejected": -1038.9478759765625, |
|
"loss": 0.0582, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -5.057316780090332, |
|
"rewards/margins": 4.969836235046387, |
|
"rewards/rejected": -10.027152061462402, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.8744888329663416, |
|
"grad_norm": 3.4313180446624756, |
|
"learning_rate": 2.3613558420969988e-07, |
|
"logits/chosen": 1.5074255466461182, |
|
"logits/rejected": 1.2785370349884033, |
|
"logps/chosen": -519.2490844726562, |
|
"logps/rejected": -1096.0401611328125, |
|
"loss": 0.055, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.868134021759033, |
|
"rewards/margins": 5.725502967834473, |
|
"rewards/rejected": -10.593636512756348, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.8776344762503933, |
|
"grad_norm": 3.3754262924194336, |
|
"learning_rate": 2.2462588221490445e-07, |
|
"logits/chosen": 1.7104518413543701, |
|
"logits/rejected": 1.401397466659546, |
|
"logps/chosen": -519.754150390625, |
|
"logps/rejected": -1093.9193115234375, |
|
"loss": 0.0621, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.866362571716309, |
|
"rewards/margins": 5.7011823654174805, |
|
"rewards/rejected": -10.567545890808105, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.8807801195344448, |
|
"grad_norm": 2.195533275604248, |
|
"learning_rate": 2.1339053496756413e-07, |
|
"logits/chosen": 1.8926490545272827, |
|
"logits/rejected": 1.602657675743103, |
|
"logps/chosen": -496.7962951660156, |
|
"logps/rejected": -1042.2293701171875, |
|
"loss": 0.0568, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.636801719665527, |
|
"rewards/margins": 5.415767192840576, |
|
"rewards/rejected": -10.052568435668945, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.8807801195344448, |
|
"eval_logits/chosen": 2.3025965690612793, |
|
"eval_logits/rejected": 2.095693349838257, |
|
"eval_logps/chosen": -497.02655029296875, |
|
"eval_logps/rejected": -1082.205078125, |
|
"eval_loss": 0.030157454311847687, |
|
"eval_rewards/accuracies": 0.9302238821983337, |
|
"eval_rewards/chosen": -4.642297744750977, |
|
"eval_rewards/margins": 5.820593357086182, |
|
"eval_rewards/rejected": -10.462892532348633, |
|
"eval_runtime": 216.4848, |
|
"eval_samples_per_second": 98.931, |
|
"eval_steps_per_second": 1.547, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.8839257628184963, |
|
"grad_norm": 2.889522075653076, |
|
"learning_rate": 2.0243089717714465e-07, |
|
"logits/chosen": 1.807885766029358, |
|
"logits/rejected": 1.4938302040100098, |
|
"logps/chosen": -484.04443359375, |
|
"logps/rejected": -1089.961181640625, |
|
"loss": 0.0552, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.520063877105713, |
|
"rewards/margins": 6.023884296417236, |
|
"rewards/rejected": -10.543947219848633, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.887071406102548, |
|
"grad_norm": 3.5756685733795166, |
|
"learning_rate": 1.9174829030926157e-07, |
|
"logits/chosen": 1.5275074243545532, |
|
"logits/rejected": 1.4837501049041748, |
|
"logps/chosen": -543.8445434570312, |
|
"logps/rejected": -1033.0228271484375, |
|
"loss": 0.048, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.1024169921875, |
|
"rewards/margins": 4.863530158996582, |
|
"rewards/rejected": -9.965947151184082, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.8902170493865995, |
|
"grad_norm": 1.6179348230361938, |
|
"learning_rate": 1.8134400242634214e-07, |
|
"logits/chosen": 1.504921793937683, |
|
"logits/rejected": 1.4333007335662842, |
|
"logps/chosen": -552.3648681640625, |
|
"logps/rejected": -1058.1396484375, |
|
"loss": 0.0736, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -5.192325592041016, |
|
"rewards/margins": 5.051383972167969, |
|
"rewards/rejected": -10.243708610534668, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.8933626926706512, |
|
"grad_norm": 2.699253797531128, |
|
"learning_rate": 1.7121928803231714e-07, |
|
"logits/chosen": 1.7584819793701172, |
|
"logits/rejected": 1.4663532972335815, |
|
"logps/chosen": -492.44598388671875, |
|
"logps/rejected": -1074.4373779296875, |
|
"loss": 0.0559, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.594998836517334, |
|
"rewards/margins": 5.777581214904785, |
|
"rewards/rejected": -10.372581481933594, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.8965083359547027, |
|
"grad_norm": 1.8945022821426392, |
|
"learning_rate": 1.613753679213581e-07, |
|
"logits/chosen": 1.57563054561615, |
|
"logits/rejected": 1.482763409614563, |
|
"logps/chosen": -539.2989501953125, |
|
"logps/rejected": -1020.1134033203125, |
|
"loss": 0.0574, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.049017906188965, |
|
"rewards/margins": 4.8051300048828125, |
|
"rewards/rejected": -9.854147911071777, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.8996539792387543, |
|
"grad_norm": 1.4570543766021729, |
|
"learning_rate": 1.5181342903067803e-07, |
|
"logits/chosen": 1.693933129310608, |
|
"logits/rejected": 1.528136968612671, |
|
"logps/chosen": -460.54095458984375, |
|
"logps/rejected": -1023.5963745117188, |
|
"loss": 0.0594, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.285604953765869, |
|
"rewards/margins": 5.596875190734863, |
|
"rewards/rejected": -9.882479667663574, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.9027996225228059, |
|
"grad_norm": 3.3452985286712646, |
|
"learning_rate": 1.4253462429741877e-07, |
|
"logits/chosen": 1.5829732418060303, |
|
"logits/rejected": 1.3568916320800781, |
|
"logps/chosen": -491.80145263671875, |
|
"logps/rejected": -1098.6715087890625, |
|
"loss": 0.0513, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.593815803527832, |
|
"rewards/margins": 6.039064407348633, |
|
"rewards/rejected": -10.632879257202148, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.9059452658068575, |
|
"grad_norm": 4.80794620513916, |
|
"learning_rate": 1.335400725196309e-07, |
|
"logits/chosen": 1.5622551441192627, |
|
"logits/rejected": 1.4623697996139526, |
|
"logps/chosen": -530.6185302734375, |
|
"logps/rejected": -1116.3253173828125, |
|
"loss": 0.0756, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.968831539154053, |
|
"rewards/margins": 5.823412895202637, |
|
"rewards/rejected": -10.792244911193848, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 2.9681992530822754, |
|
"learning_rate": 1.2483085822137752e-07, |
|
"logits/chosen": 1.4609724283218384, |
|
"logits/rejected": 1.4121100902557373, |
|
"logps/chosen": -513.2243041992188, |
|
"logps/rejected": -1081.070068359375, |
|
"loss": 0.0569, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.783768653869629, |
|
"rewards/margins": 5.678202152252197, |
|
"rewards/rejected": -10.4619722366333, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.9122365523749607, |
|
"grad_norm": 2.235750913619995, |
|
"learning_rate": 1.16408031521964e-07, |
|
"logits/chosen": 1.5177078247070312, |
|
"logits/rejected": 1.3542033433914185, |
|
"logps/chosen": -497.5135192871094, |
|
"logps/rejected": -1059.788818359375, |
|
"loss": 0.0602, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.6491899490356445, |
|
"rewards/margins": 5.587696075439453, |
|
"rewards/rejected": -10.236886024475098, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.9122365523749607, |
|
"eval_logits/chosen": 2.2910573482513428, |
|
"eval_logits/rejected": 2.086066961288452, |
|
"eval_logps/chosen": -495.3988952636719, |
|
"eval_logps/rejected": -1081.995849609375, |
|
"eval_loss": 0.029906345531344414, |
|
"eval_rewards/accuracies": 0.9298507571220398, |
|
"eval_rewards/chosen": -4.626021385192871, |
|
"eval_rewards/margins": 5.83477783203125, |
|
"eval_rewards/rejected": -10.460798263549805, |
|
"eval_runtime": 216.6391, |
|
"eval_samples_per_second": 98.86, |
|
"eval_steps_per_second": 1.546, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.9153821956590122, |
|
"grad_norm": 3.275084972381592, |
|
"learning_rate": 1.0827260800932132e-07, |
|
"logits/chosen": 1.3776516914367676, |
|
"logits/rejected": 1.2710825204849243, |
|
"logps/chosen": -530.8807373046875, |
|
"logps/rejected": -1062.57763671875, |
|
"loss": 0.0566, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.976878643035889, |
|
"rewards/margins": 5.322489261627197, |
|
"rewards/rejected": -10.299367904663086, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.9185278389430639, |
|
"grad_norm": 1.3798928260803223, |
|
"learning_rate": 1.0042556861754981e-07, |
|
"logits/chosen": 1.4723575115203857, |
|
"logits/rejected": 1.3601932525634766, |
|
"logps/chosen": -520.2330322265625, |
|
"logps/rejected": -1072.7567138671875, |
|
"loss": 0.0463, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.895315647125244, |
|
"rewards/margins": 5.485459327697754, |
|
"rewards/rejected": -10.380775451660156, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.9216734822271154, |
|
"grad_norm": 3.591219425201416, |
|
"learning_rate": 9.286785950864297e-08, |
|
"logits/chosen": 1.5991261005401611, |
|
"logits/rejected": 1.4333770275115967, |
|
"logps/chosen": -497.31903076171875, |
|
"logps/rejected": -1074.6182861328125, |
|
"loss": 0.0597, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -4.641297817230225, |
|
"rewards/margins": 5.764665126800537, |
|
"rewards/rejected": -10.405962944030762, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.9248191255111671, |
|
"grad_norm": 1.934587836265564, |
|
"learning_rate": 8.560039195840226e-08, |
|
"logits/chosen": 1.61993408203125, |
|
"logits/rejected": 1.257753610610962, |
|
"logps/chosen": -511.5843811035156, |
|
"logps/rejected": -1043.6748046875, |
|
"loss": 0.0618, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.776283264160156, |
|
"rewards/margins": 5.305155277252197, |
|
"rewards/rejected": -10.081439018249512, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.9279647687952186, |
|
"grad_norm": 1.1722760200500488, |
|
"learning_rate": 7.86240422465609e-08, |
|
"logits/chosen": 1.4890453815460205, |
|
"logits/rejected": 1.4258759021759033, |
|
"logps/chosen": -507.92279052734375, |
|
"logps/rejected": -1072.976318359375, |
|
"loss": 0.061, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.746781826019287, |
|
"rewards/margins": 5.61569881439209, |
|
"rewards/rejected": -10.362482070922852, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.9311104120792703, |
|
"grad_norm": 2.363994598388672, |
|
"learning_rate": 7.193965155112475e-08, |
|
"logits/chosen": 1.4762569665908813, |
|
"logits/rejected": 1.2930829524993896, |
|
"logps/chosen": -531.8678588867188, |
|
"logps/rejected": -1051.727783203125, |
|
"loss": 0.063, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.999019622802734, |
|
"rewards/margins": 5.162339687347412, |
|
"rewards/rejected": -10.161359786987305, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.9342560553633218, |
|
"grad_norm": 2.2596049308776855, |
|
"learning_rate": 6.554802584694791e-08, |
|
"logits/chosen": 1.6314208507537842, |
|
"logits/rejected": 1.5024511814117432, |
|
"logps/chosen": -449.2713317871094, |
|
"logps/rejected": -1065.9703369140625, |
|
"loss": 0.0606, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -4.171161651611328, |
|
"rewards/margins": 6.13657808303833, |
|
"rewards/rejected": -10.3077392578125, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.9374016986473734, |
|
"grad_norm": 3.5321264266967773, |
|
"learning_rate": 5.9449935808549576e-08, |
|
"logits/chosen": 1.5491445064544678, |
|
"logits/rejected": 1.3002042770385742, |
|
"logps/chosen": -510.8706970214844, |
|
"logps/rejected": -1102.7340087890625, |
|
"loss": 0.0614, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -4.760753154754639, |
|
"rewards/margins": 5.892817974090576, |
|
"rewards/rejected": -10.653572082519531, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.940547341931425, |
|
"grad_norm": 1.3359121084213257, |
|
"learning_rate": 5.3646116717191723e-08, |
|
"logits/chosen": 1.860054612159729, |
|
"logits/rejected": 1.563511610031128, |
|
"logps/chosen": -489.28070068359375, |
|
"logps/rejected": -1061.9932861328125, |
|
"loss": 0.0525, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.577395439147949, |
|
"rewards/margins": 5.680055618286133, |
|
"rewards/rejected": -10.257450103759766, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.9436929852154765, |
|
"grad_norm": 2.009251832962036, |
|
"learning_rate": 4.813726837222116e-08, |
|
"logits/chosen": 1.6448522806167603, |
|
"logits/rejected": 1.6830604076385498, |
|
"logps/chosen": -513.0660400390625, |
|
"logps/rejected": -1033.781494140625, |
|
"loss": 0.0634, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.795180320739746, |
|
"rewards/margins": 5.193611145019531, |
|
"rewards/rejected": -9.988792419433594, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.9436929852154765, |
|
"eval_logits/chosen": 2.2738683223724365, |
|
"eval_logits/rejected": 2.065464735031128, |
|
"eval_logps/chosen": -497.3409423828125, |
|
"eval_logps/rejected": -1084.345458984375, |
|
"eval_loss": 0.029775429517030716, |
|
"eval_rewards/accuracies": 0.9313432574272156, |
|
"eval_rewards/chosen": -4.645442008972168, |
|
"eval_rewards/margins": 5.838852405548096, |
|
"eval_rewards/rejected": -10.484294891357422, |
|
"eval_runtime": 216.5306, |
|
"eval_samples_per_second": 98.91, |
|
"eval_steps_per_second": 1.547, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.9468386284995282, |
|
"grad_norm": 3.1543209552764893, |
|
"learning_rate": 4.292405500669061e-08, |
|
"logits/chosen": 1.5579684972763062, |
|
"logits/rejected": 1.275383710861206, |
|
"logps/chosen": -507.2293395996094, |
|
"logps/rejected": -1104.159912109375, |
|
"loss": 0.0674, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.741163730621338, |
|
"rewards/margins": 5.949568748474121, |
|
"rewards/rejected": -10.6907320022583, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.9499842717835797, |
|
"grad_norm": 2.6585705280303955, |
|
"learning_rate": 3.8007105207268355e-08, |
|
"logits/chosen": 1.6319191455841064, |
|
"logits/rejected": 1.3634922504425049, |
|
"logps/chosen": -508.10174560546875, |
|
"logps/rejected": -1092.864501953125, |
|
"loss": 0.0605, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -4.735014915466309, |
|
"rewards/margins": 5.825577735900879, |
|
"rewards/rejected": -10.560592651367188, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.9531299150676313, |
|
"grad_norm": 2.6183907985687256, |
|
"learning_rate": 3.338701183844689e-08, |
|
"logits/chosen": 1.4023044109344482, |
|
"logits/rejected": 1.3029506206512451, |
|
"logps/chosen": -472.97216796875, |
|
"logps/rejected": -1090.064697265625, |
|
"loss": 0.0564, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.41116189956665, |
|
"rewards/margins": 6.135444164276123, |
|
"rewards/rejected": -10.546606063842773, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.9562755583516829, |
|
"grad_norm": 2.2757019996643066, |
|
"learning_rate": 2.9064331971056515e-08, |
|
"logits/chosen": 1.5462950468063354, |
|
"logits/rejected": 1.5350978374481201, |
|
"logps/chosen": -504.5541076660156, |
|
"logps/rejected": -1036.603759765625, |
|
"loss": 0.0627, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.7133469581604, |
|
"rewards/margins": 5.325845718383789, |
|
"rewards/rejected": -10.039191246032715, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.9594212016357345, |
|
"grad_norm": 1.553912878036499, |
|
"learning_rate": 2.503958681509683e-08, |
|
"logits/chosen": 1.6635456085205078, |
|
"logits/rejected": 1.4628212451934814, |
|
"logps/chosen": -515.1585083007812, |
|
"logps/rejected": -1074.81103515625, |
|
"loss": 0.0487, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.806754112243652, |
|
"rewards/margins": 5.593099594116211, |
|
"rewards/rejected": -10.399853706359863, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.9625668449197861, |
|
"grad_norm": 3.9352211952209473, |
|
"learning_rate": 2.1313261656891737e-08, |
|
"logits/chosen": 1.6192388534545898, |
|
"logits/rejected": 1.528223991394043, |
|
"logps/chosen": -551.916748046875, |
|
"logps/rejected": -1081.7154541015625, |
|
"loss": 0.0645, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -5.1936187744140625, |
|
"rewards/margins": 5.288642883300781, |
|
"rewards/rejected": -10.48226261138916, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.9657124882038377, |
|
"grad_norm": 2.4550108909606934, |
|
"learning_rate": 1.788580580057514e-08, |
|
"logits/chosen": 1.5395796298980713, |
|
"logits/rejected": 1.3465406894683838, |
|
"logps/chosen": -556.5447387695312, |
|
"logps/rejected": -1052.707275390625, |
|
"loss": 0.0695, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.232929229736328, |
|
"rewards/margins": 4.9423723220825195, |
|
"rewards/rejected": -10.175302505493164, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.9688581314878892, |
|
"grad_norm": 3.4560182094573975, |
|
"learning_rate": 1.4757632513916764e-08, |
|
"logits/chosen": 1.5373961925506592, |
|
"logits/rejected": 1.3364956378936768, |
|
"logps/chosen": -464.5713806152344, |
|
"logps/rejected": -1062.9267578125, |
|
"loss": 0.0592, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.311025619506836, |
|
"rewards/margins": 5.964101791381836, |
|
"rewards/rejected": -10.275126457214355, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.9720037747719409, |
|
"grad_norm": 2.193678140640259, |
|
"learning_rate": 1.1929118978490361e-08, |
|
"logits/chosen": 1.5750865936279297, |
|
"logits/rejected": 1.4524461030960083, |
|
"logps/chosen": -484.18890380859375, |
|
"logps/rejected": -1047.558349609375, |
|
"loss": 0.0547, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.513777732849121, |
|
"rewards/margins": 5.6092610359191895, |
|
"rewards/rejected": -10.123039245605469, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.9751494180559924, |
|
"grad_norm": 3.110525608062744, |
|
"learning_rate": 9.400606244196753e-09, |
|
"logits/chosen": 1.6422191858291626, |
|
"logits/rejected": 1.2251650094985962, |
|
"logps/chosen": -517.6271362304688, |
|
"logps/rejected": -1077.4622802734375, |
|
"loss": 0.0602, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.850308895111084, |
|
"rewards/margins": 5.554625034332275, |
|
"rewards/rejected": -10.404932975769043, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.9751494180559924, |
|
"eval_logits/chosen": 2.2623343467712402, |
|
"eval_logits/rejected": 2.0536904335021973, |
|
"eval_logps/chosen": -495.68597412109375, |
|
"eval_logps/rejected": -1079.9603271484375, |
|
"eval_loss": 0.029893433675169945, |
|
"eval_rewards/accuracies": 0.9302238821983337, |
|
"eval_rewards/chosen": -4.628891468048096, |
|
"eval_rewards/margins": 5.811552047729492, |
|
"eval_rewards/rejected": -10.440443992614746, |
|
"eval_runtime": 216.5239, |
|
"eval_samples_per_second": 98.913, |
|
"eval_steps_per_second": 1.547, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.9782950613400441, |
|
"grad_norm": 3.7997848987579346, |
|
"learning_rate": 7.172399188140611e-09, |
|
"logits/chosen": 1.757672667503357, |
|
"logits/rejected": 1.3846279382705688, |
|
"logps/chosen": -524.0980834960938, |
|
"logps/rejected": -1114.28271484375, |
|
"loss": 0.0582, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.909502983093262, |
|
"rewards/margins": 5.87128210067749, |
|
"rewards/rejected": -10.78078556060791, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.9814407046240956, |
|
"grad_norm": 1.7551467418670654, |
|
"learning_rate": 5.244766477869034e-09, |
|
"logits/chosen": 1.533881664276123, |
|
"logits/rejected": 1.2340834140777588, |
|
"logps/chosen": -509.0604553222656, |
|
"logps/rejected": -1127.41943359375, |
|
"loss": 0.0557, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.765114784240723, |
|
"rewards/margins": 6.15649938583374, |
|
"rewards/rejected": -10.921614646911621, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.9845863479081473, |
|
"grad_norm": 1.941530704498291, |
|
"learning_rate": 3.617940538978848e-09, |
|
"logits/chosen": 1.4696052074432373, |
|
"logits/rejected": 1.2560389041900635, |
|
"logps/chosen": -507.57843017578125, |
|
"logps/rejected": -1006.82470703125, |
|
"loss": 0.0475, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.7429070472717285, |
|
"rewards/margins": 4.967124938964844, |
|
"rewards/rejected": -9.71003246307373, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.9877319911921988, |
|
"grad_norm": 2.3873629570007324, |
|
"learning_rate": 2.2921175270890217e-09, |
|
"logits/chosen": 1.7028379440307617, |
|
"logits/rejected": 1.3064117431640625, |
|
"logps/chosen": -509.66363525390625, |
|
"logps/rejected": -1081.079833984375, |
|
"loss": 0.0633, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.765096664428711, |
|
"rewards/margins": 5.672849655151367, |
|
"rewards/rejected": -10.437946319580078, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.9908776344762504, |
|
"grad_norm": 2.30230450630188, |
|
"learning_rate": 1.2674573041909776e-09, |
|
"logits/chosen": 1.3582854270935059, |
|
"logits/rejected": 1.3528727293014526, |
|
"logps/chosen": -506.40081787109375, |
|
"logps/rejected": -1082.99560546875, |
|
"loss": 0.0522, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.720505714416504, |
|
"rewards/margins": 5.780804634094238, |
|
"rewards/rejected": -10.501310348510742, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.994023277760302, |
|
"grad_norm": 3.560915231704712, |
|
"learning_rate": 5.440834193726208e-10, |
|
"logits/chosen": 1.7087326049804688, |
|
"logits/rejected": 1.488204002380371, |
|
"logps/chosen": -502.759033203125, |
|
"logps/rejected": -1079.620849609375, |
|
"loss": 0.0549, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.718785285949707, |
|
"rewards/margins": 5.726217269897461, |
|
"rewards/rejected": -10.445002555847168, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.9971689210443536, |
|
"grad_norm": 3.5719094276428223, |
|
"learning_rate": 1.2208309392081064e-10, |
|
"logits/chosen": 1.3343207836151123, |
|
"logits/rejected": 1.0999490022659302, |
|
"logps/chosen": -544.6361083984375, |
|
"logps/rejected": -1093.305419921875, |
|
"loss": 0.0604, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.107422828674316, |
|
"rewards/margins": 5.456589698791504, |
|
"rewards/rejected": -10.56401252746582, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3179, |
|
"total_flos": 0.0, |
|
"train_loss": 0.18858218038370866, |
|
"train_runtime": 16104.4918, |
|
"train_samples_per_second": 25.267, |
|
"train_steps_per_second": 0.197 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3179, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|