|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9988571428571429, |
|
"eval_steps": 50, |
|
"global_step": 437, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.022857142857142857, |
|
"grad_norm": 5.131386476065513, |
|
"learning_rate": 1.1363636363636363e-07, |
|
"logits/chosen": -2.7012476921081543, |
|
"logits/rejected": -2.6254587173461914, |
|
"logps/chosen": -301.2864685058594, |
|
"logps/rejected": -281.73876953125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -3.3517978863528697e-06, |
|
"rewards/margins": -0.00011216916755074635, |
|
"rewards/rejected": 0.0001088173157768324, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.045714285714285714, |
|
"grad_norm": 4.052727080189218, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"logits/chosen": -2.640902042388916, |
|
"logits/rejected": -2.6057076454162598, |
|
"logps/chosen": -278.9025573730469, |
|
"logps/rejected": -254.7306671142578, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0026862886734306812, |
|
"rewards/margins": 0.0023018636275082827, |
|
"rewards/rejected": 0.00038442533696070313, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06857142857142857, |
|
"grad_norm": 4.434624613429657, |
|
"learning_rate": 3.4090909090909085e-07, |
|
"logits/chosen": -2.6378769874572754, |
|
"logits/rejected": -2.6170475482940674, |
|
"logps/chosen": -263.394287109375, |
|
"logps/rejected": -263.47857666015625, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.012888330034911633, |
|
"rewards/margins": 0.007743468042463064, |
|
"rewards/rejected": 0.005144862923771143, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09142857142857143, |
|
"grad_norm": 5.043723997595032, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -2.648149013519287, |
|
"logits/rejected": -2.5857646465301514, |
|
"logps/chosen": -290.42108154296875, |
|
"logps/rejected": -268.315185546875, |
|
"loss": 0.6832, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.03576214984059334, |
|
"rewards/margins": 0.042601048946380615, |
|
"rewards/rejected": -0.006838902831077576, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11428571428571428, |
|
"grad_norm": 7.751230449827184, |
|
"learning_rate": 4.997124959943201e-07, |
|
"logits/chosen": -2.675534725189209, |
|
"logits/rejected": -2.5954620838165283, |
|
"logps/chosen": -294.5313720703125, |
|
"logps/rejected": -254.864013671875, |
|
"loss": 0.6745, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.017789244651794434, |
|
"rewards/margins": 0.09778620302677155, |
|
"rewards/rejected": -0.07999695837497711, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11428571428571428, |
|
"eval_logits/chosen": -2.537318468093872, |
|
"eval_logits/rejected": -2.435882806777954, |
|
"eval_logps/chosen": -278.02972412109375, |
|
"eval_logps/rejected": -236.3332977294922, |
|
"eval_loss": 0.6651818156242371, |
|
"eval_rewards/accuracies": 0.6853448152542114, |
|
"eval_rewards/chosen": -0.024257637560367584, |
|
"eval_rewards/margins": 0.14831387996673584, |
|
"eval_rewards/rejected": -0.17257152497768402, |
|
"eval_runtime": 90.3676, |
|
"eval_samples_per_second": 20.262, |
|
"eval_steps_per_second": 0.321, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13714285714285715, |
|
"grad_norm": 6.305194820143998, |
|
"learning_rate": 4.979579212164186e-07, |
|
"logits/chosen": -2.5774006843566895, |
|
"logits/rejected": -2.4738996028900146, |
|
"logps/chosen": -293.5943908691406, |
|
"logps/rejected": -274.21575927734375, |
|
"loss": 0.6582, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.13088412582874298, |
|
"rewards/margins": 0.12575200200080872, |
|
"rewards/rejected": -0.2566361129283905, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 7.496714062889331, |
|
"learning_rate": 4.946196886175515e-07, |
|
"logits/chosen": -2.6025655269622803, |
|
"logits/rejected": -2.5487170219421387, |
|
"logps/chosen": -289.283203125, |
|
"logps/rejected": -295.7733459472656, |
|
"loss": 0.6391, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.13504940271377563, |
|
"rewards/margins": 0.22641614079475403, |
|
"rewards/rejected": -0.36146557331085205, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.18285714285714286, |
|
"grad_norm": 9.182158909337614, |
|
"learning_rate": 4.897191188239667e-07, |
|
"logits/chosen": -2.635709047317505, |
|
"logits/rejected": -2.6006340980529785, |
|
"logps/chosen": -301.3983459472656, |
|
"logps/rejected": -321.3275451660156, |
|
"loss": 0.6321, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.3436935245990753, |
|
"rewards/margins": 0.3164929151535034, |
|
"rewards/rejected": -0.6601864099502563, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2057142857142857, |
|
"grad_norm": 12.866627758013413, |
|
"learning_rate": 4.832875107981763e-07, |
|
"logits/chosen": -2.6577916145324707, |
|
"logits/rejected": -2.6026382446289062, |
|
"logps/chosen": -306.7483215332031, |
|
"logps/rejected": -331.4389343261719, |
|
"loss": 0.6272, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.30463024973869324, |
|
"rewards/margins": 0.45427924394607544, |
|
"rewards/rejected": -0.7589095830917358, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22857142857142856, |
|
"grad_norm": 10.120425097316348, |
|
"learning_rate": 4.753659419387223e-07, |
|
"logits/chosen": -2.6471400260925293, |
|
"logits/rejected": -2.5577075481414795, |
|
"logps/chosen": -336.1173095703125, |
|
"logps/rejected": -324.4599609375, |
|
"loss": 0.6243, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.49730944633483887, |
|
"rewards/margins": 0.41277560591697693, |
|
"rewards/rejected": -0.9100850820541382, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22857142857142856, |
|
"eval_logits/chosen": -2.496720790863037, |
|
"eval_logits/rejected": -2.3884377479553223, |
|
"eval_logps/chosen": -324.6850280761719, |
|
"eval_logps/rejected": -321.733154296875, |
|
"eval_loss": 0.6148081421852112, |
|
"eval_rewards/accuracies": 0.7025862336158752, |
|
"eval_rewards/chosen": -0.49081093072891235, |
|
"eval_rewards/margins": 0.5357595682144165, |
|
"eval_rewards/rejected": -1.0265703201293945, |
|
"eval_runtime": 90.5627, |
|
"eval_samples_per_second": 20.218, |
|
"eval_steps_per_second": 0.32, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25142857142857145, |
|
"grad_norm": 13.934592452644212, |
|
"learning_rate": 4.660050057270191e-07, |
|
"logits/chosen": -2.161785364151001, |
|
"logits/rejected": -2.060573101043701, |
|
"logps/chosen": -390.38104248046875, |
|
"logps/rejected": -416.17913818359375, |
|
"loss": 0.6043, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.7407739758491516, |
|
"rewards/margins": 0.4419216215610504, |
|
"rewards/rejected": -1.1826956272125244, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2742857142857143, |
|
"grad_norm": 18.04191794745382, |
|
"learning_rate": 4.5526448859687144e-07, |
|
"logits/chosen": -0.9994190335273743, |
|
"logits/rejected": -0.5663596391677856, |
|
"logps/chosen": -400.4964904785156, |
|
"logps/rejected": -379.3451232910156, |
|
"loss": 0.5895, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8959201574325562, |
|
"rewards/margins": 0.580389142036438, |
|
"rewards/rejected": -1.4763094186782837, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.29714285714285715, |
|
"grad_norm": 23.569070805460544, |
|
"learning_rate": 4.432129880904388e-07, |
|
"logits/chosen": 0.06917886435985565, |
|
"logits/rejected": 0.525992751121521, |
|
"logps/chosen": -440.16839599609375, |
|
"logps/rejected": -436.06341552734375, |
|
"loss": 0.5759, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.350124716758728, |
|
"rewards/margins": 0.505806565284729, |
|
"rewards/rejected": -1.8559315204620361, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 17.135407563954015, |
|
"learning_rate": 4.299274747394055e-07, |
|
"logits/chosen": -0.5247443914413452, |
|
"logits/rejected": -0.054386675357818604, |
|
"logps/chosen": -396.3623352050781, |
|
"logps/rejected": -422.71234130859375, |
|
"loss": 0.5878, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.809146523475647, |
|
"rewards/margins": 0.6988624930381775, |
|
"rewards/rejected": -1.5080091953277588, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.34285714285714286, |
|
"grad_norm": 21.61040590906815, |
|
"learning_rate": 4.1549280046953653e-07, |
|
"logits/chosen": -0.23892001807689667, |
|
"logits/rejected": 0.5097410082817078, |
|
"logps/chosen": -382.4234924316406, |
|
"logps/rejected": -453.66033935546875, |
|
"loss": 0.5613, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9543758630752563, |
|
"rewards/margins": 0.8192380666732788, |
|
"rewards/rejected": -1.7736135721206665, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.34285714285714286, |
|
"eval_logits/chosen": 0.7831615805625916, |
|
"eval_logits/rejected": 1.843852162361145, |
|
"eval_logps/chosen": -431.0166015625, |
|
"eval_logps/rejected": -483.92266845703125, |
|
"eval_loss": 0.5827990770339966, |
|
"eval_rewards/accuracies": 0.7241379022598267, |
|
"eval_rewards/chosen": -1.5541267395019531, |
|
"eval_rewards/margins": 1.0943388938903809, |
|
"eval_rewards/rejected": -2.648465394973755, |
|
"eval_runtime": 90.809, |
|
"eval_samples_per_second": 20.163, |
|
"eval_steps_per_second": 0.319, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3657142857142857, |
|
"grad_norm": 21.503001868471035, |
|
"learning_rate": 4.000011566683401e-07, |
|
"logits/chosen": 0.6514826416969299, |
|
"logits/rejected": 1.4523377418518066, |
|
"logps/chosen": -458.98016357421875, |
|
"logps/rejected": -519.3821411132812, |
|
"loss": 0.5709, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.5963990688323975, |
|
"rewards/margins": 1.04099440574646, |
|
"rewards/rejected": -2.6373934745788574, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.38857142857142857, |
|
"grad_norm": 22.124021346867316, |
|
"learning_rate": 3.8355148537705047e-07, |
|
"logits/chosen": 0.3275122046470642, |
|
"logits/rejected": 1.0541610717773438, |
|
"logps/chosen": -431.19708251953125, |
|
"logps/rejected": -465.676025390625, |
|
"loss": 0.5719, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.2709215879440308, |
|
"rewards/margins": 0.7285287976264954, |
|
"rewards/rejected": -1.999450445175171, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4114285714285714, |
|
"grad_norm": 27.446979249488912, |
|
"learning_rate": 3.662488473675315e-07, |
|
"logits/chosen": -0.1673279106616974, |
|
"logits/rejected": 0.9503982663154602, |
|
"logps/chosen": -435.6683654785156, |
|
"logps/rejected": -467.34991455078125, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.054885745048523, |
|
"rewards/margins": 1.0191079378128052, |
|
"rewards/rejected": -2.073993682861328, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4342857142857143, |
|
"grad_norm": 21.261318648058893, |
|
"learning_rate": 3.48203751140067e-07, |
|
"logits/chosen": 0.8843255043029785, |
|
"logits/rejected": 1.7267229557037354, |
|
"logps/chosen": -421.56427001953125, |
|
"logps/rejected": -454.6175842285156, |
|
"loss": 0.5709, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4530667066574097, |
|
"rewards/margins": 0.7481273412704468, |
|
"rewards/rejected": -2.2011942863464355, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.45714285714285713, |
|
"grad_norm": 18.016130637147523, |
|
"learning_rate": 3.2953144712759537e-07, |
|
"logits/chosen": 0.8405307531356812, |
|
"logits/rejected": 1.9338849782943726, |
|
"logps/chosen": -396.523193359375, |
|
"logps/rejected": -462.48724365234375, |
|
"loss": 0.5618, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.2818048000335693, |
|
"rewards/margins": 1.0542625188827515, |
|
"rewards/rejected": -2.336066961288452, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45714285714285713, |
|
"eval_logits/chosen": 1.353513240814209, |
|
"eval_logits/rejected": 2.633384943008423, |
|
"eval_logps/chosen": -407.1748962402344, |
|
"eval_logps/rejected": -473.4366149902344, |
|
"eval_loss": 0.5593964457511902, |
|
"eval_rewards/accuracies": 0.7456896305084229, |
|
"eval_rewards/chosen": -1.3157094717025757, |
|
"eval_rewards/margins": 1.2278952598571777, |
|
"eval_rewards/rejected": -2.543604850769043, |
|
"eval_runtime": 90.6049, |
|
"eval_samples_per_second": 20.209, |
|
"eval_steps_per_second": 0.32, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 13.884449533300678, |
|
"learning_rate": 3.103511916141658e-07, |
|
"logits/chosen": 1.2088024616241455, |
|
"logits/rejected": 2.1989169120788574, |
|
"logps/chosen": -401.5423278808594, |
|
"logps/rejected": -482.7976989746094, |
|
"loss": 0.5543, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.380887746810913, |
|
"rewards/margins": 0.9942899942398071, |
|
"rewards/rejected": -2.3751778602600098, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5028571428571429, |
|
"grad_norm": 22.105068043395047, |
|
"learning_rate": 2.9078548506882117e-07, |
|
"logits/chosen": 1.1457306146621704, |
|
"logits/rejected": 2.113302707672119, |
|
"logps/chosen": -436.2627868652344, |
|
"logps/rejected": -482.525146484375, |
|
"loss": 0.573, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.5119678974151611, |
|
"rewards/margins": 0.8514491319656372, |
|
"rewards/rejected": -2.363417387008667, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5257142857142857, |
|
"grad_norm": 22.834936526556735, |
|
"learning_rate": 2.709592897595191e-07, |
|
"logits/chosen": 1.5853362083435059, |
|
"logits/rejected": 2.6565065383911133, |
|
"logps/chosen": -436.3871154785156, |
|
"logps/rejected": -490.07977294921875, |
|
"loss": 0.5539, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.553279995918274, |
|
"rewards/margins": 0.9546509981155396, |
|
"rewards/rejected": -2.5079312324523926, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5485714285714286, |
|
"grad_norm": 24.160129408331898, |
|
"learning_rate": 2.509992316440332e-07, |
|
"logits/chosen": 1.073169469833374, |
|
"logits/rejected": 2.0948684215545654, |
|
"logps/chosen": -435.13104248046875, |
|
"logps/rejected": -528.5726318359375, |
|
"loss": 0.5519, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.3642125129699707, |
|
"rewards/margins": 1.2072954177856445, |
|
"rewards/rejected": -2.5715081691741943, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 20.623735608734524, |
|
"learning_rate": 2.3103279163519918e-07, |
|
"logits/chosen": 1.123517394065857, |
|
"logits/rejected": 1.916009545326233, |
|
"logps/chosen": -412.7406311035156, |
|
"logps/rejected": -501.85394287109375, |
|
"loss": 0.558, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.3211438655853271, |
|
"rewards/margins": 0.9953910112380981, |
|
"rewards/rejected": -2.316534996032715, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"eval_logits/chosen": 1.5315722227096558, |
|
"eval_logits/rejected": 3.1078054904937744, |
|
"eval_logps/chosen": -408.7300720214844, |
|
"eval_logps/rejected": -478.3948059082031, |
|
"eval_loss": 0.5526223182678223, |
|
"eval_rewards/accuracies": 0.7629310488700867, |
|
"eval_rewards/chosen": -1.3312608003616333, |
|
"eval_rewards/margins": 1.2619256973266602, |
|
"eval_rewards/rejected": -2.593186378479004, |
|
"eval_runtime": 90.8269, |
|
"eval_samples_per_second": 20.159, |
|
"eval_steps_per_second": 0.319, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5942857142857143, |
|
"grad_norm": 18.937383972416836, |
|
"learning_rate": 2.1118749140573358e-07, |
|
"logits/chosen": 2.365237236022949, |
|
"logits/rejected": 2.8070249557495117, |
|
"logps/chosen": -467.107666015625, |
|
"logps/rejected": -548.4678344726562, |
|
"loss": 0.5559, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.8732095956802368, |
|
"rewards/margins": 0.8780391812324524, |
|
"rewards/rejected": -2.751248598098755, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6171428571428571, |
|
"grad_norm": 21.474579443049855, |
|
"learning_rate": 1.9159007893272703e-07, |
|
"logits/chosen": 2.5897929668426514, |
|
"logits/rejected": 3.8603546619415283, |
|
"logps/chosen": -472.37420654296875, |
|
"logps/rejected": -535.6754150390625, |
|
"loss": 0.5468, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.9984840154647827, |
|
"rewards/margins": 1.0531952381134033, |
|
"rewards/rejected": -3.0516793727874756, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 26.380689829844055, |
|
"learning_rate": 1.7236571898357766e-07, |
|
"logits/chosen": 2.368163824081421, |
|
"logits/rejected": 3.2206153869628906, |
|
"logps/chosen": -457.65478515625, |
|
"logps/rejected": -559.1119384765625, |
|
"loss": 0.5472, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.8461668491363525, |
|
"rewards/margins": 1.113884687423706, |
|
"rewards/rejected": -2.9600515365600586, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6628571428571428, |
|
"grad_norm": 21.888196354392417, |
|
"learning_rate": 1.5363719371356882e-07, |
|
"logits/chosen": 2.094879627227783, |
|
"logits/rejected": 2.8882927894592285, |
|
"logps/chosen": -477.3121643066406, |
|
"logps/rejected": -541.1532592773438, |
|
"loss": 0.5476, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.764062523841858, |
|
"rewards/margins": 0.991985023021698, |
|
"rewards/rejected": -2.7560477256774902, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6857142857142857, |
|
"grad_norm": 16.876283045841564, |
|
"learning_rate": 1.3552411848071565e-07, |
|
"logits/chosen": 2.240018844604492, |
|
"logits/rejected": 3.6286048889160156, |
|
"logps/chosen": -489.7718200683594, |
|
"logps/rejected": -560.1759033203125, |
|
"loss": 0.5399, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.8786265850067139, |
|
"rewards/margins": 1.196711778640747, |
|
"rewards/rejected": -3.075338363647461, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6857142857142857, |
|
"eval_logits/chosen": 2.3192012310028076, |
|
"eval_logits/rejected": 3.8099868297576904, |
|
"eval_logps/chosen": -449.146484375, |
|
"eval_logps/rejected": -527.6614990234375, |
|
"eval_loss": 0.5464984178543091, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -1.7354251146316528, |
|
"eval_rewards/margins": 1.3504279851913452, |
|
"eval_rewards/rejected": -3.0858535766601562, |
|
"eval_runtime": 89.7327, |
|
"eval_samples_per_second": 20.405, |
|
"eval_steps_per_second": 0.323, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7085714285714285, |
|
"grad_norm": 17.86356014365164, |
|
"learning_rate": 1.1814217788631473e-07, |
|
"logits/chosen": 2.34942626953125, |
|
"logits/rejected": 3.1517879962921143, |
|
"logps/chosen": -443.734375, |
|
"logps/rejected": -516.211669921875, |
|
"loss": 0.5491, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.8743022680282593, |
|
"rewards/margins": 0.9158605337142944, |
|
"rewards/rejected": -2.790163040161133, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7314285714285714, |
|
"grad_norm": 18.11439951327569, |
|
"learning_rate": 1.0160238692045331e-07, |
|
"logits/chosen": 2.115510940551758, |
|
"logits/rejected": 2.9685635566711426, |
|
"logps/chosen": -417.16412353515625, |
|
"logps/rejected": -496.8562927246094, |
|
"loss": 0.5474, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.728817343711853, |
|
"rewards/margins": 0.8298628926277161, |
|
"rewards/rejected": -2.558680295944214, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7542857142857143, |
|
"grad_norm": 23.20251472261412, |
|
"learning_rate": 8.601038193139438e-08, |
|
"logits/chosen": 1.6642992496490479, |
|
"logits/rejected": 2.858081579208374, |
|
"logps/chosen": -455.21453857421875, |
|
"logps/rejected": -516.3800048828125, |
|
"loss": 0.5498, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.5599634647369385, |
|
"rewards/margins": 1.1259580850601196, |
|
"rewards/rejected": -2.6859214305877686, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7771428571428571, |
|
"grad_norm": 17.97386136540539, |
|
"learning_rate": 7.146574594727572e-08, |
|
"logits/chosen": 2.353365659713745, |
|
"logits/rejected": 3.1233067512512207, |
|
"logps/chosen": -446.12493896484375, |
|
"logps/rejected": -536.3157958984375, |
|
"loss": 0.5373, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.801740050315857, |
|
"rewards/margins": 1.141181230545044, |
|
"rewards/rejected": -2.9429211616516113, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 21.871959889194457, |
|
"learning_rate": 5.8061372659157306e-08, |
|
"logits/chosen": 2.052710771560669, |
|
"logits/rejected": 3.2841498851776123, |
|
"logps/chosen": -481.0086975097656, |
|
"logps/rejected": -538.4657592773438, |
|
"loss": 0.5536, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.9130144119262695, |
|
"rewards/margins": 0.9286670684814453, |
|
"rewards/rejected": -2.8416812419891357, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_logits/chosen": 2.5496232509613037, |
|
"eval_logits/rejected": 4.100791931152344, |
|
"eval_logps/chosen": -459.9457702636719, |
|
"eval_logps/rejected": -537.3768310546875, |
|
"eval_loss": 0.5457741618156433, |
|
"eval_rewards/accuracies": 0.767241358757019, |
|
"eval_rewards/chosen": -1.8434182405471802, |
|
"eval_rewards/margins": 1.3395885229110718, |
|
"eval_rewards/rejected": -3.183006525039673, |
|
"eval_runtime": 90.4314, |
|
"eval_samples_per_second": 20.247, |
|
"eval_steps_per_second": 0.321, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8228571428571428, |
|
"grad_norm": 18.303747174951123, |
|
"learning_rate": 4.5882873127531614e-08, |
|
"logits/chosen": 2.120243549346924, |
|
"logits/rejected": 3.347181797027588, |
|
"logps/chosen": -475.18572998046875, |
|
"logps/rejected": -556.6296997070312, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.8978074789047241, |
|
"rewards/margins": 1.1576240062713623, |
|
"rewards/rejected": -3.055431604385376, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8457142857142858, |
|
"grad_norm": 18.37433316843813, |
|
"learning_rate": 3.500802900154412e-08, |
|
"logits/chosen": 2.2264809608459473, |
|
"logits/rejected": 3.606698513031006, |
|
"logps/chosen": -450.2330627441406, |
|
"logps/rejected": -542.2530517578125, |
|
"loss": 0.5435, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.7917168140411377, |
|
"rewards/margins": 1.2466588020324707, |
|
"rewards/rejected": -3.0383753776550293, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8685714285714285, |
|
"grad_norm": 21.62972776738107, |
|
"learning_rate": 2.550629574310309e-08, |
|
"logits/chosen": 1.8507616519927979, |
|
"logits/rejected": 3.276202440261841, |
|
"logps/chosen": -521.0817260742188, |
|
"logps/rejected": -551.79736328125, |
|
"loss": 0.543, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0302536487579346, |
|
"rewards/margins": 0.913725733757019, |
|
"rewards/rejected": -2.943979501724243, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8914285714285715, |
|
"grad_norm": 21.37326667405599, |
|
"learning_rate": 1.7438359028687983e-08, |
|
"logits/chosen": 2.1451709270477295, |
|
"logits/rejected": 2.937721014022827, |
|
"logps/chosen": -487.7579040527344, |
|
"logps/rejected": -573.7957763671875, |
|
"loss": 0.5485, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7638496160507202, |
|
"rewards/margins": 1.0263822078704834, |
|
"rewards/rejected": -2.7902321815490723, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9142857142857143, |
|
"grad_norm": 36.14381504781887, |
|
"learning_rate": 1.0855747162029361e-08, |
|
"logits/chosen": 2.2515556812286377, |
|
"logits/rejected": 2.859483480453491, |
|
"logps/chosen": -467.8270568847656, |
|
"logps/rejected": -549.77392578125, |
|
"loss": 0.5612, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.89093816280365, |
|
"rewards/margins": 0.9410650134086609, |
|
"rewards/rejected": -2.832003116607666, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9142857142857143, |
|
"eval_logits/chosen": 2.319566249847412, |
|
"eval_logits/rejected": 3.925558090209961, |
|
"eval_logps/chosen": -443.83612060546875, |
|
"eval_logps/rejected": -527.0015258789062, |
|
"eval_loss": 0.5425659418106079, |
|
"eval_rewards/accuracies": 0.7543103694915771, |
|
"eval_rewards/chosen": -1.6823216676712036, |
|
"eval_rewards/margins": 1.3969323635101318, |
|
"eval_rewards/rejected": -3.079254150390625, |
|
"eval_runtime": 90.5136, |
|
"eval_samples_per_second": 20.229, |
|
"eval_steps_per_second": 0.32, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9371428571428572, |
|
"grad_norm": 20.661658798051754, |
|
"learning_rate": 5.8005019731033615e-09, |
|
"logits/chosen": 2.158614158630371, |
|
"logits/rejected": 3.273141384124756, |
|
"logps/chosen": -486.8451232910156, |
|
"logps/rejected": -554.2020263671875, |
|
"loss": 0.5334, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.9866514205932617, |
|
"rewards/margins": 0.9580680727958679, |
|
"rewards/rejected": -2.9447195529937744, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 20.631088560363143, |
|
"learning_rate": 2.3049103053431886e-09, |
|
"logits/chosen": 2.0465073585510254, |
|
"logits/rejected": 3.5103249549865723, |
|
"logps/chosen": -440.74224853515625, |
|
"logps/rejected": -527.11376953125, |
|
"loss": 0.5445, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5799415111541748, |
|
"rewards/margins": 1.351327657699585, |
|
"rewards/rejected": -2.9312691688537598, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9828571428571429, |
|
"grad_norm": 21.077395280178123, |
|
"learning_rate": 3.9129780600541397e-10, |
|
"logits/chosen": 2.380171298980713, |
|
"logits/rejected": 3.3738162517547607, |
|
"logps/chosen": -453.9419860839844, |
|
"logps/rejected": -550.107421875, |
|
"loss": 0.5377, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.7171010971069336, |
|
"rewards/margins": 1.1472762823104858, |
|
"rewards/rejected": -2.864377498626709, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9988571428571429, |
|
"step": 437, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5805802214336614, |
|
"train_runtime": 11351.5205, |
|
"train_samples_per_second": 4.933, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 437, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|