|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 100, |
|
"global_step": 1556, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0706638115631692e-09, |
|
"logits/chosen": -3.0633435249328613, |
|
"logits/rejected": -3.0370049476623535, |
|
"logps/chosen": -237.29315185546875, |
|
"logps/rejected": -251.69747924804688, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.070663811563169e-08, |
|
"logits/chosen": -2.990461826324463, |
|
"logits/rejected": -3.0024797916412354, |
|
"logps/chosen": -356.6201171875, |
|
"logps/rejected": -390.87042236328125, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.5555555820465088, |
|
"rewards/chosen": -0.004924382548779249, |
|
"rewards/margins": 0.009135871194303036, |
|
"rewards/rejected": -0.014060255140066147, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.141327623126338e-08, |
|
"logits/chosen": -3.002528429031372, |
|
"logits/rejected": -3.0017483234405518, |
|
"logps/chosen": -350.7555847167969, |
|
"logps/rejected": -393.46014404296875, |
|
"loss": 0.6801, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.050124846398830414, |
|
"rewards/margins": 0.028588850051164627, |
|
"rewards/rejected": 0.02153599075973034, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.2119914346895076e-08, |
|
"logits/chosen": -2.975447416305542, |
|
"logits/rejected": -3.0126380920410156, |
|
"logps/chosen": -375.95391845703125, |
|
"logps/rejected": -432.83587646484375, |
|
"loss": 0.6435, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.2032477855682373, |
|
"rewards/margins": 0.1010356992483139, |
|
"rewards/rejected": 0.1022120863199234, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.282655246252676e-08, |
|
"logits/chosen": -3.0026869773864746, |
|
"logits/rejected": -2.9945485591888428, |
|
"logps/chosen": -383.3456115722656, |
|
"logps/rejected": -392.7911376953125, |
|
"loss": 0.5784, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.4273909628391266, |
|
"rewards/margins": 0.30088725686073303, |
|
"rewards/rejected": 0.12650372087955475, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.353319057815846e-08, |
|
"logits/chosen": -2.989891529083252, |
|
"logits/rejected": -2.996675968170166, |
|
"logps/chosen": -339.07513427734375, |
|
"logps/rejected": -373.727783203125, |
|
"loss": 0.5345, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.6149066686630249, |
|
"rewards/margins": 0.39920732378959656, |
|
"rewards/rejected": 0.21569931507110596, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 6.423982869379015e-08, |
|
"logits/chosen": -3.026094913482666, |
|
"logits/rejected": -2.9982128143310547, |
|
"logps/chosen": -327.8692321777344, |
|
"logps/rejected": -375.9877624511719, |
|
"loss": 0.4485, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.6913961172103882, |
|
"rewards/margins": 0.7190420031547546, |
|
"rewards/rejected": -0.02764584682881832, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.494646680942184e-08, |
|
"logits/chosen": -2.974823474884033, |
|
"logits/rejected": -2.980032444000244, |
|
"logps/chosen": -351.2728576660156, |
|
"logps/rejected": -395.68609619140625, |
|
"loss": 0.3966, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.8828132748603821, |
|
"rewards/margins": 0.9640719294548035, |
|
"rewards/rejected": -0.08125858008861542, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.565310492505352e-08, |
|
"logits/chosen": -2.977529287338257, |
|
"logits/rejected": -2.9725558757781982, |
|
"logps/chosen": -359.2842712402344, |
|
"logps/rejected": -405.7890625, |
|
"loss": 0.3519, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.091180682182312, |
|
"rewards/margins": 1.2520115375518799, |
|
"rewards/rejected": -0.16083075106143951, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.635974304068522e-08, |
|
"logits/chosen": -2.979015827178955, |
|
"logits/rejected": -2.9813497066497803, |
|
"logps/chosen": -309.3511047363281, |
|
"logps/rejected": -358.91607666015625, |
|
"loss": 0.3201, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.2030521631240845, |
|
"rewards/margins": 1.6773903369903564, |
|
"rewards/rejected": -0.4743381440639496, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.0706638115631692e-07, |
|
"logits/chosen": -2.941194534301758, |
|
"logits/rejected": -2.9548678398132324, |
|
"logps/chosen": -343.6178894042969, |
|
"logps/rejected": -463.1512145996094, |
|
"loss": 0.2696, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.2106283903121948, |
|
"rewards/margins": 1.9713561534881592, |
|
"rewards/rejected": -0.7607278823852539, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_logits/chosen": -2.977161169052124, |
|
"eval_logits/rejected": -2.957442045211792, |
|
"eval_logps/chosen": -296.8330383300781, |
|
"eval_logps/rejected": -349.66558837890625, |
|
"eval_loss": 0.2511790990829468, |
|
"eval_rewards/accuracies": 0.921875, |
|
"eval_rewards/chosen": 1.1878268718719482, |
|
"eval_rewards/margins": 1.8798556327819824, |
|
"eval_rewards/rejected": -0.6920287609100342, |
|
"eval_runtime": 38.7534, |
|
"eval_samples_per_second": 12.902, |
|
"eval_steps_per_second": 0.413, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.177730192719486e-07, |
|
"logits/chosen": -2.9442899227142334, |
|
"logits/rejected": -2.9481866359710693, |
|
"logps/chosen": -346.63873291015625, |
|
"logps/rejected": -406.31964111328125, |
|
"loss": 0.2493, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.2421057224273682, |
|
"rewards/margins": 2.18147873878479, |
|
"rewards/rejected": -0.9393728971481323, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.284796573875803e-07, |
|
"logits/chosen": -2.94069242477417, |
|
"logits/rejected": -2.9417574405670166, |
|
"logps/chosen": -351.788330078125, |
|
"logps/rejected": -379.61065673828125, |
|
"loss": 0.2406, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.7772680521011353, |
|
"rewards/margins": 1.8036502599716187, |
|
"rewards/rejected": -1.0263820886611938, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.3918629550321198e-07, |
|
"logits/chosen": -2.926699638366699, |
|
"logits/rejected": -2.911668300628662, |
|
"logps/chosen": -327.4112548828125, |
|
"logps/rejected": -408.2745361328125, |
|
"loss": 0.2073, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.5646601915359497, |
|
"rewards/margins": 2.2064461708068848, |
|
"rewards/rejected": -1.6417862176895142, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.4989293361884367e-07, |
|
"logits/chosen": -2.904219150543213, |
|
"logits/rejected": -2.921232223510742, |
|
"logps/chosen": -311.6190185546875, |
|
"logps/rejected": -411.2701110839844, |
|
"loss": 0.1967, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.46902722120285034, |
|
"rewards/margins": 2.7694640159606934, |
|
"rewards/rejected": -2.3004367351531982, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.6059957173447535e-07, |
|
"logits/chosen": -2.901981830596924, |
|
"logits/rejected": -2.9112467765808105, |
|
"logps/chosen": -301.6145324707031, |
|
"logps/rejected": -391.1957092285156, |
|
"loss": 0.1723, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.218230202794075, |
|
"rewards/margins": 3.2492637634277344, |
|
"rewards/rejected": -3.031033992767334, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.7130620985010704e-07, |
|
"logits/chosen": -2.8996052742004395, |
|
"logits/rejected": -2.8838694095611572, |
|
"logps/chosen": -312.6499938964844, |
|
"logps/rejected": -447.8002014160156, |
|
"loss": 0.1554, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.3209637701511383, |
|
"rewards/margins": 4.501524925231934, |
|
"rewards/rejected": -4.180561065673828, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8201284796573874e-07, |
|
"logits/chosen": -2.8928513526916504, |
|
"logits/rejected": -2.9001543521881104, |
|
"logps/chosen": -329.20953369140625, |
|
"logps/rejected": -423.6446228027344, |
|
"loss": 0.1566, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.1461164504289627, |
|
"rewards/margins": 4.050145626068115, |
|
"rewards/rejected": -3.904029130935669, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9271948608137044e-07, |
|
"logits/chosen": -2.8557610511779785, |
|
"logits/rejected": -2.855731725692749, |
|
"logps/chosen": -338.60076904296875, |
|
"logps/rejected": -448.8922424316406, |
|
"loss": 0.1421, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.21355919539928436, |
|
"rewards/margins": 4.191808223724365, |
|
"rewards/rejected": -3.9782490730285645, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.0342612419700214e-07, |
|
"logits/chosen": -2.8638434410095215, |
|
"logits/rejected": -2.877293825149536, |
|
"logps/chosen": -347.19573974609375, |
|
"logps/rejected": -469.17755126953125, |
|
"loss": 0.1381, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.14256651699543, |
|
"rewards/margins": 3.96684193611145, |
|
"rewards/rejected": -4.109408378601074, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.1413276231263384e-07, |
|
"logits/chosen": -2.829555034637451, |
|
"logits/rejected": -2.85453462600708, |
|
"logps/chosen": -364.0372009277344, |
|
"logps/rejected": -442.7489318847656, |
|
"loss": 0.1427, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.18970072269439697, |
|
"rewards/margins": 5.029218673706055, |
|
"rewards/rejected": -4.839517593383789, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.8512933254241943, |
|
"eval_logits/rejected": -2.8302505016326904, |
|
"eval_logps/chosen": -305.8147888183594, |
|
"eval_logps/rejected": -387.1728210449219, |
|
"eval_loss": 0.12157174944877625, |
|
"eval_rewards/accuracies": 0.96875, |
|
"eval_rewards/chosen": 0.28965064883232117, |
|
"eval_rewards/margins": 4.73240327835083, |
|
"eval_rewards/rejected": -4.442752361297607, |
|
"eval_runtime": 38.702, |
|
"eval_samples_per_second": 12.919, |
|
"eval_steps_per_second": 0.413, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.248394004282655e-07, |
|
"logits/chosen": -2.817666530609131, |
|
"logits/rejected": -2.8465371131896973, |
|
"logps/chosen": -325.3854675292969, |
|
"logps/rejected": -439.5003356933594, |
|
"loss": 0.1413, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.21560493111610413, |
|
"rewards/margins": 4.514598369598389, |
|
"rewards/rejected": -4.298993110656738, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.355460385438972e-07, |
|
"logits/chosen": -2.7650692462921143, |
|
"logits/rejected": -2.7801504135131836, |
|
"logps/chosen": -326.321533203125, |
|
"logps/rejected": -456.98663330078125, |
|
"loss": 0.1332, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.027444612234830856, |
|
"rewards/margins": 5.077801704406738, |
|
"rewards/rejected": -5.050357818603516, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.462526766595289e-07, |
|
"logits/chosen": -2.788020610809326, |
|
"logits/rejected": -2.7895946502685547, |
|
"logps/chosen": -324.4822998046875, |
|
"logps/rejected": -439.76397705078125, |
|
"loss": 0.1356, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.29427874088287354, |
|
"rewards/margins": 5.166212558746338, |
|
"rewards/rejected": -4.871933460235596, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.569593147751606e-07, |
|
"logits/chosen": -2.6995949745178223, |
|
"logits/rejected": -2.7345399856567383, |
|
"logps/chosen": -356.4814758300781, |
|
"logps/rejected": -490.60931396484375, |
|
"loss": 0.1074, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.35535210371017456, |
|
"rewards/margins": 6.1955766677856445, |
|
"rewards/rejected": -6.550928592681885, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.676659528907923e-07, |
|
"logits/chosen": -2.6892549991607666, |
|
"logits/rejected": -2.694087505340576, |
|
"logps/chosen": -305.6263122558594, |
|
"logps/rejected": -387.88543701171875, |
|
"loss": 0.0979, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.6652821898460388, |
|
"rewards/margins": 4.929129600524902, |
|
"rewards/rejected": -5.5944108963012695, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.7837259100642395e-07, |
|
"logits/chosen": -2.73167085647583, |
|
"logits/rejected": -2.7620654106140137, |
|
"logps/chosen": -408.2175598144531, |
|
"logps/rejected": -449.8201599121094, |
|
"loss": 0.1298, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.9623678922653198, |
|
"rewards/margins": 5.48039436340332, |
|
"rewards/rejected": -6.4427618980407715, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.890792291220557e-07, |
|
"logits/chosen": -2.7657806873321533, |
|
"logits/rejected": -2.802060604095459, |
|
"logps/chosen": -384.2090148925781, |
|
"logps/rejected": -481.82696533203125, |
|
"loss": 0.1181, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.16663847863674164, |
|
"rewards/margins": 5.502591133117676, |
|
"rewards/rejected": -5.335952281951904, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.9978586723768735e-07, |
|
"logits/chosen": -2.673283815383911, |
|
"logits/rejected": -2.707296848297119, |
|
"logps/chosen": -312.5271911621094, |
|
"logps/rejected": -411.64031982421875, |
|
"loss": 0.0947, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.9499552845954895, |
|
"rewards/margins": 4.852605819702148, |
|
"rewards/rejected": -5.802561283111572, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.1049250535331905e-07, |
|
"logits/chosen": -2.623725175857544, |
|
"logits/rejected": -2.7073614597320557, |
|
"logps/chosen": -391.2462158203125, |
|
"logps/rejected": -474.2684631347656, |
|
"loss": 0.1168, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.0791637897491455, |
|
"rewards/margins": 7.065374851226807, |
|
"rewards/rejected": -8.144537925720215, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.211991434689507e-07, |
|
"logits/chosen": -2.6202073097229004, |
|
"logits/rejected": -2.652608633041382, |
|
"logps/chosen": -341.9140319824219, |
|
"logps/rejected": -462.9012145996094, |
|
"loss": 0.0944, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.275757372379303, |
|
"rewards/margins": 5.93372106552124, |
|
"rewards/rejected": -6.20947790145874, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_logits/chosen": -2.6932637691497803, |
|
"eval_logits/rejected": -2.6872053146362305, |
|
"eval_logps/chosen": -311.619873046875, |
|
"eval_logps/rejected": -409.2980041503906, |
|
"eval_loss": 0.11095032095909119, |
|
"eval_rewards/accuracies": 0.90625, |
|
"eval_rewards/chosen": -0.29085665941238403, |
|
"eval_rewards/margins": 6.364411354064941, |
|
"eval_rewards/rejected": -6.65526819229126, |
|
"eval_runtime": 38.7504, |
|
"eval_samples_per_second": 12.903, |
|
"eval_steps_per_second": 0.413, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3190578158458244e-07, |
|
"logits/chosen": -2.6386542320251465, |
|
"logits/rejected": -2.7159385681152344, |
|
"logps/chosen": -368.5979919433594, |
|
"logps/rejected": -466.84783935546875, |
|
"loss": 0.131, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.8093490600585938, |
|
"rewards/margins": 7.050684452056885, |
|
"rewards/rejected": -7.8600335121154785, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.426124197002141e-07, |
|
"logits/chosen": -2.571882486343384, |
|
"logits/rejected": -2.6551308631896973, |
|
"logps/chosen": -361.48394775390625, |
|
"logps/rejected": -489.70989990234375, |
|
"loss": 0.0905, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.7510203123092651, |
|
"rewards/margins": 8.015697479248047, |
|
"rewards/rejected": -8.766717910766602, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.533190578158458e-07, |
|
"logits/chosen": -2.5930895805358887, |
|
"logits/rejected": -2.6723227500915527, |
|
"logps/chosen": -384.87664794921875, |
|
"logps/rejected": -509.010986328125, |
|
"loss": 0.1232, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.44466814398765564, |
|
"rewards/margins": 6.863368988037109, |
|
"rewards/rejected": -7.308036804199219, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.640256959314775e-07, |
|
"logits/chosen": -2.5658717155456543, |
|
"logits/rejected": -2.62716007232666, |
|
"logps/chosen": -304.2865295410156, |
|
"logps/rejected": -435.2959899902344, |
|
"loss": 0.0874, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.5686666369438171, |
|
"rewards/margins": 6.669247627258301, |
|
"rewards/rejected": -7.237914085388184, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.747323340471092e-07, |
|
"logits/chosen": -2.584165096282959, |
|
"logits/rejected": -2.70393967628479, |
|
"logps/chosen": -364.13262939453125, |
|
"logps/rejected": -477.5604553222656, |
|
"loss": 0.1015, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.5823951363563538, |
|
"rewards/margins": 7.484101295471191, |
|
"rewards/rejected": -8.066494941711426, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.854389721627409e-07, |
|
"logits/chosen": -2.5895907878875732, |
|
"logits/rejected": -2.646876573562622, |
|
"logps/chosen": -355.0018005371094, |
|
"logps/rejected": -442.65948486328125, |
|
"loss": 0.0896, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.8058759570121765, |
|
"rewards/margins": 8.065896987915039, |
|
"rewards/rejected": -8.871771812438965, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.961456102783726e-07, |
|
"logits/chosen": -2.615499973297119, |
|
"logits/rejected": -2.6612184047698975, |
|
"logps/chosen": -308.342041015625, |
|
"logps/rejected": -432.08319091796875, |
|
"loss": 0.0821, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.5296161770820618, |
|
"rewards/margins": 7.243483066558838, |
|
"rewards/rejected": -7.773098945617676, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.068522483940043e-07, |
|
"logits/chosen": -2.6956448554992676, |
|
"logits/rejected": -2.7061805725097656, |
|
"logps/chosen": -346.4541931152344, |
|
"logps/rejected": -481.19989013671875, |
|
"loss": 0.1104, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.6448992490768433, |
|
"rewards/margins": 7.711002349853516, |
|
"rewards/rejected": -8.355902671813965, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.175588865096359e-07, |
|
"logits/chosen": -2.6077234745025635, |
|
"logits/rejected": -2.6278557777404785, |
|
"logps/chosen": -353.8262634277344, |
|
"logps/rejected": -447.3440856933594, |
|
"loss": 0.0958, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.5978514552116394, |
|
"rewards/margins": 7.370479583740234, |
|
"rewards/rejected": -7.968331336975098, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.282655246252677e-07, |
|
"logits/chosen": -2.603065252304077, |
|
"logits/rejected": -2.675497531890869, |
|
"logps/chosen": -355.2611999511719, |
|
"logps/rejected": -411.75732421875, |
|
"loss": 0.1039, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.2750840187072754, |
|
"rewards/margins": 7.0222907066345215, |
|
"rewards/rejected": -7.2973737716674805, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_logits/chosen": -2.6301259994506836, |
|
"eval_logits/rejected": -2.6286230087280273, |
|
"eval_logps/chosen": -315.64288330078125, |
|
"eval_logps/rejected": -421.1318359375, |
|
"eval_loss": 0.07803654670715332, |
|
"eval_rewards/accuracies": 0.984375, |
|
"eval_rewards/chosen": -0.6931607723236084, |
|
"eval_rewards/margins": 7.145491600036621, |
|
"eval_rewards/rejected": -7.83865213394165, |
|
"eval_runtime": 38.7861, |
|
"eval_samples_per_second": 12.891, |
|
"eval_steps_per_second": 0.413, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.389721627408993e-07, |
|
"logits/chosen": -2.5576305389404297, |
|
"logits/rejected": -2.602813243865967, |
|
"logps/chosen": -361.10797119140625, |
|
"logps/rejected": -468.213134765625, |
|
"loss": 0.1042, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.9836179614067078, |
|
"rewards/margins": 6.6080522537231445, |
|
"rewards/rejected": -7.591670989990234, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.49678800856531e-07, |
|
"logits/chosen": -2.521080732345581, |
|
"logits/rejected": -2.5644307136535645, |
|
"logps/chosen": -325.7511901855469, |
|
"logps/rejected": -407.7994384765625, |
|
"loss": 0.1057, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.45771685242652893, |
|
"rewards/margins": 7.0977678298950195, |
|
"rewards/rejected": -7.555483818054199, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.603854389721627e-07, |
|
"logits/chosen": -2.5245959758758545, |
|
"logits/rejected": -2.559770107269287, |
|
"logps/chosen": -340.15087890625, |
|
"logps/rejected": -485.052490234375, |
|
"loss": 0.084, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.994246780872345, |
|
"rewards/margins": 7.357940673828125, |
|
"rewards/rejected": -8.35218620300293, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.710920770877944e-07, |
|
"logits/chosen": -2.401303768157959, |
|
"logits/rejected": -2.548125743865967, |
|
"logps/chosen": -358.9648742675781, |
|
"logps/rejected": -462.87890625, |
|
"loss": 0.1172, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.293526530265808, |
|
"rewards/margins": 7.095101833343506, |
|
"rewards/rejected": -8.388628005981445, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.817987152034261e-07, |
|
"logits/chosen": -2.4654183387756348, |
|
"logits/rejected": -2.560048818588257, |
|
"logps/chosen": -291.2701721191406, |
|
"logps/rejected": -362.7830505371094, |
|
"loss": 0.0959, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.4040035009384155, |
|
"rewards/margins": 5.726696968078613, |
|
"rewards/rejected": -7.130700588226318, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.925053533190578e-07, |
|
"logits/chosen": -2.489262104034424, |
|
"logits/rejected": -2.5457305908203125, |
|
"logps/chosen": -356.9480285644531, |
|
"logps/rejected": -435.594970703125, |
|
"loss": 0.1132, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.3584586381912231, |
|
"rewards/margins": 6.3141889572143555, |
|
"rewards/rejected": -7.672647953033447, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.996429421566293e-07, |
|
"logits/chosen": -2.5229034423828125, |
|
"logits/rejected": -2.565725326538086, |
|
"logps/chosen": -326.0317077636719, |
|
"logps/rejected": -448.7723083496094, |
|
"loss": 0.1051, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.5931789875030518, |
|
"rewards/margins": 7.049294471740723, |
|
"rewards/rejected": -8.642473220825195, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.98452749345394e-07, |
|
"logits/chosen": -2.5022709369659424, |
|
"logits/rejected": -2.555453062057495, |
|
"logps/chosen": -361.46563720703125, |
|
"logps/rejected": -498.7660217285156, |
|
"loss": 0.1386, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.8986074924468994, |
|
"rewards/margins": 6.340726375579834, |
|
"rewards/rejected": -8.239333152770996, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.972625565341585e-07, |
|
"logits/chosen": -2.4549243450164795, |
|
"logits/rejected": -2.5045337677001953, |
|
"logps/chosen": -320.4005432128906, |
|
"logps/rejected": -437.33612060546875, |
|
"loss": 0.0958, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.8746875524520874, |
|
"rewards/margins": 6.6805620193481445, |
|
"rewards/rejected": -8.555249214172363, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.960723637229232e-07, |
|
"logits/chosen": -2.448908567428589, |
|
"logits/rejected": -2.458101272583008, |
|
"logps/chosen": -355.0153503417969, |
|
"logps/rejected": -504.32330322265625, |
|
"loss": 0.0762, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.4554470777511597, |
|
"rewards/margins": 8.177068710327148, |
|
"rewards/rejected": -9.632516860961914, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_logits/chosen": -2.504735231399536, |
|
"eval_logits/rejected": -2.5092720985412598, |
|
"eval_logps/chosen": -323.16851806640625, |
|
"eval_logps/rejected": -433.9158020019531, |
|
"eval_loss": 0.08059512078762054, |
|
"eval_rewards/accuracies": 0.953125, |
|
"eval_rewards/chosen": -1.4457205533981323, |
|
"eval_rewards/margins": 7.671328544616699, |
|
"eval_rewards/rejected": -9.117048263549805, |
|
"eval_runtime": 38.7512, |
|
"eval_samples_per_second": 12.903, |
|
"eval_steps_per_second": 0.413, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.948821709116876e-07, |
|
"logits/chosen": -2.376183032989502, |
|
"logits/rejected": -2.455298900604248, |
|
"logps/chosen": -485.12603759765625, |
|
"logps/rejected": -551.7554931640625, |
|
"loss": 0.1056, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.5901005268096924, |
|
"rewards/margins": 8.771623611450195, |
|
"rewards/rejected": -10.361722946166992, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.936919781004522e-07, |
|
"logits/chosen": -2.470151424407959, |
|
"logits/rejected": -2.5587172508239746, |
|
"logps/chosen": -377.3062438964844, |
|
"logps/rejected": -507.6141052246094, |
|
"loss": 0.0955, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.233569860458374, |
|
"rewards/margins": 8.123286247253418, |
|
"rewards/rejected": -9.356857299804688, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.925017852892168e-07, |
|
"logits/chosen": -2.5230183601379395, |
|
"logits/rejected": -2.603940725326538, |
|
"logps/chosen": -362.92333984375, |
|
"logps/rejected": -481.7613220214844, |
|
"loss": 0.0683, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.9068357944488525, |
|
"rewards/margins": 7.921334743499756, |
|
"rewards/rejected": -9.828168869018555, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.913115924779814e-07, |
|
"logits/chosen": -2.438596248626709, |
|
"logits/rejected": -2.562830924987793, |
|
"logps/chosen": -386.5306701660156, |
|
"logps/rejected": -499.86444091796875, |
|
"loss": 0.0677, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2040196657180786, |
|
"rewards/margins": 8.880427360534668, |
|
"rewards/rejected": -10.084446907043457, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.90121399666746e-07, |
|
"logits/chosen": -2.4589312076568604, |
|
"logits/rejected": -2.524345874786377, |
|
"logps/chosen": -332.1251220703125, |
|
"logps/rejected": -433.63787841796875, |
|
"loss": 0.1309, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.1124681234359741, |
|
"rewards/margins": 7.2715253829956055, |
|
"rewards/rejected": -8.383993148803711, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.889312068555106e-07, |
|
"logits/chosen": -2.58622407913208, |
|
"logits/rejected": -2.60271954536438, |
|
"logps/chosen": -271.59014892578125, |
|
"logps/rejected": -417.29833984375, |
|
"loss": 0.1275, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.1244533061981201, |
|
"rewards/margins": 6.384497165679932, |
|
"rewards/rejected": -7.508950710296631, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.877410140442752e-07, |
|
"logits/chosen": -2.4364261627197266, |
|
"logits/rejected": -2.4858317375183105, |
|
"logps/chosen": -350.3711853027344, |
|
"logps/rejected": -449.4051818847656, |
|
"loss": 0.0982, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.0685746669769287, |
|
"rewards/margins": 7.424908638000488, |
|
"rewards/rejected": -8.49348258972168, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.865508212330398e-07, |
|
"logits/chosen": -2.441240072250366, |
|
"logits/rejected": -2.527020215988159, |
|
"logps/chosen": -366.98150634765625, |
|
"logps/rejected": -525.4156494140625, |
|
"loss": 0.0867, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9126319885253906, |
|
"rewards/margins": 9.27831745147705, |
|
"rewards/rejected": -10.190949440002441, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.853606284218044e-07, |
|
"logits/chosen": -2.3090662956237793, |
|
"logits/rejected": -2.3255538940429688, |
|
"logps/chosen": -371.3923034667969, |
|
"logps/rejected": -526.1776123046875, |
|
"loss": 0.1095, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.190389633178711, |
|
"rewards/margins": 9.64104175567627, |
|
"rewards/rejected": -11.831432342529297, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.841704356105689e-07, |
|
"logits/chosen": -2.334197521209717, |
|
"logits/rejected": -2.423285484313965, |
|
"logps/chosen": -369.0033264160156, |
|
"logps/rejected": -506.4518127441406, |
|
"loss": 0.0959, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.7217298746109009, |
|
"rewards/margins": 8.07056999206543, |
|
"rewards/rejected": -9.7923002243042, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_logits/chosen": -2.467820405960083, |
|
"eval_logits/rejected": -2.440288782119751, |
|
"eval_logps/chosen": -318.6737060546875, |
|
"eval_logps/rejected": -428.9326171875, |
|
"eval_loss": 0.07413332909345627, |
|
"eval_rewards/accuracies": 0.984375, |
|
"eval_rewards/chosen": -0.9962404370307922, |
|
"eval_rewards/margins": 7.622487545013428, |
|
"eval_rewards/rejected": -8.618727684020996, |
|
"eval_runtime": 38.7439, |
|
"eval_samples_per_second": 12.905, |
|
"eval_steps_per_second": 0.413, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.829802427993334e-07, |
|
"logits/chosen": -2.3268227577209473, |
|
"logits/rejected": -2.3746628761291504, |
|
"logps/chosen": -404.0111083984375, |
|
"logps/rejected": -492.5167541503906, |
|
"loss": 0.0859, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.8345616459846497, |
|
"rewards/margins": 8.07560920715332, |
|
"rewards/rejected": -8.910171508789062, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.81790049988098e-07, |
|
"logits/chosen": -2.415301561355591, |
|
"logits/rejected": -2.4919333457946777, |
|
"logps/chosen": -388.5622253417969, |
|
"logps/rejected": -531.6051025390625, |
|
"loss": 0.0631, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.3940558433532715, |
|
"rewards/margins": 7.842892646789551, |
|
"rewards/rejected": -9.23694896697998, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.805998571768626e-07, |
|
"logits/chosen": -2.310925245285034, |
|
"logits/rejected": -2.42446231842041, |
|
"logps/chosen": -342.0956115722656, |
|
"logps/rejected": -516.9351196289062, |
|
"loss": 0.1142, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.3800750970840454, |
|
"rewards/margins": 8.762998580932617, |
|
"rewards/rejected": -10.143075942993164, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.794096643656272e-07, |
|
"logits/chosen": -2.280027151107788, |
|
"logits/rejected": -2.31703782081604, |
|
"logps/chosen": -409.70379638671875, |
|
"logps/rejected": -529.5406494140625, |
|
"loss": 0.0723, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.3363559246063232, |
|
"rewards/margins": 10.320574760437012, |
|
"rewards/rejected": -11.656930923461914, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.782194715543918e-07, |
|
"logits/chosen": -2.276779890060425, |
|
"logits/rejected": -2.343441963195801, |
|
"logps/chosen": -348.50531005859375, |
|
"logps/rejected": -521.2000122070312, |
|
"loss": 0.0902, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.5391457080841064, |
|
"rewards/margins": 9.673690795898438, |
|
"rewards/rejected": -11.212836265563965, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.770292787431564e-07, |
|
"logits/chosen": -2.3436553478240967, |
|
"logits/rejected": -2.3175175189971924, |
|
"logps/chosen": -386.4251403808594, |
|
"logps/rejected": -530.1958618164062, |
|
"loss": 0.0787, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.0843443870544434, |
|
"rewards/margins": 9.766562461853027, |
|
"rewards/rejected": -11.850906372070312, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.7583908593192097e-07, |
|
"logits/chosen": -2.2515616416931152, |
|
"logits/rejected": -2.2762718200683594, |
|
"logps/chosen": -396.88751220703125, |
|
"logps/rejected": -541.3609619140625, |
|
"loss": 0.0841, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.3059911727905273, |
|
"rewards/margins": 9.442736625671387, |
|
"rewards/rejected": -11.748727798461914, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.746488931206855e-07, |
|
"logits/chosen": -2.304055690765381, |
|
"logits/rejected": -2.3429813385009766, |
|
"logps/chosen": -353.8645935058594, |
|
"logps/rejected": -520.8157348632812, |
|
"loss": 0.0793, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.6302597522735596, |
|
"rewards/margins": 10.208868980407715, |
|
"rewards/rejected": -12.839129447937012, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.734587003094501e-07, |
|
"logits/chosen": -2.326953887939453, |
|
"logits/rejected": -2.4166040420532227, |
|
"logps/chosen": -377.34356689453125, |
|
"logps/rejected": -494.58782958984375, |
|
"loss": 0.1041, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.1931746006011963, |
|
"rewards/margins": 9.344148635864258, |
|
"rewards/rejected": -11.537323951721191, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.722685074982147e-07, |
|
"logits/chosen": -2.3279807567596436, |
|
"logits/rejected": -2.38569974899292, |
|
"logps/chosen": -320.0870056152344, |
|
"logps/rejected": -498.17706298828125, |
|
"loss": 0.0814, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5494163036346436, |
|
"rewards/margins": 10.105340957641602, |
|
"rewards/rejected": -11.654756546020508, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_logits/chosen": -2.498293399810791, |
|
"eval_logits/rejected": -2.4712274074554443, |
|
"eval_logps/chosen": -323.183837890625, |
|
"eval_logps/rejected": -441.4797058105469, |
|
"eval_loss": 0.055789634585380554, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -1.4472523927688599, |
|
"eval_rewards/margins": 8.426188468933105, |
|
"eval_rewards/rejected": -9.87343978881836, |
|
"eval_runtime": 38.7758, |
|
"eval_samples_per_second": 12.895, |
|
"eval_steps_per_second": 0.413, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.710783146869793e-07, |
|
"logits/chosen": -2.3991808891296387, |
|
"logits/rejected": -2.4218363761901855, |
|
"logps/chosen": -314.1746520996094, |
|
"logps/rejected": -519.7462768554688, |
|
"loss": 0.0819, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.8268877267837524, |
|
"rewards/margins": 10.331625938415527, |
|
"rewards/rejected": -12.158514022827148, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.698881218757438e-07, |
|
"logits/chosen": -2.363438606262207, |
|
"logits/rejected": -2.3997836112976074, |
|
"logps/chosen": -305.2399597167969, |
|
"logps/rejected": -481.65582275390625, |
|
"loss": 0.0786, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.5438249111175537, |
|
"rewards/margins": 8.62690544128418, |
|
"rewards/rejected": -10.17072868347168, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.6869792906450845e-07, |
|
"logits/chosen": -2.3670878410339355, |
|
"logits/rejected": -2.4363322257995605, |
|
"logps/chosen": -342.06622314453125, |
|
"logps/rejected": -468.9805603027344, |
|
"loss": 0.0719, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.5114291906356812, |
|
"rewards/margins": 8.608851432800293, |
|
"rewards/rejected": -10.120282173156738, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.67507736253273e-07, |
|
"logits/chosen": -2.2785589694976807, |
|
"logits/rejected": -2.3089492321014404, |
|
"logps/chosen": -407.75048828125, |
|
"logps/rejected": -557.4127197265625, |
|
"loss": 0.0903, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.8981235027313232, |
|
"rewards/margins": 10.704629898071289, |
|
"rewards/rejected": -12.602753639221191, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.6631754344203763e-07, |
|
"logits/chosen": -2.3073747158050537, |
|
"logits/rejected": -2.383291244506836, |
|
"logps/chosen": -357.61492919921875, |
|
"logps/rejected": -522.1990356445312, |
|
"loss": 0.1043, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.5501503944396973, |
|
"rewards/margins": 8.703204154968262, |
|
"rewards/rejected": -11.253355026245117, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.6512735063080217e-07, |
|
"logits/chosen": -2.492027521133423, |
|
"logits/rejected": -2.534536361694336, |
|
"logps/chosen": -430.7220764160156, |
|
"logps/rejected": -559.482666015625, |
|
"loss": 0.0971, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.385508418083191, |
|
"rewards/margins": 9.584807395935059, |
|
"rewards/rejected": -10.970315933227539, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.6393715781956676e-07, |
|
"logits/chosen": -2.3780312538146973, |
|
"logits/rejected": -2.37473201751709, |
|
"logps/chosen": -326.2506103515625, |
|
"logps/rejected": -496.7969665527344, |
|
"loss": 0.0865, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.0338951349258423, |
|
"rewards/margins": 9.33600902557373, |
|
"rewards/rejected": -10.369903564453125, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.6274696500833135e-07, |
|
"logits/chosen": -2.4264612197875977, |
|
"logits/rejected": -2.45288348197937, |
|
"logps/chosen": -368.6007385253906, |
|
"logps/rejected": -534.6527709960938, |
|
"loss": 0.0645, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.9812146425247192, |
|
"rewards/margins": 9.224861145019531, |
|
"rewards/rejected": -10.206075668334961, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.6155677219709594e-07, |
|
"logits/chosen": -2.383737087249756, |
|
"logits/rejected": -2.4557416439056396, |
|
"logps/chosen": -401.9710388183594, |
|
"logps/rejected": -555.4797973632812, |
|
"loss": 0.0216, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5994548797607422, |
|
"rewards/margins": 12.170892715454102, |
|
"rewards/rejected": -13.770347595214844, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.603665793858605e-07, |
|
"logits/chosen": -2.4060428142547607, |
|
"logits/rejected": -2.4426844120025635, |
|
"logps/chosen": -366.8950500488281, |
|
"logps/rejected": -558.5940551757812, |
|
"loss": 0.0164, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.232177972793579, |
|
"rewards/margins": 12.297248840332031, |
|
"rewards/rejected": -13.529426574707031, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_logits/chosen": -2.492385149002075, |
|
"eval_logits/rejected": -2.468630313873291, |
|
"eval_logps/chosen": -324.3902587890625, |
|
"eval_logps/rejected": -453.6976623535156, |
|
"eval_loss": 0.06341304630041122, |
|
"eval_rewards/accuracies": 0.984375, |
|
"eval_rewards/chosen": -1.5678963661193848, |
|
"eval_rewards/margins": 9.527338981628418, |
|
"eval_rewards/rejected": -11.095235824584961, |
|
"eval_runtime": 38.5408, |
|
"eval_samples_per_second": 12.973, |
|
"eval_steps_per_second": 0.415, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.5917638657462507e-07, |
|
"logits/chosen": -2.33616042137146, |
|
"logits/rejected": -2.3640098571777344, |
|
"logps/chosen": -373.46905517578125, |
|
"logps/rejected": -514.2394409179688, |
|
"loss": 0.0209, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4009530544281006, |
|
"rewards/margins": 10.871899604797363, |
|
"rewards/rejected": -12.272851943969727, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.5798619376338966e-07, |
|
"logits/chosen": -2.4044508934020996, |
|
"logits/rejected": -2.420480966567993, |
|
"logps/chosen": -347.3623962402344, |
|
"logps/rejected": -556.5758056640625, |
|
"loss": 0.0227, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.055593490600586, |
|
"rewards/margins": 11.897196769714355, |
|
"rewards/rejected": -12.952789306640625, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.567960009521542e-07, |
|
"logits/chosen": -2.359771490097046, |
|
"logits/rejected": -2.4249939918518066, |
|
"logps/chosen": -370.0980529785156, |
|
"logps/rejected": -567.7897338867188, |
|
"loss": 0.0131, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.0004942417144775, |
|
"rewards/margins": 12.14315414428711, |
|
"rewards/rejected": -14.143648147583008, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.5560580814091884e-07, |
|
"logits/chosen": -2.3424394130706787, |
|
"logits/rejected": -2.342963457107544, |
|
"logps/chosen": -385.192626953125, |
|
"logps/rejected": -510.11749267578125, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.1284375190734863, |
|
"rewards/margins": 11.841325759887695, |
|
"rewards/rejected": -13.969762802124023, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.5441561532968337e-07, |
|
"logits/chosen": -2.3772830963134766, |
|
"logits/rejected": -2.414663791656494, |
|
"logps/chosen": -375.8727722167969, |
|
"logps/rejected": -580.7897338867188, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.0805163383483887, |
|
"rewards/margins": 12.892430305480957, |
|
"rewards/rejected": -15.972944259643555, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.5322542251844796e-07, |
|
"logits/chosen": -2.3776564598083496, |
|
"logits/rejected": -2.409484386444092, |
|
"logps/chosen": -331.92431640625, |
|
"logps/rejected": -500.89739990234375, |
|
"loss": 0.0143, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.3345754146575928, |
|
"rewards/margins": 11.422739028930664, |
|
"rewards/rejected": -13.757314682006836, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.5203522970721255e-07, |
|
"logits/chosen": -2.3700737953186035, |
|
"logits/rejected": -2.397162914276123, |
|
"logps/chosen": -340.53094482421875, |
|
"logps/rejected": -506.8477478027344, |
|
"loss": 0.0146, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.4118890166282654, |
|
"rewards/margins": 12.948440551757812, |
|
"rewards/rejected": -13.360328674316406, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.5084503689597714e-07, |
|
"logits/chosen": -2.41035795211792, |
|
"logits/rejected": -2.4271979331970215, |
|
"logps/chosen": -329.87933349609375, |
|
"logps/rejected": -537.0123291015625, |
|
"loss": 0.0135, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4996788501739502, |
|
"rewards/margins": 11.888396263122559, |
|
"rewards/rejected": -13.388073921203613, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.496548440847417e-07, |
|
"logits/chosen": -2.401721477508545, |
|
"logits/rejected": -2.447669506072998, |
|
"logps/chosen": -366.2709045410156, |
|
"logps/rejected": -519.80224609375, |
|
"loss": 0.0139, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3627954721450806, |
|
"rewards/margins": 12.356982231140137, |
|
"rewards/rejected": -13.71977710723877, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.484646512735063e-07, |
|
"logits/chosen": -2.4436986446380615, |
|
"logits/rejected": -2.5449397563934326, |
|
"logps/chosen": -384.5765686035156, |
|
"logps/rejected": -555.2340087890625, |
|
"loss": 0.0172, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.7870714068412781, |
|
"rewards/margins": 11.903576850891113, |
|
"rewards/rejected": -12.690648078918457, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_logits/chosen": -2.5417840480804443, |
|
"eval_logits/rejected": -2.5121681690216064, |
|
"eval_logps/chosen": -326.2882080078125, |
|
"eval_logps/rejected": -464.37054443359375, |
|
"eval_loss": 0.06124735251069069, |
|
"eval_rewards/accuracies": 0.984375, |
|
"eval_rewards/chosen": -1.7576879262924194, |
|
"eval_rewards/margins": 10.404834747314453, |
|
"eval_rewards/rejected": -12.162521362304688, |
|
"eval_runtime": 38.6563, |
|
"eval_samples_per_second": 12.934, |
|
"eval_steps_per_second": 0.414, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.4727445846227086e-07, |
|
"logits/chosen": -2.438345432281494, |
|
"logits/rejected": -2.4737024307250977, |
|
"logps/chosen": -369.38397216796875, |
|
"logps/rejected": -519.6220703125, |
|
"loss": 0.011, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.9280792474746704, |
|
"rewards/margins": 12.675816535949707, |
|
"rewards/rejected": -14.60389518737793, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.4608426565103545e-07, |
|
"logits/chosen": -2.450275182723999, |
|
"logits/rejected": -2.462500810623169, |
|
"logps/chosen": -343.4928283691406, |
|
"logps/rejected": -515.9462280273438, |
|
"loss": 0.0221, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.3710033893585205, |
|
"rewards/margins": 13.644805908203125, |
|
"rewards/rejected": -16.015810012817383, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.4489407283980004e-07, |
|
"logits/chosen": -2.423760414123535, |
|
"logits/rejected": -2.385545253753662, |
|
"logps/chosen": -370.15985107421875, |
|
"logps/rejected": -515.8549194335938, |
|
"loss": 0.0097, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.4730286598205566, |
|
"rewards/margins": 12.967801094055176, |
|
"rewards/rejected": -15.440831184387207, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.437038800285646e-07, |
|
"logits/chosen": -2.399423360824585, |
|
"logits/rejected": -2.418363094329834, |
|
"logps/chosen": -384.27984619140625, |
|
"logps/rejected": -549.5245971679688, |
|
"loss": 0.0156, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.72330904006958, |
|
"rewards/margins": 12.818862915039062, |
|
"rewards/rejected": -16.542171478271484, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.4251368721732916e-07, |
|
"logits/chosen": -2.5278353691101074, |
|
"logits/rejected": -2.5364837646484375, |
|
"logps/chosen": -329.5386657714844, |
|
"logps/rejected": -519.6696166992188, |
|
"loss": 0.0223, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4351348876953125, |
|
"rewards/margins": 11.446606636047363, |
|
"rewards/rejected": -12.881741523742676, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.413234944060938e-07, |
|
"logits/chosen": -2.527299165725708, |
|
"logits/rejected": -2.5759024620056152, |
|
"logps/chosen": -403.71063232421875, |
|
"logps/rejected": -589.4862670898438, |
|
"loss": 0.0147, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.19414202868938446, |
|
"rewards/margins": 12.035063743591309, |
|
"rewards/rejected": -12.229207038879395, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.4013330159485834e-07, |
|
"logits/chosen": -2.4672398567199707, |
|
"logits/rejected": -2.4999210834503174, |
|
"logps/chosen": -334.6300048828125, |
|
"logps/rejected": -534.4932250976562, |
|
"loss": 0.0255, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.7743580341339111, |
|
"rewards/margins": 12.416712760925293, |
|
"rewards/rejected": -14.191072463989258, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.3894310878362293e-07, |
|
"logits/chosen": -2.447817087173462, |
|
"logits/rejected": -2.5005249977111816, |
|
"logps/chosen": -338.5157470703125, |
|
"logps/rejected": -544.09423828125, |
|
"loss": 0.0229, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.8672630786895752, |
|
"rewards/margins": 12.040175437927246, |
|
"rewards/rejected": -13.907438278198242, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.377529159723875e-07, |
|
"logits/chosen": -2.4685416221618652, |
|
"logits/rejected": -2.49491548538208, |
|
"logps/chosen": -366.1611022949219, |
|
"logps/rejected": -518.9093627929688, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9218127727508545, |
|
"rewards/margins": 11.573265075683594, |
|
"rewards/rejected": -13.495076179504395, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.365627231611521e-07, |
|
"logits/chosen": -2.470853805541992, |
|
"logits/rejected": -2.497331380844116, |
|
"logps/chosen": -405.1899719238281, |
|
"logps/rejected": -591.7445068359375, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9404414892196655, |
|
"rewards/margins": 13.470489501953125, |
|
"rewards/rejected": -15.410931587219238, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_logits/chosen": -2.5345709323883057, |
|
"eval_logits/rejected": -2.507004737854004, |
|
"eval_logps/chosen": -336.10919189453125, |
|
"eval_logps/rejected": -476.1966552734375, |
|
"eval_loss": 0.0556936077773571, |
|
"eval_rewards/accuracies": 0.984375, |
|
"eval_rewards/chosen": -2.7397918701171875, |
|
"eval_rewards/margins": 10.605344772338867, |
|
"eval_rewards/rejected": -13.345136642456055, |
|
"eval_runtime": 38.7118, |
|
"eval_samples_per_second": 12.916, |
|
"eval_steps_per_second": 0.413, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.3537253034991665e-07, |
|
"logits/chosen": -2.441990852355957, |
|
"logits/rejected": -2.4507715702056885, |
|
"logps/chosen": -329.62542724609375, |
|
"logps/rejected": -574.9547729492188, |
|
"loss": 0.0214, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.120880603790283, |
|
"rewards/margins": 13.88032054901123, |
|
"rewards/rejected": -17.001201629638672, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 4.3418233753868124e-07, |
|
"logits/chosen": -2.3679394721984863, |
|
"logits/rejected": -2.410681962966919, |
|
"logps/chosen": -341.8808898925781, |
|
"logps/rejected": -532.3084106445312, |
|
"loss": 0.0303, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.787487030029297, |
|
"rewards/margins": 11.951956748962402, |
|
"rewards/rejected": -14.739442825317383, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 4.3299214472744583e-07, |
|
"logits/chosen": -2.4356143474578857, |
|
"logits/rejected": -2.484920024871826, |
|
"logps/chosen": -378.17376708984375, |
|
"logps/rejected": -561.7147216796875, |
|
"loss": 0.0212, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.4539060592651367, |
|
"rewards/margins": 12.572771072387695, |
|
"rewards/rejected": -15.026677131652832, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 4.3180195191621036e-07, |
|
"logits/chosen": -2.4165291786193848, |
|
"logits/rejected": -2.3931941986083984, |
|
"logps/chosen": -377.8540344238281, |
|
"logps/rejected": -555.7592163085938, |
|
"loss": 0.0254, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2512832880020142, |
|
"rewards/margins": 12.33320426940918, |
|
"rewards/rejected": -13.58448600769043, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.30611759104975e-07, |
|
"logits/chosen": -2.3533992767333984, |
|
"logits/rejected": -2.3296687602996826, |
|
"logps/chosen": -418.5027770996094, |
|
"logps/rejected": -600.8396606445312, |
|
"loss": 0.0201, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.496593952178955, |
|
"rewards/margins": 13.320207595825195, |
|
"rewards/rejected": -15.816801071166992, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.2942156629373954e-07, |
|
"logits/chosen": -2.246854782104492, |
|
"logits/rejected": -2.3130173683166504, |
|
"logps/chosen": -396.1013488769531, |
|
"logps/rejected": -553.8746337890625, |
|
"loss": 0.0209, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.86772084236145, |
|
"rewards/margins": 13.22656536102295, |
|
"rewards/rejected": -16.094287872314453, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.2823137348250413e-07, |
|
"logits/chosen": -2.1099252700805664, |
|
"logits/rejected": -2.1625306606292725, |
|
"logps/chosen": -439.188232421875, |
|
"logps/rejected": -567.4981689453125, |
|
"loss": 0.0195, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -3.4562058448791504, |
|
"rewards/margins": 11.824674606323242, |
|
"rewards/rejected": -15.280881881713867, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.270411806712687e-07, |
|
"logits/chosen": -2.182868480682373, |
|
"logits/rejected": -2.140045642852783, |
|
"logps/chosen": -414.1625061035156, |
|
"logps/rejected": -590.7791748046875, |
|
"loss": 0.0203, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.0633182525634766, |
|
"rewards/margins": 13.505340576171875, |
|
"rewards/rejected": -15.568659782409668, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.258509878600333e-07, |
|
"logits/chosen": -2.301701068878174, |
|
"logits/rejected": -2.3724331855773926, |
|
"logps/chosen": -318.6136779785156, |
|
"logps/rejected": -549.11572265625, |
|
"loss": 0.0162, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7360296249389648, |
|
"rewards/margins": 12.463074684143066, |
|
"rewards/rejected": -14.199106216430664, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.2466079504879785e-07, |
|
"logits/chosen": -2.3375637531280518, |
|
"logits/rejected": -2.371568202972412, |
|
"logps/chosen": -355.43218994140625, |
|
"logps/rejected": -497.6923828125, |
|
"loss": 0.0296, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.8382488489151, |
|
"rewards/margins": 11.133204460144043, |
|
"rewards/rejected": -12.971455574035645, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_logits/chosen": -2.422253131866455, |
|
"eval_logits/rejected": -2.3856472969055176, |
|
"eval_logps/chosen": -327.49688720703125, |
|
"eval_logps/rejected": -458.99761962890625, |
|
"eval_loss": 0.0712868794798851, |
|
"eval_rewards/accuracies": 0.953125, |
|
"eval_rewards/chosen": -1.8785579204559326, |
|
"eval_rewards/margins": 9.746674537658691, |
|
"eval_rewards/rejected": -11.625232696533203, |
|
"eval_runtime": 38.5688, |
|
"eval_samples_per_second": 12.964, |
|
"eval_steps_per_second": 0.415, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.234706022375625e-07, |
|
"logits/chosen": -2.335549831390381, |
|
"logits/rejected": -2.3633885383605957, |
|
"logps/chosen": -334.0445251464844, |
|
"logps/rejected": -532.0367431640625, |
|
"loss": 0.0173, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.1568909883499146, |
|
"rewards/margins": 12.151830673217773, |
|
"rewards/rejected": -13.308721542358398, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.2228040942632703e-07, |
|
"logits/chosen": -2.2730376720428467, |
|
"logits/rejected": -2.279794931411743, |
|
"logps/chosen": -372.47711181640625, |
|
"logps/rejected": -565.377197265625, |
|
"loss": 0.0135, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.948188066482544, |
|
"rewards/margins": 12.926470756530762, |
|
"rewards/rejected": -14.874661445617676, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.210902166150916e-07, |
|
"logits/chosen": -2.1850171089172363, |
|
"logits/rejected": -2.2554237842559814, |
|
"logps/chosen": -330.89398193359375, |
|
"logps/rejected": -572.4408569335938, |
|
"loss": 0.0152, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.2409050464630127, |
|
"rewards/margins": 15.152783393859863, |
|
"rewards/rejected": -17.393688201904297, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 4.199000238038562e-07, |
|
"logits/chosen": -2.2348155975341797, |
|
"logits/rejected": -2.276552200317383, |
|
"logps/chosen": -391.0440673828125, |
|
"logps/rejected": -562.8758544921875, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.518620014190674, |
|
"rewards/margins": 13.422780990600586, |
|
"rewards/rejected": -15.941401481628418, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 4.187098309926208e-07, |
|
"logits/chosen": -2.234314441680908, |
|
"logits/rejected": -2.273665428161621, |
|
"logps/chosen": -379.77752685546875, |
|
"logps/rejected": -609.7650146484375, |
|
"loss": 0.0167, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.2126364707946777, |
|
"rewards/margins": 14.33509635925293, |
|
"rewards/rejected": -16.547733306884766, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.1751963818138534e-07, |
|
"logits/chosen": -2.2460713386535645, |
|
"logits/rejected": -2.28529953956604, |
|
"logps/chosen": -391.7981872558594, |
|
"logps/rejected": -584.82373046875, |
|
"loss": 0.0106, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.583667278289795, |
|
"rewards/margins": 13.928072929382324, |
|
"rewards/rejected": -16.511741638183594, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.1632944537015e-07, |
|
"logits/chosen": -2.312187671661377, |
|
"logits/rejected": -2.313152313232422, |
|
"logps/chosen": -332.22418212890625, |
|
"logps/rejected": -550.9510498046875, |
|
"loss": 0.0151, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.8060202598571777, |
|
"rewards/margins": 13.428415298461914, |
|
"rewards/rejected": -16.23443603515625, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.151392525589145e-07, |
|
"logits/chosen": -2.269207715988159, |
|
"logits/rejected": -2.2718236446380615, |
|
"logps/chosen": -332.3182067871094, |
|
"logps/rejected": -509.44085693359375, |
|
"loss": 0.0267, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -3.1180636882781982, |
|
"rewards/margins": 12.261663436889648, |
|
"rewards/rejected": -15.379727363586426, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.139490597476791e-07, |
|
"logits/chosen": -2.2478084564208984, |
|
"logits/rejected": -2.3000128269195557, |
|
"logps/chosen": -337.1382141113281, |
|
"logps/rejected": -537.2418212890625, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.009748935699463, |
|
"rewards/margins": 12.527368545532227, |
|
"rewards/rejected": -14.537118911743164, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.127588669364437e-07, |
|
"logits/chosen": -2.313680648803711, |
|
"logits/rejected": -2.327012538909912, |
|
"logps/chosen": -291.6064758300781, |
|
"logps/rejected": -546.3372802734375, |
|
"loss": 0.0148, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -3.4880402088165283, |
|
"rewards/margins": 14.52784252166748, |
|
"rewards/rejected": -18.01588249206543, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_logits/chosen": -2.3877577781677246, |
|
"eval_logits/rejected": -2.35882568359375, |
|
"eval_logps/chosen": -347.202880859375, |
|
"eval_logps/rejected": -496.7171325683594, |
|
"eval_loss": 0.07778895646333694, |
|
"eval_rewards/accuracies": 0.953125, |
|
"eval_rewards/chosen": -3.8491578102111816, |
|
"eval_rewards/margins": 11.548023223876953, |
|
"eval_rewards/rejected": -15.397181510925293, |
|
"eval_runtime": 38.6215, |
|
"eval_samples_per_second": 12.946, |
|
"eval_steps_per_second": 0.414, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.115686741252083e-07, |
|
"logits/chosen": -2.292132616043091, |
|
"logits/rejected": -2.347907781600952, |
|
"logps/chosen": -362.74481201171875, |
|
"logps/rejected": -558.0933837890625, |
|
"loss": 0.0163, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.724060297012329, |
|
"rewards/margins": 13.57036304473877, |
|
"rewards/rejected": -16.294422149658203, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.103784813139728e-07, |
|
"logits/chosen": -2.3167264461517334, |
|
"logits/rejected": -2.3449079990386963, |
|
"logps/chosen": -369.4256591796875, |
|
"logps/rejected": -566.0360107421875, |
|
"loss": 0.0155, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.4625415802001953, |
|
"rewards/margins": 13.401751518249512, |
|
"rewards/rejected": -16.86429214477539, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.091882885027374e-07, |
|
"logits/chosen": -2.3674325942993164, |
|
"logits/rejected": -2.455508232116699, |
|
"logps/chosen": -381.26068115234375, |
|
"logps/rejected": -550.90625, |
|
"loss": 0.0244, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -3.13775897026062, |
|
"rewards/margins": 13.795980453491211, |
|
"rewards/rejected": -16.933740615844727, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.07998095691502e-07, |
|
"logits/chosen": -2.3083391189575195, |
|
"logits/rejected": -2.330939769744873, |
|
"logps/chosen": -362.44171142578125, |
|
"logps/rejected": -523.51171875, |
|
"loss": 0.0124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.1269755363464355, |
|
"rewards/margins": 12.574740409851074, |
|
"rewards/rejected": -15.701716423034668, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.0680790288026654e-07, |
|
"logits/chosen": -2.3918001651763916, |
|
"logits/rejected": -2.426542282104492, |
|
"logps/chosen": -420.2566833496094, |
|
"logps/rejected": -605.3551025390625, |
|
"loss": 0.0202, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.9122402667999268, |
|
"rewards/margins": 13.67309856414795, |
|
"rewards/rejected": -16.585338592529297, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.056177100690312e-07, |
|
"logits/chosen": -2.2674708366394043, |
|
"logits/rejected": -2.2906508445739746, |
|
"logps/chosen": -390.3266296386719, |
|
"logps/rejected": -587.2613525390625, |
|
"loss": 0.011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.047337532043457, |
|
"rewards/margins": 13.966493606567383, |
|
"rewards/rejected": -18.013832092285156, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.044275172577957e-07, |
|
"logits/chosen": -2.256685733795166, |
|
"logits/rejected": -2.283980131149292, |
|
"logps/chosen": -307.6758728027344, |
|
"logps/rejected": -536.929931640625, |
|
"loss": 0.0251, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.8640975952148438, |
|
"rewards/margins": 14.663250923156738, |
|
"rewards/rejected": -17.527347564697266, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.0323732444656036e-07, |
|
"logits/chosen": -2.2302117347717285, |
|
"logits/rejected": -2.319187641143799, |
|
"logps/chosen": -377.6014099121094, |
|
"logps/rejected": -592.4954223632812, |
|
"loss": 0.0208, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.1300606727600098, |
|
"rewards/margins": 14.494562149047852, |
|
"rewards/rejected": -16.624622344970703, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 4.020471316353249e-07, |
|
"logits/chosen": -2.3077661991119385, |
|
"logits/rejected": -2.34450364112854, |
|
"logps/chosen": -384.89007568359375, |
|
"logps/rejected": -577.9298095703125, |
|
"loss": 0.0126, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.4994373321533203, |
|
"rewards/margins": 12.733844757080078, |
|
"rewards/rejected": -14.233282089233398, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.008569388240895e-07, |
|
"logits/chosen": -2.230447292327881, |
|
"logits/rejected": -2.283294677734375, |
|
"logps/chosen": -346.1694641113281, |
|
"logps/rejected": -534.3992919921875, |
|
"loss": 0.019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3882415294647217, |
|
"rewards/margins": 14.5983247756958, |
|
"rewards/rejected": -15.986566543579102, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_logits/chosen": -2.403440475463867, |
|
"eval_logits/rejected": -2.378675699234009, |
|
"eval_logps/chosen": -332.9962463378906, |
|
"eval_logps/rejected": -477.9118957519531, |
|
"eval_loss": 0.07047431915998459, |
|
"eval_rewards/accuracies": 0.9375, |
|
"eval_rewards/chosen": -2.4284939765930176, |
|
"eval_rewards/margins": 11.088165283203125, |
|
"eval_rewards/rejected": -13.516657829284668, |
|
"eval_runtime": 38.6695, |
|
"eval_samples_per_second": 12.93, |
|
"eval_steps_per_second": 0.414, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.996667460128541e-07, |
|
"logits/chosen": -2.31799578666687, |
|
"logits/rejected": -2.3302206993103027, |
|
"logps/chosen": -333.87261962890625, |
|
"logps/rejected": -506.0113220214844, |
|
"loss": 0.0166, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8233000040054321, |
|
"rewards/margins": 13.524618148803711, |
|
"rewards/rejected": -15.347920417785645, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.9847655320161867e-07, |
|
"logits/chosen": -2.3380274772644043, |
|
"logits/rejected": -2.3655543327331543, |
|
"logps/chosen": -330.939453125, |
|
"logps/rejected": -566.5387573242188, |
|
"loss": 0.0211, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.293247938156128, |
|
"rewards/margins": 13.109285354614258, |
|
"rewards/rejected": -16.402530670166016, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.972863603903832e-07, |
|
"logits/chosen": -2.4296791553497314, |
|
"logits/rejected": -2.395019054412842, |
|
"logps/chosen": -368.58843994140625, |
|
"logps/rejected": -550.57177734375, |
|
"loss": 0.0147, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.1659107208251953, |
|
"rewards/margins": 14.171772956848145, |
|
"rewards/rejected": -16.337684631347656, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 3.9609616757914784e-07, |
|
"logits/chosen": -2.386429786682129, |
|
"logits/rejected": -2.401638984680176, |
|
"logps/chosen": -347.26214599609375, |
|
"logps/rejected": -538.3074951171875, |
|
"loss": 0.0162, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.806589126586914, |
|
"rewards/margins": 12.520380973815918, |
|
"rewards/rejected": -15.326970100402832, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.949059747679124e-07, |
|
"logits/chosen": -2.3784899711608887, |
|
"logits/rejected": -2.42669939994812, |
|
"logps/chosen": -364.69512939453125, |
|
"logps/rejected": -592.1053466796875, |
|
"loss": 0.0159, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -3.1103992462158203, |
|
"rewards/margins": 15.538830757141113, |
|
"rewards/rejected": -18.649229049682617, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.9371578195667697e-07, |
|
"logits/chosen": -2.4179718494415283, |
|
"logits/rejected": -2.4337425231933594, |
|
"logps/chosen": -338.0289001464844, |
|
"logps/rejected": -522.47412109375, |
|
"loss": 0.0343, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.291858196258545, |
|
"rewards/margins": 14.658024787902832, |
|
"rewards/rejected": -16.949880599975586, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.9252558914544156e-07, |
|
"logits/chosen": -2.37274169921875, |
|
"logits/rejected": -2.376906633377075, |
|
"logps/chosen": -371.0089111328125, |
|
"logps/rejected": -562.0587158203125, |
|
"loss": 0.0236, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.699599504470825, |
|
"rewards/margins": 14.066309928894043, |
|
"rewards/rejected": -16.76590919494629, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.9133539633420615e-07, |
|
"logits/chosen": -2.3570303916931152, |
|
"logits/rejected": -2.4414098262786865, |
|
"logps/chosen": -347.50531005859375, |
|
"logps/rejected": -606.2113647460938, |
|
"loss": 0.015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.3379924297332764, |
|
"rewards/margins": 14.867982864379883, |
|
"rewards/rejected": -17.205974578857422, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.901452035229707e-07, |
|
"logits/chosen": -2.373347043991089, |
|
"logits/rejected": -2.4218459129333496, |
|
"logps/chosen": -421.48187255859375, |
|
"logps/rejected": -606.8762817382812, |
|
"loss": 0.0132, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.6556594371795654, |
|
"rewards/margins": 14.492483139038086, |
|
"rewards/rejected": -17.148143768310547, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.8895501071173533e-07, |
|
"logits/chosen": -2.3142504692077637, |
|
"logits/rejected": -2.3538806438446045, |
|
"logps/chosen": -325.9708557128906, |
|
"logps/rejected": -511.67449951171875, |
|
"loss": 0.0214, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.7798726558685303, |
|
"rewards/margins": 13.109631538391113, |
|
"rewards/rejected": -15.889503479003906, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_logits/chosen": -2.3960964679718018, |
|
"eval_logits/rejected": -2.3517098426818848, |
|
"eval_logps/chosen": -346.35821533203125, |
|
"eval_logps/rejected": -495.85186767578125, |
|
"eval_loss": 0.07910314947366714, |
|
"eval_rewards/accuracies": 0.96875, |
|
"eval_rewards/chosen": -3.7646918296813965, |
|
"eval_rewards/margins": 11.545960426330566, |
|
"eval_rewards/rejected": -15.310651779174805, |
|
"eval_runtime": 38.7173, |
|
"eval_samples_per_second": 12.914, |
|
"eval_steps_per_second": 0.413, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.8776481790049987e-07, |
|
"logits/chosen": -2.3062312602996826, |
|
"logits/rejected": -2.3327383995056152, |
|
"logps/chosen": -354.59381103515625, |
|
"logps/rejected": -503.6541442871094, |
|
"loss": 0.0196, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -3.5312271118164062, |
|
"rewards/margins": 13.276026725769043, |
|
"rewards/rejected": -16.807254791259766, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.865746250892644e-07, |
|
"logits/chosen": -2.396146774291992, |
|
"logits/rejected": -2.3744444847106934, |
|
"logps/chosen": -397.74609375, |
|
"logps/rejected": -583.1174926757812, |
|
"loss": 0.0162, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.2743606567382812, |
|
"rewards/margins": 15.37347412109375, |
|
"rewards/rejected": -18.647836685180664, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.8538443227802905e-07, |
|
"logits/chosen": -2.3621578216552734, |
|
"logits/rejected": -2.3470935821533203, |
|
"logps/chosen": -374.19757080078125, |
|
"logps/rejected": -564.0121459960938, |
|
"loss": 0.022, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.340083122253418, |
|
"rewards/margins": 13.78313159942627, |
|
"rewards/rejected": -18.123212814331055, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.841942394667936e-07, |
|
"logits/chosen": -2.304884672164917, |
|
"logits/rejected": -2.4029793739318848, |
|
"logps/chosen": -369.39898681640625, |
|
"logps/rejected": -578.387451171875, |
|
"loss": 0.0146, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.7867379188537598, |
|
"rewards/margins": 14.443509101867676, |
|
"rewards/rejected": -17.23024559020996, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.8300404665555817e-07, |
|
"logits/chosen": -2.2816107273101807, |
|
"logits/rejected": -2.2829480171203613, |
|
"logps/chosen": -374.7585144042969, |
|
"logps/rejected": -540.5015869140625, |
|
"loss": 0.0164, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.205556869506836, |
|
"rewards/margins": 14.464788436889648, |
|
"rewards/rejected": -16.670345306396484, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.8181385384432276e-07, |
|
"logits/chosen": -2.282743453979492, |
|
"logits/rejected": -2.2942354679107666, |
|
"logps/chosen": -394.46502685546875, |
|
"logps/rejected": -594.6571044921875, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.892620325088501, |
|
"rewards/margins": 14.386013984680176, |
|
"rewards/rejected": -17.27863311767578, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.8062366103308735e-07, |
|
"logits/chosen": -2.2720725536346436, |
|
"logits/rejected": -2.245262622833252, |
|
"logps/chosen": -342.9836730957031, |
|
"logps/rejected": -546.7418212890625, |
|
"loss": 0.0365, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.7557284832000732, |
|
"rewards/margins": 14.667689323425293, |
|
"rewards/rejected": -17.423416137695312, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.794334682218519e-07, |
|
"logits/chosen": -2.295213222503662, |
|
"logits/rejected": -2.3375067710876465, |
|
"logps/chosen": -370.61798095703125, |
|
"logps/rejected": -474.4059143066406, |
|
"loss": 0.0237, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -3.196665048599243, |
|
"rewards/margins": 12.084269523620605, |
|
"rewards/rejected": -15.28093433380127, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.7824327541061653e-07, |
|
"logits/chosen": -2.4100170135498047, |
|
"logits/rejected": -2.4586007595062256, |
|
"logps/chosen": -358.7035217285156, |
|
"logps/rejected": -547.9478149414062, |
|
"loss": 0.0184, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.731393337249756, |
|
"rewards/margins": 12.000238418579102, |
|
"rewards/rejected": -15.73162841796875, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.7705308259938107e-07, |
|
"logits/chosen": -2.432584047317505, |
|
"logits/rejected": -2.430572032928467, |
|
"logps/chosen": -400.4476318359375, |
|
"logps/rejected": -589.388427734375, |
|
"loss": 0.0124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.0731418132781982, |
|
"rewards/margins": 13.324457168579102, |
|
"rewards/rejected": -16.397600173950195, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_logits/chosen": -2.4233508110046387, |
|
"eval_logits/rejected": -2.3732004165649414, |
|
"eval_logps/chosen": -345.49517822265625, |
|
"eval_logps/rejected": -491.72662353515625, |
|
"eval_loss": 0.08803335577249527, |
|
"eval_rewards/accuracies": 0.9375, |
|
"eval_rewards/chosen": -3.678384304046631, |
|
"eval_rewards/margins": 11.219746589660645, |
|
"eval_rewards/rejected": -14.89813232421875, |
|
"eval_runtime": 38.608, |
|
"eval_samples_per_second": 12.951, |
|
"eval_steps_per_second": 0.414, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.7586288978814566e-07, |
|
"logits/chosen": -2.335282564163208, |
|
"logits/rejected": -2.330732583999634, |
|
"logps/chosen": -388.20806884765625, |
|
"logps/rejected": -580.2225341796875, |
|
"loss": 0.0118, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.1078379154205322, |
|
"rewards/margins": 13.242405891418457, |
|
"rewards/rejected": -16.350242614746094, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.7467269697691025e-07, |
|
"logits/chosen": -2.3464579582214355, |
|
"logits/rejected": -2.3436694145202637, |
|
"logps/chosen": -335.885986328125, |
|
"logps/rejected": -532.0635986328125, |
|
"loss": 0.0328, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.1488466262817383, |
|
"rewards/margins": 13.591397285461426, |
|
"rewards/rejected": -16.740243911743164, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.7348250416567484e-07, |
|
"logits/chosen": -2.2621750831604004, |
|
"logits/rejected": -2.2600533962249756, |
|
"logps/chosen": -415.00982666015625, |
|
"logps/rejected": -549.5345458984375, |
|
"loss": 0.0264, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.42472767829895, |
|
"rewards/margins": 13.469167709350586, |
|
"rewards/rejected": -15.893896102905273, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.722923113544394e-07, |
|
"logits/chosen": -2.361262559890747, |
|
"logits/rejected": -2.315338611602783, |
|
"logps/chosen": -394.708740234375, |
|
"logps/rejected": -578.1019287109375, |
|
"loss": 0.0251, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.082348585128784, |
|
"rewards/margins": 15.23118782043457, |
|
"rewards/rejected": -17.31353759765625, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.71102118543204e-07, |
|
"logits/chosen": -2.315455913543701, |
|
"logits/rejected": -2.284585952758789, |
|
"logps/chosen": -367.0815734863281, |
|
"logps/rejected": -577.2198486328125, |
|
"loss": 0.0113, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.139265537261963, |
|
"rewards/margins": 14.051069259643555, |
|
"rewards/rejected": -16.19033432006836, |
|
"step": 1550 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4668, |
|
"num_train_epochs": 6, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|