|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 50, |
|
"global_step": 478, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02092050209205021, |
|
"grad_norm": 6.4084427221095295, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.630444049835205, |
|
"logits/rejected": -2.576719045639038, |
|
"logps/chosen": -288.65911865234375, |
|
"logps/rejected": -275.90252685546875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": 0.00032657841802574694, |
|
"rewards/margins": 0.0008425033884122968, |
|
"rewards/rejected": -0.0005159247666597366, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04184100418410042, |
|
"grad_norm": 5.7047484713043755, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.644824981689453, |
|
"logits/rejected": -2.6137185096740723, |
|
"logps/chosen": -293.5597839355469, |
|
"logps/rejected": -259.2336730957031, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.002616675803437829, |
|
"rewards/margins": 0.001477306941524148, |
|
"rewards/rejected": 0.0011393685126677155, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06276150627615062, |
|
"grad_norm": 5.335002677652247, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.666045665740967, |
|
"logits/rejected": -2.5890631675720215, |
|
"logps/chosen": -294.64007568359375, |
|
"logps/rejected": -287.18695068359375, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.015856895595788956, |
|
"rewards/margins": 0.008159220218658447, |
|
"rewards/rejected": 0.0076976739801466465, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08368200836820083, |
|
"grad_norm": 5.361991577232885, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.6365890502929688, |
|
"logits/rejected": -2.5537407398223877, |
|
"logps/chosen": -270.41375732421875, |
|
"logps/rejected": -240.17959594726562, |
|
"loss": 0.6825, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.03482341766357422, |
|
"rewards/margins": 0.02616509422659874, |
|
"rewards/rejected": 0.008658323436975479, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10460251046025104, |
|
"grad_norm": 6.225003725839748, |
|
"learning_rate": 4.999733114418725e-07, |
|
"logits/chosen": -2.5800583362579346, |
|
"logits/rejected": -2.571406364440918, |
|
"logps/chosen": -264.10205078125, |
|
"logps/rejected": -246.74868774414062, |
|
"loss": 0.6687, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.013260206207633018, |
|
"rewards/margins": 0.07922474294900894, |
|
"rewards/rejected": -0.06596453487873077, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10460251046025104, |
|
"eval_logits/chosen": -2.615793228149414, |
|
"eval_logits/rejected": -2.5782690048217773, |
|
"eval_logps/chosen": -260.0484313964844, |
|
"eval_logps/rejected": -271.3634033203125, |
|
"eval_loss": 0.649348258972168, |
|
"eval_rewards/accuracies": 0.70703125, |
|
"eval_rewards/chosen": 0.025814848020672798, |
|
"eval_rewards/margins": 0.11282772570848465, |
|
"eval_rewards/rejected": -0.0870128720998764, |
|
"eval_runtime": 103.335, |
|
"eval_samples_per_second": 19.355, |
|
"eval_steps_per_second": 0.31, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12552301255230125, |
|
"grad_norm": 9.898033994957384, |
|
"learning_rate": 4.990398100856366e-07, |
|
"logits/chosen": -2.543462038040161, |
|
"logits/rejected": -2.50410795211792, |
|
"logps/chosen": -268.82574462890625, |
|
"logps/rejected": -262.0675964355469, |
|
"loss": 0.6502, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.010042434558272362, |
|
"rewards/margins": 0.14874781668186188, |
|
"rewards/rejected": -0.15879027545452118, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14644351464435146, |
|
"grad_norm": 9.743629526270734, |
|
"learning_rate": 4.967775735898179e-07, |
|
"logits/chosen": -2.6071839332580566, |
|
"logits/rejected": -2.5465664863586426, |
|
"logps/chosen": -304.88092041015625, |
|
"logps/rejected": -310.2099609375, |
|
"loss": 0.6315, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.198007270693779, |
|
"rewards/margins": 0.2037646323442459, |
|
"rewards/rejected": -0.4017719328403473, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16736401673640167, |
|
"grad_norm": 15.248138814448389, |
|
"learning_rate": 4.931986719649298e-07, |
|
"logits/chosen": -1.8189195394515991, |
|
"logits/rejected": -1.8181097507476807, |
|
"logps/chosen": -303.1712951660156, |
|
"logps/rejected": -337.1569519042969, |
|
"loss": 0.6003, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.4402276873588562, |
|
"rewards/margins": 0.30723443627357483, |
|
"rewards/rejected": -0.7474621534347534, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18828451882845187, |
|
"grad_norm": 14.823998474977419, |
|
"learning_rate": 4.883222001996351e-07, |
|
"logits/chosen": -0.9793803095817566, |
|
"logits/rejected": -0.8350278735160828, |
|
"logps/chosen": -324.31463623046875, |
|
"logps/rejected": -369.8716735839844, |
|
"loss": 0.5849, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.5321702361106873, |
|
"rewards/margins": 0.5584505796432495, |
|
"rewards/rejected": -1.090620756149292, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20920502092050208, |
|
"grad_norm": 22.816637306761073, |
|
"learning_rate": 4.821741763807186e-07, |
|
"logits/chosen": -0.4131926894187927, |
|
"logits/rejected": 0.20026779174804688, |
|
"logps/chosen": -381.84002685546875, |
|
"logps/rejected": -387.60943603515625, |
|
"loss": 0.5614, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8785870671272278, |
|
"rewards/margins": 0.7347825765609741, |
|
"rewards/rejected": -1.6133695840835571, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.20920502092050208, |
|
"eval_logits/chosen": -0.5290488004684448, |
|
"eval_logits/rejected": -0.22265684604644775, |
|
"eval_logps/chosen": -343.2120361328125, |
|
"eval_logps/rejected": -421.86474609375, |
|
"eval_loss": 0.5806925892829895, |
|
"eval_rewards/accuracies": 0.7109375, |
|
"eval_rewards/chosen": -0.8058211803436279, |
|
"eval_rewards/margins": 0.7862052321434021, |
|
"eval_rewards/rejected": -1.5920264720916748, |
|
"eval_runtime": 104.2044, |
|
"eval_samples_per_second": 19.193, |
|
"eval_steps_per_second": 0.307, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2301255230125523, |
|
"grad_norm": 19.02164952454875, |
|
"learning_rate": 4.747874028753375e-07, |
|
"logits/chosen": -0.43088096380233765, |
|
"logits/rejected": -0.01379423774778843, |
|
"logps/chosen": -391.91552734375, |
|
"logps/rejected": -446.66961669921875, |
|
"loss": 0.5589, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9717921018600464, |
|
"rewards/margins": 0.7386372089385986, |
|
"rewards/rejected": -1.7104294300079346, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2510460251046025, |
|
"grad_norm": 23.262987016748436, |
|
"learning_rate": 4.662012913161997e-07, |
|
"logits/chosen": 0.030251333490014076, |
|
"logits/rejected": 0.5855879187583923, |
|
"logps/chosen": -378.17999267578125, |
|
"logps/rejected": -441.85406494140625, |
|
"loss": 0.5556, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.0160324573516846, |
|
"rewards/margins": 1.018049955368042, |
|
"rewards/rejected": -2.0340826511383057, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2719665271966527, |
|
"grad_norm": 21.032912122052533, |
|
"learning_rate": 4.5646165232345103e-07, |
|
"logits/chosen": -0.32387033104896545, |
|
"logits/rejected": 0.246691033244133, |
|
"logps/chosen": -401.41961669921875, |
|
"logps/rejected": -456.65216064453125, |
|
"loss": 0.542, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.039526343345642, |
|
"rewards/margins": 0.8820648193359375, |
|
"rewards/rejected": -1.9215911626815796, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2928870292887029, |
|
"grad_norm": 18.49329811567727, |
|
"learning_rate": 4.456204510851956e-07, |
|
"logits/chosen": -0.7103713154792786, |
|
"logits/rejected": -0.07352075725793839, |
|
"logps/chosen": -398.73785400390625, |
|
"logps/rejected": -456.5108337402344, |
|
"loss": 0.5449, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.9843541979789734, |
|
"rewards/margins": 0.8879961967468262, |
|
"rewards/rejected": -1.8723503351211548, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3138075313807531, |
|
"grad_norm": 22.991798597203633, |
|
"learning_rate": 4.337355301007335e-07, |
|
"logits/chosen": -0.8071734309196472, |
|
"logits/rejected": 0.08783279359340668, |
|
"logps/chosen": -380.7442321777344, |
|
"logps/rejected": -434.64959716796875, |
|
"loss": 0.5419, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.8969854116439819, |
|
"rewards/margins": 0.7560001611709595, |
|
"rewards/rejected": -1.6529855728149414, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3138075313807531, |
|
"eval_logits/chosen": -0.0013768002390861511, |
|
"eval_logits/rejected": 0.6415377259254456, |
|
"eval_logps/chosen": -367.39569091796875, |
|
"eval_logps/rejected": -469.2164611816406, |
|
"eval_loss": 0.5584754347801208, |
|
"eval_rewards/accuracies": 0.74609375, |
|
"eval_rewards/chosen": -1.0476573705673218, |
|
"eval_rewards/margins": 1.0178861618041992, |
|
"eval_rewards/rejected": -2.0655436515808105, |
|
"eval_runtime": 102.1901, |
|
"eval_samples_per_second": 19.571, |
|
"eval_steps_per_second": 0.313, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33472803347280333, |
|
"grad_norm": 17.28842262569855, |
|
"learning_rate": 4.2087030056579986e-07, |
|
"logits/chosen": 0.42611104249954224, |
|
"logits/rejected": 1.240443468093872, |
|
"logps/chosen": -371.35699462890625, |
|
"logps/rejected": -441.8954162597656, |
|
"loss": 0.5371, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.0083401203155518, |
|
"rewards/margins": 1.0140050649642944, |
|
"rewards/rejected": -2.0223450660705566, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.35564853556485354, |
|
"grad_norm": 23.711859022455013, |
|
"learning_rate": 4.070934040463998e-07, |
|
"logits/chosen": 0.9712627530097961, |
|
"logits/rejected": 1.8879001140594482, |
|
"logps/chosen": -393.61029052734375, |
|
"logps/rejected": -474.1710510253906, |
|
"loss": 0.5201, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.2388116121292114, |
|
"rewards/margins": 0.9170275926589966, |
|
"rewards/rejected": -2.155838966369629, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.37656903765690375, |
|
"grad_norm": 20.4986556938231, |
|
"learning_rate": 3.9247834624635404e-07, |
|
"logits/chosen": 1.3909448385238647, |
|
"logits/rejected": 2.293593406677246, |
|
"logps/chosen": -461.04046630859375, |
|
"logps/rejected": -531.3743286132812, |
|
"loss": 0.534, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.5673408508300781, |
|
"rewards/margins": 1.0537548065185547, |
|
"rewards/rejected": -2.621096134185791, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.39748953974895396, |
|
"grad_norm": 29.28443173248426, |
|
"learning_rate": 3.7710310482256523e-07, |
|
"logits/chosen": 0.8631278276443481, |
|
"logits/rejected": 1.6216942071914673, |
|
"logps/chosen": -393.5219421386719, |
|
"logps/rejected": -465.5166015625, |
|
"loss": 0.5181, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.397423505783081, |
|
"rewards/margins": 0.8221076726913452, |
|
"rewards/rejected": -2.2195310592651367, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.41841004184100417, |
|
"grad_norm": 24.61839368474693, |
|
"learning_rate": 3.610497133404795e-07, |
|
"logits/chosen": 1.1675374507904053, |
|
"logits/rejected": 2.012094020843506, |
|
"logps/chosen": -408.1467590332031, |
|
"logps/rejected": -462.46875, |
|
"loss": 0.526, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.4781768321990967, |
|
"rewards/margins": 0.8430646657943726, |
|
"rewards/rejected": -2.3212413787841797, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.41841004184100417, |
|
"eval_logits/chosen": 0.9801958799362183, |
|
"eval_logits/rejected": 1.7427237033843994, |
|
"eval_logps/chosen": -402.51995849609375, |
|
"eval_logps/rejected": -517.015625, |
|
"eval_loss": 0.5562114715576172, |
|
"eval_rewards/accuracies": 0.76171875, |
|
"eval_rewards/chosen": -1.3989005088806152, |
|
"eval_rewards/margins": 1.1446349620819092, |
|
"eval_rewards/rejected": -2.5435354709625244, |
|
"eval_runtime": 106.3821, |
|
"eval_samples_per_second": 18.8, |
|
"eval_steps_per_second": 0.301, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4393305439330544, |
|
"grad_norm": 24.016781344152104, |
|
"learning_rate": 3.4440382358952115e-07, |
|
"logits/chosen": 1.11398446559906, |
|
"logits/rejected": 2.043394088745117, |
|
"logps/chosen": -427.09759521484375, |
|
"logps/rejected": -499.5668029785156, |
|
"loss": 0.5271, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.5520648956298828, |
|
"rewards/margins": 0.8630266189575195, |
|
"rewards/rejected": -2.4150915145874023, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4602510460251046, |
|
"grad_norm": 19.623224068729442, |
|
"learning_rate": 3.272542485937368e-07, |
|
"logits/chosen": 0.09422092139720917, |
|
"logits/rejected": 0.9946798086166382, |
|
"logps/chosen": -387.312255859375, |
|
"logps/rejected": -478.73699951171875, |
|
"loss": 0.5347, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0305876731872559, |
|
"rewards/margins": 1.044886827468872, |
|
"rewards/rejected": -2.075474262237549, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4811715481171548, |
|
"grad_norm": 20.279694061078374, |
|
"learning_rate": 3.096924887558854e-07, |
|
"logits/chosen": 0.5403207540512085, |
|
"logits/rejected": 1.6011781692504883, |
|
"logps/chosen": -414.5859375, |
|
"logps/rejected": -522.0372314453125, |
|
"loss": 0.4963, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.325188398361206, |
|
"rewards/margins": 1.3052270412445068, |
|
"rewards/rejected": -2.630415439605713, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.502092050209205, |
|
"grad_norm": 17.600900176176182, |
|
"learning_rate": 2.9181224366319943e-07, |
|
"logits/chosen": 1.2316529750823975, |
|
"logits/rejected": 2.2336225509643555, |
|
"logps/chosen": -405.16815185546875, |
|
"logps/rejected": -461.0508728027344, |
|
"loss": 0.5208, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.505174994468689, |
|
"rewards/margins": 0.9252998232841492, |
|
"rewards/rejected": -2.4304747581481934, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5230125523012552, |
|
"grad_norm": 21.388657517716165, |
|
"learning_rate": 2.7370891215954565e-07, |
|
"logits/chosen": 0.33056747913360596, |
|
"logits/rejected": 1.398674726486206, |
|
"logps/chosen": -425.12738037109375, |
|
"logps/rejected": -477.9986267089844, |
|
"loss": 0.5202, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.3082549571990967, |
|
"rewards/margins": 1.007277011871338, |
|
"rewards/rejected": -2.3155319690704346, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5230125523012552, |
|
"eval_logits/chosen": 0.6489181518554688, |
|
"eval_logits/rejected": 1.4379549026489258, |
|
"eval_logps/chosen": -376.8782653808594, |
|
"eval_logps/rejected": -495.4537353515625, |
|
"eval_loss": 0.5419028401374817, |
|
"eval_rewards/accuracies": 0.7890625, |
|
"eval_rewards/chosen": -1.142483115196228, |
|
"eval_rewards/margins": 1.185433030128479, |
|
"eval_rewards/rejected": -2.327916383743286, |
|
"eval_runtime": 104.2974, |
|
"eval_samples_per_second": 19.176, |
|
"eval_steps_per_second": 0.307, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5439330543933054, |
|
"grad_norm": 23.92722986686011, |
|
"learning_rate": 2.55479083351317e-07, |
|
"logits/chosen": 1.0843138694763184, |
|
"logits/rejected": 2.0778467655181885, |
|
"logps/chosen": -430.15911865234375, |
|
"logps/rejected": -504.9864807128906, |
|
"loss": 0.5125, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.1564133167266846, |
|
"rewards/margins": 1.2437217235565186, |
|
"rewards/rejected": -2.4001352787017822, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5648535564853556, |
|
"grad_norm": 21.06560694930431, |
|
"learning_rate": 2.3722002126275822e-07, |
|
"logits/chosen": 1.3407487869262695, |
|
"logits/rejected": 2.4112818241119385, |
|
"logps/chosen": -432.30023193359375, |
|
"logps/rejected": -504.80670166015625, |
|
"loss": 0.5096, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.3888903856277466, |
|
"rewards/margins": 1.141367793083191, |
|
"rewards/rejected": -2.5302579402923584, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5857740585774058, |
|
"grad_norm": 21.8560737042057, |
|
"learning_rate": 2.19029145890313e-07, |
|
"logits/chosen": 1.2237210273742676, |
|
"logits/rejected": 2.0341880321502686, |
|
"logps/chosen": -389.9634704589844, |
|
"logps/rejected": -492.298828125, |
|
"loss": 0.5114, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.3260904550552368, |
|
"rewards/margins": 1.0742130279541016, |
|
"rewards/rejected": -2.400303363800049, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.606694560669456, |
|
"grad_norm": 23.993372955897243, |
|
"learning_rate": 2.0100351342479216e-07, |
|
"logits/chosen": 1.5786670446395874, |
|
"logits/rejected": 2.3115456104278564, |
|
"logps/chosen": -447.48382568359375, |
|
"logps/rejected": -546.939697265625, |
|
"loss": 0.513, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5614277124404907, |
|
"rewards/margins": 1.1185188293457031, |
|
"rewards/rejected": -2.6799466609954834, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6276150627615062, |
|
"grad_norm": 22.907667503889392, |
|
"learning_rate": 1.8323929841460178e-07, |
|
"logits/chosen": 2.0128085613250732, |
|
"logits/rejected": 2.6608686447143555, |
|
"logps/chosen": -434.07830810546875, |
|
"logps/rejected": -551.2879028320312, |
|
"loss": 0.5054, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.6475236415863037, |
|
"rewards/margins": 1.0819259881973267, |
|
"rewards/rejected": -2.72944974899292, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6276150627615062, |
|
"eval_logits/chosen": 1.477131962776184, |
|
"eval_logits/rejected": 2.255997657775879, |
|
"eval_logps/chosen": -402.4423522949219, |
|
"eval_logps/rejected": -531.4893798828125, |
|
"eval_loss": 0.5449927449226379, |
|
"eval_rewards/accuracies": 0.77734375, |
|
"eval_rewards/chosen": -1.3981244564056396, |
|
"eval_rewards/margins": 1.290148377418518, |
|
"eval_rewards/rejected": -2.688272714614868, |
|
"eval_runtime": 103.6153, |
|
"eval_samples_per_second": 19.302, |
|
"eval_steps_per_second": 0.309, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6485355648535565, |
|
"grad_norm": 20.97030138169365, |
|
"learning_rate": 1.6583128063291573e-07, |
|
"logits/chosen": 1.2844688892364502, |
|
"logits/rejected": 2.06947660446167, |
|
"logps/chosen": -437.86358642578125, |
|
"logps/rejected": -538.4571533203125, |
|
"loss": 0.5145, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4390218257904053, |
|
"rewards/margins": 1.1747257709503174, |
|
"rewards/rejected": -2.6137475967407227, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6694560669456067, |
|
"grad_norm": 23.361486108617427, |
|
"learning_rate": 1.488723393865766e-07, |
|
"logits/chosen": 1.0614537000656128, |
|
"logits/rejected": 2.0729637145996094, |
|
"logps/chosen": -453.379150390625, |
|
"logps/rejected": -521.8544311523438, |
|
"loss": 0.4923, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.6114938259124756, |
|
"rewards/margins": 1.0873934030532837, |
|
"rewards/rejected": -2.6988871097564697, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6903765690376569, |
|
"grad_norm": 24.156654265698716, |
|
"learning_rate": 1.3245295796480788e-07, |
|
"logits/chosen": 1.4917339086532593, |
|
"logits/rejected": 2.5346505641937256, |
|
"logps/chosen": -463.26202392578125, |
|
"logps/rejected": -520.6095581054688, |
|
"loss": 0.5059, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.7963454723358154, |
|
"rewards/margins": 0.9912670254707336, |
|
"rewards/rejected": -2.7876124382019043, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7112970711297071, |
|
"grad_norm": 19.422370738594758, |
|
"learning_rate": 1.1666074087171627e-07, |
|
"logits/chosen": 1.7316112518310547, |
|
"logits/rejected": 2.561483144760132, |
|
"logps/chosen": -413.0133361816406, |
|
"logps/rejected": -536.4874877929688, |
|
"loss": 0.5184, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.6657421588897705, |
|
"rewards/margins": 1.103548288345337, |
|
"rewards/rejected": -2.7692904472351074, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7322175732217573, |
|
"grad_norm": 19.236548541895125, |
|
"learning_rate": 1.0157994641835734e-07, |
|
"logits/chosen": 1.52297842502594, |
|
"logits/rejected": 2.436295986175537, |
|
"logps/chosen": -450.27239990234375, |
|
"logps/rejected": -541.5868530273438, |
|
"loss": 0.497, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.680456519126892, |
|
"rewards/margins": 1.0492502450942993, |
|
"rewards/rejected": -2.7297065258026123, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7322175732217573, |
|
"eval_logits/chosen": 1.3703731298446655, |
|
"eval_logits/rejected": 2.225933074951172, |
|
"eval_logps/chosen": -422.6754455566406, |
|
"eval_logps/rejected": -549.4119873046875, |
|
"eval_loss": 0.5301805138587952, |
|
"eval_rewards/accuracies": 0.7734375, |
|
"eval_rewards/chosen": -1.6004550457000732, |
|
"eval_rewards/margins": 1.2670434713363647, |
|
"eval_rewards/rejected": -2.8674986362457275, |
|
"eval_runtime": 102.8183, |
|
"eval_samples_per_second": 19.452, |
|
"eval_steps_per_second": 0.311, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7531380753138075, |
|
"grad_norm": 19.689504951232507, |
|
"learning_rate": 8.729103716819111e-08, |
|
"logits/chosen": 1.695031762123108, |
|
"logits/rejected": 2.4951744079589844, |
|
"logps/chosen": -429.60675048828125, |
|
"logps/rejected": -540.1343383789062, |
|
"loss": 0.5127, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.8274829387664795, |
|
"rewards/margins": 1.1173667907714844, |
|
"rewards/rejected": -2.9448494911193848, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7740585774058577, |
|
"grad_norm": 23.998324017815545, |
|
"learning_rate": 7.387025063449081e-08, |
|
"logits/chosen": 1.3235762119293213, |
|
"logits/rejected": 2.6002328395843506, |
|
"logps/chosen": -454.5791015625, |
|
"logps/rejected": -572.55078125, |
|
"loss": 0.495, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.667346715927124, |
|
"rewards/margins": 1.3339135646820068, |
|
"rewards/rejected": -3.001260280609131, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7949790794979079, |
|
"grad_norm": 20.911379608221466, |
|
"learning_rate": 6.138919252022435e-08, |
|
"logits/chosen": 1.435396432876587, |
|
"logits/rejected": 2.2644729614257812, |
|
"logps/chosen": -481.06378173828125, |
|
"logps/rejected": -542.4463500976562, |
|
"loss": 0.5119, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.6459989547729492, |
|
"rewards/margins": 0.9545661211013794, |
|
"rewards/rejected": -2.600564956665039, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8158995815899581, |
|
"grad_norm": 33.66392923940902, |
|
"learning_rate": 4.991445467064689e-08, |
|
"logits/chosen": 1.5889087915420532, |
|
"logits/rejected": 2.5967953205108643, |
|
"logps/chosen": -469.99884033203125, |
|
"logps/rejected": -584.1378784179688, |
|
"loss": 0.5084, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.6343872547149658, |
|
"rewards/margins": 1.305241346359253, |
|
"rewards/rejected": -2.9396286010742188, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8368200836820083, |
|
"grad_norm": 21.820601306046125, |
|
"learning_rate": 3.9507259776993954e-08, |
|
"logits/chosen": 1.4544366598129272, |
|
"logits/rejected": 2.2073497772216797, |
|
"logps/chosen": -470.62188720703125, |
|
"logps/rejected": -558.2474365234375, |
|
"loss": 0.5076, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.766871452331543, |
|
"rewards/margins": 1.029807209968567, |
|
"rewards/rejected": -2.796678304672241, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8368200836820083, |
|
"eval_logits/chosen": 1.3332302570343018, |
|
"eval_logits/rejected": 2.278512477874756, |
|
"eval_logps/chosen": -423.95953369140625, |
|
"eval_logps/rejected": -558.9130859375, |
|
"eval_loss": 0.5348207950592041, |
|
"eval_rewards/accuracies": 0.7890625, |
|
"eval_rewards/chosen": -1.6132957935333252, |
|
"eval_rewards/margins": 1.3492140769958496, |
|
"eval_rewards/rejected": -2.962510108947754, |
|
"eval_runtime": 105.1303, |
|
"eval_samples_per_second": 19.024, |
|
"eval_steps_per_second": 0.304, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8577405857740585, |
|
"grad_norm": 23.0331791932855, |
|
"learning_rate": 3.022313472693447e-08, |
|
"logits/chosen": 1.0064998865127563, |
|
"logits/rejected": 1.9991016387939453, |
|
"logps/chosen": -479.11846923828125, |
|
"logps/rejected": -583.687255859375, |
|
"loss": 0.4977, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.6211599111557007, |
|
"rewards/margins": 1.2330563068389893, |
|
"rewards/rejected": -2.8542163372039795, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8786610878661087, |
|
"grad_norm": 25.09829796115416, |
|
"learning_rate": 2.2111614344599684e-08, |
|
"logits/chosen": 1.3670974969863892, |
|
"logits/rejected": 2.6799685955047607, |
|
"logps/chosen": -465.97674560546875, |
|
"logps/rejected": -562.8436279296875, |
|
"loss": 0.4821, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.6978868246078491, |
|
"rewards/margins": 1.2206120491027832, |
|
"rewards/rejected": -2.9184985160827637, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.899581589958159, |
|
"grad_norm": 21.301017285621448, |
|
"learning_rate": 1.521597710086439e-08, |
|
"logits/chosen": 1.232881784439087, |
|
"logits/rejected": 2.3901188373565674, |
|
"logps/chosen": -466.0107421875, |
|
"logps/rejected": -540.5730590820312, |
|
"loss": 0.5114, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.70029616355896, |
|
"rewards/margins": 1.1120824813842773, |
|
"rewards/rejected": -2.8123791217803955, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9205020920502092, |
|
"grad_norm": 17.114528155184686, |
|
"learning_rate": 9.57301420397924e-09, |
|
"logits/chosen": 1.5404767990112305, |
|
"logits/rejected": 2.9349493980407715, |
|
"logps/chosen": -467.8946228027344, |
|
"logps/rejected": -551.1821899414062, |
|
"loss": 0.5065, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.856208086013794, |
|
"rewards/margins": 1.1952415704727173, |
|
"rewards/rejected": -3.051449775695801, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9414225941422594, |
|
"grad_norm": 19.50832169815014, |
|
"learning_rate": 5.212833302556258e-09, |
|
"logits/chosen": 1.3590748310089111, |
|
"logits/rejected": 2.3424363136291504, |
|
"logps/chosen": -451.4662170410156, |
|
"logps/rejected": -575.4518432617188, |
|
"loss": 0.5092, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.667201042175293, |
|
"rewards/margins": 1.312021017074585, |
|
"rewards/rejected": -2.979222059249878, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9414225941422594, |
|
"eval_logits/chosen": 1.3857550621032715, |
|
"eval_logits/rejected": 2.3444478511810303, |
|
"eval_logps/chosen": -429.63800048828125, |
|
"eval_logps/rejected": -565.6296997070312, |
|
"eval_loss": 0.5340853333473206, |
|
"eval_rewards/accuracies": 0.78515625, |
|
"eval_rewards/chosen": -1.6700804233551025, |
|
"eval_rewards/margins": 1.3595958948135376, |
|
"eval_rewards/rejected": -3.0296761989593506, |
|
"eval_runtime": 102.8547, |
|
"eval_samples_per_second": 19.445, |
|
"eval_steps_per_second": 0.311, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9623430962343096, |
|
"grad_norm": 18.50816041515539, |
|
"learning_rate": 2.158697848236607e-09, |
|
"logits/chosen": 1.729901909828186, |
|
"logits/rejected": 2.7589685916900635, |
|
"logps/chosen": -457.56207275390625, |
|
"logps/rejected": -565.1722412109375, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.942845106124878, |
|
"rewards/margins": 1.1519941091537476, |
|
"rewards/rejected": -3.094839334487915, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9832635983263598, |
|
"grad_norm": 18.70014449283038, |
|
"learning_rate": 4.269029751107489e-10, |
|
"logits/chosen": 1.6632955074310303, |
|
"logits/rejected": 2.187119960784912, |
|
"logps/chosen": -438.1412048339844, |
|
"logps/rejected": -556.7476806640625, |
|
"loss": 0.5241, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.7501564025878906, |
|
"rewards/margins": 1.1538572311401367, |
|
"rewards/rejected": -2.9040138721466064, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 478, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5431942655451627, |
|
"train_runtime": 12655.0807, |
|
"train_samples_per_second": 4.831, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 478, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|