|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -3.411515712738037, |
|
"eval_logits/rejected": -3.456860065460205, |
|
"eval_logps/chosen": -564.323974609375, |
|
"eval_logps/rejected": -567.8529052734375, |
|
"eval_loss": 0.7979298830032349, |
|
"eval_rewards/accuracies": 0.46875, |
|
"eval_rewards/chosen": 4.5177483558654785, |
|
"eval_rewards/diff": -0.33996284008026123, |
|
"eval_rewards/diff_abs": 1.2063032388687134, |
|
"eval_rewards/rejected": 4.610641002655029, |
|
"eval_rewards/student_margin": -0.09289252758026123, |
|
"eval_rewards/teacher_margin": 0.2470703125, |
|
"eval_runtime": 26.855, |
|
"eval_samples": 1543, |
|
"eval_samples_per_second": 57.457, |
|
"eval_steps_per_second": 0.149, |
|
"train_loss": 0.54411713648699, |
|
"train_runtime": 5965.6032, |
|
"train_samples": 160261, |
|
"train_samples_per_second": 26.864, |
|
"train_steps_per_second": 0.14 |
|
} |