jikaixuan commited on
Commit
05bee42
1 Parent(s): 5a0cfe4

Model save

Browse files
README.md CHANGED
@@ -15,17 +15,17 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.2409
19
- - Rewards/chosen: -2.7432
20
- - Rewards/rejected: -6.3660
21
- - Rewards/accuracies: 0.7340
22
- - Rewards/margins: 3.6228
23
- - Logps/rejected: -322.9595
24
- - Logps/chosen: -311.6890
25
- - Logits/rejected: -2.6650
26
- - Logits/chosen: -2.6975
27
- - Use Label: 6842.4238
28
- - Pred Label: 9189.5762
29
 
30
  ## Model description
31
 
@@ -62,7 +62,7 @@ The following hyperparameters were used during training:
62
 
63
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
64
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:---------:|:----------:|
65
- | 0.2135 | 1.0 | 955 | 0.2409 | -2.7432 | -6.3660 | 0.7340 | 3.6228 | -322.9595 | -311.6890 | -2.6650 | -2.6975 | 6698.4238 | 8833.5762 |
66
 
67
 
68
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.3439
19
+ - Rewards/chosen: -1.1633
20
+ - Rewards/rejected: -3.5290
21
+ - Rewards/accuracies: 0.7420
22
+ - Rewards/margins: 2.3657
23
+ - Logps/rejected: -294.5901
24
+ - Logps/chosen: -295.8908
25
+ - Logits/rejected: -2.7390
26
+ - Logits/chosen: -2.7421
27
+ - Use Label: 9180.7998
28
+ - Pred Label: 6851.2002
29
 
30
  ## Model description
31
 
 
62
 
63
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
64
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:---------:|:----------:|
65
+ | 0.333 | 1.0 | 955 | 0.3439 | -1.1633 | -3.5290 | 0.7420 | 2.3657 | -294.5901 | -295.8908 | -2.7390 | -2.7421 | 8950.7998 | 6581.2002 |
66
 
67
 
68
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07cfe6d5c754ecd489ce1a7120c06b95508a9d918a16f1ea233a9da39e0c3d38
3
  size 218138576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:940dab0d56f0e97f38a28e754a805f85470394ea5666743d488b36e659bc665a
3
  size 218138576
all_results.json CHANGED
@@ -1,23 +1,23 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -2.6974706649780273,
4
- "eval_logits/rejected": -2.665019989013672,
5
- "eval_logps/chosen": -311.6889953613281,
6
- "eval_logps/rejected": -322.95947265625,
7
- "eval_loss": 0.2408759742975235,
8
- "eval_pred_label": 9189.576171875,
9
- "eval_rewards/accuracies": 0.734000027179718,
10
- "eval_rewards/chosen": -2.7431609630584717,
11
- "eval_rewards/margins": 3.6228184700012207,
12
- "eval_rewards/rejected": -6.36598014831543,
13
- "eval_runtime": 452.5604,
14
  "eval_samples": 2000,
15
- "eval_samples_per_second": 4.419,
16
- "eval_steps_per_second": 0.276,
17
- "eval_use_label": 6842.423828125,
18
- "train_loss": 0.31699458866219243,
19
- "train_runtime": 25218.7851,
20
  "train_samples": 61135,
21
- "train_samples_per_second": 2.424,
22
  "train_steps_per_second": 0.038
23
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": -2.742107391357422,
4
+ "eval_logits/rejected": -2.739009141921997,
5
+ "eval_logps/chosen": -295.89080810546875,
6
+ "eval_logps/rejected": -294.5900573730469,
7
+ "eval_loss": 0.3439472019672394,
8
+ "eval_pred_label": 6851.2001953125,
9
+ "eval_rewards/accuracies": 0.7419999837875366,
10
+ "eval_rewards/chosen": -1.163341999053955,
11
+ "eval_rewards/margins": 2.3656928539276123,
12
+ "eval_rewards/rejected": -3.5290346145629883,
13
+ "eval_runtime": 460.2253,
14
  "eval_samples": 2000,
15
+ "eval_samples_per_second": 4.346,
16
+ "eval_steps_per_second": 0.272,
17
+ "eval_use_label": 9180.7998046875,
18
+ "train_loss": 0.39464539333163756,
19
+ "train_runtime": 25354.6696,
20
  "train_samples": 61135,
21
+ "train_samples_per_second": 2.411,
22
  "train_steps_per_second": 0.038
23
  }
eval_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -2.6974706649780273,
4
- "eval_logits/rejected": -2.665019989013672,
5
- "eval_logps/chosen": -311.6889953613281,
6
- "eval_logps/rejected": -322.95947265625,
7
- "eval_loss": 0.2408759742975235,
8
- "eval_pred_label": 9189.576171875,
9
- "eval_rewards/accuracies": 0.734000027179718,
10
- "eval_rewards/chosen": -2.7431609630584717,
11
- "eval_rewards/margins": 3.6228184700012207,
12
- "eval_rewards/rejected": -6.36598014831543,
13
- "eval_runtime": 452.5604,
14
  "eval_samples": 2000,
15
- "eval_samples_per_second": 4.419,
16
- "eval_steps_per_second": 0.276,
17
- "eval_use_label": 6842.423828125
18
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": -2.742107391357422,
4
+ "eval_logits/rejected": -2.739009141921997,
5
+ "eval_logps/chosen": -295.89080810546875,
6
+ "eval_logps/rejected": -294.5900573730469,
7
+ "eval_loss": 0.3439472019672394,
8
+ "eval_pred_label": 6851.2001953125,
9
+ "eval_rewards/accuracies": 0.7419999837875366,
10
+ "eval_rewards/chosen": -1.163341999053955,
11
+ "eval_rewards/margins": 2.3656928539276123,
12
+ "eval_rewards/rejected": -3.5290346145629883,
13
+ "eval_runtime": 460.2253,
14
  "eval_samples": 2000,
15
+ "eval_samples_per_second": 4.346,
16
+ "eval_steps_per_second": 0.272,
17
+ "eval_use_label": 9180.7998046875
18
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.31699458866219243,
4
- "train_runtime": 25218.7851,
5
  "train_samples": 61135,
6
- "train_samples_per_second": 2.424,
7
  "train_steps_per_second": 0.038
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.39464539333163756,
4
+ "train_runtime": 25354.6696,
5
  "train_samples": 61135,
6
+ "train_samples_per_second": 2.411,
7
  "train_steps_per_second": 0.038
8
  }
trainer_state.json CHANGED
@@ -80,13 +80,13 @@
80
  "logps/chosen": -281.32928466796875,
81
  "logps/rejected": -277.8607482910156,
82
  "loss": 0.6339,
83
- "pred_label": 0.10000000149011612,
84
  "rewards/accuracies": 0.6875,
85
  "rewards/chosen": 0.02641097828745842,
86
  "rewards/margins": 0.2079576551914215,
87
  "rewards/rejected": -0.1815466731786728,
88
  "step": 40,
89
- "use_label": 561.9000244140625
90
  },
91
  {
92
  "epoch": 0.05,
@@ -96,1479 +96,1479 @@
96
  "logps/chosen": -266.80517578125,
97
  "logps/rejected": -261.9176025390625,
98
  "loss": 0.5914,
99
- "pred_label": 2.950000047683716,
100
  "rewards/accuracies": 0.643750011920929,
101
  "rewards/chosen": -0.06968289613723755,
102
  "rewards/margins": 0.33043327927589417,
103
  "rewards/rejected": -0.4001162648200989,
104
  "step": 50,
105
- "use_label": 719.0499877929688
106
  },
107
  {
108
  "epoch": 0.06,
109
  "learning_rate": 3.125e-05,
110
- "logits/chosen": -2.817157506942749,
111
- "logits/rejected": -2.8072521686553955,
112
- "logps/chosen": -300.76080322265625,
113
- "logps/rejected": -290.3218994140625,
114
- "loss": 0.5988,
115
- "pred_label": 11.574999809265137,
116
  "rewards/accuracies": 0.699999988079071,
117
- "rewards/chosen": -0.03455673158168793,
118
- "rewards/margins": 0.3870925307273865,
119
- "rewards/rejected": -0.4216492772102356,
120
  "step": 60,
121
- "use_label": 870.4249877929688
122
  },
123
  {
124
  "epoch": 0.07,
125
  "learning_rate": 3.6458333333333336e-05,
126
- "logits/chosen": -2.820021152496338,
127
- "logits/rejected": -2.813854217529297,
128
- "logps/chosen": -294.513671875,
129
- "logps/rejected": -278.9858093261719,
130
- "loss": 0.5227,
131
- "pred_label": 25.399999618530273,
132
- "rewards/accuracies": 0.7437499761581421,
133
- "rewards/chosen": -0.020013216882944107,
134
- "rewards/margins": 0.5820196866989136,
135
- "rewards/rejected": -0.6020328998565674,
136
  "step": 70,
137
- "use_label": 1016.5999755859375
138
  },
139
  {
140
  "epoch": 0.08,
141
  "learning_rate": 4.166666666666667e-05,
142
- "logits/chosen": -2.7645280361175537,
143
- "logits/rejected": -2.7516016960144043,
144
- "logps/chosen": -306.9416809082031,
145
- "logps/rejected": -288.9856262207031,
146
- "loss": 0.4536,
147
- "pred_label": 58.599998474121094,
148
- "rewards/accuracies": 0.75,
149
- "rewards/chosen": -0.16363248229026794,
150
- "rewards/margins": 0.8336677551269531,
151
- "rewards/rejected": -0.9973002672195435,
152
  "step": 80,
153
- "use_label": 1143.4000244140625
154
  },
155
  {
156
  "epoch": 0.09,
157
  "learning_rate": 4.6875e-05,
158
- "logits/chosen": -2.6945321559906006,
159
- "logits/rejected": -2.747893810272217,
160
- "logps/chosen": -295.63934326171875,
161
- "logps/rejected": -275.24127197265625,
162
- "loss": 0.4735,
163
- "pred_label": 106.5,
164
- "rewards/accuracies": 0.731249988079071,
165
- "rewards/chosen": 0.014383295550942421,
166
- "rewards/margins": 0.911063551902771,
167
- "rewards/rejected": -0.8966802358627319,
168
  "step": 90,
169
- "use_label": 1255.5
170
  },
171
  {
172
  "epoch": 0.1,
173
  "learning_rate": 4.976717112922003e-05,
174
- "logits/chosen": -2.719611406326294,
175
- "logits/rejected": -2.718784809112549,
176
- "logps/chosen": -276.9577941894531,
177
- "logps/rejected": -306.4429626464844,
178
- "loss": 0.484,
179
- "pred_label": 153.3249969482422,
180
- "rewards/accuracies": 0.706250011920929,
181
- "rewards/chosen": -0.28260549902915955,
182
- "rewards/margins": 1.008049726486206,
183
- "rewards/rejected": -1.290655255317688,
184
  "step": 100,
185
- "use_label": 1368.675048828125
186
  },
187
  {
188
  "epoch": 0.12,
189
  "learning_rate": 4.918509895227008e-05,
190
- "logits/chosen": -2.7281861305236816,
191
- "logits/rejected": -2.687361478805542,
192
- "logps/chosen": -269.6018981933594,
193
- "logps/rejected": -263.89166259765625,
194
- "loss": 0.4361,
195
- "pred_label": 213.60000610351562,
196
- "rewards/accuracies": 0.6312500238418579,
197
- "rewards/chosen": -0.4927287697792053,
198
- "rewards/margins": 0.9790979623794556,
199
- "rewards/rejected": -1.4718266725540161,
200
  "step": 110,
201
- "use_label": 1468.4000244140625
202
  },
203
  {
204
  "epoch": 0.13,
205
  "learning_rate": 4.860302677532014e-05,
206
- "logits/chosen": -2.7501158714294434,
207
- "logits/rejected": -2.742772340774536,
208
- "logps/chosen": -281.5338439941406,
209
- "logps/rejected": -259.219970703125,
210
- "loss": 0.4674,
211
- "pred_label": 270.0249938964844,
212
- "rewards/accuracies": 0.6625000238418579,
213
- "rewards/chosen": -0.20209825038909912,
214
- "rewards/margins": 0.9515292048454285,
215
- "rewards/rejected": -1.1536273956298828,
216
  "step": 120,
217
- "use_label": 1571.9749755859375
218
  },
219
  {
220
  "epoch": 0.14,
221
  "learning_rate": 4.80209545983702e-05,
222
- "logits/chosen": -2.8077096939086914,
223
- "logits/rejected": -2.7647416591644287,
224
- "logps/chosen": -308.7791748046875,
225
- "logps/rejected": -282.69976806640625,
226
- "loss": 0.4803,
227
- "pred_label": 317.4750061035156,
228
- "rewards/accuracies": 0.6812499761581421,
229
- "rewards/chosen": -0.15117435157299042,
230
- "rewards/margins": 0.8076679110527039,
231
- "rewards/rejected": -0.958842396736145,
232
  "step": 130,
233
- "use_label": 1684.5250244140625
234
  },
235
  {
236
  "epoch": 0.15,
237
  "learning_rate": 4.743888242142026e-05,
238
- "logits/chosen": -2.814915657043457,
239
- "logits/rejected": -2.8093409538269043,
240
- "logps/chosen": -261.60418701171875,
241
- "logps/rejected": -249.7925567626953,
242
- "loss": 0.3944,
243
- "pred_label": 370.67498779296875,
244
- "rewards/accuracies": 0.706250011920929,
245
- "rewards/chosen": -0.34973591566085815,
246
- "rewards/margins": 1.1276283264160156,
247
- "rewards/rejected": -1.4773643016815186,
248
  "step": 140,
249
- "use_label": 1791.324951171875
250
  },
251
  {
252
  "epoch": 0.16,
253
  "learning_rate": 4.685681024447032e-05,
254
- "logits/chosen": -2.71696138381958,
255
- "logits/rejected": -2.740691661834717,
256
- "logps/chosen": -302.32843017578125,
257
- "logps/rejected": -298.6430358886719,
258
- "loss": 0.3948,
259
- "pred_label": 430.0,
260
  "rewards/accuracies": 0.612500011920929,
261
- "rewards/chosen": -0.7442375421524048,
262
- "rewards/margins": 0.9453207850456238,
263
- "rewards/rejected": -1.6895582675933838,
264
  "step": 150,
265
- "use_label": 1892.0
266
  },
267
  {
268
  "epoch": 0.17,
269
  "learning_rate": 4.6274738067520374e-05,
270
- "logits/chosen": -2.7621922492980957,
271
- "logits/rejected": -2.707430601119995,
272
- "logps/chosen": -264.4729919433594,
273
- "logps/rejected": -283.64837646484375,
274
- "loss": 0.4037,
275
- "pred_label": 494.9750061035156,
276
- "rewards/accuracies": 0.71875,
277
- "rewards/chosen": -0.20909884572029114,
278
- "rewards/margins": 1.2514889240264893,
279
- "rewards/rejected": -1.4605878591537476,
280
  "step": 160,
281
- "use_label": 1987.0250244140625
282
  },
283
  {
284
  "epoch": 0.18,
285
  "learning_rate": 4.5692665890570435e-05,
286
- "logits/chosen": -2.800288438796997,
287
- "logits/rejected": -2.8005611896514893,
288
- "logps/chosen": -298.9527282714844,
289
- "logps/rejected": -264.09014892578125,
290
- "loss": 0.3923,
291
- "pred_label": 555.2249755859375,
292
- "rewards/accuracies": 0.6625000238418579,
293
- "rewards/chosen": -0.24765756726264954,
294
- "rewards/margins": 1.1968435049057007,
295
- "rewards/rejected": -1.4445011615753174,
296
  "step": 170,
297
- "use_label": 2086.77490234375
298
  },
299
  {
300
  "epoch": 0.19,
301
  "learning_rate": 4.511059371362049e-05,
302
- "logits/chosen": -2.7035715579986572,
303
- "logits/rejected": -2.6763572692871094,
304
- "logps/chosen": -261.8144836425781,
305
- "logps/rejected": -294.1943054199219,
306
- "loss": 0.3547,
307
- "pred_label": 639.875,
308
  "rewards/accuracies": 0.731249988079071,
309
- "rewards/chosen": -0.588528573513031,
310
- "rewards/margins": 1.7626692056655884,
311
- "rewards/rejected": -2.3511977195739746,
312
  "step": 180,
313
- "use_label": 2162.125
314
  },
315
  {
316
  "epoch": 0.2,
317
  "learning_rate": 4.452852153667055e-05,
318
- "logits/chosen": -2.788900375366211,
319
- "logits/rejected": -2.745344638824463,
320
- "logps/chosen": -311.311767578125,
321
- "logps/rejected": -287.326416015625,
322
- "loss": 0.3357,
323
- "pred_label": 725.2999877929688,
324
- "rewards/accuracies": 0.793749988079071,
325
- "rewards/chosen": -0.4055810868740082,
326
- "rewards/margins": 2.22284197807312,
327
- "rewards/rejected": -2.6284232139587402,
328
  "step": 190,
329
- "use_label": 2236.699951171875
330
  },
331
  {
332
  "epoch": 0.21,
333
  "learning_rate": 4.394644935972061e-05,
334
- "logits/chosen": -2.789668560028076,
335
- "logits/rejected": -2.7655420303344727,
336
- "logps/chosen": -274.904052734375,
337
- "logps/rejected": -299.71356201171875,
338
- "loss": 0.3242,
339
- "pred_label": 815.0,
340
- "rewards/accuracies": 0.71875,
341
- "rewards/chosen": -1.3575278520584106,
342
- "rewards/margins": 1.7897872924804688,
343
- "rewards/rejected": -3.147315263748169,
344
  "step": 200,
345
- "use_label": 2307.0
346
  },
347
  {
348
  "epoch": 0.22,
349
  "learning_rate": 4.336437718277067e-05,
350
- "logits/chosen": -2.7797892093658447,
351
- "logits/rejected": -2.796938419342041,
352
- "logps/chosen": -310.7392883300781,
353
- "logps/rejected": -278.1353454589844,
354
- "loss": 0.3789,
355
- "pred_label": 897.2750244140625,
356
- "rewards/accuracies": 0.75,
357
- "rewards/chosen": -0.9491372108459473,
358
- "rewards/margins": 1.4454370737075806,
359
- "rewards/rejected": -2.394573926925659,
360
  "step": 210,
361
- "use_label": 2384.72509765625
362
  },
363
  {
364
  "epoch": 0.23,
365
  "learning_rate": 4.278230500582072e-05,
366
- "logits/chosen": -2.729732036590576,
367
- "logits/rejected": -2.7413415908813477,
368
- "logps/chosen": -283.9934387207031,
369
- "logps/rejected": -297.1932678222656,
370
- "loss": 0.3046,
371
- "pred_label": 983.3499755859375,
372
  "rewards/accuracies": 0.75,
373
- "rewards/chosen": -0.9001734852790833,
374
- "rewards/margins": 1.7534692287445068,
375
- "rewards/rejected": -2.6536426544189453,
376
  "step": 220,
377
- "use_label": 2458.64990234375
378
  },
379
  {
380
  "epoch": 0.24,
381
  "learning_rate": 4.220023282887078e-05,
382
- "logits/chosen": -2.7093918323516846,
383
- "logits/rejected": -2.724216938018799,
384
- "logps/chosen": -320.80218505859375,
385
- "logps/rejected": -295.5621643066406,
386
- "loss": 0.2963,
387
- "pred_label": 1077.550048828125,
388
- "rewards/accuracies": 0.7437499761581421,
389
- "rewards/chosen": -1.2629055976867676,
390
- "rewards/margins": 1.8823902606964111,
391
- "rewards/rejected": -3.1452958583831787,
392
  "step": 230,
393
- "use_label": 2524.449951171875
394
  },
395
  {
396
  "epoch": 0.25,
397
  "learning_rate": 4.161816065192084e-05,
398
- "logits/chosen": -2.707616090774536,
399
- "logits/rejected": -2.6788878440856934,
400
- "logps/chosen": -300.3539123535156,
401
- "logps/rejected": -273.60333251953125,
402
- "loss": 0.2668,
403
- "pred_label": 1176.5250244140625,
404
- "rewards/accuracies": 0.6875,
405
- "rewards/chosen": -2.5776500701904297,
406
- "rewards/margins": 2.4807069301605225,
407
- "rewards/rejected": -5.058356761932373,
408
  "step": 240,
409
- "use_label": 2585.47509765625
410
  },
411
  {
412
  "epoch": 0.26,
413
  "learning_rate": 4.10360884749709e-05,
414
- "logits/chosen": -2.7368381023406982,
415
- "logits/rejected": -2.722391366958618,
416
- "logps/chosen": -316.1106262207031,
417
- "logps/rejected": -321.246337890625,
418
- "loss": 0.3166,
419
- "pred_label": 1270.574951171875,
420
- "rewards/accuracies": 0.706250011920929,
421
- "rewards/chosen": -2.5470499992370605,
422
- "rewards/margins": 2.635700225830078,
423
- "rewards/rejected": -5.182750225067139,
424
  "step": 250,
425
- "use_label": 2651.425048828125
426
  },
427
  {
428
  "epoch": 0.27,
429
  "learning_rate": 4.045401629802096e-05,
430
- "logits/chosen": -2.7675890922546387,
431
- "logits/rejected": -2.7713561058044434,
432
- "logps/chosen": -266.34136962890625,
433
- "logps/rejected": -286.3191833496094,
434
- "loss": 0.3408,
435
- "pred_label": 1354.6500244140625,
436
- "rewards/accuracies": 0.7250000238418579,
437
- "rewards/chosen": -1.7731956243515015,
438
- "rewards/margins": 1.8298133611679077,
439
- "rewards/rejected": -3.603008985519409,
440
  "step": 260,
441
- "use_label": 2727.35009765625
442
  },
443
  {
444
  "epoch": 0.28,
445
  "learning_rate": 3.9871944121071014e-05,
446
- "logits/chosen": -2.764836311340332,
447
- "logits/rejected": -2.7679736614227295,
448
- "logps/chosen": -306.4355773925781,
449
- "logps/rejected": -316.5330505371094,
450
- "loss": 0.3,
451
- "pred_label": 1444.324951171875,
452
- "rewards/accuracies": 0.793749988079071,
453
- "rewards/chosen": -1.4564238786697388,
454
- "rewards/margins": 2.7216858863830566,
455
- "rewards/rejected": -4.178110122680664,
456
  "step": 270,
457
- "use_label": 2797.675048828125
458
  },
459
  {
460
  "epoch": 0.29,
461
  "learning_rate": 3.928987194412107e-05,
462
- "logits/chosen": -2.81805682182312,
463
- "logits/rejected": -2.8056905269622803,
464
- "logps/chosen": -309.92047119140625,
465
- "logps/rejected": -300.94268798828125,
466
- "loss": 0.3011,
467
- "pred_label": 1543.4749755859375,
468
- "rewards/accuracies": 0.75,
469
- "rewards/chosen": -1.595649003982544,
470
- "rewards/margins": 2.275697708129883,
471
- "rewards/rejected": -3.8713467121124268,
472
  "step": 280,
473
- "use_label": 2858.52490234375
474
  },
475
  {
476
  "epoch": 0.3,
477
  "learning_rate": 3.870779976717113e-05,
478
- "logits/chosen": -2.786515235900879,
479
- "logits/rejected": -2.7539620399475098,
480
- "logps/chosen": -312.62060546875,
481
- "logps/rejected": -268.4746398925781,
482
- "loss": 0.3045,
483
- "pred_label": 1641.2249755859375,
484
- "rewards/accuracies": 0.7124999761581421,
485
- "rewards/chosen": -1.9698221683502197,
486
- "rewards/margins": 1.8559925556182861,
487
- "rewards/rejected": -3.825814723968506,
488
  "step": 290,
489
- "use_label": 2920.77490234375
490
  },
491
  {
492
  "epoch": 0.31,
493
  "learning_rate": 3.812572759022119e-05,
494
- "logits/chosen": -2.79560923576355,
495
- "logits/rejected": -2.744600296020508,
496
- "logps/chosen": -279.0300598144531,
497
- "logps/rejected": -275.5624084472656,
498
- "loss": 0.3083,
499
- "pred_label": 1731.300048828125,
500
- "rewards/accuracies": 0.706250011920929,
501
- "rewards/chosen": -1.5351210832595825,
502
- "rewards/margins": 2.036700963973999,
503
- "rewards/rejected": -3.571821928024292,
504
  "step": 300,
505
- "use_label": 2990.699951171875
506
  },
507
  {
508
  "epoch": 0.32,
509
  "learning_rate": 3.7543655413271246e-05,
510
- "logits/chosen": -2.7574381828308105,
511
- "logits/rejected": -2.6999497413635254,
512
- "logps/chosen": -285.3556213378906,
513
- "logps/rejected": -344.51849365234375,
514
- "loss": 0.2794,
515
- "pred_label": 1830.0,
516
- "rewards/accuracies": 0.78125,
517
- "rewards/chosen": -1.8423988819122314,
518
- "rewards/margins": 3.7551066875457764,
519
- "rewards/rejected": -5.597506046295166,
520
  "step": 310,
521
- "use_label": 3052.0
522
  },
523
  {
524
  "epoch": 0.33,
525
  "learning_rate": 3.696158323632131e-05,
526
- "logits/chosen": -2.7264797687530518,
527
- "logits/rejected": -2.687101125717163,
528
- "logps/chosen": -318.2635192871094,
529
- "logps/rejected": -331.156005859375,
530
- "loss": 0.2657,
531
- "pred_label": 1930.050048828125,
532
- "rewards/accuracies": 0.699999988079071,
533
- "rewards/chosen": -2.228361129760742,
534
- "rewards/margins": 2.806436777114868,
535
- "rewards/rejected": -5.034797668457031,
536
  "step": 320,
537
- "use_label": 3111.949951171875
538
  },
539
  {
540
  "epoch": 0.35,
541
  "learning_rate": 3.637951105937136e-05,
542
- "logits/chosen": -2.7709312438964844,
543
- "logits/rejected": -2.7176883220672607,
544
- "logps/chosen": -289.0472717285156,
545
- "logps/rejected": -277.4143371582031,
546
- "loss": 0.2868,
547
- "pred_label": 2034.1500244140625,
548
- "rewards/accuracies": 0.6875,
549
- "rewards/chosen": -1.8098747730255127,
550
- "rewards/margins": 2.50335955619812,
551
- "rewards/rejected": -4.313233852386475,
552
  "step": 330,
553
- "use_label": 3167.85009765625
554
  },
555
  {
556
  "epoch": 0.36,
557
  "learning_rate": 3.579743888242142e-05,
558
- "logits/chosen": -2.69682240486145,
559
- "logits/rejected": -2.7000985145568848,
560
- "logps/chosen": -308.01141357421875,
561
- "logps/rejected": -279.4844665527344,
562
- "loss": 0.3048,
563
- "pred_label": 2128.949951171875,
564
- "rewards/accuracies": 0.675000011920929,
565
- "rewards/chosen": -1.883141279220581,
566
- "rewards/margins": 1.98026442527771,
567
- "rewards/rejected": -3.863405704498291,
568
  "step": 340,
569
- "use_label": 3233.050048828125
570
  },
571
  {
572
  "epoch": 0.37,
573
  "learning_rate": 3.5215366705471484e-05,
574
- "logits/chosen": -2.663508176803589,
575
- "logits/rejected": -2.6349689960479736,
576
- "logps/chosen": -314.67962646484375,
577
- "logps/rejected": -331.1473693847656,
578
- "loss": 0.2869,
579
- "pred_label": 2234.77490234375,
580
- "rewards/accuracies": 0.8187500238418579,
581
- "rewards/chosen": -1.7484877109527588,
582
- "rewards/margins": 3.6000218391418457,
583
- "rewards/rejected": -5.348509788513184,
584
  "step": 350,
585
- "use_label": 3287.22509765625
586
  },
587
  {
588
  "epoch": 0.38,
589
  "learning_rate": 3.463329452852154e-05,
590
- "logits/chosen": -2.7377424240112305,
591
- "logits/rejected": -2.729678153991699,
592
- "logps/chosen": -297.26873779296875,
593
- "logps/rejected": -313.16888427734375,
594
- "loss": 0.2666,
595
- "pred_label": 2342.22509765625,
596
- "rewards/accuracies": 0.762499988079071,
597
- "rewards/chosen": -2.4474658966064453,
598
- "rewards/margins": 2.349332094192505,
599
- "rewards/rejected": -4.796797752380371,
600
  "step": 360,
601
- "use_label": 3339.77490234375
602
  },
603
  {
604
  "epoch": 0.39,
605
  "learning_rate": 3.40512223515716e-05,
606
- "logits/chosen": -2.6696877479553223,
607
- "logits/rejected": -2.5973799228668213,
608
- "logps/chosen": -310.88897705078125,
609
- "logps/rejected": -354.97222900390625,
610
- "loss": 0.2556,
611
- "pred_label": 2450.97509765625,
612
- "rewards/accuracies": 0.7875000238418579,
613
- "rewards/chosen": -3.468627452850342,
614
- "rewards/margins": 4.456360816955566,
615
- "rewards/rejected": -7.924988746643066,
616
  "step": 370,
617
- "use_label": 3391.02490234375
618
  },
619
  {
620
  "epoch": 0.4,
621
  "learning_rate": 3.3469150174621654e-05,
622
- "logits/chosen": -2.7021727561950684,
623
- "logits/rejected": -2.679241418838501,
624
- "logps/chosen": -320.2778015136719,
625
- "logps/rejected": -327.36431884765625,
626
- "loss": 0.2394,
627
- "pred_label": 2560.375,
628
- "rewards/accuracies": 0.71875,
629
- "rewards/chosen": -3.5047569274902344,
630
- "rewards/margins": 3.173809289932251,
631
- "rewards/rejected": -6.678565979003906,
632
  "step": 380,
633
- "use_label": 3441.625
634
  },
635
  {
636
  "epoch": 0.41,
637
  "learning_rate": 3.288707799767171e-05,
638
- "logits/chosen": -2.6417362689971924,
639
- "logits/rejected": -2.6089835166931152,
640
- "logps/chosen": -295.4016418457031,
641
- "logps/rejected": -288.3475341796875,
642
- "loss": 0.2799,
643
- "pred_label": 2666.675048828125,
644
- "rewards/accuracies": 0.737500011920929,
645
- "rewards/chosen": -1.2609713077545166,
646
- "rewards/margins": 3.1455700397491455,
647
- "rewards/rejected": -4.406540870666504,
648
  "step": 390,
649
- "use_label": 3495.324951171875
650
  },
651
  {
652
  "epoch": 0.42,
653
  "learning_rate": 3.2305005820721776e-05,
654
- "logits/chosen": -2.774970054626465,
655
- "logits/rejected": -2.757179021835327,
656
- "logps/chosen": -317.8477478027344,
657
- "logps/rejected": -290.2752685546875,
658
- "loss": 0.2669,
659
- "pred_label": 2758.64990234375,
660
  "rewards/accuracies": 0.71875,
661
- "rewards/chosen": -1.0796750783920288,
662
- "rewards/margins": 2.0139389038085938,
663
- "rewards/rejected": -3.093614101409912,
664
  "step": 400,
665
- "use_label": 3563.35009765625
666
  },
667
  {
668
  "epoch": 0.43,
669
  "learning_rate": 3.172293364377183e-05,
670
- "logits/chosen": -2.777430295944214,
671
- "logits/rejected": -2.7574374675750732,
672
- "logps/chosen": -312.574951171875,
673
- "logps/rejected": -291.488037109375,
674
- "loss": 0.296,
675
- "pred_label": 2843.10009765625,
676
- "rewards/accuracies": 0.668749988079071,
677
- "rewards/chosen": -1.4477269649505615,
678
- "rewards/margins": 1.8462398052215576,
679
- "rewards/rejected": -3.2939670085906982,
680
  "step": 410,
681
- "use_label": 3638.89990234375
682
  },
683
  {
684
  "epoch": 0.44,
685
  "learning_rate": 3.1140861466821885e-05,
686
- "logits/chosen": -2.718291759490967,
687
- "logits/rejected": -2.688828706741333,
688
- "logps/chosen": -254.98593139648438,
689
- "logps/rejected": -283.18560791015625,
690
- "loss": 0.3046,
691
- "pred_label": 2935.14990234375,
692
  "rewards/accuracies": 0.6937500238418579,
693
- "rewards/chosen": -1.440553069114685,
694
- "rewards/margins": 1.9665199518203735,
695
- "rewards/rejected": -3.4070727825164795,
696
  "step": 420,
697
- "use_label": 3706.85009765625
698
  },
699
  {
700
  "epoch": 0.45,
701
  "learning_rate": 3.055878928987195e-05,
702
- "logits/chosen": -2.7530155181884766,
703
- "logits/rejected": -2.7446746826171875,
704
- "logps/chosen": -288.42120361328125,
705
- "logps/rejected": -287.4293518066406,
706
- "loss": 0.3446,
707
- "pred_label": 3031.39990234375,
708
- "rewards/accuracies": 0.6875,
709
- "rewards/chosen": -1.0520861148834229,
710
- "rewards/margins": 1.7391481399536133,
711
- "rewards/rejected": -2.791234254837036,
712
  "step": 430,
713
- "use_label": 3770.60009765625
714
  },
715
  {
716
  "epoch": 0.46,
717
  "learning_rate": 2.9976717112922005e-05,
718
- "logits/chosen": -2.8176846504211426,
719
- "logits/rejected": -2.8231449127197266,
720
- "logps/chosen": -296.26837158203125,
721
- "logps/rejected": -284.00335693359375,
722
- "loss": 0.2945,
723
- "pred_label": 3121.39990234375,
724
- "rewards/accuracies": 0.71875,
725
- "rewards/chosen": -1.2998032569885254,
726
- "rewards/margins": 1.8008592128753662,
727
- "rewards/rejected": -3.1006627082824707,
728
  "step": 440,
729
- "use_label": 3840.60009765625
730
  },
731
  {
732
  "epoch": 0.47,
733
  "learning_rate": 2.939464493597206e-05,
734
- "logits/chosen": -2.6171724796295166,
735
- "logits/rejected": -2.6079916954040527,
736
- "logps/chosen": -248.51025390625,
737
- "logps/rejected": -273.4137268066406,
738
- "loss": 0.287,
739
- "pred_label": 3211.925048828125,
740
- "rewards/accuracies": 0.7250000238418579,
741
- "rewards/chosen": -1.8791544437408447,
742
- "rewards/margins": 2.497044086456299,
743
- "rewards/rejected": -4.3761982917785645,
744
  "step": 450,
745
- "use_label": 3910.074951171875
746
  },
747
  {
748
  "epoch": 0.48,
749
  "learning_rate": 2.881257275902212e-05,
750
- "logits/chosen": -2.815080165863037,
751
- "logits/rejected": -2.8055264949798584,
752
- "logps/chosen": -318.76666259765625,
753
- "logps/rejected": -305.4552917480469,
754
- "loss": 0.3106,
755
- "pred_label": 3311.449951171875,
756
- "rewards/accuracies": 0.6937500238418579,
757
- "rewards/chosen": -1.6964454650878906,
758
- "rewards/margins": 1.949159026145935,
759
- "rewards/rejected": -3.645604372024536,
760
  "step": 460,
761
- "use_label": 3970.550048828125
762
  },
763
  {
764
  "epoch": 0.49,
765
  "learning_rate": 2.8230500582072178e-05,
766
- "logits/chosen": -2.736795425415039,
767
- "logits/rejected": -2.70324444770813,
768
- "logps/chosen": -289.38824462890625,
769
- "logps/rejected": -302.501708984375,
770
- "loss": 0.2652,
771
- "pred_label": 3412.550048828125,
772
- "rewards/accuracies": 0.6875,
773
- "rewards/chosen": -2.2287776470184326,
774
- "rewards/margins": 2.337141513824463,
775
- "rewards/rejected": -4.565918922424316,
776
  "step": 470,
777
- "use_label": 4029.449951171875
778
  },
779
  {
780
  "epoch": 0.5,
781
  "learning_rate": 2.7648428405122233e-05,
782
- "logits/chosen": -2.6519317626953125,
783
- "logits/rejected": -2.616342067718506,
784
- "logps/chosen": -274.76422119140625,
785
- "logps/rejected": -303.0647277832031,
786
- "loss": 0.3052,
787
- "pred_label": 3516.85009765625,
788
- "rewards/accuracies": 0.6812499761581421,
789
- "rewards/chosen": -2.918997287750244,
790
- "rewards/margins": 2.5355329513549805,
791
- "rewards/rejected": -5.454530239105225,
792
  "step": 480,
793
- "use_label": 4085.14990234375
794
  },
795
  {
796
  "epoch": 0.51,
797
  "learning_rate": 2.7066356228172297e-05,
798
- "logits/chosen": -2.6925861835479736,
799
- "logits/rejected": -2.67038631439209,
800
- "logps/chosen": -289.875,
801
- "logps/rejected": -282.19903564453125,
802
- "loss": 0.2758,
803
- "pred_label": 3613.125,
804
- "rewards/accuracies": 0.7250000238418579,
805
- "rewards/chosen": -2.284597873687744,
806
- "rewards/margins": 2.195889711380005,
807
- "rewards/rejected": -4.48048734664917,
808
  "step": 490,
809
- "use_label": 4148.875
810
  },
811
  {
812
  "epoch": 0.52,
813
  "learning_rate": 2.6484284051222352e-05,
814
- "logits/chosen": -2.7040975093841553,
815
- "logits/rejected": -2.6474618911743164,
816
- "logps/chosen": -308.2983703613281,
817
- "logps/rejected": -305.19525146484375,
818
- "loss": 0.2732,
819
- "pred_label": 3721.64990234375,
820
- "rewards/accuracies": 0.7437499761581421,
821
- "rewards/chosen": -1.8841865062713623,
822
- "rewards/margins": 2.809114456176758,
823
- "rewards/rejected": -4.693300724029541,
824
  "step": 500,
825
- "use_label": 4200.35009765625
826
  },
827
  {
828
  "epoch": 0.53,
829
  "learning_rate": 2.590221187427241e-05,
830
- "logits/chosen": -2.7423815727233887,
831
- "logits/rejected": -2.699392795562744,
832
- "logps/chosen": -296.5220642089844,
833
- "logps/rejected": -285.8718566894531,
834
- "loss": 0.2433,
835
- "pred_label": 3824.625,
836
  "rewards/accuracies": 0.7749999761581421,
837
- "rewards/chosen": -2.1184868812561035,
838
- "rewards/margins": 3.1191751956939697,
839
- "rewards/rejected": -5.237661361694336,
840
  "step": 510,
841
- "use_label": 4257.375
842
  },
843
  {
844
  "epoch": 0.54,
845
  "learning_rate": 2.532013969732247e-05,
846
- "logits/chosen": -2.7525784969329834,
847
- "logits/rejected": -2.727067708969116,
848
- "logps/chosen": -304.08343505859375,
849
- "logps/rejected": -308.287841796875,
850
- "loss": 0.2989,
851
- "pred_label": 3925.175048828125,
852
- "rewards/accuracies": 0.706250011920929,
853
- "rewards/chosen": -1.2129416465759277,
854
- "rewards/margins": 2.644366502761841,
855
- "rewards/rejected": -3.8573079109191895,
856
  "step": 520,
857
- "use_label": 4316.8251953125
858
  },
859
  {
860
  "epoch": 0.55,
861
  "learning_rate": 2.4738067520372525e-05,
862
- "logits/chosen": -2.759995937347412,
863
- "logits/rejected": -2.6951136589050293,
864
- "logps/chosen": -314.9224548339844,
865
- "logps/rejected": -335.85101318359375,
866
- "loss": 0.2862,
867
- "pred_label": 4020.449951171875,
868
- "rewards/accuracies": 0.706250011920929,
869
- "rewards/chosen": -1.4606602191925049,
870
- "rewards/margins": 2.726986885070801,
871
- "rewards/rejected": -4.187647342681885,
872
  "step": 530,
873
- "use_label": 4381.5498046875
874
  },
875
  {
876
  "epoch": 0.57,
877
  "learning_rate": 2.4155995343422587e-05,
878
- "logits/chosen": -2.7623682022094727,
879
- "logits/rejected": -2.743201732635498,
880
- "logps/chosen": -307.76995849609375,
881
- "logps/rejected": -319.3795471191406,
882
- "loss": 0.2676,
883
- "pred_label": 4129.97509765625,
884
- "rewards/accuracies": 0.78125,
885
- "rewards/chosen": -1.9233381748199463,
886
- "rewards/margins": 3.4177041053771973,
887
- "rewards/rejected": -5.3410420417785645,
888
  "step": 540,
889
- "use_label": 4432.02490234375
890
  },
891
  {
892
  "epoch": 0.58,
893
  "learning_rate": 2.3573923166472644e-05,
894
- "logits/chosen": -2.766664981842041,
895
- "logits/rejected": -2.727250576019287,
896
- "logps/chosen": -307.920166015625,
897
- "logps/rejected": -313.8031311035156,
898
- "loss": 0.2619,
899
- "pred_label": 4236.1748046875,
900
- "rewards/accuracies": 0.71875,
901
- "rewards/chosen": -1.871700644493103,
902
- "rewards/margins": 3.314380645751953,
903
- "rewards/rejected": -5.186081886291504,
904
  "step": 550,
905
- "use_label": 4485.8251953125
906
  },
907
  {
908
  "epoch": 0.59,
909
  "learning_rate": 2.2991850989522702e-05,
910
- "logits/chosen": -2.747933864593506,
911
- "logits/rejected": -2.743129253387451,
912
- "logps/chosen": -296.66705322265625,
913
- "logps/rejected": -261.8858947753906,
914
- "loss": 0.281,
915
- "pred_label": 4334.72509765625,
916
- "rewards/accuracies": 0.75,
917
- "rewards/chosen": -2.2262706756591797,
918
- "rewards/margins": 2.213707685470581,
919
- "rewards/rejected": -4.43997859954834,
920
  "step": 560,
921
- "use_label": 4547.27490234375
922
  },
923
  {
924
  "epoch": 0.6,
925
  "learning_rate": 2.240977881257276e-05,
926
- "logits/chosen": -2.7517714500427246,
927
- "logits/rejected": -2.7313544750213623,
928
- "logps/chosen": -288.4844970703125,
929
- "logps/rejected": -302.5806579589844,
930
- "loss": 0.2905,
931
- "pred_label": 4442.875,
932
- "rewards/accuracies": 0.706250011920929,
933
- "rewards/chosen": -1.8995840549468994,
934
- "rewards/margins": 2.1551976203918457,
935
- "rewards/rejected": -4.054781913757324,
936
  "step": 570,
937
- "use_label": 4599.125
938
  },
939
  {
940
  "epoch": 0.61,
941
  "learning_rate": 2.1827706635622818e-05,
942
- "logits/chosen": -2.7617430686950684,
943
- "logits/rejected": -2.73646879196167,
944
- "logps/chosen": -295.9949645996094,
945
- "logps/rejected": -310.08331298828125,
946
- "loss": 0.2821,
947
- "pred_label": 4553.9501953125,
948
- "rewards/accuracies": 0.768750011920929,
949
- "rewards/chosen": -1.7886556386947632,
950
- "rewards/margins": 2.772958993911743,
951
- "rewards/rejected": -4.561614513397217,
952
  "step": 580,
953
- "use_label": 4648.0498046875
954
  },
955
  {
956
  "epoch": 0.62,
957
  "learning_rate": 2.124563445867288e-05,
958
- "logits/chosen": -2.6858975887298584,
959
- "logits/rejected": -2.6842644214630127,
960
- "logps/chosen": -292.4759216308594,
961
- "logps/rejected": -323.04754638671875,
962
- "loss": 0.2683,
963
- "pred_label": 4648.1748046875,
964
  "rewards/accuracies": 0.6875,
965
- "rewards/chosen": -2.0878047943115234,
966
- "rewards/margins": 2.138942241668701,
967
- "rewards/rejected": -4.226747035980225,
968
  "step": 590,
969
- "use_label": 4713.8251953125
970
  },
971
  {
972
  "epoch": 0.63,
973
  "learning_rate": 2.0663562281722934e-05,
974
- "logits/chosen": -2.755965232849121,
975
- "logits/rejected": -2.7383854389190674,
976
- "logps/chosen": -327.86962890625,
977
- "logps/rejected": -351.33074951171875,
978
- "loss": 0.2585,
979
- "pred_label": 4749.9501953125,
980
- "rewards/accuracies": 0.7562500238418579,
981
- "rewards/chosen": -1.4506783485412598,
982
- "rewards/margins": 3.4063594341278076,
983
- "rewards/rejected": -4.8570380210876465,
984
  "step": 600,
985
- "use_label": 4772.0498046875
986
  },
987
  {
988
  "epoch": 0.64,
989
  "learning_rate": 2.0081490104772992e-05,
990
- "logits/chosen": -2.7139110565185547,
991
- "logits/rejected": -2.7096176147460938,
992
- "logps/chosen": -322.1976013183594,
993
- "logps/rejected": -281.54266357421875,
994
- "loss": 0.267,
995
- "pred_label": 4857.875,
996
- "rewards/accuracies": 0.706250011920929,
997
- "rewards/chosen": -1.6288321018218994,
998
- "rewards/margins": 2.5627994537353516,
999
- "rewards/rejected": -4.19163179397583,
1000
  "step": 610,
1001
- "use_label": 4824.125
1002
  },
1003
  {
1004
  "epoch": 0.65,
1005
  "learning_rate": 1.9499417927823053e-05,
1006
- "logits/chosen": -2.733909845352173,
1007
- "logits/rejected": -2.6813321113586426,
1008
- "logps/chosen": -296.90032958984375,
1009
- "logps/rejected": -298.9272766113281,
1010
- "loss": 0.2764,
1011
- "pred_label": 4962.77490234375,
1012
- "rewards/accuracies": 0.768750011920929,
1013
- "rewards/chosen": -1.691881775856018,
1014
- "rewards/margins": 2.9819297790527344,
1015
- "rewards/rejected": -4.673811912536621,
1016
  "step": 620,
1017
- "use_label": 4879.22509765625
1018
  },
1019
  {
1020
  "epoch": 0.66,
1021
  "learning_rate": 1.8917345750873107e-05,
1022
- "logits/chosen": -2.7139275074005127,
1023
- "logits/rejected": -2.7132036685943604,
1024
- "logps/chosen": -297.9289855957031,
1025
- "logps/rejected": -306.44940185546875,
1026
- "loss": 0.2715,
1027
- "pred_label": 5066.4501953125,
1028
  "rewards/accuracies": 0.7749999761581421,
1029
- "rewards/chosen": -1.2809231281280518,
1030
- "rewards/margins": 3.2467029094696045,
1031
- "rewards/rejected": -4.527626991271973,
1032
  "step": 630,
1033
- "use_label": 4935.5498046875
1034
  },
1035
  {
1036
  "epoch": 0.67,
1037
  "learning_rate": 1.833527357392317e-05,
1038
- "logits/chosen": -2.7025070190429688,
1039
- "logits/rejected": -2.669739246368408,
1040
- "logps/chosen": -266.36798095703125,
1041
- "logps/rejected": -293.29669189453125,
1042
- "loss": 0.2907,
1043
- "pred_label": 5163.5,
1044
- "rewards/accuracies": 0.768750011920929,
1045
- "rewards/chosen": -1.3176727294921875,
1046
- "rewards/margins": 3.1546790599823,
1047
- "rewards/rejected": -4.472352027893066,
1048
  "step": 640,
1049
- "use_label": 4998.5
1050
  },
1051
  {
1052
  "epoch": 0.68,
1053
  "learning_rate": 1.7753201396973227e-05,
1054
- "logits/chosen": -2.721750497817993,
1055
- "logits/rejected": -2.6910438537597656,
1056
- "logps/chosen": -316.4061279296875,
1057
- "logps/rejected": -322.0241394042969,
1058
- "loss": 0.2773,
1059
- "pred_label": 5267.2001953125,
1060
- "rewards/accuracies": 0.78125,
1061
- "rewards/chosen": -1.4457476139068604,
1062
- "rewards/margins": 2.958343982696533,
1063
- "rewards/rejected": -4.404091835021973,
1064
  "step": 650,
1065
- "use_label": 5054.7998046875
1066
  },
1067
  {
1068
  "epoch": 0.69,
1069
  "learning_rate": 1.717112922002328e-05,
1070
- "logits/chosen": -2.7757656574249268,
1071
- "logits/rejected": -2.7493858337402344,
1072
- "logps/chosen": -311.73321533203125,
1073
- "logps/rejected": -285.9647216796875,
1074
- "loss": 0.2968,
1075
- "pred_label": 5371.77490234375,
1076
- "rewards/accuracies": 0.6625000238418579,
1077
- "rewards/chosen": -1.636950135231018,
1078
- "rewards/margins": 1.971967339515686,
1079
- "rewards/rejected": -3.6089179515838623,
1080
  "step": 660,
1081
- "use_label": 5110.22509765625
1082
  },
1083
  {
1084
  "epoch": 0.7,
1085
  "learning_rate": 1.6589057043073342e-05,
1086
- "logits/chosen": -2.7224373817443848,
1087
- "logits/rejected": -2.686645030975342,
1088
- "logps/chosen": -299.3620300292969,
1089
- "logps/rejected": -292.6378173828125,
1090
- "loss": 0.2702,
1091
- "pred_label": 5477.2001953125,
1092
- "rewards/accuracies": 0.7875000238418579,
1093
- "rewards/chosen": -1.828905701637268,
1094
- "rewards/margins": 3.295133113861084,
1095
- "rewards/rejected": -5.124039173126221,
1096
  "step": 670,
1097
- "use_label": 5164.7998046875
1098
  },
1099
  {
1100
  "epoch": 0.71,
1101
  "learning_rate": 1.60069848661234e-05,
1102
- "logits/chosen": -2.7378199100494385,
1103
- "logits/rejected": -2.7310595512390137,
1104
- "logps/chosen": -304.2184753417969,
1105
- "logps/rejected": -292.1669921875,
1106
- "loss": 0.3204,
1107
- "pred_label": 5576.47509765625,
1108
- "rewards/accuracies": 0.668749988079071,
1109
- "rewards/chosen": -1.7283070087432861,
1110
- "rewards/margins": 1.6653932332992554,
1111
- "rewards/rejected": -3.393700361251831,
1112
  "step": 680,
1113
- "use_label": 5225.52490234375
1114
  },
1115
  {
1116
  "epoch": 0.72,
1117
  "learning_rate": 1.5424912689173458e-05,
1118
- "logits/chosen": -2.7175381183624268,
1119
- "logits/rejected": -2.7000772953033447,
1120
- "logps/chosen": -266.01263427734375,
1121
- "logps/rejected": -258.3471374511719,
1122
- "loss": 0.258,
1123
- "pred_label": 5677.625,
1124
  "rewards/accuracies": 0.7749999761581421,
1125
- "rewards/chosen": -1.56284499168396,
1126
- "rewards/margins": 3.3434224128723145,
1127
- "rewards/rejected": -4.9062676429748535,
1128
  "step": 690,
1129
- "use_label": 5284.375
1130
  },
1131
  {
1132
  "epoch": 0.73,
1133
  "learning_rate": 1.4842840512223516e-05,
1134
- "logits/chosen": -2.735502243041992,
1135
- "logits/rejected": -2.690195322036743,
1136
- "logps/chosen": -293.5663146972656,
1137
- "logps/rejected": -299.68121337890625,
1138
- "loss": 0.2598,
1139
- "pred_label": 5784.4248046875,
1140
- "rewards/accuracies": 0.7250000238418579,
1141
- "rewards/chosen": -1.7876224517822266,
1142
- "rewards/margins": 3.3526370525360107,
1143
- "rewards/rejected": -5.140259742736816,
1144
  "step": 700,
1145
- "use_label": 5337.5751953125
1146
  },
1147
  {
1148
  "epoch": 0.74,
1149
  "learning_rate": 1.4260768335273575e-05,
1150
- "logits/chosen": -2.7820751667022705,
1151
- "logits/rejected": -2.74092173576355,
1152
- "logps/chosen": -305.2626953125,
1153
- "logps/rejected": -284.5972900390625,
1154
- "loss": 0.2792,
1155
- "pred_label": 5887.0498046875,
1156
- "rewards/accuracies": 0.7250000238418579,
1157
- "rewards/chosen": -1.5688129663467407,
1158
- "rewards/margins": 2.764045000076294,
1159
- "rewards/rejected": -4.332858085632324,
1160
  "step": 710,
1161
- "use_label": 5394.9501953125
1162
  },
1163
  {
1164
  "epoch": 0.75,
1165
  "learning_rate": 1.3678696158323633e-05,
1166
- "logits/chosen": -2.741055727005005,
1167
- "logits/rejected": -2.739257335662842,
1168
- "logps/chosen": -288.95611572265625,
1169
- "logps/rejected": -306.3753967285156,
1170
- "loss": 0.2461,
1171
- "pred_label": 5995.5498046875,
1172
  "rewards/accuracies": 0.78125,
1173
- "rewards/chosen": -1.7503833770751953,
1174
- "rewards/margins": 3.5468757152557373,
1175
- "rewards/rejected": -5.297258377075195,
1176
  "step": 720,
1177
- "use_label": 5446.4501953125
1178
  },
1179
  {
1180
  "epoch": 0.76,
1181
  "learning_rate": 1.309662398137369e-05,
1182
- "logits/chosen": -2.7259597778320312,
1183
- "logits/rejected": -2.719268321990967,
1184
- "logps/chosen": -315.80181884765625,
1185
- "logps/rejected": -282.5977783203125,
1186
- "loss": 0.2265,
1187
- "pred_label": 6100.625,
1188
- "rewards/accuracies": 0.7124999761581421,
1189
- "rewards/chosen": -1.8351987600326538,
1190
- "rewards/margins": 2.257732629776001,
1191
- "rewards/rejected": -4.092931270599365,
1192
  "step": 730,
1193
- "use_label": 5501.375
1194
  },
1195
  {
1196
  "epoch": 0.77,
1197
  "learning_rate": 1.2514551804423749e-05,
1198
- "logits/chosen": -2.704035758972168,
1199
- "logits/rejected": -2.694396495819092,
1200
- "logps/chosen": -332.0672912597656,
1201
- "logps/rejected": -348.5420837402344,
1202
- "loss": 0.254,
1203
- "pred_label": 6200.5751953125,
1204
- "rewards/accuracies": 0.7749999761581421,
1205
- "rewards/chosen": -2.469543933868408,
1206
- "rewards/margins": 3.7357654571533203,
1207
- "rewards/rejected": -6.2053093910217285,
1208
  "step": 740,
1209
- "use_label": 5561.4248046875
1210
  },
1211
  {
1212
  "epoch": 0.79,
1213
  "learning_rate": 1.1932479627473807e-05,
1214
- "logits/chosen": -2.717568874359131,
1215
- "logits/rejected": -2.6717875003814697,
1216
- "logps/chosen": -285.0014343261719,
1217
- "logps/rejected": -322.31671142578125,
1218
- "loss": 0.2139,
1219
- "pred_label": 6312.14990234375,
1220
- "rewards/accuracies": 0.762499988079071,
1221
- "rewards/chosen": -2.451322078704834,
1222
- "rewards/margins": 3.727611541748047,
1223
- "rewards/rejected": -6.178933620452881,
1224
  "step": 750,
1225
- "use_label": 5609.85009765625
1226
  },
1227
  {
1228
  "epoch": 0.8,
1229
  "learning_rate": 1.1350407450523866e-05,
1230
- "logits/chosen": -2.719574451446533,
1231
- "logits/rejected": -2.7501304149627686,
1232
- "logps/chosen": -310.88677978515625,
1233
- "logps/rejected": -308.3591003417969,
1234
- "loss": 0.2668,
1235
- "pred_label": 6422.97509765625,
1236
- "rewards/accuracies": 0.768750011920929,
1237
- "rewards/chosen": -1.5423939228057861,
1238
- "rewards/margins": 3.0822057723999023,
1239
- "rewards/rejected": -4.624599456787109,
1240
  "step": 760,
1241
- "use_label": 5659.02490234375
1242
  },
1243
  {
1244
  "epoch": 0.81,
1245
  "learning_rate": 1.0768335273573923e-05,
1246
- "logits/chosen": -2.746647357940674,
1247
- "logits/rejected": -2.733283519744873,
1248
- "logps/chosen": -320.37213134765625,
1249
- "logps/rejected": -333.1225280761719,
1250
- "loss": 0.248,
1251
- "pred_label": 6535.22509765625,
1252
- "rewards/accuracies": 0.737500011920929,
1253
- "rewards/chosen": -1.807631492614746,
1254
- "rewards/margins": 3.0322647094726562,
1255
- "rewards/rejected": -4.839896202087402,
1256
  "step": 770,
1257
- "use_label": 5706.77490234375
1258
  },
1259
  {
1260
  "epoch": 0.82,
1261
  "learning_rate": 1.0186263096623982e-05,
1262
- "logits/chosen": -2.7445006370544434,
1263
- "logits/rejected": -2.7238264083862305,
1264
- "logps/chosen": -303.70404052734375,
1265
- "logps/rejected": -301.3880615234375,
1266
- "loss": 0.2467,
1267
- "pred_label": 6641.10009765625,
1268
- "rewards/accuracies": 0.737500011920929,
1269
- "rewards/chosen": -2.2510251998901367,
1270
- "rewards/margins": 3.1742377281188965,
1271
- "rewards/rejected": -5.425262928009033,
1272
  "step": 780,
1273
- "use_label": 5760.89990234375
1274
  },
1275
  {
1276
  "epoch": 0.83,
1277
  "learning_rate": 9.60419091967404e-06,
1278
- "logits/chosen": -2.7392661571502686,
1279
- "logits/rejected": -2.7158615589141846,
1280
- "logps/chosen": -294.07037353515625,
1281
- "logps/rejected": -322.58758544921875,
1282
- "loss": 0.2523,
1283
- "pred_label": 6745.9501953125,
1284
- "rewards/accuracies": 0.7124999761581421,
1285
- "rewards/chosen": -2.626100778579712,
1286
- "rewards/margins": 3.5596566200256348,
1287
- "rewards/rejected": -6.185757637023926,
1288
  "step": 790,
1289
- "use_label": 5816.0498046875
1290
  },
1291
  {
1292
  "epoch": 0.84,
1293
  "learning_rate": 9.022118742724098e-06,
1294
- "logits/chosen": -2.6276087760925293,
1295
- "logits/rejected": -2.5911715030670166,
1296
- "logps/chosen": -343.8062438964844,
1297
- "logps/rejected": -343.41070556640625,
1298
- "loss": 0.2253,
1299
- "pred_label": 6867.2998046875,
1300
- "rewards/accuracies": 0.737500011920929,
1301
- "rewards/chosen": -2.9334328174591064,
1302
- "rewards/margins": 3.8014297485351562,
1303
- "rewards/rejected": -6.73486328125,
1304
  "step": 800,
1305
- "use_label": 5854.7001953125
1306
  },
1307
  {
1308
  "epoch": 0.85,
1309
  "learning_rate": 8.440046565774158e-06,
1310
- "logits/chosen": -2.6645331382751465,
1311
- "logits/rejected": -2.640127658843994,
1312
- "logps/chosen": -313.86480712890625,
1313
- "logps/rejected": -287.8795471191406,
1314
- "loss": 0.2183,
1315
- "pred_label": 6983.14990234375,
1316
- "rewards/accuracies": 0.6937500238418579,
1317
- "rewards/chosen": -3.2840819358825684,
1318
- "rewards/margins": 2.622573137283325,
1319
- "rewards/rejected": -5.9066548347473145,
1320
  "step": 810,
1321
- "use_label": 5898.85009765625
1322
  },
1323
  {
1324
  "epoch": 0.86,
1325
  "learning_rate": 7.857974388824214e-06,
1326
- "logits/chosen": -2.684762477874756,
1327
- "logits/rejected": -2.6731903553009033,
1328
- "logps/chosen": -316.68157958984375,
1329
- "logps/rejected": -307.9013671875,
1330
- "loss": 0.2752,
1331
- "pred_label": 7098.10009765625,
1332
- "rewards/accuracies": 0.768750011920929,
1333
- "rewards/chosen": -2.3363418579101562,
1334
- "rewards/margins": 4.297635078430176,
1335
- "rewards/rejected": -6.63397741317749,
1336
  "step": 820,
1337
- "use_label": 5943.89990234375
1338
  },
1339
  {
1340
  "epoch": 0.87,
1341
  "learning_rate": 7.275902211874273e-06,
1342
- "logits/chosen": -2.7039098739624023,
1343
- "logits/rejected": -2.6936678886413574,
1344
- "logps/chosen": -289.11761474609375,
1345
- "logps/rejected": -341.92388916015625,
1346
- "loss": 0.2433,
1347
- "pred_label": 7206.47509765625,
1348
- "rewards/accuracies": 0.71875,
1349
- "rewards/chosen": -2.6304259300231934,
1350
- "rewards/margins": 3.5417304039001465,
1351
- "rewards/rejected": -6.172156810760498,
1352
  "step": 830,
1353
- "use_label": 5995.52490234375
1354
  },
1355
  {
1356
  "epoch": 0.88,
1357
  "learning_rate": 6.693830034924331e-06,
1358
- "logits/chosen": -2.6950812339782715,
1359
- "logits/rejected": -2.695385456085205,
1360
- "logps/chosen": -295.2992858886719,
1361
- "logps/rejected": -315.1665954589844,
1362
- "loss": 0.2626,
1363
- "pred_label": 7316.375,
1364
- "rewards/accuracies": 0.7124999761581421,
1365
- "rewards/chosen": -2.7282135486602783,
1366
- "rewards/margins": 2.3400490283966064,
1367
- "rewards/rejected": -5.068262577056885,
1368
  "step": 840,
1369
- "use_label": 6045.625
1370
  },
1371
  {
1372
  "epoch": 0.89,
1373
  "learning_rate": 6.111757857974389e-06,
1374
- "logits/chosen": -2.691633701324463,
1375
- "logits/rejected": -2.7306087017059326,
1376
- "logps/chosen": -319.0354919433594,
1377
- "logps/rejected": -299.2270812988281,
1378
- "loss": 0.2547,
1379
- "pred_label": 7422.2001953125,
1380
  "rewards/accuracies": 0.7749999761581421,
1381
- "rewards/chosen": -2.400865316390991,
1382
- "rewards/margins": 3.1857247352600098,
1383
- "rewards/rejected": -5.586589813232422,
1384
  "step": 850,
1385
- "use_label": 6099.7998046875
1386
  },
1387
  {
1388
  "epoch": 0.9,
1389
  "learning_rate": 5.529685681024447e-06,
1390
- "logits/chosen": -2.6894938945770264,
1391
- "logits/rejected": -2.6826186180114746,
1392
- "logps/chosen": -314.03753662109375,
1393
- "logps/rejected": -334.26690673828125,
1394
- "loss": 0.2358,
1395
- "pred_label": 7535.10009765625,
1396
- "rewards/accuracies": 0.75,
1397
- "rewards/chosen": -2.9154160022735596,
1398
- "rewards/margins": 3.2621803283691406,
1399
- "rewards/rejected": -6.177596092224121,
1400
  "step": 860,
1401
- "use_label": 6146.89990234375
1402
  },
1403
  {
1404
  "epoch": 0.91,
1405
  "learning_rate": 4.947613504074506e-06,
1406
- "logits/chosen": -2.6914477348327637,
1407
- "logits/rejected": -2.6861438751220703,
1408
- "logps/chosen": -267.033203125,
1409
- "logps/rejected": -298.70428466796875,
1410
- "loss": 0.2361,
1411
- "pred_label": 7641.60009765625,
1412
- "rewards/accuracies": 0.737500011920929,
1413
- "rewards/chosen": -2.404568910598755,
1414
- "rewards/margins": 3.4409728050231934,
1415
- "rewards/rejected": -5.845541477203369,
1416
  "step": 870,
1417
- "use_label": 6200.39990234375
1418
  },
1419
  {
1420
  "epoch": 0.92,
1421
  "learning_rate": 4.3655413271245635e-06,
1422
- "logits/chosen": -2.7052195072174072,
1423
- "logits/rejected": -2.6577229499816895,
1424
- "logps/chosen": -311.93719482421875,
1425
- "logps/rejected": -314.8600769042969,
1426
- "loss": 0.2224,
1427
- "pred_label": 7745.8251953125,
1428
- "rewards/accuracies": 0.7749999761581421,
1429
- "rewards/chosen": -2.5347952842712402,
1430
- "rewards/margins": 2.786163330078125,
1431
- "rewards/rejected": -5.320958614349365,
1432
  "step": 880,
1433
- "use_label": 6256.1748046875
1434
  },
1435
  {
1436
  "epoch": 0.93,
1437
  "learning_rate": 3.7834691501746217e-06,
1438
- "logits/chosen": -2.718468189239502,
1439
- "logits/rejected": -2.710252523422241,
1440
- "logps/chosen": -304.1473693847656,
1441
- "logps/rejected": -332.4493103027344,
1442
- "loss": 0.2246,
1443
- "pred_label": 7864.375,
1444
- "rewards/accuracies": 0.793749988079071,
1445
- "rewards/chosen": -2.2862813472747803,
1446
- "rewards/margins": 3.7558963298797607,
1447
- "rewards/rejected": -6.042177677154541,
1448
  "step": 890,
1449
- "use_label": 6297.625
1450
  },
1451
  {
1452
  "epoch": 0.94,
1453
  "learning_rate": 3.2013969732246805e-06,
1454
- "logits/chosen": -2.6227777004241943,
1455
- "logits/rejected": -2.571324110031128,
1456
- "logps/chosen": -307.7371826171875,
1457
- "logps/rejected": -324.693359375,
1458
- "loss": 0.2493,
1459
- "pred_label": 7978.625,
1460
- "rewards/accuracies": 0.7562500238418579,
1461
- "rewards/chosen": -2.859910488128662,
1462
- "rewards/margins": 3.757611036300659,
1463
- "rewards/rejected": -6.617520809173584,
1464
  "step": 900,
1465
- "use_label": 6343.375
1466
  },
1467
  {
1468
  "epoch": 0.95,
1469
  "learning_rate": 2.6193247962747383e-06,
1470
- "logits/chosen": -2.701064109802246,
1471
- "logits/rejected": -2.665743350982666,
1472
- "logps/chosen": -323.77337646484375,
1473
- "logps/rejected": -322.24432373046875,
1474
- "loss": 0.238,
1475
- "pred_label": 8087.5,
1476
- "rewards/accuracies": 0.75,
1477
- "rewards/chosen": -2.7357375621795654,
1478
- "rewards/margins": 3.1752612590789795,
1479
- "rewards/rejected": -5.910999298095703,
1480
  "step": 910,
1481
- "use_label": 6394.5
1482
  },
1483
  {
1484
  "epoch": 0.96,
1485
  "learning_rate": 2.037252619324796e-06,
1486
- "logits/chosen": -2.639965772628784,
1487
- "logits/rejected": -2.6299610137939453,
1488
- "logps/chosen": -327.05975341796875,
1489
- "logps/rejected": -325.63909912109375,
1490
- "loss": 0.2258,
1491
- "pred_label": 8202.6748046875,
1492
- "rewards/accuracies": 0.737500011920929,
1493
- "rewards/chosen": -3.017204761505127,
1494
- "rewards/margins": 3.5882301330566406,
1495
- "rewards/rejected": -6.605435371398926,
1496
  "step": 920,
1497
- "use_label": 6439.3251953125
1498
  },
1499
  {
1500
  "epoch": 0.97,
1501
  "learning_rate": 1.4551804423748545e-06,
1502
- "logits/chosen": -2.69065260887146,
1503
- "logits/rejected": -2.6345713138580322,
1504
- "logps/chosen": -286.16278076171875,
1505
- "logps/rejected": -308.4196472167969,
1506
- "loss": 0.2207,
1507
- "pred_label": 8314.349609375,
1508
- "rewards/accuracies": 0.8125,
1509
- "rewards/chosen": -1.7949655055999756,
1510
- "rewards/margins": 4.867483139038086,
1511
- "rewards/rejected": -6.662448883056641,
1512
  "step": 930,
1513
- "use_label": 6487.64990234375
1514
  },
1515
  {
1516
  "epoch": 0.98,
1517
  "learning_rate": 8.731082654249127e-07,
1518
- "logits/chosen": -2.71934175491333,
1519
- "logits/rejected": -2.67936372756958,
1520
- "logps/chosen": -318.2463073730469,
1521
- "logps/rejected": -326.56634521484375,
1522
- "loss": 0.234,
1523
- "pred_label": 8430.5751953125,
1524
- "rewards/accuracies": 0.7250000238418579,
1525
- "rewards/chosen": -3.0853824615478516,
1526
- "rewards/margins": 2.8827648162841797,
1527
- "rewards/rejected": -5.968147277832031,
1528
  "step": 940,
1529
- "use_label": 6531.4248046875
1530
  },
1531
  {
1532
  "epoch": 0.99,
1533
  "learning_rate": 2.910360884749709e-07,
1534
- "logits/chosen": -2.6776463985443115,
1535
- "logits/rejected": -2.6152024269104004,
1536
- "logps/chosen": -300.53662109375,
1537
- "logps/rejected": -322.4374694824219,
1538
- "loss": 0.2135,
1539
- "pred_label": 8549.625,
1540
- "rewards/accuracies": 0.7562500238418579,
1541
- "rewards/chosen": -3.2081990242004395,
1542
- "rewards/margins": 3.740739107131958,
1543
- "rewards/rejected": -6.94893741607666,
1544
  "step": 950,
1545
- "use_label": 6572.375
1546
  },
1547
  {
1548
  "epoch": 1.0,
1549
- "eval_logits/chosen": -2.6974706649780273,
1550
- "eval_logits/rejected": -2.665019989013672,
1551
- "eval_logps/chosen": -311.6889953613281,
1552
- "eval_logps/rejected": -322.95947265625,
1553
- "eval_loss": 0.2408759742975235,
1554
- "eval_pred_label": 8833.576171875,
1555
- "eval_rewards/accuracies": 0.734000027179718,
1556
- "eval_rewards/chosen": -2.7431609630584717,
1557
- "eval_rewards/margins": 3.6228184700012207,
1558
- "eval_rewards/rejected": -6.36598014831543,
1559
- "eval_runtime": 452.5439,
1560
- "eval_samples_per_second": 4.419,
1561
- "eval_steps_per_second": 0.276,
1562
- "eval_use_label": 6698.423828125,
1563
  "step": 955
1564
  },
1565
  {
1566
  "epoch": 1.0,
1567
  "step": 955,
1568
  "total_flos": 0.0,
1569
- "train_loss": 0.31699458866219243,
1570
- "train_runtime": 25218.7851,
1571
- "train_samples_per_second": 2.424,
1572
  "train_steps_per_second": 0.038
1573
  }
1574
  ],
 
80
  "logps/chosen": -281.32928466796875,
81
  "logps/rejected": -277.8607482910156,
82
  "loss": 0.6339,
83
+ "pred_label": 0.0,
84
  "rewards/accuracies": 0.6875,
85
  "rewards/chosen": 0.02641097828745842,
86
  "rewards/margins": 0.2079576551914215,
87
  "rewards/rejected": -0.1815466731786728,
88
  "step": 40,
89
+ "use_label": 562.0
90
  },
91
  {
92
  "epoch": 0.05,
 
96
  "logps/chosen": -266.80517578125,
97
  "logps/rejected": -261.9176025390625,
98
  "loss": 0.5914,
99
+ "pred_label": 1.2999999523162842,
100
  "rewards/accuracies": 0.643750011920929,
101
  "rewards/chosen": -0.06968289613723755,
102
  "rewards/margins": 0.33043327927589417,
103
  "rewards/rejected": -0.4001162648200989,
104
  "step": 50,
105
+ "use_label": 720.7000122070312
106
  },
107
  {
108
  "epoch": 0.06,
109
  "learning_rate": 3.125e-05,
110
+ "logits/chosen": -2.818674087524414,
111
+ "logits/rejected": -2.8088903427124023,
112
+ "logps/chosen": -300.7886962890625,
113
+ "logps/rejected": -290.3478698730469,
114
+ "loss": 0.6053,
115
+ "pred_label": 5.875,
116
  "rewards/accuracies": 0.699999988079071,
117
+ "rewards/chosen": -0.03734510391950607,
118
+ "rewards/margins": 0.38690200448036194,
119
+ "rewards/rejected": -0.4242470860481262,
120
  "step": 60,
121
+ "use_label": 876.125
122
  },
123
  {
124
  "epoch": 0.07,
125
  "learning_rate": 3.6458333333333336e-05,
126
+ "logits/chosen": -2.8236336708068848,
127
+ "logits/rejected": -2.818040132522583,
128
+ "logps/chosen": -294.469970703125,
129
+ "logps/rejected": -278.6357421875,
130
+ "loss": 0.535,
131
+ "pred_label": 13.5,
132
+ "rewards/accuracies": 0.737500011920929,
133
+ "rewards/chosen": -0.015645451843738556,
134
+ "rewards/margins": 0.5513776540756226,
135
+ "rewards/rejected": -0.5670231580734253,
136
  "step": 70,
137
+ "use_label": 1028.5
138
  },
139
  {
140
  "epoch": 0.08,
141
  "learning_rate": 4.166666666666667e-05,
142
+ "logits/chosen": -2.770120143890381,
143
+ "logits/rejected": -2.756917953491211,
144
+ "logps/chosen": -306.6744384765625,
145
+ "logps/rejected": -288.44793701171875,
146
+ "loss": 0.4709,
147
+ "pred_label": 36.79999923706055,
148
+ "rewards/accuracies": 0.762499988079071,
149
+ "rewards/chosen": -0.13690751791000366,
150
+ "rewards/margins": 0.8066266179084778,
151
+ "rewards/rejected": -0.9435340762138367,
152
  "step": 80,
153
+ "use_label": 1165.199951171875
154
  },
155
  {
156
  "epoch": 0.09,
157
  "learning_rate": 4.6875e-05,
158
+ "logits/chosen": -2.698793411254883,
159
+ "logits/rejected": -2.75233793258667,
160
+ "logps/chosen": -295.5422668457031,
161
+ "logps/rejected": -274.40032958984375,
162
+ "loss": 0.5055,
163
+ "pred_label": 72.92500305175781,
164
+ "rewards/accuracies": 0.71875,
165
+ "rewards/chosen": 0.02409188821911812,
166
+ "rewards/margins": 0.8366730809211731,
167
+ "rewards/rejected": -0.8125811815261841,
168
  "step": 90,
169
+ "use_label": 1289.074951171875
170
  },
171
  {
172
  "epoch": 0.1,
173
  "learning_rate": 4.976717112922003e-05,
174
+ "logits/chosen": -2.719881057739258,
175
+ "logits/rejected": -2.7187676429748535,
176
+ "logps/chosen": -276.2088317871094,
177
+ "logps/rejected": -304.5025329589844,
178
+ "loss": 0.5147,
179
+ "pred_label": 106.19999694824219,
180
+ "rewards/accuracies": 0.7250000238418579,
181
+ "rewards/chosen": -0.20771054923534393,
182
+ "rewards/margins": 0.8888995051383972,
183
+ "rewards/rejected": -1.096610188484192,
184
  "step": 100,
185
+ "use_label": 1415.800048828125
186
  },
187
  {
188
  "epoch": 0.12,
189
  "learning_rate": 4.918509895227008e-05,
190
+ "logits/chosen": -2.7200231552124023,
191
+ "logits/rejected": -2.6798348426818848,
192
+ "logps/chosen": -266.4808044433594,
193
+ "logps/rejected": -259.0126037597656,
194
+ "loss": 0.5078,
195
+ "pred_label": 140.47500610351562,
196
+ "rewards/accuracies": 0.6625000238418579,
197
+ "rewards/chosen": -0.18062053620815277,
198
+ "rewards/margins": 0.8032993078231812,
199
+ "rewards/rejected": -0.9839197993278503,
200
  "step": 110,
201
+ "use_label": 1541.5250244140625
202
  },
203
  {
204
  "epoch": 0.13,
205
  "learning_rate": 4.860302677532014e-05,
206
+ "logits/chosen": -2.7260735034942627,
207
+ "logits/rejected": -2.729832172393799,
208
+ "logps/chosen": -280.414306640625,
209
+ "logps/rejected": -256.8193664550781,
210
+ "loss": 0.5022,
211
+ "pred_label": 172.5,
212
+ "rewards/accuracies": 0.6937500238418579,
213
+ "rewards/chosen": -0.09014640003442764,
214
+ "rewards/margins": 0.8234178423881531,
215
+ "rewards/rejected": -0.9135642051696777,
216
  "step": 120,
217
+ "use_label": 1669.5
218
  },
219
  {
220
  "epoch": 0.14,
221
  "learning_rate": 4.80209545983702e-05,
222
+ "logits/chosen": -2.7872612476348877,
223
+ "logits/rejected": -2.748222589492798,
224
+ "logps/chosen": -308.61865234375,
225
+ "logps/rejected": -281.79461669921875,
226
+ "loss": 0.5032,
227
+ "pred_label": 206.75,
228
+ "rewards/accuracies": 0.7124999761581421,
229
+ "rewards/chosen": -0.1351226270198822,
230
+ "rewards/margins": 0.7332055568695068,
231
+ "rewards/rejected": -0.8683282136917114,
232
  "step": 130,
233
+ "use_label": 1795.25
234
  },
235
  {
236
  "epoch": 0.15,
237
  "learning_rate": 4.743888242142026e-05,
238
+ "logits/chosen": -2.794041872024536,
239
+ "logits/rejected": -2.7909293174743652,
240
+ "logps/chosen": -260.35333251953125,
241
+ "logps/rejected": -247.11325073242188,
242
+ "loss": 0.4325,
243
+ "pred_label": 246.6999969482422,
244
+ "rewards/accuracies": 0.7250000238418579,
245
+ "rewards/chosen": -0.22465598583221436,
246
+ "rewards/margins": 0.984777569770813,
247
+ "rewards/rejected": -1.209433674812317,
248
  "step": 140,
249
+ "use_label": 1915.300048828125
250
  },
251
  {
252
  "epoch": 0.16,
253
  "learning_rate": 4.685681024447032e-05,
254
+ "logits/chosen": -2.705472946166992,
255
+ "logits/rejected": -2.7284083366394043,
256
+ "logps/chosen": -300.3094787597656,
257
+ "logps/rejected": -294.97125244140625,
258
+ "loss": 0.4364,
259
+ "pred_label": 291.7250061035156,
260
  "rewards/accuracies": 0.612500011920929,
261
+ "rewards/chosen": -0.5423473119735718,
262
+ "rewards/margins": 0.7800337076187134,
263
+ "rewards/rejected": -1.3223809003829956,
264
  "step": 150,
265
+ "use_label": 2030.2750244140625
266
  },
267
  {
268
  "epoch": 0.17,
269
  "learning_rate": 4.6274738067520374e-05,
270
+ "logits/chosen": -2.743900775909424,
271
+ "logits/rejected": -2.688366174697876,
272
+ "logps/chosen": -263.2331237792969,
273
+ "logps/rejected": -281.53582763671875,
274
+ "loss": 0.4222,
275
+ "pred_label": 345.32501220703125,
276
+ "rewards/accuracies": 0.699999988079071,
277
+ "rewards/chosen": -0.0851084440946579,
278
+ "rewards/margins": 1.1642277240753174,
279
+ "rewards/rejected": -1.2493362426757812,
280
  "step": 160,
281
+ "use_label": 2136.675048828125
282
  },
283
  {
284
  "epoch": 0.18,
285
  "learning_rate": 4.5692665890570435e-05,
286
+ "logits/chosen": -2.7768936157226562,
287
+ "logits/rejected": -2.779693126678467,
288
+ "logps/chosen": -297.79376220703125,
289
+ "logps/rejected": -261.3406982421875,
290
+ "loss": 0.4307,
291
+ "pred_label": 394.0,
292
+ "rewards/accuracies": 0.6937500238418579,
293
+ "rewards/chosen": -0.13176263868808746,
294
+ "rewards/margins": 1.0377919673919678,
295
+ "rewards/rejected": -1.169554591178894,
296
  "step": 170,
297
+ "use_label": 2248.0
298
  },
299
  {
300
  "epoch": 0.19,
301
  "learning_rate": 4.511059371362049e-05,
302
+ "logits/chosen": -2.6910805702209473,
303
+ "logits/rejected": -2.6632163524627686,
304
+ "logps/chosen": -259.5965270996094,
305
+ "logps/rejected": -289.4101257324219,
306
+ "loss": 0.4077,
307
+ "pred_label": 464.125,
308
  "rewards/accuracies": 0.731249988079071,
309
+ "rewards/chosen": -0.3667333722114563,
310
+ "rewards/margins": 1.5060479640960693,
311
+ "rewards/rejected": -1.8727811574935913,
312
  "step": 180,
313
+ "use_label": 2337.875
314
  },
315
  {
316
  "epoch": 0.2,
317
  "learning_rate": 4.452852153667055e-05,
318
+ "logits/chosen": -2.7862701416015625,
319
+ "logits/rejected": -2.747239589691162,
320
+ "logps/chosen": -308.29376220703125,
321
+ "logps/rejected": -280.5757751464844,
322
+ "loss": 0.3987,
323
+ "pred_label": 532.6500244140625,
324
+ "rewards/accuracies": 0.8187500238418579,
325
+ "rewards/chosen": -0.10377927869558334,
326
+ "rewards/margins": 1.8495817184448242,
327
+ "rewards/rejected": -1.953360915184021,
328
  "step": 190,
329
+ "use_label": 2429.35009765625
330
  },
331
  {
332
  "epoch": 0.21,
333
  "learning_rate": 4.394644935972061e-05,
334
+ "logits/chosen": -2.787761688232422,
335
+ "logits/rejected": -2.7670867443084717,
336
+ "logps/chosen": -267.0201110839844,
337
+ "logps/rejected": -287.81341552734375,
338
+ "loss": 0.4013,
339
+ "pred_label": 604.4000244140625,
340
+ "rewards/accuracies": 0.737500011920929,
341
+ "rewards/chosen": -0.5691341757774353,
342
+ "rewards/margins": 1.3881704807281494,
343
+ "rewards/rejected": -1.95730459690094,
344
  "step": 200,
345
+ "use_label": 2517.60009765625
346
  },
347
  {
348
  "epoch": 0.22,
349
  "learning_rate": 4.336437718277067e-05,
350
+ "logits/chosen": -2.7742085456848145,
351
+ "logits/rejected": -2.794970989227295,
352
+ "logps/chosen": -306.9640808105469,
353
+ "logps/rejected": -271.60906982421875,
354
+ "loss": 0.4304,
355
+ "pred_label": 661.4000244140625,
356
+ "rewards/accuracies": 0.762499988079071,
357
+ "rewards/chosen": -0.5716164708137512,
358
+ "rewards/margins": 1.1703290939331055,
359
+ "rewards/rejected": -1.7419456243515015,
360
  "step": 210,
361
+ "use_label": 2620.60009765625
362
  },
363
  {
364
  "epoch": 0.23,
365
  "learning_rate": 4.278230500582072e-05,
366
+ "logits/chosen": -2.7252678871154785,
367
+ "logits/rejected": -2.740291118621826,
368
+ "logps/chosen": -280.5372009277344,
369
+ "logps/rejected": -290.718994140625,
370
+ "loss": 0.3818,
371
+ "pred_label": 722.1749877929688,
372
  "rewards/accuracies": 0.75,
373
+ "rewards/chosen": -0.5545529723167419,
374
+ "rewards/margins": 1.4516589641571045,
375
+ "rewards/rejected": -2.006211996078491,
376
  "step": 220,
377
+ "use_label": 2719.824951171875
378
  },
379
  {
380
  "epoch": 0.24,
381
  "learning_rate": 4.220023282887078e-05,
382
+ "logits/chosen": -2.7117373943328857,
383
+ "logits/rejected": -2.7296900749206543,
384
+ "logps/chosen": -316.2425231933594,
385
+ "logps/rejected": -287.38421630859375,
386
+ "loss": 0.3674,
387
+ "pred_label": 794.0499877929688,
388
+ "rewards/accuracies": 0.75,
389
+ "rewards/chosen": -0.8069397211074829,
390
+ "rewards/margins": 1.5205641984939575,
391
+ "rewards/rejected": -2.3275039196014404,
392
  "step": 230,
393
+ "use_label": 2807.949951171875
394
  },
395
  {
396
  "epoch": 0.25,
397
  "learning_rate": 4.161816065192084e-05,
398
+ "logits/chosen": -2.7252871990203857,
399
+ "logits/rejected": -2.7065608501434326,
400
+ "logps/chosen": -281.2114562988281,
401
+ "logps/rejected": -245.62576293945312,
402
+ "loss": 0.4159,
403
+ "pred_label": 862.6500244140625,
404
+ "rewards/accuracies": 0.706250011920929,
405
+ "rewards/chosen": -0.6634035706520081,
406
+ "rewards/margins": 1.5971952676773071,
407
+ "rewards/rejected": -2.260598659515381,
408
  "step": 240,
409
+ "use_label": 2899.35009765625
410
  },
411
  {
412
  "epoch": 0.26,
413
  "learning_rate": 4.10360884749709e-05,
414
+ "logits/chosen": -2.7199912071228027,
415
+ "logits/rejected": -2.709681987762451,
416
+ "logps/chosen": -296.9573974609375,
417
+ "logps/rejected": -291.0780334472656,
418
+ "loss": 0.4335,
419
+ "pred_label": 924.7000122070312,
420
+ "rewards/accuracies": 0.737500011920929,
421
+ "rewards/chosen": -0.63172847032547,
422
+ "rewards/margins": 1.5341920852661133,
423
+ "rewards/rejected": -2.1659207344055176,
424
  "step": 250,
425
+ "use_label": 2997.300048828125
426
  },
427
  {
428
  "epoch": 0.27,
429
  "learning_rate": 4.045401629802096e-05,
430
+ "logits/chosen": -2.7235524654388428,
431
+ "logits/rejected": -2.7239880561828613,
432
+ "logps/chosen": -256.81951904296875,
433
+ "logps/rejected": -270.71209716796875,
434
+ "loss": 0.4011,
435
+ "pred_label": 982.9749755859375,
436
+ "rewards/accuracies": 0.6812499761581421,
437
+ "rewards/chosen": -0.8210113644599915,
438
+ "rewards/margins": 1.2212928533554077,
439
+ "rewards/rejected": -2.042304277420044,
440
  "step": 260,
441
+ "use_label": 3099.02490234375
442
  },
443
  {
444
  "epoch": 0.28,
445
  "learning_rate": 3.9871944121071014e-05,
446
+ "logits/chosen": -2.7312769889831543,
447
+ "logits/rejected": -2.739067554473877,
448
+ "logps/chosen": -298.7057800292969,
449
+ "logps/rejected": -300.0126953125,
450
+ "loss": 0.4119,
451
+ "pred_label": 1049.5,
452
+ "rewards/accuracies": 0.7749999761581421,
453
+ "rewards/chosen": -0.6834469437599182,
454
+ "rewards/margins": 1.8426272869110107,
455
+ "rewards/rejected": -2.5260744094848633,
456
  "step": 270,
457
+ "use_label": 3192.5
458
  },
459
  {
460
  "epoch": 0.29,
461
  "learning_rate": 3.928987194412107e-05,
462
+ "logits/chosen": -2.807619333267212,
463
+ "logits/rejected": -2.8070969581604004,
464
+ "logps/chosen": -298.39654541015625,
465
+ "logps/rejected": -282.5412902832031,
466
+ "loss": 0.401,
467
+ "pred_label": 1114.199951171875,
468
+ "rewards/accuracies": 0.78125,
469
+ "rewards/chosen": -0.44325321912765503,
470
+ "rewards/margins": 1.5879560708999634,
471
+ "rewards/rejected": -2.0312094688415527,
472
  "step": 280,
473
+ "use_label": 3287.800048828125
474
  },
475
  {
476
  "epoch": 0.3,
477
  "learning_rate": 3.870779976717113e-05,
478
+ "logits/chosen": -2.7824301719665527,
479
+ "logits/rejected": -2.762150764465332,
480
+ "logps/chosen": -300.687255859375,
481
+ "logps/rejected": -250.01663208007812,
482
+ "loss": 0.3923,
483
+ "pred_label": 1174.4749755859375,
484
+ "rewards/accuracies": 0.706250011920929,
485
+ "rewards/chosen": -0.7764869928359985,
486
+ "rewards/margins": 1.2035253047943115,
487
+ "rewards/rejected": -1.9800125360488892,
488
  "step": 290,
489
+ "use_label": 3387.52490234375
490
  },
491
  {
492
  "epoch": 0.31,
493
  "learning_rate": 3.812572759022119e-05,
494
+ "logits/chosen": -2.769491195678711,
495
+ "logits/rejected": -2.732734203338623,
496
+ "logps/chosen": -271.75091552734375,
497
+ "logps/rejected": -263.69464111328125,
498
+ "loss": 0.4009,
499
+ "pred_label": 1240.0250244140625,
500
+ "rewards/accuracies": 0.699999988079071,
501
+ "rewards/chosen": -0.8072065114974976,
502
+ "rewards/margins": 1.5778390169143677,
503
+ "rewards/rejected": -2.3850455284118652,
504
  "step": 300,
505
+ "use_label": 3481.97509765625
506
  },
507
  {
508
  "epoch": 0.32,
509
  "learning_rate": 3.7543655413271246e-05,
510
+ "logits/chosen": -2.7556052207946777,
511
+ "logits/rejected": -2.6895577907562256,
512
+ "logps/chosen": -273.4451904296875,
513
+ "logps/rejected": -317.7626953125,
514
+ "loss": 0.3834,
515
+ "pred_label": 1312.050048828125,
516
+ "rewards/accuracies": 0.800000011920929,
517
+ "rewards/chosen": -0.6513568162918091,
518
+ "rewards/margins": 2.270569324493408,
519
+ "rewards/rejected": -2.921926259994507,
520
  "step": 310,
521
+ "use_label": 3569.949951171875
522
  },
523
  {
524
  "epoch": 0.33,
525
  "learning_rate": 3.696158323632131e-05,
526
+ "logits/chosen": -2.73972749710083,
527
+ "logits/rejected": -2.7072513103485107,
528
+ "logps/chosen": -303.25665283203125,
529
+ "logps/rejected": -305.5859069824219,
530
+ "loss": 0.3917,
531
+ "pred_label": 1381.9000244140625,
532
+ "rewards/accuracies": 0.7562500238418579,
533
+ "rewards/chosen": -0.7276700735092163,
534
+ "rewards/margins": 1.7501161098480225,
535
+ "rewards/rejected": -2.4777863025665283,
536
  "step": 320,
537
+ "use_label": 3660.10009765625
538
  },
539
  {
540
  "epoch": 0.35,
541
  "learning_rate": 3.637951105937136e-05,
542
+ "logits/chosen": -2.793794631958008,
543
+ "logits/rejected": -2.7443809509277344,
544
+ "logps/chosen": -277.2012634277344,
545
+ "logps/rejected": -254.2499542236328,
546
+ "loss": 0.4169,
547
+ "pred_label": 1455.949951171875,
548
+ "rewards/accuracies": 0.6937500238418579,
549
+ "rewards/chosen": -0.6252767443656921,
550
+ "rewards/margins": 1.3715183734893799,
551
+ "rewards/rejected": -1.9967950582504272,
552
  "step": 330,
553
+ "use_label": 3746.050048828125
554
  },
555
  {
556
  "epoch": 0.36,
557
  "learning_rate": 3.579743888242142e-05,
558
+ "logits/chosen": -2.747743606567383,
559
+ "logits/rejected": -2.763869524002075,
560
+ "logps/chosen": -294.94415283203125,
561
+ "logps/rejected": -256.500244140625,
562
+ "loss": 0.4603,
563
+ "pred_label": 1514.300048828125,
564
+ "rewards/accuracies": 0.6625000238418579,
565
+ "rewards/chosen": -0.5764142870903015,
566
+ "rewards/margins": 0.9885671734809875,
567
+ "rewards/rejected": -1.564981460571289,
568
  "step": 340,
569
+ "use_label": 3847.699951171875
570
  },
571
  {
572
  "epoch": 0.37,
573
  "learning_rate": 3.5215366705471484e-05,
574
+ "logits/chosen": -2.730821132659912,
575
+ "logits/rejected": -2.7214157581329346,
576
+ "logps/chosen": -302.2289733886719,
577
+ "logps/rejected": -301.8661804199219,
578
+ "loss": 0.3947,
579
+ "pred_label": 1581.550048828125,
580
+ "rewards/accuracies": 0.793749988079071,
581
+ "rewards/chosen": -0.503420352935791,
582
+ "rewards/margins": 1.9169738292694092,
583
+ "rewards/rejected": -2.4203941822052,
584
  "step": 350,
585
+ "use_label": 3940.449951171875
586
  },
587
  {
588
  "epoch": 0.38,
589
  "learning_rate": 3.463329452852154e-05,
590
+ "logits/chosen": -2.804471969604492,
591
+ "logits/rejected": -2.8228957653045654,
592
+ "logps/chosen": -281.3868103027344,
593
+ "logps/rejected": -285.85137939453125,
594
+ "loss": 0.4073,
595
+ "pred_label": 1646.2750244140625,
596
+ "rewards/accuracies": 0.7562500238418579,
597
+ "rewards/chosen": -0.8592750430107117,
598
+ "rewards/margins": 1.2057772874832153,
599
+ "rewards/rejected": -2.0650525093078613,
600
  "step": 360,
601
+ "use_label": 4035.72509765625
602
  },
603
  {
604
  "epoch": 0.39,
605
  "learning_rate": 3.40512223515716e-05,
606
+ "logits/chosen": -2.7621943950653076,
607
+ "logits/rejected": -2.719569683074951,
608
+ "logps/chosen": -283.62945556640625,
609
+ "logps/rejected": -303.4671936035156,
610
+ "loss": 0.4147,
611
+ "pred_label": 1718.800048828125,
612
+ "rewards/accuracies": 0.8062499761581421,
613
+ "rewards/chosen": -0.7426743507385254,
614
+ "rewards/margins": 2.031805992126465,
615
+ "rewards/rejected": -2.7744803428649902,
616
  "step": 370,
617
+ "use_label": 4123.2001953125
618
  },
619
  {
620
  "epoch": 0.4,
621
  "learning_rate": 3.3469150174621654e-05,
622
+ "logits/chosen": -2.7821342945098877,
623
+ "logits/rejected": -2.7678303718566895,
624
+ "logps/chosen": -292.40106201171875,
625
+ "logps/rejected": -284.78363037109375,
626
+ "loss": 0.3521,
627
+ "pred_label": 1789.875,
628
+ "rewards/accuracies": 0.7437499761581421,
629
+ "rewards/chosen": -0.7170799374580383,
630
+ "rewards/margins": 1.70342218875885,
631
+ "rewards/rejected": -2.420502185821533,
632
  "step": 380,
633
+ "use_label": 4212.125
634
  },
635
  {
636
  "epoch": 0.41,
637
  "learning_rate": 3.288707799767171e-05,
638
+ "logits/chosen": -2.673171281814575,
639
+ "logits/rejected": -2.665820837020874,
640
+ "logps/chosen": -289.74224853515625,
641
+ "logps/rejected": -273.6763916015625,
642
+ "loss": 0.3695,
643
+ "pred_label": 1873.8499755859375,
644
+ "rewards/accuracies": 0.7437499761581421,
645
+ "rewards/chosen": -0.6950327157974243,
646
+ "rewards/margins": 2.244393825531006,
647
+ "rewards/rejected": -2.939426898956299,
648
  "step": 390,
649
+ "use_label": 4288.14990234375
650
  },
651
  {
652
  "epoch": 0.42,
653
  "learning_rate": 3.2305005820721776e-05,
654
+ "logits/chosen": -2.809072971343994,
655
+ "logits/rejected": -2.8203823566436768,
656
+ "logps/chosen": -312.44891357421875,
657
+ "logps/rejected": -279.95147705078125,
658
+ "loss": 0.3548,
659
+ "pred_label": 1947.875,
660
  "rewards/accuracies": 0.71875,
661
+ "rewards/chosen": -0.5397918224334717,
662
+ "rewards/margins": 1.5214416980743408,
663
+ "rewards/rejected": -2.0612335205078125,
664
  "step": 400,
665
+ "use_label": 4374.125
666
  },
667
  {
668
  "epoch": 0.43,
669
  "learning_rate": 3.172293364377183e-05,
670
+ "logits/chosen": -2.7990567684173584,
671
+ "logits/rejected": -2.7979464530944824,
672
+ "logps/chosen": -306.15771484375,
673
+ "logps/rejected": -280.4829406738281,
674
+ "loss": 0.3551,
675
+ "pred_label": 2012.9749755859375,
676
+ "rewards/accuracies": 0.675000011920929,
677
+ "rewards/chosen": -0.806006133556366,
678
+ "rewards/margins": 1.3874499797821045,
679
+ "rewards/rejected": -2.1934561729431152,
680
  "step": 410,
681
+ "use_label": 4469.02490234375
682
  },
683
  {
684
  "epoch": 0.44,
685
  "learning_rate": 3.1140861466821885e-05,
686
+ "logits/chosen": -2.7010891437530518,
687
+ "logits/rejected": -2.6862704753875732,
688
+ "logps/chosen": -249.2625732421875,
689
+ "logps/rejected": -274.73675537109375,
690
+ "loss": 0.3416,
691
+ "pred_label": 2088.074951171875,
692
  "rewards/accuracies": 0.6937500238418579,
693
+ "rewards/chosen": -0.8682168126106262,
694
+ "rewards/margins": 1.693968415260315,
695
+ "rewards/rejected": -2.562185049057007,
696
  "step": 420,
697
+ "use_label": 4553.9248046875
698
  },
699
  {
700
  "epoch": 0.45,
701
  "learning_rate": 3.055878928987195e-05,
702
+ "logits/chosen": -2.734596014022827,
703
+ "logits/rejected": -2.7307052612304688,
704
+ "logps/chosen": -285.8783264160156,
705
+ "logps/rejected": -281.76055908203125,
706
+ "loss": 0.384,
707
+ "pred_label": 2167.85009765625,
708
+ "rewards/accuracies": 0.7250000238418579,
709
+ "rewards/chosen": -0.7977985739707947,
710
+ "rewards/margins": 1.426555871963501,
711
+ "rewards/rejected": -2.2243542671203613,
712
  "step": 430,
713
+ "use_label": 4634.14990234375
714
  },
715
  {
716
  "epoch": 0.46,
717
  "learning_rate": 2.9976717112922005e-05,
718
+ "logits/chosen": -2.7989730834960938,
719
+ "logits/rejected": -2.8120064735412598,
720
+ "logps/chosen": -291.6087646484375,
721
+ "logps/rejected": -275.2794494628906,
722
+ "loss": 0.3699,
723
+ "pred_label": 2237.574951171875,
724
+ "rewards/accuracies": 0.6875,
725
+ "rewards/chosen": -0.8338401913642883,
726
+ "rewards/margins": 1.3944308757781982,
727
+ "rewards/rejected": -2.228271007537842,
728
  "step": 440,
729
+ "use_label": 4724.4248046875
730
  },
731
  {
732
  "epoch": 0.47,
733
  "learning_rate": 2.939464493597206e-05,
734
+ "logits/chosen": -2.5998082160949707,
735
+ "logits/rejected": -2.586780071258545,
736
+ "logps/chosen": -238.42855834960938,
737
+ "logps/rejected": -255.28445434570312,
738
+ "loss": 0.392,
739
+ "pred_label": 2305.5,
740
+ "rewards/accuracies": 0.731249988079071,
741
+ "rewards/chosen": -0.8709820508956909,
742
+ "rewards/margins": 1.692291021347046,
743
+ "rewards/rejected": -2.5632731914520264,
744
  "step": 450,
745
+ "use_label": 4816.5
746
  },
747
  {
748
  "epoch": 0.48,
749
  "learning_rate": 2.881257275902212e-05,
750
+ "logits/chosen": -2.800619125366211,
751
+ "logits/rejected": -2.7941062450408936,
752
+ "logps/chosen": -308.8480529785156,
753
+ "logps/rejected": -291.00689697265625,
754
+ "loss": 0.3698,
755
+ "pred_label": 2380.89990234375,
756
+ "rewards/accuracies": 0.706250011920929,
757
+ "rewards/chosen": -0.704586386680603,
758
+ "rewards/margins": 1.4961796998977661,
759
+ "rewards/rejected": -2.200766086578369,
760
  "step": 460,
761
+ "use_label": 4901.10009765625
762
  },
763
  {
764
  "epoch": 0.49,
765
  "learning_rate": 2.8230500582072178e-05,
766
+ "logits/chosen": -2.731538772583008,
767
+ "logits/rejected": -2.70729660987854,
768
+ "logps/chosen": -277.3286437988281,
769
+ "logps/rejected": -282.3288269042969,
770
+ "loss": 0.364,
771
+ "pred_label": 2453.449951171875,
772
+ "rewards/accuracies": 0.6937500238418579,
773
+ "rewards/chosen": -1.0228168964385986,
774
+ "rewards/margins": 1.525810956954956,
775
+ "rewards/rejected": -2.5486273765563965,
776
  "step": 470,
777
+ "use_label": 4988.5498046875
778
  },
779
  {
780
  "epoch": 0.5,
781
  "learning_rate": 2.7648428405122233e-05,
782
+ "logits/chosen": -2.6976053714752197,
783
+ "logits/rejected": -2.6677191257476807,
784
+ "logps/chosen": -255.345703125,
785
+ "logps/rejected": -273.9075012207031,
786
+ "loss": 0.3902,
787
+ "pred_label": 2528.52490234375,
788
+ "rewards/accuracies": 0.6499999761581421,
789
+ "rewards/chosen": -0.9771437644958496,
790
+ "rewards/margins": 1.5616614818572998,
791
+ "rewards/rejected": -2.5388054847717285,
792
  "step": 480,
793
+ "use_label": 5073.47509765625
794
  },
795
  {
796
  "epoch": 0.51,
797
  "learning_rate": 2.7066356228172297e-05,
798
+ "logits/chosen": -2.7429401874542236,
799
+ "logits/rejected": -2.7300381660461426,
800
+ "logps/chosen": -275.7494201660156,
801
+ "logps/rejected": -261.87689208984375,
802
+ "loss": 0.3725,
803
+ "pred_label": 2596.800048828125,
804
+ "rewards/accuracies": 0.71875,
805
+ "rewards/chosen": -0.8720412254333496,
806
+ "rewards/margins": 1.576229453086853,
807
+ "rewards/rejected": -2.448270559310913,
808
  "step": 490,
809
+ "use_label": 5165.2001953125
810
  },
811
  {
812
  "epoch": 0.52,
813
  "learning_rate": 2.6484284051222352e-05,
814
+ "logits/chosen": -2.7378077507019043,
815
+ "logits/rejected": -2.7030460834503174,
816
+ "logps/chosen": -297.50439453125,
817
+ "logps/rejected": -287.47821044921875,
818
+ "loss": 0.3548,
819
+ "pred_label": 2678.425048828125,
820
+ "rewards/accuracies": 0.7749999761581421,
821
+ "rewards/chosen": -0.8047893643379211,
822
+ "rewards/margins": 2.116807222366333,
823
+ "rewards/rejected": -2.9215962886810303,
824
  "step": 500,
825
+ "use_label": 5243.5751953125
826
  },
827
  {
828
  "epoch": 0.53,
829
  "learning_rate": 2.590221187427241e-05,
830
+ "logits/chosen": -2.7614152431488037,
831
+ "logits/rejected": -2.730996608734131,
832
+ "logps/chosen": -285.0855407714844,
833
+ "logps/rejected": -265.8699951171875,
834
+ "loss": 0.3337,
835
+ "pred_label": 2751.699951171875,
836
  "rewards/accuracies": 0.7749999761581421,
837
+ "rewards/chosen": -0.9748345613479614,
838
+ "rewards/margins": 2.262641429901123,
839
+ "rewards/rejected": -3.237475872039795,
840
  "step": 510,
841
+ "use_label": 5330.2998046875
842
  },
843
  {
844
  "epoch": 0.54,
845
  "learning_rate": 2.532013969732247e-05,
846
+ "logits/chosen": -2.777317523956299,
847
+ "logits/rejected": -2.761885404586792,
848
+ "logps/chosen": -297.0881042480469,
849
+ "logps/rejected": -294.2591247558594,
850
+ "loss": 0.391,
851
+ "pred_label": 2820.324951171875,
852
+ "rewards/accuracies": 0.7250000238418579,
853
+ "rewards/chosen": -0.5134023427963257,
854
+ "rewards/margins": 1.941035270690918,
855
+ "rewards/rejected": -2.454437732696533,
856
  "step": 520,
857
+ "use_label": 5421.6748046875
858
  },
859
  {
860
  "epoch": 0.55,
861
  "learning_rate": 2.4738067520372525e-05,
862
+ "logits/chosen": -2.779775381088257,
863
+ "logits/rejected": -2.7358291149139404,
864
+ "logps/chosen": -308.1401062011719,
865
+ "logps/rejected": -322.45196533203125,
866
+ "loss": 0.3465,
867
+ "pred_label": 2893.199951171875,
868
+ "rewards/accuracies": 0.7250000238418579,
869
+ "rewards/chosen": -0.7824206948280334,
870
+ "rewards/margins": 2.0653204917907715,
871
+ "rewards/rejected": -2.84774112701416,
872
  "step": 530,
873
+ "use_label": 5508.7998046875
874
  },
875
  {
876
  "epoch": 0.57,
877
  "learning_rate": 2.4155995343422587e-05,
878
+ "logits/chosen": -2.791782855987549,
879
+ "logits/rejected": -2.7862627506256104,
880
+ "logps/chosen": -296.45611572265625,
881
+ "logps/rejected": -298.04638671875,
882
+ "loss": 0.3581,
883
+ "pred_label": 2980.824951171875,
884
+ "rewards/accuracies": 0.7875000238418579,
885
+ "rewards/chosen": -0.7919517755508423,
886
+ "rewards/margins": 2.415773868560791,
887
+ "rewards/rejected": -3.2077255249023438,
888
  "step": 540,
889
+ "use_label": 5581.1748046875
890
  },
891
  {
892
  "epoch": 0.58,
893
  "learning_rate": 2.3573923166472644e-05,
894
+ "logits/chosen": -2.761887311935425,
895
+ "logits/rejected": -2.735725164413452,
896
+ "logps/chosen": -296.20660400390625,
897
+ "logps/rejected": -290.74456787109375,
898
+ "loss": 0.3915,
899
+ "pred_label": 3058.625,
900
+ "rewards/accuracies": 0.7250000238418579,
901
+ "rewards/chosen": -0.7003430128097534,
902
+ "rewards/margins": 2.179884433746338,
903
+ "rewards/rejected": -2.880227565765381,
904
  "step": 550,
905
+ "use_label": 5663.375
906
  },
907
  {
908
  "epoch": 0.59,
909
  "learning_rate": 2.2991850989522702e-05,
910
+ "logits/chosen": -2.7279725074768066,
911
+ "logits/rejected": -2.742814540863037,
912
+ "logps/chosen": -283.19342041015625,
913
+ "logps/rejected": -241.63967895507812,
914
+ "loss": 0.3821,
915
+ "pred_label": 3127.85009765625,
916
+ "rewards/accuracies": 0.737500011920929,
917
+ "rewards/chosen": -0.8789056539535522,
918
+ "rewards/margins": 1.5364501476287842,
919
+ "rewards/rejected": -2.415355920791626,
920
  "step": 560,
921
+ "use_label": 5754.14990234375
922
  },
923
  {
924
  "epoch": 0.6,
925
  "learning_rate": 2.240977881257276e-05,
926
+ "logits/chosen": -2.757524251937866,
927
+ "logits/rejected": -2.7482261657714844,
928
+ "logps/chosen": -276.85345458984375,
929
+ "logps/rejected": -283.73046875,
930
+ "loss": 0.4022,
931
+ "pred_label": 3200.25,
932
+ "rewards/accuracies": 0.7250000238418579,
933
+ "rewards/chosen": -0.736481785774231,
934
+ "rewards/margins": 1.4332786798477173,
935
+ "rewards/rejected": -2.1697604656219482,
936
  "step": 570,
937
+ "use_label": 5841.75
938
  },
939
  {
940
  "epoch": 0.61,
941
  "learning_rate": 2.1827706635622818e-05,
942
+ "logits/chosen": -2.7960715293884277,
943
+ "logits/rejected": -2.7829785346984863,
944
+ "logps/chosen": -282.49871826171875,
945
+ "logps/rejected": -286.5024719238281,
946
+ "loss": 0.404,
947
+ "pred_label": 3271.77490234375,
948
+ "rewards/accuracies": 0.7875000238418579,
949
+ "rewards/chosen": -0.4390278458595276,
950
+ "rewards/margins": 1.7644996643066406,
951
+ "rewards/rejected": -2.2035276889801025,
952
  "step": 580,
953
+ "use_label": 5930.22509765625
954
  },
955
  {
956
  "epoch": 0.62,
957
  "learning_rate": 2.124563445867288e-05,
958
+ "logits/chosen": -2.7188072204589844,
959
+ "logits/rejected": -2.70890474319458,
960
+ "logps/chosen": -278.3678894042969,
961
+ "logps/rejected": -304.47906494140625,
962
+ "loss": 0.3951,
963
+ "pred_label": 3332.175048828125,
964
  "rewards/accuracies": 0.6875,
965
+ "rewards/chosen": -0.6770004034042358,
966
+ "rewards/margins": 1.6928961277008057,
967
+ "rewards/rejected": -2.369896411895752,
968
  "step": 590,
969
+ "use_label": 6029.8251953125
970
  },
971
  {
972
  "epoch": 0.63,
973
  "learning_rate": 2.0663562281722934e-05,
974
+ "logits/chosen": -2.788299322128296,
975
+ "logits/rejected": -2.7893383502960205,
976
+ "logps/chosen": -314.5467529296875,
977
+ "logps/rejected": -325.0448913574219,
978
+ "loss": 0.3942,
979
+ "pred_label": 3400.875,
980
+ "rewards/accuracies": 0.762499988079071,
981
+ "rewards/chosen": -0.11838535964488983,
982
+ "rewards/margins": 2.110067129135132,
983
+ "rewards/rejected": -2.228452205657959,
984
  "step": 600,
985
+ "use_label": 6121.125
986
  },
987
  {
988
  "epoch": 0.64,
989
  "learning_rate": 2.0081490104772992e-05,
990
+ "logits/chosen": -2.7528319358825684,
991
+ "logits/rejected": -2.7486462593078613,
992
+ "logps/chosen": -311.63922119140625,
993
+ "logps/rejected": -263.623046875,
994
+ "loss": 0.3407,
995
+ "pred_label": 3483.35009765625,
996
+ "rewards/accuracies": 0.7250000238418579,
997
+ "rewards/chosen": -0.5729966163635254,
998
+ "rewards/margins": 1.8266725540161133,
999
+ "rewards/rejected": -2.3996691703796387,
1000
  "step": 610,
1001
+ "use_label": 6198.64990234375
1002
  },
1003
  {
1004
  "epoch": 0.65,
1005
  "learning_rate": 1.9499417927823053e-05,
1006
+ "logits/chosen": -2.756082057952881,
1007
+ "logits/rejected": -2.706552028656006,
1008
+ "logps/chosen": -288.4559631347656,
1009
+ "logps/rejected": -281.10784912109375,
1010
+ "loss": 0.3528,
1011
+ "pred_label": 3560.199951171875,
1012
+ "rewards/accuracies": 0.7875000238418579,
1013
+ "rewards/chosen": -0.847445011138916,
1014
+ "rewards/margins": 2.0444235801696777,
1015
+ "rewards/rejected": -2.891869068145752,
1016
  "step": 620,
1017
+ "use_label": 6281.7998046875
1018
  },
1019
  {
1020
  "epoch": 0.66,
1021
  "learning_rate": 1.8917345750873107e-05,
1022
+ "logits/chosen": -2.754945993423462,
1023
+ "logits/rejected": -2.764570951461792,
1024
+ "logps/chosen": -290.9334411621094,
1025
+ "logps/rejected": -289.19891357421875,
1026
+ "loss": 0.3691,
1027
+ "pred_label": 3639.375,
1028
  "rewards/accuracies": 0.7749999761581421,
1029
+ "rewards/chosen": -0.5813703536987305,
1030
+ "rewards/margins": 2.221205949783325,
1031
+ "rewards/rejected": -2.8025763034820557,
1032
  "step": 630,
1033
+ "use_label": 6362.625
1034
  },
1035
  {
1036
  "epoch": 0.67,
1037
  "learning_rate": 1.833527357392317e-05,
1038
+ "logits/chosen": -2.7366719245910645,
1039
+ "logits/rejected": -2.7163639068603516,
1040
+ "logps/chosen": -257.1309509277344,
1041
+ "logps/rejected": -274.4014892578125,
1042
+ "loss": 0.3542,
1043
+ "pred_label": 3707.27490234375,
1044
+ "rewards/accuracies": 0.7749999761581421,
1045
+ "rewards/chosen": -0.3939729332923889,
1046
+ "rewards/margins": 2.188854217529297,
1047
+ "rewards/rejected": -2.582827091217041,
1048
  "step": 640,
1049
+ "use_label": 6454.72509765625
1050
  },
1051
  {
1052
  "epoch": 0.68,
1053
  "learning_rate": 1.7753201396973227e-05,
1054
+ "logits/chosen": -2.7500858306884766,
1055
+ "logits/rejected": -2.72772216796875,
1056
+ "logps/chosen": -308.90020751953125,
1057
+ "logps/rejected": -307.0010070800781,
1058
+ "loss": 0.3495,
1059
+ "pred_label": 3787.10009765625,
1060
+ "rewards/accuracies": 0.793749988079071,
1061
+ "rewards/chosen": -0.6951545476913452,
1062
+ "rewards/margins": 2.2066218852996826,
1063
+ "rewards/rejected": -2.9017765522003174,
1064
  "step": 650,
1065
+ "use_label": 6534.89990234375
1066
  },
1067
  {
1068
  "epoch": 0.69,
1069
  "learning_rate": 1.717112922002328e-05,
1070
+ "logits/chosen": -2.798872232437134,
1071
+ "logits/rejected": -2.7752318382263184,
1072
+ "logps/chosen": -304.62677001953125,
1073
+ "logps/rejected": -275.2629089355469,
1074
+ "loss": 0.3558,
1075
+ "pred_label": 3870.0,
1076
+ "rewards/accuracies": 0.6812499761581421,
1077
+ "rewards/chosen": -0.9263059496879578,
1078
+ "rewards/margins": 1.612428069114685,
1079
+ "rewards/rejected": -2.538734197616577,
1080
  "step": 660,
1081
+ "use_label": 6612.0
1082
  },
1083
  {
1084
  "epoch": 0.7,
1085
  "learning_rate": 1.6589057043073342e-05,
1086
+ "logits/chosen": -2.7448298931121826,
1087
+ "logits/rejected": -2.7179744243621826,
1088
+ "logps/chosen": -289.99267578125,
1089
+ "logps/rejected": -273.59368896484375,
1090
+ "loss": 0.3439,
1091
+ "pred_label": 3954.050048828125,
1092
+ "rewards/accuracies": 0.7749999761581421,
1093
+ "rewards/chosen": -0.8919633030891418,
1094
+ "rewards/margins": 2.3276610374450684,
1095
+ "rewards/rejected": -3.2196242809295654,
1096
  "step": 670,
1097
+ "use_label": 6687.9501953125
1098
  },
1099
  {
1100
  "epoch": 0.71,
1101
  "learning_rate": 1.60069848661234e-05,
1102
+ "logits/chosen": -2.7539925575256348,
1103
+ "logits/rejected": -2.757519006729126,
1104
+ "logps/chosen": -295.6678771972656,
1105
+ "logps/rejected": -280.3077392578125,
1106
+ "loss": 0.3793,
1107
+ "pred_label": 4029.449951171875,
1108
+ "rewards/accuracies": 0.6625000238418579,
1109
+ "rewards/chosen": -0.8732484579086304,
1110
+ "rewards/margins": 1.334531545639038,
1111
+ "rewards/rejected": -2.207780122756958,
1112
  "step": 680,
1113
+ "use_label": 6772.5498046875
1114
  },
1115
  {
1116
  "epoch": 0.72,
1117
  "learning_rate": 1.5424912689173458e-05,
1118
+ "logits/chosen": -2.736889123916626,
1119
+ "logits/rejected": -2.7280757427215576,
1120
+ "logps/chosen": -257.803466796875,
1121
+ "logps/rejected": -238.80056762695312,
1122
+ "loss": 0.334,
1123
+ "pred_label": 4103.75,
1124
  "rewards/accuracies": 0.7749999761581421,
1125
+ "rewards/chosen": -0.7419306039810181,
1126
+ "rewards/margins": 2.2096810340881348,
1127
+ "rewards/rejected": -2.9516117572784424,
1128
  "step": 690,
1129
+ "use_label": 6858.25
1130
  },
1131
  {
1132
  "epoch": 0.73,
1133
  "learning_rate": 1.4842840512223516e-05,
1134
+ "logits/chosen": -2.7456727027893066,
1135
+ "logits/rejected": -2.7135043144226074,
1136
+ "logps/chosen": -286.5834655761719,
1137
+ "logps/rejected": -281.8011169433594,
1138
+ "loss": 0.323,
1139
+ "pred_label": 4186.8251953125,
1140
+ "rewards/accuracies": 0.737500011920929,
1141
+ "rewards/chosen": -1.0893428325653076,
1142
+ "rewards/margins": 2.2629120349884033,
1143
+ "rewards/rejected": -3.352254867553711,
1144
  "step": 700,
1145
+ "use_label": 6935.1748046875
1146
  },
1147
  {
1148
  "epoch": 0.74,
1149
  "learning_rate": 1.4260768335273575e-05,
1150
+ "logits/chosen": -2.7708964347839355,
1151
+ "logits/rejected": -2.745072603225708,
1152
+ "logps/chosen": -298.2213134765625,
1153
+ "logps/rejected": -270.7209167480469,
1154
+ "loss": 0.3476,
1155
+ "pred_label": 4265.2998046875,
1156
+ "rewards/accuracies": 0.737500011920929,
1157
+ "rewards/chosen": -0.8646717071533203,
1158
+ "rewards/margins": 2.080548048019409,
1159
+ "rewards/rejected": -2.9452195167541504,
1160
  "step": 710,
1161
+ "use_label": 7016.7001953125
1162
  },
1163
  {
1164
  "epoch": 0.75,
1165
  "learning_rate": 1.3678696158323633e-05,
1166
+ "logits/chosen": -2.7377257347106934,
1167
+ "logits/rejected": -2.7467942237854004,
1168
+ "logps/chosen": -283.3219909667969,
1169
+ "logps/rejected": -291.44952392578125,
1170
+ "loss": 0.2997,
1171
+ "pred_label": 4351.2998046875,
1172
  "rewards/accuracies": 0.78125,
1173
+ "rewards/chosen": -1.1869792938232422,
1174
+ "rewards/margins": 2.617690086364746,
1175
+ "rewards/rejected": -3.8046698570251465,
1176
  "step": 720,
1177
+ "use_label": 7090.7001953125
1178
  },
1179
  {
1180
  "epoch": 0.76,
1181
  "learning_rate": 1.309662398137369e-05,
1182
+ "logits/chosen": -2.7232143878936768,
1183
+ "logits/rejected": -2.7313239574432373,
1184
+ "logps/chosen": -309.1606750488281,
1185
+ "logps/rejected": -270.8785095214844,
1186
+ "loss": 0.3276,
1187
+ "pred_label": 4437.2998046875,
1188
+ "rewards/accuracies": 0.699999988079071,
1189
+ "rewards/chosen": -1.171083927154541,
1190
+ "rewards/margins": 1.749921202659607,
1191
+ "rewards/rejected": -2.9210047721862793,
1192
  "step": 730,
1193
+ "use_label": 7164.7001953125
1194
  },
1195
  {
1196
  "epoch": 0.77,
1197
  "learning_rate": 1.2514551804423749e-05,
1198
+ "logits/chosen": -2.7109317779541016,
1199
+ "logits/rejected": -2.722676992416382,
1200
+ "logps/chosen": -320.5135192871094,
1201
+ "logps/rejected": -325.51531982421875,
1202
+ "loss": 0.3404,
1203
+ "pred_label": 4520.64990234375,
1204
+ "rewards/accuracies": 0.7875000238418579,
1205
+ "rewards/chosen": -1.3141722679138184,
1206
+ "rewards/margins": 2.5884618759155273,
1207
+ "rewards/rejected": -3.902634382247925,
1208
  "step": 740,
1209
+ "use_label": 7241.35009765625
1210
  },
1211
  {
1212
  "epoch": 0.79,
1213
  "learning_rate": 1.1932479627473807e-05,
1214
+ "logits/chosen": -2.71235990524292,
1215
+ "logits/rejected": -2.6860625743865967,
1216
+ "logps/chosen": -276.7895202636719,
1217
+ "logps/rejected": -302.22442626953125,
1218
+ "loss": 0.3014,
1219
+ "pred_label": 4610.0751953125,
1220
+ "rewards/accuracies": 0.7749999761581421,
1221
+ "rewards/chosen": -1.6301307678222656,
1222
+ "rewards/margins": 2.539576292037964,
1223
+ "rewards/rejected": -4.169707298278809,
1224
  "step": 750,
1225
+ "use_label": 7311.9248046875
1226
  },
1227
  {
1228
  "epoch": 0.8,
1229
  "learning_rate": 1.1350407450523866e-05,
1230
+ "logits/chosen": -2.7227749824523926,
1231
+ "logits/rejected": -2.76944899559021,
1232
+ "logps/chosen": -306.2732238769531,
1233
+ "logps/rejected": -296.8711242675781,
1234
+ "loss": 0.323,
1235
+ "pred_label": 4696.0751953125,
1236
+ "rewards/accuracies": 0.7749999761581421,
1237
+ "rewards/chosen": -1.0810390710830688,
1238
+ "rewards/margins": 2.3947594165802,
1239
+ "rewards/rejected": -3.4757981300354004,
1240
  "step": 760,
1241
+ "use_label": 7385.9248046875
1242
  },
1243
  {
1244
  "epoch": 0.81,
1245
  "learning_rate": 1.0768335273573923e-05,
1246
+ "logits/chosen": -2.7785048484802246,
1247
+ "logits/rejected": -2.7769179344177246,
1248
+ "logps/chosen": -313.1297607421875,
1249
+ "logps/rejected": -317.2922058105469,
1250
+ "loss": 0.3118,
1251
+ "pred_label": 4793.8251953125,
1252
+ "rewards/accuracies": 0.731249988079071,
1253
+ "rewards/chosen": -1.0833934545516968,
1254
+ "rewards/margins": 2.1734704971313477,
1255
+ "rewards/rejected": -3.256863832473755,
1256
  "step": 770,
1257
+ "use_label": 7448.1748046875
1258
  },
1259
  {
1260
  "epoch": 0.82,
1261
  "learning_rate": 1.0186263096623982e-05,
1262
+ "logits/chosen": -2.7547028064727783,
1263
+ "logits/rejected": -2.7470812797546387,
1264
+ "logps/chosen": -294.8021545410156,
1265
+ "logps/rejected": -284.2214050292969,
1266
+ "loss": 0.3235,
1267
+ "pred_label": 4885.39990234375,
1268
+ "rewards/accuracies": 0.71875,
1269
+ "rewards/chosen": -1.3608357906341553,
1270
+ "rewards/margins": 2.3477611541748047,
1271
+ "rewards/rejected": -3.708597183227539,
1272
  "step": 780,
1273
+ "use_label": 7516.60009765625
1274
  },
1275
  {
1276
  "epoch": 0.83,
1277
  "learning_rate": 9.60419091967404e-06,
1278
+ "logits/chosen": -2.7538862228393555,
1279
+ "logits/rejected": -2.7440714836120605,
1280
+ "logps/chosen": -283.4197998046875,
1281
+ "logps/rejected": -299.2707214355469,
1282
+ "loss": 0.335,
1283
+ "pred_label": 4973.375,
1284
+ "rewards/accuracies": 0.71875,
1285
+ "rewards/chosen": -1.5610417127609253,
1286
+ "rewards/margins": 2.2930331230163574,
1287
+ "rewards/rejected": -3.8540749549865723,
1288
  "step": 790,
1289
+ "use_label": 7588.625
1290
  },
1291
  {
1292
  "epoch": 0.84,
1293
  "learning_rate": 9.022118742724098e-06,
1294
+ "logits/chosen": -2.6376984119415283,
1295
+ "logits/rejected": -2.625143051147461,
1296
+ "logps/chosen": -329.54010009765625,
1297
+ "logps/rejected": -318.21673583984375,
1298
+ "loss": 0.3064,
1299
+ "pred_label": 5072.7998046875,
1300
+ "rewards/accuracies": 0.7437499761581421,
1301
+ "rewards/chosen": -1.5068175792694092,
1302
+ "rewards/margins": 2.708643913269043,
1303
+ "rewards/rejected": -4.215461254119873,
1304
  "step": 800,
1305
+ "use_label": 7649.2001953125
1306
  },
1307
  {
1308
  "epoch": 0.85,
1309
  "learning_rate": 8.440046565774158e-06,
1310
+ "logits/chosen": -2.6940813064575195,
1311
+ "logits/rejected": -2.6780383586883545,
1312
+ "logps/chosen": -297.8315124511719,
1313
+ "logps/rejected": -264.34185791015625,
1314
+ "loss": 0.3022,
1315
+ "pred_label": 5162.1748046875,
1316
+ "rewards/accuracies": 0.706250011920929,
1317
+ "rewards/chosen": -1.6807565689086914,
1318
+ "rewards/margins": 1.872127890586853,
1319
+ "rewards/rejected": -3.552884578704834,
1320
  "step": 810,
1321
+ "use_label": 7719.8251953125
1322
  },
1323
  {
1324
  "epoch": 0.86,
1325
  "learning_rate": 7.857974388824214e-06,
1326
+ "logits/chosen": -2.7031378746032715,
1327
+ "logits/rejected": -2.714179277420044,
1328
+ "logps/chosen": -305.0399169921875,
1329
+ "logps/rejected": -282.8117980957031,
1330
+ "loss": 0.3364,
1331
+ "pred_label": 5257.125,
1332
+ "rewards/accuracies": 0.78125,
1333
+ "rewards/chosen": -1.1721713542938232,
1334
+ "rewards/margins": 2.952846050262451,
1335
+ "rewards/rejected": -4.125017166137695,
1336
  "step": 820,
1337
+ "use_label": 7784.875
1338
  },
1339
  {
1340
  "epoch": 0.87,
1341
  "learning_rate": 7.275902211874273e-06,
1342
+ "logits/chosen": -2.7300140857696533,
1343
+ "logits/rejected": -2.728454113006592,
1344
+ "logps/chosen": -278.2652282714844,
1345
+ "logps/rejected": -319.8196716308594,
1346
+ "loss": 0.3174,
1347
+ "pred_label": 5347.625,
1348
+ "rewards/accuracies": 0.737500011920929,
1349
+ "rewards/chosen": -1.5451847314834595,
1350
+ "rewards/margins": 2.416550636291504,
1351
+ "rewards/rejected": -3.961735486984253,
1352
  "step": 830,
1353
+ "use_label": 7854.375
1354
  },
1355
  {
1356
  "epoch": 0.88,
1357
  "learning_rate": 6.693830034924331e-06,
1358
+ "logits/chosen": -2.716839551925659,
1359
+ "logits/rejected": -2.7346701622009277,
1360
+ "logps/chosen": -283.65179443359375,
1361
+ "logps/rejected": -295.53912353515625,
1362
+ "loss": 0.3542,
1363
+ "pred_label": 5434.0751953125,
1364
+ "rewards/accuracies": 0.699999988079071,
1365
+ "rewards/chosen": -1.5634657144546509,
1366
+ "rewards/margins": 1.5420477390289307,
1367
+ "rewards/rejected": -3.105513334274292,
1368
  "step": 840,
1369
+ "use_label": 7927.9248046875
1370
  },
1371
  {
1372
  "epoch": 0.89,
1373
  "learning_rate": 6.111757857974389e-06,
1374
+ "logits/chosen": -2.722932815551758,
1375
+ "logits/rejected": -2.772580623626709,
1376
+ "logps/chosen": -308.41778564453125,
1377
+ "logps/rejected": -278.7804260253906,
1378
+ "loss": 0.3299,
1379
+ "pred_label": 5507.02490234375,
1380
  "rewards/accuracies": 0.7749999761581421,
1381
+ "rewards/chosen": -1.3390918970108032,
1382
+ "rewards/margins": 2.2028300762176514,
1383
+ "rewards/rejected": -3.541922092437744,
1384
  "step": 850,
1385
+ "use_label": 8014.97509765625
1386
  },
1387
  {
1388
  "epoch": 0.9,
1389
  "learning_rate": 5.529685681024447e-06,
1390
+ "logits/chosen": -2.7165842056274414,
1391
+ "logits/rejected": -2.7188987731933594,
1392
+ "logps/chosen": -300.7062683105469,
1393
+ "logps/rejected": -310.7410888671875,
1394
+ "loss": 0.3504,
1395
+ "pred_label": 5592.375,
1396
+ "rewards/accuracies": 0.7437499761581421,
1397
+ "rewards/chosen": -1.582289457321167,
1398
+ "rewards/margins": 2.2427265644073486,
1399
+ "rewards/rejected": -3.8250160217285156,
1400
  "step": 860,
1401
+ "use_label": 8089.625
1402
  },
1403
  {
1404
  "epoch": 0.91,
1405
  "learning_rate": 4.947613504074506e-06,
1406
+ "logits/chosen": -2.7352089881896973,
1407
+ "logits/rejected": -2.74865984916687,
1408
+ "logps/chosen": -255.16845703125,
1409
+ "logps/rejected": -277.4416198730469,
1410
+ "loss": 0.3527,
1411
+ "pred_label": 5676.5,
1412
+ "rewards/accuracies": 0.731249988079071,
1413
+ "rewards/chosen": -1.218096137046814,
1414
+ "rewards/margins": 2.5011773109436035,
1415
+ "rewards/rejected": -3.719273328781128,
1416
  "step": 870,
1417
+ "use_label": 8165.5
1418
  },
1419
  {
1420
  "epoch": 0.92,
1421
  "learning_rate": 4.3655413271245635e-06,
1422
+ "logits/chosen": -2.754638195037842,
1423
+ "logits/rejected": -2.7097582817077637,
1424
+ "logps/chosen": -298.7574157714844,
1425
+ "logps/rejected": -294.0702209472656,
1426
+ "loss": 0.3122,
1427
+ "pred_label": 5764.7001953125,
1428
+ "rewards/accuracies": 0.7437499761581421,
1429
+ "rewards/chosen": -1.216820240020752,
1430
+ "rewards/margins": 2.025149345397949,
1431
+ "rewards/rejected": -3.241969585418701,
1432
  "step": 880,
1433
+ "use_label": 8237.2998046875
1434
  },
1435
  {
1436
  "epoch": 0.93,
1437
  "learning_rate": 3.7834691501746217e-06,
1438
+ "logits/chosen": -2.761935234069824,
1439
+ "logits/rejected": -2.767670154571533,
1440
+ "logps/chosen": -291.3049011230469,
1441
+ "logps/rejected": -307.02984619140625,
1442
+ "loss": 0.3179,
1443
+ "pred_label": 5856.4248046875,
1444
+ "rewards/accuracies": 0.8062499761581421,
1445
+ "rewards/chosen": -1.0020384788513184,
1446
+ "rewards/margins": 2.4981894493103027,
1447
+ "rewards/rejected": -3.5002281665802,
1448
  "step": 890,
1449
+ "use_label": 8305.5751953125
1450
  },
1451
  {
1452
  "epoch": 0.94,
1453
  "learning_rate": 3.2013969732246805e-06,
1454
+ "logits/chosen": -2.6637260913848877,
1455
+ "logits/rejected": -2.6392276287078857,
1456
+ "logps/chosen": -291.3471374511719,
1457
+ "logps/rejected": -295.1965637207031,
1458
+ "loss": 0.3437,
1459
+ "pred_label": 5942.14990234375,
1460
+ "rewards/accuracies": 0.78125,
1461
+ "rewards/chosen": -1.2209066152572632,
1462
+ "rewards/margins": 2.44693660736084,
1463
+ "rewards/rejected": -3.6678433418273926,
1464
  "step": 900,
1465
+ "use_label": 8379.849609375
1466
  },
1467
  {
1468
  "epoch": 0.95,
1469
  "learning_rate": 2.6193247962747383e-06,
1470
+ "logits/chosen": -2.7372214794158936,
1471
+ "logits/rejected": -2.737588405609131,
1472
+ "logps/chosen": -308.132568359375,
1473
+ "logps/rejected": -295.11187744140625,
1474
+ "loss": 0.3504,
1475
+ "pred_label": 6017.4501953125,
1476
+ "rewards/accuracies": 0.762499988079071,
1477
+ "rewards/chosen": -1.17165207862854,
1478
+ "rewards/margins": 2.026106834411621,
1479
+ "rewards/rejected": -3.1977591514587402,
1480
  "step": 910,
1481
+ "use_label": 8464.5498046875
1482
  },
1483
  {
1484
  "epoch": 0.96,
1485
  "learning_rate": 2.037252619324796e-06,
1486
+ "logits/chosen": -2.6864240169525146,
1487
+ "logits/rejected": -2.693215847015381,
1488
+ "logps/chosen": -308.95989990234375,
1489
+ "logps/rejected": -296.0351867675781,
1490
+ "loss": 0.3214,
1491
+ "pred_label": 6108.375,
1492
+ "rewards/accuracies": 0.731249988079071,
1493
+ "rewards/chosen": -1.2072199583053589,
1494
+ "rewards/margins": 2.437824249267578,
1495
+ "rewards/rejected": -3.6450438499450684,
1496
  "step": 920,
1497
+ "use_label": 8533.625
1498
  },
1499
  {
1500
  "epoch": 0.97,
1501
  "learning_rate": 1.4551804423748545e-06,
1502
+ "logits/chosen": -2.7318947315216064,
1503
+ "logits/rejected": -2.6931326389312744,
1504
+ "logps/chosen": -274.75,
1505
+ "logps/rejected": -280.5450134277344,
1506
+ "loss": 0.3077,
1507
+ "pred_label": 6194.10009765625,
1508
+ "rewards/accuracies": 0.824999988079071,
1509
+ "rewards/chosen": -0.6536897420883179,
1510
+ "rewards/margins": 3.221295118331909,
1511
+ "rewards/rejected": -3.8749847412109375,
1512
  "step": 930,
1513
+ "use_label": 8607.900390625
1514
  },
1515
  {
1516
  "epoch": 0.98,
1517
  "learning_rate": 8.731082654249127e-07,
1518
+ "logits/chosen": -2.764519214630127,
1519
+ "logits/rejected": -2.7424118518829346,
1520
+ "logps/chosen": -302.68206787109375,
1521
+ "logps/rejected": -300.8379821777344,
1522
+ "loss": 0.3294,
1523
+ "pred_label": 6278.64990234375,
1524
+ "rewards/accuracies": 0.737500011920929,
1525
+ "rewards/chosen": -1.528961181640625,
1526
+ "rewards/margins": 1.8663454055786133,
1527
+ "rewards/rejected": -3.3953068256378174,
1528
  "step": 940,
1529
+ "use_label": 8683.349609375
1530
  },
1531
  {
1532
  "epoch": 0.99,
1533
  "learning_rate": 2.910360884749709e-07,
1534
+ "logits/chosen": -2.7359890937805176,
1535
+ "logits/rejected": -2.679305076599121,
1536
+ "logps/chosen": -283.5569152832031,
1537
+ "logps/rejected": -291.2298889160156,
1538
+ "loss": 0.333,
1539
+ "pred_label": 6368.97509765625,
1540
+ "rewards/accuracies": 0.737500011920929,
1541
+ "rewards/chosen": -1.5102250576019287,
1542
+ "rewards/margins": 2.3179523944854736,
1543
+ "rewards/rejected": -3.8281772136688232,
1544
  "step": 950,
1545
+ "use_label": 8753.025390625
1546
  },
1547
  {
1548
  "epoch": 1.0,
1549
+ "eval_logits/chosen": -2.742107391357422,
1550
+ "eval_logits/rejected": -2.739009141921997,
1551
+ "eval_logps/chosen": -295.89080810546875,
1552
+ "eval_logps/rejected": -294.5900573730469,
1553
+ "eval_loss": 0.3439472019672394,
1554
+ "eval_pred_label": 6581.2001953125,
1555
+ "eval_rewards/accuracies": 0.7419999837875366,
1556
+ "eval_rewards/chosen": -1.163341999053955,
1557
+ "eval_rewards/margins": 2.3656928539276123,
1558
+ "eval_rewards/rejected": -3.5290346145629883,
1559
+ "eval_runtime": 460.9775,
1560
+ "eval_samples_per_second": 4.339,
1561
+ "eval_steps_per_second": 0.271,
1562
+ "eval_use_label": 8950.7998046875,
1563
  "step": 955
1564
  },
1565
  {
1566
  "epoch": 1.0,
1567
  "step": 955,
1568
  "total_flos": 0.0,
1569
+ "train_loss": 0.39464539333163756,
1570
+ "train_runtime": 25354.6696,
1571
+ "train_samples_per_second": 2.411,
1572
  "train_steps_per_second": 0.038
1573
  }
1574
  ],