jikaixuan commited on
Commit
fcf093e
1 Parent(s): 8d98a46

Model save

Browse files
README.md CHANGED
@@ -15,15 +15,17 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.6409
19
- - Rewards/chosen: 0.0197
20
- - Rewards/rejected: -0.0229
21
- - Rewards/accuracies: 0.6130
22
- - Rewards/margins: 0.0426
23
- - Logps/rejected: -253.1684
24
- - Logps/chosen: -269.3594
25
- - Logits/rejected: -2.4973
26
- - Logits/chosen: -2.4954
 
 
27
 
28
  ## Model description
29
 
@@ -47,10 +49,10 @@ The following hyperparameters were used during training:
47
  - eval_batch_size: 4
48
  - seed: 42
49
  - distributed_type: multi-GPU
50
- - num_devices: 2
51
- - gradient_accumulation_steps: 8
52
  - total_train_batch_size: 64
53
- - total_eval_batch_size: 8
54
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
  - lr_scheduler_type: linear
56
  - lr_scheduler_warmup_ratio: 0.1
@@ -58,9 +60,9 @@ The following hyperparameters were used during training:
58
 
59
  ### Training results
60
 
61
- | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
- |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
- | 0.6468 | 1.0 | 955 | 0.6409 | 0.0197 | -0.0229 | 0.6130 | 0.0426 | -253.1684 | -269.3594 | -2.4973 | -2.4954 |
64
 
65
 
66
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.6351
19
+ - Rewards/chosen: 0.0300
20
+ - Rewards/rejected: -0.0335
21
+ - Rewards/accuracies: 0.6200
22
+ - Rewards/margins: 0.0635
23
+ - Logps/rejected: -250.2864
24
+ - Logps/chosen: -272.9344
25
+ - Logits/rejected: -2.5200
26
+ - Logits/chosen: -2.5063
27
+ - Use Label: 7566.9282
28
+ - Pred Label: 8465.0723
29
 
30
  ## Model description
31
 
 
49
  - eval_batch_size: 4
50
  - seed: 42
51
  - distributed_type: multi-GPU
52
+ - num_devices: 4
53
+ - gradient_accumulation_steps: 4
54
  - total_train_batch_size: 64
55
+ - total_eval_batch_size: 16
56
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
57
  - lr_scheduler_type: linear
58
  - lr_scheduler_warmup_ratio: 0.1
 
60
 
61
  ### Training results
62
 
63
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
64
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:---------:|:----------:|
65
+ | 0.6403 | 1.0 | 955 | 0.6351 | 0.0300 | -0.0335 | 0.6200 | 0.0635 | -250.2864 | -272.9344 | -2.5200 | -2.5063 | 7400.9282 | 8131.0718 |
66
 
67
 
68
  ### Framework versions
all_results.json CHANGED
@@ -1,21 +1,23 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -2.4944536685943604,
4
- "eval_logits/rejected": -2.4963433742523193,
5
- "eval_logps/chosen": -269.25555419921875,
6
- "eval_logps/rejected": -253.21238708496094,
7
- "eval_loss": 0.6399702429771423,
8
- "eval_rewards/accuracies": 0.6370000243186951,
9
- "eval_rewards/chosen": 0.030110126361250877,
10
- "eval_rewards/margins": 0.05743245780467987,
11
- "eval_rewards/rejected": -0.027322327718138695,
12
- "eval_runtime": 803.6977,
 
13
  "eval_samples": 2000,
14
- "eval_samples_per_second": 2.488,
15
- "eval_steps_per_second": 0.311,
16
- "train_loss": 0.6598132096035942,
17
- "train_runtime": 45126.4521,
 
18
  "train_samples": 61135,
19
- "train_samples_per_second": 1.355,
20
- "train_steps_per_second": 0.021
21
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": -2.5062618255615234,
4
+ "eval_logits/rejected": -2.5199859142303467,
5
+ "eval_logps/chosen": -272.93438720703125,
6
+ "eval_logps/rejected": -250.28643798828125,
7
+ "eval_loss": 0.6350826025009155,
8
+ "eval_pred_label": 8465.072265625,
9
+ "eval_rewards/accuracies": 0.6200000047683716,
10
+ "eval_rewards/chosen": 0.029984984546899796,
11
+ "eval_rewards/margins": 0.06345725804567337,
12
+ "eval_rewards/rejected": -0.033472273498773575,
13
+ "eval_runtime": 446.4868,
14
  "eval_samples": 2000,
15
+ "eval_samples_per_second": 4.479,
16
+ "eval_steps_per_second": 0.28,
17
+ "eval_use_label": 7566.92822265625,
18
+ "train_loss": 0.6552608817035616,
19
+ "train_runtime": 24261.6882,
20
  "train_samples": 61135,
21
+ "train_samples_per_second": 2.52,
22
+ "train_steps_per_second": 0.039
23
  }
eval_results.json CHANGED
@@ -1,16 +1,18 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -2.4944536685943604,
4
- "eval_logits/rejected": -2.4963433742523193,
5
- "eval_logps/chosen": -269.25555419921875,
6
- "eval_logps/rejected": -253.21238708496094,
7
- "eval_loss": 0.6399702429771423,
8
- "eval_rewards/accuracies": 0.6370000243186951,
9
- "eval_rewards/chosen": 0.030110126361250877,
10
- "eval_rewards/margins": 0.05743245780467987,
11
- "eval_rewards/rejected": -0.027322327718138695,
12
- "eval_runtime": 803.6977,
 
13
  "eval_samples": 2000,
14
- "eval_samples_per_second": 2.488,
15
- "eval_steps_per_second": 0.311
 
16
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": -2.5062618255615234,
4
+ "eval_logits/rejected": -2.5199859142303467,
5
+ "eval_logps/chosen": -272.93438720703125,
6
+ "eval_logps/rejected": -250.28643798828125,
7
+ "eval_loss": 0.6350826025009155,
8
+ "eval_pred_label": 8465.072265625,
9
+ "eval_rewards/accuracies": 0.6200000047683716,
10
+ "eval_rewards/chosen": 0.029984984546899796,
11
+ "eval_rewards/margins": 0.06345725804567337,
12
+ "eval_rewards/rejected": -0.033472273498773575,
13
+ "eval_runtime": 446.4868,
14
  "eval_samples": 2000,
15
+ "eval_samples_per_second": 4.479,
16
+ "eval_steps_per_second": 0.28,
17
+ "eval_use_label": 7566.92822265625
18
  }
runs/Jan15_16-31-35_uclaml03.cs.ucla.edu/events.out.tfevents.1705390086.uclaml03.cs.ucla.edu.1514934.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ae2d670c8fd9c570877d76918f04f157685c085f6f2daed5b6d0a504d569ac4
3
+ size 935
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.6598132096035942,
4
- "train_runtime": 45126.4521,
5
  "train_samples": 61135,
6
- "train_samples_per_second": 1.355,
7
- "train_steps_per_second": 0.021
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.6552608817035616,
4
+ "train_runtime": 24261.6882,
5
  "train_samples": 61135,
6
+ "train_samples_per_second": 2.52,
7
+ "train_steps_per_second": 0.039
8
  }
trainer_state.json CHANGED
@@ -11,1371 +11,1565 @@
11
  {
12
  "epoch": 0.0,
13
  "learning_rate": 5.208333333333333e-09,
14
- "logits/chosen": -2.570180892944336,
15
- "logits/rejected": -2.5666794776916504,
16
- "logps/chosen": -302.8643798828125,
17
- "logps/rejected": -232.7855682373047,
18
  "loss": 0.6931,
 
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
21
  "rewards/margins": 0.0,
22
  "rewards/rejected": 0.0,
23
- "step": 1
 
24
  },
25
  {
26
  "epoch": 0.01,
27
  "learning_rate": 5.208333333333333e-08,
28
- "logits/chosen": -2.547344207763672,
29
- "logits/rejected": -2.532263994216919,
30
- "logps/chosen": -294.40087890625,
31
- "logps/rejected": -276.11126708984375,
32
- "loss": 0.6879,
33
- "rewards/accuracies": 0.4340277910232544,
34
- "rewards/chosen": 0.0004416291194502264,
35
- "rewards/margins": -0.0003637511981651187,
36
- "rewards/rejected": 0.000805380754172802,
37
- "step": 10
 
 
38
  },
39
  {
40
  "epoch": 0.02,
41
  "learning_rate": 1.0416666666666667e-07,
42
- "logits/chosen": -2.3985350131988525,
43
- "logits/rejected": -2.426626682281494,
44
- "logps/chosen": -244.53689575195312,
45
- "logps/rejected": -248.93624877929688,
46
- "loss": 0.6882,
47
- "rewards/accuracies": 0.49687498807907104,
48
- "rewards/chosen": -0.002964673563838005,
49
- "rewards/margins": 0.00020738500461447984,
50
- "rewards/rejected": -0.0031720586121082306,
51
- "step": 20
 
 
52
  },
53
  {
54
  "epoch": 0.03,
55
  "learning_rate": 1.5624999999999999e-07,
56
- "logits/chosen": -2.5088179111480713,
57
- "logits/rejected": -2.4950203895568848,
58
- "logps/chosen": -265.425537109375,
59
- "logps/rejected": -244.62191772460938,
60
- "loss": 0.6868,
61
- "rewards/accuracies": 0.44999998807907104,
62
- "rewards/chosen": -0.003862012643367052,
63
- "rewards/margins": -0.0030291248112916946,
64
- "rewards/rejected": -0.0008328882977366447,
65
- "step": 30
 
 
66
  },
67
  {
68
  "epoch": 0.04,
69
  "learning_rate": 2.0833333333333333e-07,
70
- "logits/chosen": -2.472740888595581,
71
- "logits/rejected": -2.498608112335205,
72
- "logps/chosen": -264.92767333984375,
73
- "logps/rejected": -260.81536865234375,
74
- "loss": 0.6883,
75
- "rewards/accuracies": 0.4937500059604645,
76
- "rewards/chosen": 0.002251622499898076,
77
- "rewards/margins": 0.002210150007158518,
78
- "rewards/rejected": 4.1472725570201874e-05,
79
- "step": 40
 
 
80
  },
81
  {
82
  "epoch": 0.05,
83
  "learning_rate": 2.604166666666667e-07,
84
- "logits/chosen": -2.511932611465454,
85
- "logits/rejected": -2.4823100566864014,
86
- "logps/chosen": -255.1762237548828,
87
- "logps/rejected": -249.55859375,
88
- "loss": 0.6856,
89
- "rewards/accuracies": 0.5406249761581421,
90
- "rewards/chosen": 0.0019304720917716622,
91
- "rewards/margins": 0.0034704413264989853,
92
- "rewards/rejected": -0.0015399687690660357,
93
- "step": 50
 
 
94
  },
95
  {
96
  "epoch": 0.06,
97
  "learning_rate": 3.1249999999999997e-07,
98
- "logits/chosen": -2.4945309162139893,
99
- "logits/rejected": -2.5101318359375,
100
- "logps/chosen": -288.4325256347656,
101
- "logps/rejected": -261.18310546875,
102
- "loss": 0.6867,
103
- "rewards/accuracies": 0.503125011920929,
104
- "rewards/chosen": -0.002425801707431674,
105
- "rewards/margins": 0.002398231066763401,
106
- "rewards/rejected": -0.004824032541364431,
107
- "step": 60
 
 
108
  },
109
  {
110
  "epoch": 0.07,
111
  "learning_rate": 3.645833333333333e-07,
112
- "logits/chosen": -2.521496534347534,
113
- "logits/rejected": -2.506330966949463,
114
- "logps/chosen": -275.6085510253906,
115
- "logps/rejected": -259.58392333984375,
116
- "loss": 0.6867,
117
- "rewards/accuracies": 0.5375000238418579,
118
- "rewards/chosen": 0.0025966810062527657,
119
- "rewards/margins": 0.007796707563102245,
120
- "rewards/rejected": -0.005200026091188192,
121
- "step": 70
 
 
122
  },
123
  {
124
  "epoch": 0.08,
125
  "learning_rate": 4.1666666666666667e-07,
126
- "logits/chosen": -2.456026315689087,
127
- "logits/rejected": -2.4563703536987305,
128
- "logps/chosen": -295.45941162109375,
129
- "logps/rejected": -263.0380859375,
130
- "loss": 0.6859,
131
- "rewards/accuracies": 0.4593749940395355,
132
- "rewards/chosen": -2.993037924170494e-05,
133
- "rewards/margins": -0.000812772021163255,
134
- "rewards/rejected": 0.0007828413508832455,
135
- "step": 80
 
 
136
  },
137
  {
138
  "epoch": 0.09,
139
  "learning_rate": 4.6874999999999996e-07,
140
- "logits/chosen": -2.4610087871551514,
141
- "logits/rejected": -2.4633374214172363,
142
- "logps/chosen": -282.82684326171875,
143
- "logps/rejected": -245.01785278320312,
144
- "loss": 0.6867,
145
- "rewards/accuracies": 0.4906249940395355,
146
- "rewards/chosen": -0.003978222608566284,
147
- "rewards/margins": -0.003358404617756605,
148
- "rewards/rejected": -0.0006198181654326618,
149
- "step": 90
 
 
150
  },
151
  {
152
  "epoch": 0.1,
153
  "learning_rate": 4.976717112922002e-07,
154
- "logits/chosen": -2.468822479248047,
155
- "logits/rejected": -2.4451956748962402,
156
- "logps/chosen": -264.9376525878906,
157
- "logps/rejected": -259.81182861328125,
158
- "loss": 0.6865,
159
- "rewards/accuracies": 0.528124988079071,
160
- "rewards/chosen": 0.004453591071069241,
161
- "rewards/margins": 0.004475563298910856,
162
- "rewards/rejected": -2.197279900428839e-05,
163
- "step": 100
 
 
164
  },
165
  {
166
  "epoch": 0.12,
167
  "learning_rate": 4.918509895227007e-07,
168
- "logits/chosen": -2.487288236618042,
169
- "logits/rejected": -2.4807934761047363,
170
- "logps/chosen": -263.60906982421875,
171
- "logps/rejected": -244.46047973632812,
172
- "loss": 0.6852,
173
- "rewards/accuracies": 0.534375011920929,
174
- "rewards/chosen": 0.00421065092086792,
175
- "rewards/margins": 0.007397785782814026,
176
- "rewards/rejected": -0.003187134861946106,
177
- "step": 110
 
 
178
  },
179
  {
180
  "epoch": 0.13,
181
  "learning_rate": 4.860302677532014e-07,
182
- "logits/chosen": -2.4685282707214355,
183
- "logits/rejected": -2.497313976287842,
184
- "logps/chosen": -277.75543212890625,
185
- "logps/rejected": -245.1427764892578,
186
- "loss": 0.6852,
187
- "rewards/accuracies": 0.5531250238418579,
188
- "rewards/chosen": 0.0019862186163663864,
189
- "rewards/margins": 0.0036893251817673445,
190
- "rewards/rejected": -0.0017031064489856362,
191
- "step": 120
 
 
192
  },
193
  {
194
  "epoch": 0.14,
195
  "learning_rate": 4.802095459837019e-07,
196
- "logits/chosen": -2.512012481689453,
197
- "logits/rejected": -2.498793840408325,
198
- "logps/chosen": -283.26959228515625,
199
- "logps/rejected": -265.85369873046875,
200
- "loss": 0.6838,
201
- "rewards/accuracies": 0.534375011920929,
202
- "rewards/chosen": 0.006660536862909794,
203
- "rewards/margins": 0.007559786085039377,
204
- "rewards/rejected": -0.0008992499788291752,
205
- "step": 130
 
 
206
  },
207
  {
208
  "epoch": 0.15,
209
  "learning_rate": 4.743888242142026e-07,
210
- "logits/chosen": -2.471496105194092,
211
- "logits/rejected": -2.4844748973846436,
212
- "logps/chosen": -255.3807373046875,
213
- "logps/rejected": -242.02658081054688,
214
- "loss": 0.6857,
215
- "rewards/accuracies": 0.5062500238418579,
216
- "rewards/chosen": 0.0008136004325933754,
217
- "rewards/margins": 4.014703517896123e-05,
218
- "rewards/rejected": 0.0007734533282928169,
219
- "step": 140
 
 
220
  },
221
  {
222
  "epoch": 0.16,
223
  "learning_rate": 4.685681024447031e-07,
224
- "logits/chosen": -2.471660614013672,
225
- "logits/rejected": -2.463984966278076,
226
- "logps/chosen": -287.92523193359375,
227
- "logps/rejected": -260.02313232421875,
228
- "loss": 0.6837,
229
- "rewards/accuracies": 0.518750011920929,
230
- "rewards/chosen": 0.00581919914111495,
231
- "rewards/margins": 0.008211213164031506,
232
- "rewards/rejected": -0.0023920147214084864,
233
- "step": 150
 
 
234
  },
235
  {
236
  "epoch": 0.17,
237
  "learning_rate": 4.627473806752037e-07,
238
- "logits/chosen": -2.520613193511963,
239
- "logits/rejected": -2.4651389122009277,
240
- "logps/chosen": -279.1468505859375,
241
- "logps/rejected": -264.69915771484375,
242
- "loss": 0.682,
243
- "rewards/accuracies": 0.5531250238418579,
244
- "rewards/chosen": 0.008319775573909283,
245
- "rewards/margins": 0.011747308075428009,
246
- "rewards/rejected": -0.003427532035857439,
247
- "step": 160
 
 
248
  },
249
  {
250
  "epoch": 0.18,
251
  "learning_rate": 4.5692665890570433e-07,
252
- "logits/chosen": -2.5029516220092773,
253
- "logits/rejected": -2.496436595916748,
254
- "logps/chosen": -279.58843994140625,
255
- "logps/rejected": -243.44253540039062,
256
- "loss": 0.6817,
257
- "rewards/accuracies": 0.5874999761581421,
258
- "rewards/chosen": 0.008326916955411434,
259
- "rewards/margins": 0.014212583191692829,
260
- "rewards/rejected": -0.00588566530495882,
261
- "step": 170
 
 
262
  },
263
  {
264
  "epoch": 0.19,
265
  "learning_rate": 4.5110593713620486e-07,
266
- "logits/chosen": -2.4697937965393066,
267
- "logits/rejected": -2.4516141414642334,
268
- "logps/chosen": -236.89675903320312,
269
- "logps/rejected": -253.85598754882812,
270
- "loss": 0.6832,
271
- "rewards/accuracies": 0.5531250238418579,
272
- "rewards/chosen": 0.00583054032176733,
273
- "rewards/margins": 0.012070106342434883,
274
- "rewards/rejected": -0.00623956648632884,
275
- "step": 180
 
 
276
  },
277
  {
278
  "epoch": 0.2,
279
  "learning_rate": 4.4528521536670544e-07,
280
- "logits/chosen": -2.5360610485076904,
281
- "logits/rejected": -2.505577802658081,
282
- "logps/chosen": -291.94586181640625,
283
- "logps/rejected": -260.87054443359375,
284
- "loss": 0.6801,
285
- "rewards/accuracies": 0.637499988079071,
286
- "rewards/chosen": 0.011800072155892849,
287
- "rewards/margins": 0.02227787859737873,
288
- "rewards/rejected": -0.010477803647518158,
289
- "step": 190
 
 
290
  },
291
  {
292
  "epoch": 0.21,
293
  "learning_rate": 4.3946449359720607e-07,
294
- "logits/chosen": -2.4782238006591797,
295
- "logits/rejected": -2.508026599884033,
296
- "logps/chosen": -258.73895263671875,
297
- "logps/rejected": -252.0211639404297,
298
- "loss": 0.6809,
299
- "rewards/accuracies": 0.6031249761581421,
300
- "rewards/chosen": 0.009132781066000462,
301
- "rewards/margins": 0.0161266028881073,
302
- "rewards/rejected": -0.006993822753429413,
303
- "step": 200
 
 
304
  },
305
  {
306
  "epoch": 0.22,
307
  "learning_rate": 4.336437718277066e-07,
308
- "logits/chosen": -2.4700803756713867,
309
- "logits/rejected": -2.479269504547119,
310
- "logps/chosen": -272.45904541015625,
311
- "logps/rejected": -256.2876281738281,
312
- "loss": 0.6783,
313
- "rewards/accuracies": 0.574999988079071,
314
- "rewards/chosen": 0.016153430566191673,
315
- "rewards/margins": 0.023166943341493607,
316
- "rewards/rejected": -0.007013511843979359,
317
- "step": 210
 
 
318
  },
319
  {
320
  "epoch": 0.23,
321
  "learning_rate": 4.278230500582072e-07,
322
- "logits/chosen": -2.4889426231384277,
323
- "logits/rejected": -2.5127673149108887,
324
- "logps/chosen": -267.21966552734375,
325
- "logps/rejected": -258.29205322265625,
326
- "loss": 0.6793,
327
- "rewards/accuracies": 0.6031249761581421,
328
- "rewards/chosen": 0.005786339286714792,
329
- "rewards/margins": 0.01781514100730419,
330
- "rewards/rejected": -0.012028800323605537,
331
- "step": 220
 
 
332
  },
333
  {
334
  "epoch": 0.24,
335
  "learning_rate": 4.220023282887078e-07,
336
- "logits/chosen": -2.4950485229492188,
337
- "logits/rejected": -2.4980530738830566,
338
- "logps/chosen": -295.85028076171875,
339
- "logps/rejected": -259.36529541015625,
340
- "loss": 0.6777,
341
- "rewards/accuracies": 0.596875011920929,
342
- "rewards/chosen": 0.007827522233128548,
343
- "rewards/margins": 0.014727133326232433,
344
- "rewards/rejected": -0.006899611093103886,
345
- "step": 230
 
 
346
  },
347
  {
348
  "epoch": 0.25,
349
  "learning_rate": 4.1618160651920834e-07,
350
- "logits/chosen": -2.466526508331299,
351
- "logits/rejected": -2.4315407276153564,
352
- "logps/chosen": -282.66192626953125,
353
- "logps/rejected": -248.1639862060547,
354
- "loss": 0.6778,
355
- "rewards/accuracies": 0.5687500238418579,
356
- "rewards/chosen": 0.01533576101064682,
357
- "rewards/margins": 0.017889009788632393,
358
- "rewards/rejected": -0.0025532490108162165,
359
- "step": 240
 
 
360
  },
361
  {
362
  "epoch": 0.26,
363
  "learning_rate": 4.103608847497089e-07,
364
- "logits/chosen": -2.4848718643188477,
365
- "logits/rejected": -2.443408250808716,
366
- "logps/chosen": -288.2550964355469,
367
- "logps/rejected": -271.9901123046875,
368
- "loss": 0.6761,
369
- "rewards/accuracies": 0.621874988079071,
370
- "rewards/chosen": 0.013339035212993622,
371
- "rewards/margins": 0.023220960050821304,
372
- "rewards/rejected": -0.009881924837827682,
373
- "step": 250
 
 
374
  },
375
  {
376
  "epoch": 0.27,
377
  "learning_rate": 4.0454016298020956e-07,
378
- "logits/chosen": -2.4762511253356934,
379
- "logits/rejected": -2.446619749069214,
380
- "logps/chosen": -268.5465393066406,
381
- "logps/rejected": -251.9985809326172,
382
- "loss": 0.6763,
383
- "rewards/accuracies": 0.5625,
384
- "rewards/chosen": 0.006427633110433817,
385
- "rewards/margins": 0.015916740521788597,
386
- "rewards/rejected": -0.009489107877016068,
387
- "step": 260
 
 
388
  },
389
  {
390
  "epoch": 0.28,
391
  "learning_rate": 3.987194412107101e-07,
392
- "logits/chosen": -2.4372754096984863,
393
- "logits/rejected": -2.450491189956665,
394
- "logps/chosen": -263.63262939453125,
395
- "logps/rejected": -252.0638427734375,
396
  "loss": 0.6735,
397
- "rewards/accuracies": 0.6312500238418579,
398
- "rewards/chosen": 0.013371949084103107,
399
- "rewards/margins": 0.023976340889930725,
400
- "rewards/rejected": -0.010604391805827618,
401
- "step": 270
 
 
402
  },
403
  {
404
  "epoch": 0.29,
405
  "learning_rate": 3.9289871944121066e-07,
406
- "logits/chosen": -2.5451080799102783,
407
- "logits/rejected": -2.516885995864868,
408
- "logps/chosen": -282.615234375,
409
- "logps/rejected": -242.2822265625,
410
- "loss": 0.6722,
411
- "rewards/accuracies": 0.640625,
412
- "rewards/chosen": 0.0192705187946558,
413
- "rewards/margins": 0.029350727796554565,
414
- "rewards/rejected": -0.010080209001898766,
415
- "step": 280
 
 
416
  },
417
  {
418
  "epoch": 0.3,
419
  "learning_rate": 3.870779976717113e-07,
420
- "logits/chosen": -2.4943687915802,
421
- "logits/rejected": -2.5217864513397217,
422
- "logps/chosen": -277.5215759277344,
423
- "logps/rejected": -229.96298217773438,
424
- "loss": 0.6722,
425
- "rewards/accuracies": 0.6187499761581421,
426
- "rewards/chosen": 0.015035443007946014,
427
- "rewards/margins": 0.024382654577493668,
428
- "rewards/rejected": -0.009347210638225079,
429
- "step": 290
 
 
430
  },
431
  {
432
  "epoch": 0.31,
433
  "learning_rate": 3.812572759022118e-07,
434
- "logits/chosen": -2.49491810798645,
435
- "logits/rejected": -2.501307249069214,
436
- "logps/chosen": -281.02484130859375,
437
- "logps/rejected": -237.4232940673828,
438
- "loss": 0.6726,
439
- "rewards/accuracies": 0.628125011920929,
440
- "rewards/chosen": 0.018051721155643463,
441
- "rewards/margins": 0.032371118664741516,
442
- "rewards/rejected": -0.014319395646452904,
443
- "step": 300
 
 
444
  },
445
  {
446
  "epoch": 0.32,
447
  "learning_rate": 3.754365541327124e-07,
448
- "logits/chosen": -2.4540202617645264,
449
- "logits/rejected": -2.435542583465576,
450
- "logps/chosen": -258.79193115234375,
451
- "logps/rejected": -253.71694946289062,
452
- "loss": 0.673,
453
- "rewards/accuracies": 0.6468750238418579,
454
- "rewards/chosen": 0.015593824908137321,
455
- "rewards/margins": 0.0299003217369318,
456
- "rewards/rejected": -0.014306495897471905,
457
- "step": 310
 
 
458
  },
459
  {
460
  "epoch": 0.33,
461
  "learning_rate": 3.6961583236321304e-07,
462
- "logits/chosen": -2.488478422164917,
463
- "logits/rejected": -2.471566677093506,
464
- "logps/chosen": -287.9583435058594,
465
- "logps/rejected": -256.38427734375,
466
- "loss": 0.6681,
467
- "rewards/accuracies": 0.653124988079071,
468
- "rewards/chosen": 0.01864878088235855,
469
- "rewards/margins": 0.03612912446260452,
470
- "rewards/rejected": -0.01748034544289112,
471
- "step": 320
 
 
472
  },
473
  {
474
  "epoch": 0.35,
475
  "learning_rate": 3.637951105937136e-07,
476
- "logits/chosen": -2.469587802886963,
477
- "logits/rejected": -2.4525370597839355,
478
- "logps/chosen": -249.15280151367188,
479
- "logps/rejected": -235.3613739013672,
480
- "loss": 0.6664,
481
- "rewards/accuracies": 0.6187499761581421,
482
- "rewards/chosen": 0.013197916559875011,
483
- "rewards/margins": 0.030393391847610474,
484
- "rewards/rejected": -0.01719547249376774,
485
- "step": 330
 
 
486
  },
487
  {
488
  "epoch": 0.36,
489
  "learning_rate": 3.579743888242142e-07,
490
- "logits/chosen": -2.486199378967285,
491
- "logits/rejected": -2.491894006729126,
492
- "logps/chosen": -279.8724060058594,
493
- "logps/rejected": -249.26123046875,
494
- "loss": 0.6691,
495
- "rewards/accuracies": 0.59375,
496
- "rewards/chosen": 0.016388490796089172,
497
- "rewards/margins": 0.02631198987364769,
498
- "rewards/rejected": -0.009923500940203667,
499
- "step": 340
 
 
500
  },
501
  {
502
  "epoch": 0.37,
503
  "learning_rate": 3.521536670547148e-07,
504
- "logits/chosen": -2.501917839050293,
505
- "logits/rejected": -2.500124216079712,
506
- "logps/chosen": -277.4295349121094,
507
- "logps/rejected": -261.6769104003906,
508
- "loss": 0.665,
509
- "rewards/accuracies": 0.637499988079071,
510
- "rewards/chosen": 0.019429894164204597,
511
- "rewards/margins": 0.030850976705551147,
512
- "rewards/rejected": -0.011421086266636848,
513
- "step": 350
 
 
514
  },
515
  {
516
  "epoch": 0.38,
517
  "learning_rate": 3.4633294528521536e-07,
518
- "logits/chosen": -2.5457377433776855,
519
- "logits/rejected": -2.5352020263671875,
520
- "logps/chosen": -280.8336486816406,
521
- "logps/rejected": -259.36895751953125,
522
- "loss": 0.6614,
523
- "rewards/accuracies": 0.6156250238418579,
524
- "rewards/chosen": 0.01814255118370056,
525
- "rewards/margins": 0.03848281502723694,
526
- "rewards/rejected": -0.02034026011824608,
527
- "step": 360
 
 
528
  },
529
  {
530
  "epoch": 0.39,
531
  "learning_rate": 3.4051222351571594e-07,
532
- "logits/chosen": -2.5180299282073975,
533
- "logits/rejected": -2.4479191303253174,
534
- "logps/chosen": -274.1128845214844,
535
- "logps/rejected": -259.3582763671875,
536
- "loss": 0.6664,
537
- "rewards/accuracies": 0.643750011920929,
538
- "rewards/chosen": 0.023864779621362686,
539
- "rewards/margins": 0.041657593101263046,
540
- "rewards/rejected": -0.01779281720519066,
541
- "step": 370
 
 
542
  },
543
  {
544
  "epoch": 0.4,
545
  "learning_rate": 3.346915017462165e-07,
546
- "logits/chosen": -2.504103899002075,
547
- "logits/rejected": -2.4852070808410645,
548
- "logps/chosen": -269.26409912109375,
549
- "logps/rejected": -253.5343780517578,
550
- "loss": 0.663,
551
- "rewards/accuracies": 0.640625,
552
- "rewards/chosen": 0.021114524453878403,
553
- "rewards/margins": 0.042601972818374634,
554
- "rewards/rejected": -0.021487446501851082,
555
- "step": 380
 
 
556
  },
557
  {
558
  "epoch": 0.41,
559
  "learning_rate": 3.288707799767171e-07,
560
- "logits/chosen": -2.4515504837036133,
561
- "logits/rejected": -2.447246551513672,
562
- "logps/chosen": -259.05401611328125,
563
- "logps/rejected": -240.0911407470703,
564
- "loss": 0.663,
565
- "rewards/accuracies": 0.628125011920929,
566
- "rewards/chosen": 0.014576256275177002,
567
- "rewards/margins": 0.04004546254873276,
568
- "rewards/rejected": -0.025469202548265457,
569
- "step": 390
 
 
570
  },
571
  {
572
  "epoch": 0.42,
573
  "learning_rate": 3.230500582072177e-07,
574
- "logits/chosen": -2.509997844696045,
575
- "logits/rejected": -2.522238254547119,
576
- "logps/chosen": -282.1490478515625,
577
- "logps/rejected": -243.8563690185547,
578
- "loss": 0.6624,
579
- "rewards/accuracies": 0.637499988079071,
580
- "rewards/chosen": 0.025715211406350136,
581
- "rewards/margins": 0.043169617652893066,
582
- "rewards/rejected": -0.01745440624654293,
583
- "step": 400
 
 
584
  },
585
  {
586
  "epoch": 0.43,
587
  "learning_rate": 3.1722933643771827e-07,
588
- "logits/chosen": -2.498192548751831,
589
- "logits/rejected": -2.4921789169311523,
590
- "logps/chosen": -271.2725830078125,
591
- "logps/rejected": -244.8346710205078,
592
- "loss": 0.6594,
593
- "rewards/accuracies": 0.6468750238418579,
594
- "rewards/chosen": 0.02634851261973381,
595
- "rewards/margins": 0.0405191034078598,
596
- "rewards/rejected": -0.014170585200190544,
597
- "step": 410
 
 
598
  },
599
  {
600
  "epoch": 0.44,
601
  "learning_rate": 3.1140861466821885e-07,
602
- "logits/chosen": -2.4811172485351562,
603
- "logits/rejected": -2.4510648250579834,
604
- "logps/chosen": -253.666015625,
605
- "logps/rejected": -243.52877807617188,
606
- "loss": 0.6617,
607
- "rewards/accuracies": 0.628125011920929,
608
- "rewards/chosen": 0.024721184745430946,
609
- "rewards/margins": 0.03779374435544014,
610
- "rewards/rejected": -0.013072559610009193,
611
- "step": 420
 
 
612
  },
613
  {
614
  "epoch": 0.45,
615
  "learning_rate": 3.0558789289871943e-07,
616
- "logits/chosen": -2.5075089931488037,
617
- "logits/rejected": -2.4863505363464355,
618
- "logps/chosen": -279.38818359375,
619
- "logps/rejected": -254.1600799560547,
620
- "loss": 0.6609,
621
- "rewards/accuracies": 0.596875011920929,
622
- "rewards/chosen": 0.02350917086005211,
623
- "rewards/margins": 0.03294364735484123,
624
- "rewards/rejected": -0.009434476494789124,
625
- "step": 430
 
 
626
  },
627
  {
628
  "epoch": 0.46,
629
  "learning_rate": 2.9976717112922e-07,
630
- "logits/chosen": -2.5201539993286133,
631
- "logits/rejected": -2.511667013168335,
632
- "logps/chosen": -272.9944152832031,
633
- "logps/rejected": -233.72854614257812,
634
- "loss": 0.6589,
635
- "rewards/accuracies": 0.581250011920929,
636
- "rewards/chosen": 0.020865267142653465,
637
- "rewards/margins": 0.04071135073900223,
638
- "rewards/rejected": -0.019846081733703613,
639
- "step": 440
 
 
640
  },
641
  {
642
  "epoch": 0.47,
643
  "learning_rate": 2.939464493597206e-07,
644
- "logits/chosen": -2.4186248779296875,
645
- "logits/rejected": -2.3882439136505127,
646
- "logps/chosen": -244.801025390625,
647
- "logps/rejected": -241.0476837158203,
648
- "loss": 0.6588,
649
- "rewards/accuracies": 0.637499988079071,
650
- "rewards/chosen": 0.019629117101430893,
651
- "rewards/margins": 0.037906430661678314,
652
- "rewards/rejected": -0.01827731356024742,
653
- "step": 450
 
 
654
  },
655
  {
656
  "epoch": 0.48,
657
  "learning_rate": 2.8812572759022117e-07,
658
- "logits/chosen": -2.5232837200164795,
659
- "logits/rejected": -2.5000243186950684,
660
- "logps/chosen": -294.4573669433594,
661
- "logps/rejected": -256.6082458496094,
662
- "loss": 0.6559,
663
- "rewards/accuracies": 0.6156250238418579,
664
- "rewards/chosen": 0.02450462244451046,
665
- "rewards/margins": 0.0395827516913414,
666
- "rewards/rejected": -0.015078130178153515,
667
- "step": 460
 
 
668
  },
669
  {
670
  "epoch": 0.49,
671
  "learning_rate": 2.8230500582072175e-07,
672
- "logits/chosen": -2.4724531173706055,
673
- "logits/rejected": -2.4754977226257324,
674
- "logps/chosen": -283.3002624511719,
675
- "logps/rejected": -251.4059295654297,
676
- "loss": 0.6557,
677
- "rewards/accuracies": 0.65625,
678
- "rewards/chosen": 0.024192675948143005,
679
- "rewards/margins": 0.05236636474728584,
680
- "rewards/rejected": -0.028173688799142838,
681
- "step": 470
 
 
682
  },
683
  {
684
  "epoch": 0.5,
685
  "learning_rate": 2.7648428405122233e-07,
686
- "logits/chosen": -2.410810947418213,
687
- "logits/rejected": -2.4008870124816895,
688
- "logps/chosen": -242.271484375,
689
- "logps/rejected": -250.8043212890625,
690
- "loss": 0.6593,
691
- "rewards/accuracies": 0.6031249761581421,
692
- "rewards/chosen": 0.012235969305038452,
693
- "rewards/margins": 0.02636186219751835,
694
- "rewards/rejected": -0.01412589568644762,
695
- "step": 480
 
 
696
  },
697
  {
698
  "epoch": 0.51,
699
  "learning_rate": 2.706635622817229e-07,
700
- "logits/chosen": -2.4645464420318604,
701
- "logits/rejected": -2.469316244125366,
702
- "logps/chosen": -272.09796142578125,
703
- "logps/rejected": -238.6561737060547,
704
- "loss": 0.6533,
705
- "rewards/accuracies": 0.671875,
706
- "rewards/chosen": 0.024095356464385986,
707
- "rewards/margins": 0.05057717487215996,
708
- "rewards/rejected": -0.02648181840777397,
709
- "step": 490
 
 
710
  },
711
  {
712
  "epoch": 0.52,
713
  "learning_rate": 2.648428405122235e-07,
714
- "logits/chosen": -2.508164167404175,
715
- "logits/rejected": -2.4578819274902344,
716
- "logps/chosen": -277.1492004394531,
717
- "logps/rejected": -256.0687561035156,
718
- "loss": 0.6531,
719
- "rewards/accuracies": 0.640625,
720
- "rewards/chosen": 0.03192506358027458,
721
- "rewards/margins": 0.05114533379673958,
722
- "rewards/rejected": -0.019220268353819847,
723
- "step": 500
 
 
724
  },
725
  {
726
  "epoch": 0.53,
727
  "learning_rate": 2.590221187427241e-07,
728
- "logits/chosen": -2.5105607509613037,
729
- "logits/rejected": -2.4753470420837402,
730
- "logps/chosen": -263.40966796875,
731
- "logps/rejected": -244.8257598876953,
732
- "loss": 0.6522,
733
- "rewards/accuracies": 0.625,
734
- "rewards/chosen": 0.027492288500070572,
735
- "rewards/margins": 0.04991314187645912,
736
- "rewards/rejected": -0.022420858964323997,
737
- "step": 510
 
 
738
  },
739
  {
740
  "epoch": 0.54,
741
  "learning_rate": 2.5320139697322466e-07,
742
- "logits/chosen": -2.5335867404937744,
743
- "logits/rejected": -2.468331813812256,
744
- "logps/chosen": -272.64385986328125,
745
- "logps/rejected": -244.65286254882812,
746
- "loss": 0.651,
747
- "rewards/accuracies": 0.65625,
748
- "rewards/chosen": 0.03846021741628647,
749
- "rewards/margins": 0.06949218362569809,
750
- "rewards/rejected": -0.03103196993470192,
751
- "step": 520
 
 
752
  },
753
  {
754
  "epoch": 0.55,
755
  "learning_rate": 2.4738067520372524e-07,
756
- "logits/chosen": -2.505664110183716,
757
- "logits/rejected": -2.512172222137451,
758
- "logps/chosen": -280.70819091796875,
759
- "logps/rejected": -255.12228393554688,
760
- "loss": 0.6487,
761
- "rewards/accuracies": 0.668749988079071,
762
- "rewards/chosen": 0.035160940140485764,
763
- "rewards/margins": 0.06470004469156265,
764
- "rewards/rejected": -0.029539108276367188,
765
- "step": 530
 
 
766
  },
767
  {
768
  "epoch": 0.57,
769
  "learning_rate": 2.415599534342258e-07,
770
- "logits/chosen": -2.5341105461120605,
771
- "logits/rejected": -2.4813685417175293,
772
- "logps/chosen": -280.51751708984375,
773
- "logps/rejected": -252.9778289794922,
774
- "loss": 0.6471,
775
- "rewards/accuracies": 0.653124988079071,
776
- "rewards/chosen": 0.03973756358027458,
777
- "rewards/margins": 0.06431148201227188,
778
- "rewards/rejected": -0.02457391656935215,
779
- "step": 540
 
 
780
  },
781
  {
782
  "epoch": 0.58,
783
  "learning_rate": 2.3573923166472642e-07,
784
- "logits/chosen": -2.506739377975464,
785
- "logits/rejected": -2.487623691558838,
786
- "logps/chosen": -272.10675048828125,
787
- "logps/rejected": -258.69647216796875,
788
- "loss": 0.6479,
789
- "rewards/accuracies": 0.6343749761581421,
790
- "rewards/chosen": 0.028482938185334206,
791
- "rewards/margins": 0.057554639875888824,
792
- "rewards/rejected": -0.02907169796526432,
793
- "step": 550
 
 
794
  },
795
  {
796
  "epoch": 0.59,
797
  "learning_rate": 2.2991850989522698e-07,
798
- "logits/chosen": -2.442371129989624,
799
- "logits/rejected": -2.4590439796447754,
800
- "logps/chosen": -280.783935546875,
801
- "logps/rejected": -227.5161895751953,
802
- "loss": 0.6513,
803
- "rewards/accuracies": 0.628125011920929,
804
- "rewards/chosen": 0.02295442670583725,
805
- "rewards/margins": 0.05334927886724472,
806
- "rewards/rejected": -0.03039485774934292,
807
- "step": 560
 
 
808
  },
809
  {
810
  "epoch": 0.6,
811
  "learning_rate": 2.2409778812572759e-07,
812
- "logits/chosen": -2.470194101333618,
813
- "logits/rejected": -2.441986560821533,
814
- "logps/chosen": -255.4265594482422,
815
- "logps/rejected": -241.7792205810547,
816
- "loss": 0.6487,
817
- "rewards/accuracies": 0.6343749761581421,
818
- "rewards/chosen": 0.024136796593666077,
819
- "rewards/margins": 0.04882895201444626,
820
- "rewards/rejected": -0.02469216100871563,
821
- "step": 570
 
 
822
  },
823
  {
824
  "epoch": 0.61,
825
  "learning_rate": 2.1827706635622817e-07,
826
- "logits/chosen": -2.5127058029174805,
827
- "logits/rejected": -2.496788263320923,
828
- "logps/chosen": -262.003662109375,
829
- "logps/rejected": -251.4923858642578,
830
- "loss": 0.6508,
831
- "rewards/accuracies": 0.6937500238418579,
832
- "rewards/chosen": 0.025721842423081398,
833
- "rewards/margins": 0.056790102273225784,
834
- "rewards/rejected": -0.031068259850144386,
835
- "step": 580
 
 
836
  },
837
  {
838
  "epoch": 0.62,
839
  "learning_rate": 2.1245634458672875e-07,
840
- "logits/chosen": -2.4597954750061035,
841
- "logits/rejected": -2.4486401081085205,
842
- "logps/chosen": -272.46258544921875,
843
- "logps/rejected": -250.1220703125,
844
- "loss": 0.6465,
845
- "rewards/accuracies": 0.671875,
846
- "rewards/chosen": 0.03377040475606918,
847
- "rewards/margins": 0.05980812385678291,
848
- "rewards/rejected": -0.02603771723806858,
849
- "step": 590
 
 
850
  },
851
  {
852
  "epoch": 0.63,
853
  "learning_rate": 2.0663562281722933e-07,
854
- "logits/chosen": -2.468313217163086,
855
- "logits/rejected": -2.4899404048919678,
856
- "logps/chosen": -280.9910888671875,
857
- "logps/rejected": -279.2497863769531,
858
- "loss": 0.6493,
859
- "rewards/accuracies": 0.6468750238418579,
860
- "rewards/chosen": 0.03328691050410271,
861
- "rewards/margins": 0.05028299614787102,
862
- "rewards/rejected": -0.016996093094348907,
863
- "step": 600
 
 
864
  },
865
  {
866
  "epoch": 0.64,
867
  "learning_rate": 2.008149010477299e-07,
868
- "logits/chosen": -2.515079975128174,
869
- "logits/rejected": -2.471588611602783,
870
- "logps/chosen": -284.3990478515625,
871
- "logps/rejected": -246.21835327148438,
872
- "loss": 0.6456,
873
- "rewards/accuracies": 0.637499988079071,
874
- "rewards/chosen": 0.03540956228971481,
875
- "rewards/margins": 0.062371380627155304,
876
- "rewards/rejected": -0.026961814612150192,
877
- "step": 610
 
 
878
  },
879
  {
880
  "epoch": 0.65,
881
  "learning_rate": 1.949941792782305e-07,
882
- "logits/chosen": -2.5138742923736572,
883
- "logits/rejected": -2.451676845550537,
884
- "logps/chosen": -272.3465881347656,
885
- "logps/rejected": -240.0901641845703,
886
- "loss": 0.6492,
887
- "rewards/accuracies": 0.6343749761581421,
888
- "rewards/chosen": 0.03495832532644272,
889
- "rewards/margins": 0.05197330191731453,
890
- "rewards/rejected": -0.017014967277646065,
891
- "step": 620
 
 
892
  },
893
  {
894
  "epoch": 0.66,
895
  "learning_rate": 1.8917345750873107e-07,
896
- "logits/chosen": -2.4583332538604736,
897
- "logits/rejected": -2.468278169631958,
898
- "logps/chosen": -264.42913818359375,
899
- "logps/rejected": -256.8759765625,
900
- "loss": 0.6451,
901
- "rewards/accuracies": 0.637499988079071,
902
- "rewards/chosen": 0.02500128373503685,
903
- "rewards/margins": 0.04210829734802246,
904
- "rewards/rejected": -0.01710701361298561,
905
- "step": 630
 
 
906
  },
907
  {
908
  "epoch": 0.67,
909
  "learning_rate": 1.8335273573923165e-07,
910
- "logits/chosen": -2.5137476921081543,
911
- "logits/rejected": -2.473841667175293,
912
- "logps/chosen": -252.57705688476562,
913
- "logps/rejected": -230.48471069335938,
914
- "loss": 0.6434,
915
- "rewards/accuracies": 0.640625,
916
- "rewards/chosen": 0.03589923307299614,
917
- "rewards/margins": 0.06454572081565857,
918
- "rewards/rejected": -0.028646480292081833,
919
- "step": 640
 
 
920
  },
921
  {
922
  "epoch": 0.68,
923
  "learning_rate": 1.7753201396973226e-07,
924
- "logits/chosen": -2.45261549949646,
925
- "logits/rejected": -2.4653515815734863,
926
- "logps/chosen": -274.7919006347656,
927
- "logps/rejected": -261.8819580078125,
928
- "loss": 0.6407,
929
- "rewards/accuracies": 0.6468750238418579,
930
- "rewards/chosen": 0.03197002038359642,
931
- "rewards/margins": 0.07201725244522095,
932
- "rewards/rejected": -0.040047239512205124,
933
- "step": 650
 
 
934
  },
935
  {
936
  "epoch": 0.69,
937
  "learning_rate": 1.7171129220023281e-07,
938
- "logits/chosen": -2.4911041259765625,
939
- "logits/rejected": -2.4684813022613525,
940
- "logps/chosen": -267.3164978027344,
941
- "logps/rejected": -232.7432403564453,
942
- "loss": 0.6489,
943
- "rewards/accuracies": 0.6499999761581421,
944
- "rewards/chosen": 0.022824671119451523,
945
- "rewards/margins": 0.04568014293909073,
946
- "rewards/rejected": -0.022855471819639206,
947
- "step": 660
 
 
948
  },
949
  {
950
  "epoch": 0.7,
951
  "learning_rate": 1.658905704307334e-07,
952
- "logits/chosen": -2.4661662578582764,
953
- "logits/rejected": -2.429795026779175,
954
- "logps/chosen": -267.6933898925781,
955
- "logps/rejected": -238.0891876220703,
956
- "loss": 0.6461,
957
- "rewards/accuracies": 0.65625,
958
- "rewards/chosen": 0.029722299426794052,
959
- "rewards/margins": 0.061273299157619476,
960
- "rewards/rejected": -0.031550996005535126,
961
- "step": 670
 
 
962
  },
963
  {
964
  "epoch": 0.71,
965
  "learning_rate": 1.60069848661234e-07,
966
- "logits/chosen": -2.494511127471924,
967
- "logits/rejected": -2.505267381668091,
968
- "logps/chosen": -287.6944274902344,
969
- "logps/rejected": -259.6459045410156,
970
- "loss": 0.6466,
971
- "rewards/accuracies": 0.640625,
972
- "rewards/chosen": 0.041329581290483475,
973
- "rewards/margins": 0.05584716796875,
974
- "rewards/rejected": -0.014517592266201973,
975
- "step": 680
 
 
976
  },
977
  {
978
  "epoch": 0.72,
979
  "learning_rate": 1.5424912689173456e-07,
980
- "logits/chosen": -2.4804582595825195,
981
- "logits/rejected": -2.462634325027466,
982
- "logps/chosen": -270.22698974609375,
983
- "logps/rejected": -227.7113800048828,
984
- "loss": 0.6407,
985
- "rewards/accuracies": 0.671875,
986
- "rewards/chosen": 0.037567246705293655,
987
- "rewards/margins": 0.0671004056930542,
988
- "rewards/rejected": -0.02953316643834114,
989
- "step": 690
 
 
990
  },
991
  {
992
  "epoch": 0.73,
993
  "learning_rate": 1.4842840512223514e-07,
994
- "logits/chosen": -2.468341112136841,
995
- "logits/rejected": -2.4682674407958984,
996
- "logps/chosen": -257.55096435546875,
997
- "logps/rejected": -232.13906860351562,
998
- "loss": 0.647,
999
- "rewards/accuracies": 0.671875,
1000
- "rewards/chosen": 0.03749538213014603,
1001
- "rewards/margins": 0.0758819431066513,
1002
- "rewards/rejected": -0.03838656097650528,
1003
- "step": 700
 
 
1004
  },
1005
  {
1006
  "epoch": 0.74,
1007
  "learning_rate": 1.4260768335273574e-07,
1008
- "logits/chosen": -2.5157179832458496,
1009
- "logits/rejected": -2.497849702835083,
1010
- "logps/chosen": -264.9339904785156,
1011
- "logps/rejected": -229.9503936767578,
1012
- "loss": 0.6375,
1013
- "rewards/accuracies": 0.6312500238418579,
1014
- "rewards/chosen": 0.03981786221265793,
1015
- "rewards/margins": 0.06821642816066742,
1016
- "rewards/rejected": -0.02839856967329979,
1017
- "step": 710
 
 
1018
  },
1019
  {
1020
  "epoch": 0.75,
1021
  "learning_rate": 1.3678696158323632e-07,
1022
- "logits/chosen": -2.4586503505706787,
1023
- "logits/rejected": -2.4603028297424316,
1024
- "logps/chosen": -262.70880126953125,
1025
- "logps/rejected": -237.71212768554688,
1026
- "loss": 0.645,
1027
- "rewards/accuracies": 0.6312500238418579,
1028
- "rewards/chosen": 0.025259777903556824,
1029
- "rewards/margins": 0.0490126870572567,
1030
- "rewards/rejected": -0.023752911016345024,
1031
- "step": 720
 
 
1032
  },
1033
  {
1034
  "epoch": 0.76,
1035
  "learning_rate": 1.3096623981373688e-07,
1036
- "logits/chosen": -2.502094268798828,
1037
- "logits/rejected": -2.4800729751586914,
1038
- "logps/chosen": -282.3802185058594,
1039
- "logps/rejected": -236.8638153076172,
1040
- "loss": 0.6438,
1041
- "rewards/accuracies": 0.6656249761581421,
1042
- "rewards/chosen": 0.04312821850180626,
1043
- "rewards/margins": 0.07856440544128418,
1044
- "rewards/rejected": -0.03543618693947792,
1045
- "step": 730
 
 
1046
  },
1047
  {
1048
  "epoch": 0.77,
1049
  "learning_rate": 1.2514551804423749e-07,
1050
- "logits/chosen": -2.4671387672424316,
1051
- "logits/rejected": -2.514324903488159,
1052
- "logps/chosen": -285.3311462402344,
1053
- "logps/rejected": -263.767822265625,
1054
- "loss": 0.642,
1055
- "rewards/accuracies": 0.684374988079071,
1056
- "rewards/chosen": 0.04080774262547493,
1057
- "rewards/margins": 0.07315204292535782,
1058
- "rewards/rejected": -0.03234430402517319,
1059
- "step": 740
 
 
1060
  },
1061
  {
1062
  "epoch": 0.79,
1063
  "learning_rate": 1.1932479627473807e-07,
1064
- "logits/chosen": -2.490058183670044,
1065
- "logits/rejected": -2.4621422290802,
1066
- "logps/chosen": -251.80148315429688,
1067
- "logps/rejected": -249.9593048095703,
1068
- "loss": 0.6395,
1069
- "rewards/accuracies": 0.6625000238418579,
1070
- "rewards/chosen": 0.028191978111863136,
1071
- "rewards/margins": 0.06258732825517654,
1072
- "rewards/rejected": -0.03439534455537796,
1073
- "step": 750
 
 
1074
  },
1075
  {
1076
  "epoch": 0.8,
1077
  "learning_rate": 1.1350407450523865e-07,
1078
- "logits/chosen": -2.470336437225342,
1079
- "logits/rejected": -2.5136280059814453,
1080
- "logps/chosen": -281.42767333984375,
1081
- "logps/rejected": -254.0972137451172,
1082
- "loss": 0.6419,
1083
- "rewards/accuracies": 0.637499988079071,
1084
- "rewards/chosen": 0.036293573677539825,
1085
- "rewards/margins": 0.06266774237155914,
1086
- "rewards/rejected": -0.02637416496872902,
1087
- "step": 760
 
 
1088
  },
1089
  {
1090
  "epoch": 0.81,
1091
  "learning_rate": 1.0768335273573923e-07,
1092
- "logits/chosen": -2.5531864166259766,
1093
- "logits/rejected": -2.545531749725342,
1094
- "logps/chosen": -305.70111083984375,
1095
- "logps/rejected": -276.01019287109375,
1096
- "loss": 0.64,
1097
- "rewards/accuracies": 0.621874988079071,
1098
- "rewards/chosen": 0.046415045857429504,
1099
- "rewards/margins": 0.07657970488071442,
1100
- "rewards/rejected": -0.030164653435349464,
1101
- "step": 770
 
 
1102
  },
1103
  {
1104
  "epoch": 0.82,
1105
  "learning_rate": 1.0186263096623981e-07,
1106
- "logits/chosen": -2.484541416168213,
1107
- "logits/rejected": -2.4959442615509033,
1108
- "logps/chosen": -272.62445068359375,
1109
- "logps/rejected": -249.4247589111328,
1110
- "loss": 0.6416,
1111
- "rewards/accuracies": 0.6187499761581421,
1112
- "rewards/chosen": 0.03473570942878723,
1113
- "rewards/margins": 0.057318903505802155,
1114
- "rewards/rejected": -0.022583190351724625,
1115
- "step": 780
 
 
1116
  },
1117
  {
1118
  "epoch": 0.83,
1119
  "learning_rate": 9.604190919674039e-08,
1120
- "logits/chosen": -2.480217218399048,
1121
- "logits/rejected": -2.4997220039367676,
1122
- "logps/chosen": -253.22061157226562,
1123
- "logps/rejected": -248.5176239013672,
1124
- "loss": 0.6447,
1125
- "rewards/accuracies": 0.612500011920929,
1126
- "rewards/chosen": 0.02552936039865017,
1127
- "rewards/margins": 0.05032141134142876,
1128
- "rewards/rejected": -0.024792049080133438,
1129
- "step": 790
 
 
1130
  },
1131
  {
1132
  "epoch": 0.84,
1133
  "learning_rate": 9.022118742724097e-08,
1134
- "logits/chosen": -2.4872233867645264,
1135
- "logits/rejected": -2.461895704269409,
1136
- "logps/chosen": -271.40191650390625,
1137
- "logps/rejected": -261.29241943359375,
1138
- "loss": 0.6352,
1139
- "rewards/accuracies": 0.640625,
1140
- "rewards/chosen": 0.035524625331163406,
1141
- "rewards/margins": 0.0665384978055954,
1142
- "rewards/rejected": -0.031013870611786842,
1143
- "step": 800
 
 
1144
  },
1145
  {
1146
  "epoch": 0.85,
1147
  "learning_rate": 8.440046565774157e-08,
1148
- "logits/chosen": -2.462954521179199,
1149
- "logits/rejected": -2.472172498703003,
1150
- "logps/chosen": -272.635986328125,
1151
- "logps/rejected": -229.35366821289062,
1152
- "loss": 0.6432,
1153
- "rewards/accuracies": 0.65625,
1154
- "rewards/chosen": 0.03845527023077011,
1155
- "rewards/margins": 0.05895563215017319,
1156
- "rewards/rejected": -0.020500360056757927,
1157
- "step": 810
 
 
1158
  },
1159
  {
1160
  "epoch": 0.86,
1161
  "learning_rate": 7.857974388824213e-08,
1162
- "logits/chosen": -2.481274127960205,
1163
- "logits/rejected": -2.4623420238494873,
1164
- "logps/chosen": -282.552734375,
1165
- "logps/rejected": -246.048095703125,
1166
- "loss": 0.6457,
1167
- "rewards/accuracies": 0.6312500238418579,
1168
- "rewards/chosen": 0.04651721939444542,
1169
- "rewards/margins": 0.07211866974830627,
1170
- "rewards/rejected": -0.025601446628570557,
1171
- "step": 820
 
 
1172
  },
1173
  {
1174
  "epoch": 0.87,
1175
  "learning_rate": 7.275902211874273e-08,
1176
- "logits/chosen": -2.495702028274536,
1177
- "logits/rejected": -2.4765870571136475,
1178
- "logps/chosen": -249.3843536376953,
1179
- "logps/rejected": -278.4346923828125,
1180
- "loss": 0.644,
1181
- "rewards/accuracies": 0.684374988079071,
1182
- "rewards/chosen": 0.021697301417589188,
1183
- "rewards/margins": 0.059917084872722626,
1184
- "rewards/rejected": -0.038219790905714035,
1185
- "step": 830
 
 
1186
  },
1187
  {
1188
  "epoch": 0.88,
1189
  "learning_rate": 6.693830034924331e-08,
1190
- "logits/chosen": -2.4697625637054443,
1191
- "logits/rejected": -2.4722721576690674,
1192
- "logps/chosen": -265.5938415527344,
1193
- "logps/rejected": -257.2969665527344,
1194
- "loss": 0.6376,
1195
- "rewards/accuracies": 0.653124988079071,
1196
- "rewards/chosen": 0.028097212314605713,
1197
- "rewards/margins": 0.046820152550935745,
1198
- "rewards/rejected": -0.018722938373684883,
1199
- "step": 840
 
 
1200
  },
1201
  {
1202
  "epoch": 0.89,
1203
  "learning_rate": 6.111757857974389e-08,
1204
- "logits/chosen": -2.4835047721862793,
1205
- "logits/rejected": -2.5235934257507324,
1206
- "logps/chosen": -263.8602294921875,
1207
- "logps/rejected": -241.2860870361328,
1208
- "loss": 0.6443,
1209
- "rewards/accuracies": 0.640625,
1210
- "rewards/chosen": 0.03554076701402664,
1211
- "rewards/margins": 0.065843865275383,
1212
- "rewards/rejected": -0.030303100124001503,
1213
- "step": 850
 
 
1214
  },
1215
  {
1216
  "epoch": 0.9,
1217
  "learning_rate": 5.529685681024446e-08,
1218
- "logits/chosen": -2.446396589279175,
1219
- "logits/rejected": -2.447554111480713,
1220
- "logps/chosen": -278.2039489746094,
1221
- "logps/rejected": -267.393798828125,
1222
- "loss": 0.6433,
1223
- "rewards/accuracies": 0.6312500238418579,
1224
- "rewards/chosen": 0.02485939860343933,
1225
- "rewards/margins": 0.04789874702692032,
1226
- "rewards/rejected": -0.023039352148771286,
1227
- "step": 860
 
 
1228
  },
1229
  {
1230
  "epoch": 0.91,
1231
  "learning_rate": 4.947613504074505e-08,
1232
- "logits/chosen": -2.451906442642212,
1233
- "logits/rejected": -2.4401917457580566,
1234
- "logps/chosen": -254.4597625732422,
1235
- "logps/rejected": -240.5189666748047,
1236
- "loss": 0.6406,
1237
- "rewards/accuracies": 0.596875011920929,
1238
- "rewards/chosen": 0.03511255979537964,
1239
- "rewards/margins": 0.05534617230296135,
1240
- "rewards/rejected": -0.020233619958162308,
1241
- "step": 870
 
 
1242
  },
1243
  {
1244
  "epoch": 0.92,
1245
  "learning_rate": 4.365541327124563e-08,
1246
- "logits/chosen": -2.507020950317383,
1247
- "logits/rejected": -2.4901695251464844,
1248
- "logps/chosen": -260.8884582519531,
1249
- "logps/rejected": -251.20938110351562,
1250
- "loss": 0.6417,
1251
- "rewards/accuracies": 0.6312500238418579,
1252
- "rewards/chosen": 0.04014205187559128,
1253
- "rewards/margins": 0.060112785547971725,
1254
- "rewards/rejected": -0.01997072994709015,
1255
- "step": 880
 
 
1256
  },
1257
  {
1258
  "epoch": 0.93,
1259
  "learning_rate": 3.783469150174622e-08,
1260
- "logits/chosen": -2.5302183628082275,
1261
- "logits/rejected": -2.5431013107299805,
1262
- "logps/chosen": -254.4590606689453,
1263
- "logps/rejected": -259.6826477050781,
1264
- "loss": 0.6409,
1265
- "rewards/accuracies": 0.643750011920929,
1266
- "rewards/chosen": 0.02364221215248108,
1267
- "rewards/margins": 0.04719501733779907,
1268
- "rewards/rejected": -0.023552805185317993,
1269
- "step": 890
 
 
1270
  },
1271
  {
1272
  "epoch": 0.94,
1273
  "learning_rate": 3.20139697322468e-08,
1274
- "logits/chosen": -2.4807305335998535,
1275
- "logits/rejected": -2.5058140754699707,
1276
- "logps/chosen": -264.16021728515625,
1277
- "logps/rejected": -249.19570922851562,
1278
- "loss": 0.6452,
1279
- "rewards/accuracies": 0.628125011920929,
1280
- "rewards/chosen": 0.029653768986463547,
1281
- "rewards/margins": 0.05728424713015556,
1282
- "rewards/rejected": -0.027630474418401718,
1283
- "step": 900
 
 
1284
  },
1285
  {
1286
  "epoch": 0.95,
1287
  "learning_rate": 2.619324796274738e-08,
1288
- "logits/chosen": -2.48866605758667,
1289
- "logits/rejected": -2.5024585723876953,
1290
- "logps/chosen": -289.67401123046875,
1291
- "logps/rejected": -261.8035583496094,
1292
- "loss": 0.6424,
1293
- "rewards/accuracies": 0.643750011920929,
1294
- "rewards/chosen": 0.041562773287296295,
1295
- "rewards/margins": 0.06672655045986176,
1296
- "rewards/rejected": -0.025163773447275162,
1297
- "step": 910
 
 
1298
  },
1299
  {
1300
  "epoch": 0.96,
1301
  "learning_rate": 2.037252619324796e-08,
1302
- "logits/chosen": -2.4807181358337402,
1303
- "logits/rejected": -2.4651541709899902,
1304
- "logps/chosen": -270.3717346191406,
1305
- "logps/rejected": -248.5556182861328,
1306
- "loss": 0.6415,
1307
- "rewards/accuracies": 0.653124988079071,
1308
- "rewards/chosen": 0.022484585642814636,
1309
- "rewards/margins": 0.05588115006685257,
1310
- "rewards/rejected": -0.033396560698747635,
1311
- "step": 920
 
 
1312
  },
1313
  {
1314
  "epoch": 0.97,
1315
  "learning_rate": 1.4551804423748545e-08,
1316
- "logits/chosen": -2.4763240814208984,
1317
- "logits/rejected": -2.4676055908203125,
1318
- "logps/chosen": -272.7355041503906,
1319
- "logps/rejected": -254.0940704345703,
1320
- "loss": 0.6432,
1321
- "rewards/accuracies": 0.65625,
1322
- "rewards/chosen": 0.046184636652469635,
1323
- "rewards/margins": 0.0702408105134964,
1324
- "rewards/rejected": -0.02405618131160736,
1325
- "step": 930
 
 
1326
  },
1327
  {
1328
  "epoch": 0.98,
1329
  "learning_rate": 8.731082654249125e-09,
1330
- "logits/chosen": -2.4857518672943115,
1331
- "logits/rejected": -2.5030598640441895,
1332
- "logps/chosen": -275.62445068359375,
1333
- "logps/rejected": -264.8072509765625,
1334
- "loss": 0.6407,
1335
- "rewards/accuracies": 0.671875,
1336
- "rewards/chosen": 0.036477264016866684,
1337
- "rewards/margins": 0.06461935490369797,
1338
- "rewards/rejected": -0.02814210020005703,
1339
- "step": 940
 
 
1340
  },
1341
  {
1342
  "epoch": 0.99,
1343
  "learning_rate": 2.910360884749709e-09,
1344
- "logits/chosen": -2.4887900352478027,
1345
- "logits/rejected": -2.494776725769043,
1346
- "logps/chosen": -259.7311706542969,
1347
- "logps/rejected": -253.02346801757812,
1348
- "loss": 0.6442,
1349
- "rewards/accuracies": 0.65625,
1350
- "rewards/chosen": 0.02119762822985649,
1351
- "rewards/margins": 0.04852701723575592,
1352
- "rewards/rejected": -0.02732938900589943,
1353
- "step": 950
 
 
1354
  },
1355
  {
1356
  "epoch": 1.0,
1357
- "eval_logits/chosen": -2.4944536685943604,
1358
- "eval_logits/rejected": -2.4963433742523193,
1359
- "eval_logps/chosen": -269.25555419921875,
1360
- "eval_logps/rejected": -253.21238708496094,
1361
- "eval_loss": 0.6399702429771423,
1362
- "eval_rewards/accuracies": 0.6370000243186951,
1363
- "eval_rewards/chosen": 0.030110126361250877,
1364
- "eval_rewards/margins": 0.05743245780467987,
1365
- "eval_rewards/rejected": -0.027322327718138695,
1366
- "eval_runtime": 806.7503,
1367
- "eval_samples_per_second": 2.479,
1368
- "eval_steps_per_second": 0.31,
 
 
1369
  "step": 955
1370
  },
1371
  {
1372
  "epoch": 1.0,
1373
  "step": 955,
1374
  "total_flos": 0.0,
1375
- "train_loss": 0.6598132096035942,
1376
- "train_runtime": 45126.4521,
1377
- "train_samples_per_second": 1.355,
1378
- "train_steps_per_second": 0.021
1379
  }
1380
  ],
1381
  "logging_steps": 10,
 
11
  {
12
  "epoch": 0.0,
13
  "learning_rate": 5.208333333333333e-09,
14
+ "logits/chosen": -2.676934003829956,
15
+ "logits/rejected": -2.509021043777466,
16
+ "logps/chosen": -304.709228515625,
17
+ "logps/rejected": -229.49505615234375,
18
  "loss": 0.6931,
19
+ "pred_label": 0.0,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
22
  "rewards/margins": 0.0,
23
  "rewards/rejected": 0.0,
24
+ "step": 1,
25
+ "use_label": 10.0
26
  },
27
  {
28
  "epoch": 0.01,
29
  "learning_rate": 5.208333333333333e-08,
30
+ "logits/chosen": -2.5309348106384277,
31
+ "logits/rejected": -2.5985612869262695,
32
+ "logps/chosen": -313.4403381347656,
33
+ "logps/rejected": -277.2276611328125,
34
+ "loss": 0.6812,
35
+ "pred_label": 18.55555534362793,
36
+ "rewards/accuracies": 0.4861111044883728,
37
+ "rewards/chosen": 0.0022232607007026672,
38
+ "rewards/margins": -7.403641939163208e-05,
39
+ "rewards/rejected": 0.0022972968872636557,
40
+ "step": 10,
41
+ "use_label": 71.44444274902344
42
  },
43
  {
44
  "epoch": 0.02,
45
  "learning_rate": 1.0416666666666667e-07,
46
+ "logits/chosen": -2.390097141265869,
47
+ "logits/rejected": -2.4532761573791504,
48
+ "logps/chosen": -229.01559448242188,
49
+ "logps/rejected": -232.5874786376953,
50
+ "loss": 0.6791,
51
+ "pred_label": 61.54999923706055,
52
+ "rewards/accuracies": 0.53125,
53
+ "rewards/chosen": -0.001973286736756563,
54
+ "rewards/margins": -0.0003373834188096225,
55
+ "rewards/rejected": -0.0016359034925699234,
56
+ "step": 20,
57
+ "use_label": 180.4499969482422
58
  },
59
  {
60
  "epoch": 0.03,
61
  "learning_rate": 1.5624999999999999e-07,
62
+ "logits/chosen": -2.529320001602173,
63
+ "logits/rejected": -2.4789929389953613,
64
+ "logps/chosen": -270.6199951171875,
65
+ "logps/rejected": -244.80044555664062,
66
+ "loss": 0.6782,
67
+ "pred_label": 106.80000305175781,
68
+ "rewards/accuracies": 0.48124998807907104,
69
+ "rewards/chosen": -0.006186917424201965,
70
+ "rewards/margins": -0.005202265456318855,
71
+ "rewards/rejected": -0.0009846522007137537,
72
+ "step": 30,
73
+ "use_label": 295.20001220703125
74
  },
75
  {
76
  "epoch": 0.04,
77
  "learning_rate": 2.0833333333333333e-07,
78
+ "logits/chosen": -2.4986624717712402,
79
+ "logits/rejected": -2.5215377807617188,
80
+ "logps/chosen": -270.62274169921875,
81
+ "logps/rejected": -263.65087890625,
82
+ "loss": 0.6789,
83
+ "pred_label": 156.22500610351562,
84
+ "rewards/accuracies": 0.5687500238418579,
85
+ "rewards/chosen": -0.0010104719549417496,
86
+ "rewards/margins": 0.0030316715128719807,
87
+ "rewards/rejected": -0.004042143002152443,
88
+ "step": 40,
89
+ "use_label": 405.7749938964844
90
  },
91
  {
92
  "epoch": 0.05,
93
  "learning_rate": 2.604166666666667e-07,
94
+ "logits/chosen": -2.531646490097046,
95
+ "logits/rejected": -2.5199639797210693,
96
+ "logps/chosen": -255.382080078125,
97
+ "logps/rejected": -249.44775390625,
98
+ "loss": 0.6778,
99
+ "pred_label": 213.4250030517578,
100
+ "rewards/accuracies": 0.4937500059604645,
101
+ "rewards/chosen": -0.0030100971926003695,
102
+ "rewards/margins": -0.00096442288486287,
103
+ "rewards/rejected": -0.0020456742495298386,
104
+ "step": 50,
105
+ "use_label": 508.57501220703125
106
  },
107
  {
108
  "epoch": 0.06,
109
  "learning_rate": 3.1249999999999997e-07,
110
+ "logits/chosen": -2.4867427349090576,
111
+ "logits/rejected": -2.473141670227051,
112
+ "logps/chosen": -278.27655029296875,
113
+ "logps/rejected": -257.5716247558594,
114
+ "loss": 0.6784,
115
+ "pred_label": 266.17498779296875,
116
+ "rewards/accuracies": 0.4749999940395355,
117
+ "rewards/chosen": -0.0048277489840984344,
118
+ "rewards/margins": -0.004098966252058744,
119
+ "rewards/rejected": -0.0007287820335477591,
120
+ "step": 60,
121
+ "use_label": 615.8250122070312
122
  },
123
  {
124
  "epoch": 0.07,
125
  "learning_rate": 3.645833333333333e-07,
126
+ "logits/chosen": -2.4988815784454346,
127
+ "logits/rejected": -2.492649793624878,
128
+ "logps/chosen": -285.4491271972656,
129
+ "logps/rejected": -263.70635986328125,
130
+ "loss": 0.6793,
131
+ "pred_label": 313.125,
132
+ "rewards/accuracies": 0.44999998807907104,
133
+ "rewards/chosen": -0.004624291323125362,
134
+ "rewards/margins": -0.003086260985583067,
135
+ "rewards/rejected": -0.0015380297554656863,
136
+ "step": 70,
137
+ "use_label": 728.875
138
  },
139
  {
140
  "epoch": 0.08,
141
  "learning_rate": 4.1666666666666667e-07,
142
+ "logits/chosen": -2.447936534881592,
143
+ "logits/rejected": -2.440683126449585,
144
+ "logps/chosen": -292.83795166015625,
145
+ "logps/rejected": -262.8486328125,
146
+ "loss": 0.6787,
147
+ "pred_label": 365.5,
148
+ "rewards/accuracies": 0.53125,
149
+ "rewards/chosen": -0.00346105033531785,
150
+ "rewards/margins": -0.00289200060069561,
151
+ "rewards/rejected": -0.0005690503749065101,
152
+ "step": 80,
153
+ "use_label": 836.5
154
  },
155
  {
156
  "epoch": 0.09,
157
  "learning_rate": 4.6874999999999996e-07,
158
+ "logits/chosen": -2.410609006881714,
159
+ "logits/rejected": -2.4672443866729736,
160
+ "logps/chosen": -281.00421142578125,
161
+ "logps/rejected": -256.7078552246094,
162
+ "loss": 0.6794,
163
+ "pred_label": 423.875,
164
+ "rewards/accuracies": 0.512499988079071,
165
+ "rewards/chosen": 0.0006400573183782399,
166
+ "rewards/margins": -0.0020913761109113693,
167
+ "rewards/rejected": 0.002731433603912592,
168
+ "step": 90,
169
+ "use_label": 938.125
170
  },
171
  {
172
  "epoch": 0.1,
173
  "learning_rate": 4.976717112922002e-07,
174
+ "logits/chosen": -2.4385340213775635,
175
+ "logits/rejected": -2.401470899581909,
176
+ "logps/chosen": -263.50323486328125,
177
+ "logps/rejected": -282.2510070800781,
178
+ "loss": 0.6781,
179
+ "pred_label": 479.6000061035156,
180
+ "rewards/accuracies": 0.550000011920929,
181
+ "rewards/chosen": 0.007567421533167362,
182
+ "rewards/margins": 0.010446270927786827,
183
+ "rewards/rejected": -0.002878849394619465,
184
+ "step": 100,
185
+ "use_label": 1042.4000244140625
186
  },
187
  {
188
  "epoch": 0.12,
189
  "learning_rate": 4.918509895227007e-07,
190
+ "logits/chosen": -2.4747567176818848,
191
+ "logits/rejected": -2.4313316345214844,
192
+ "logps/chosen": -255.5030975341797,
193
+ "logps/rejected": -241.1230926513672,
194
+ "loss": 0.6776,
195
+ "pred_label": 530.625,
196
+ "rewards/accuracies": 0.5,
197
+ "rewards/chosen": 0.0008927445742301643,
198
+ "rewards/margins": 0.001805878826417029,
199
+ "rewards/rejected": -0.0009131338447332382,
200
+ "step": 110,
201
+ "use_label": 1151.375
202
  },
203
  {
204
  "epoch": 0.13,
205
  "learning_rate": 4.860302677532014e-07,
206
+ "logits/chosen": -2.432814836502075,
207
+ "logits/rejected": -2.4882383346557617,
208
+ "logps/chosen": -266.65386962890625,
209
+ "logps/rejected": -237.3206787109375,
210
+ "loss": 0.6794,
211
+ "pred_label": 576.875,
212
+ "rewards/accuracies": 0.5562499761581421,
213
+ "rewards/chosen": 0.008614806458353996,
214
+ "rewards/margins": 0.01159285381436348,
215
+ "rewards/rejected": -0.0029780478216707706,
216
+ "step": 120,
217
+ "use_label": 1265.125
218
  },
219
  {
220
  "epoch": 0.14,
221
  "learning_rate": 4.802095459837019e-07,
222
+ "logits/chosen": -2.498955249786377,
223
+ "logits/rejected": -2.49072003364563,
224
+ "logps/chosen": -295.6458435058594,
225
+ "logps/rejected": -263.4668273925781,
226
+ "loss": 0.6774,
227
+ "pred_label": 627.4749755859375,
228
+ "rewards/accuracies": 0.5062500238418579,
229
+ "rewards/chosen": -0.00027961478917859495,
230
+ "rewards/margins": 0.0010693027870729566,
231
+ "rewards/rejected": -0.001348917605355382,
232
+ "step": 130,
233
+ "use_label": 1374.5250244140625
234
  },
235
  {
236
  "epoch": 0.15,
237
  "learning_rate": 4.743888242142026e-07,
238
+ "logits/chosen": -2.50268292427063,
239
+ "logits/rejected": -2.506229877471924,
240
+ "logps/chosen": -249.8160400390625,
241
+ "logps/rejected": -228.35537719726562,
242
+ "loss": 0.6792,
243
+ "pred_label": 681.7999877929688,
244
+ "rewards/accuracies": 0.48750001192092896,
245
+ "rewards/chosen": 0.0006030676886439323,
246
+ "rewards/margins": -0.00023779459297657013,
247
+ "rewards/rejected": 0.0008408633293583989,
248
+ "step": 140,
249
+ "use_label": 1480.199951171875
250
  },
251
  {
252
  "epoch": 0.16,
253
  "learning_rate": 4.685681024447031e-07,
254
+ "logits/chosen": -2.430985689163208,
255
+ "logits/rejected": -2.438312292098999,
256
+ "logps/chosen": -282.99285888671875,
257
+ "logps/rejected": -260.1983947753906,
258
+ "loss": 0.6764,
259
+ "pred_label": 730.6749877929688,
260
+ "rewards/accuracies": 0.5249999761581421,
261
+ "rewards/chosen": 0.007840326987206936,
262
+ "rewards/margins": 0.008648511953651905,
263
+ "rewards/rejected": -0.0008081849664449692,
264
+ "step": 150,
265
+ "use_label": 1591.324951171875
266
  },
267
  {
268
  "epoch": 0.17,
269
  "learning_rate": 4.627473806752037e-07,
270
+ "logits/chosen": -2.513338565826416,
271
+ "logits/rejected": -2.4252638816833496,
272
+ "logps/chosen": -251.98593139648438,
273
+ "logps/rejected": -258.3878479003906,
274
+ "loss": 0.6779,
275
+ "pred_label": 785.4500122070312,
276
+ "rewards/accuracies": 0.5562499761581421,
277
+ "rewards/chosen": 0.009579241275787354,
278
+ "rewards/margins": 0.013932722620666027,
279
+ "rewards/rejected": -0.004353481810539961,
280
+ "step": 160,
281
+ "use_label": 1696.550048828125
282
  },
283
  {
284
  "epoch": 0.18,
285
  "learning_rate": 4.5692665890570433e-07,
286
+ "logits/chosen": -2.51324725151062,
287
+ "logits/rejected": -2.5183651447296143,
288
+ "logps/chosen": -286.78240966796875,
289
+ "logps/rejected": -242.28451538085938,
290
+ "loss": 0.6781,
291
+ "pred_label": 837.375,
292
+ "rewards/accuracies": 0.625,
293
+ "rewards/chosen": 0.010844933800399303,
294
+ "rewards/margins": 0.014444952830672264,
295
+ "rewards/rejected": -0.003600016701966524,
296
+ "step": 170,
297
+ "use_label": 1804.625
298
  },
299
  {
300
  "epoch": 0.19,
301
  "learning_rate": 4.5110593713620486e-07,
302
+ "logits/chosen": -2.4405276775360107,
303
+ "logits/rejected": -2.418701410293579,
304
+ "logps/chosen": -245.29843139648438,
305
+ "logps/rejected": -261.94500732421875,
306
+ "loss": 0.6779,
307
+ "pred_label": 900.5250244140625,
308
+ "rewards/accuracies": 0.512499988079071,
309
+ "rewards/chosen": -9.365100413560867e-05,
310
+ "rewards/margins": 0.0037569026462733746,
311
+ "rewards/rejected": -0.003850553184747696,
312
+ "step": 180,
313
+ "use_label": 1901.4749755859375
314
  },
315
  {
316
  "epoch": 0.2,
317
  "learning_rate": 4.4528521536670544e-07,
318
+ "logits/chosen": -2.549407482147217,
319
+ "logits/rejected": -2.5117835998535156,
320
+ "logps/chosen": -291.4512939453125,
321
+ "logps/rejected": -250.8589324951172,
322
+ "loss": 0.6749,
323
+ "pred_label": 960.4500122070312,
324
+ "rewards/accuracies": 0.5562499761581421,
325
+ "rewards/chosen": 0.0050302534364163876,
326
+ "rewards/margins": 0.01830260455608368,
327
+ "rewards/rejected": -0.013272350654006004,
328
+ "step": 190,
329
+ "use_label": 2001.550048828125
330
  },
331
  {
332
  "epoch": 0.21,
333
  "learning_rate": 4.3946449359720607e-07,
334
+ "logits/chosen": -2.521691083908081,
335
+ "logits/rejected": -2.522428035736084,
336
+ "logps/chosen": -251.04867553710938,
337
+ "logps/rejected": -259.01055908203125,
338
+ "loss": 0.6754,
339
+ "pred_label": 1024.925048828125,
340
+ "rewards/accuracies": 0.512499988079071,
341
+ "rewards/chosen": 0.004073199350386858,
342
+ "rewards/margins": 0.008589169010519981,
343
+ "rewards/rejected": -0.004515970591455698,
344
+ "step": 200,
345
+ "use_label": 2097.074951171875
346
  },
347
  {
348
  "epoch": 0.22,
349
  "learning_rate": 4.336437718277066e-07,
350
+ "logits/chosen": -2.5071074962615967,
351
+ "logits/rejected": -2.5461485385894775,
352
+ "logps/chosen": -288.81842041015625,
353
+ "logps/rejected": -246.823486328125,
354
+ "loss": 0.6752,
355
+ "pred_label": 1087.875,
356
+ "rewards/accuracies": 0.53125,
357
+ "rewards/chosen": 0.011537188664078712,
358
+ "rewards/margins": 0.013686036691069603,
359
+ "rewards/rejected": -0.0021488501224666834,
360
+ "step": 210,
361
+ "use_label": 2194.125
362
  },
363
  {
364
  "epoch": 0.23,
365
  "learning_rate": 4.278230500582072e-07,
366
+ "logits/chosen": -2.4844157695770264,
367
+ "logits/rejected": -2.4883625507354736,
368
+ "logps/chosen": -263.35015869140625,
369
+ "logps/rejected": -261.3338623046875,
370
+ "loss": 0.6738,
371
+ "pred_label": 1153.574951171875,
372
+ "rewards/accuracies": 0.612500011920929,
373
+ "rewards/chosen": 0.0034189000725746155,
374
+ "rewards/margins": 0.01800468936562538,
375
+ "rewards/rejected": -0.014585788361728191,
376
+ "step": 220,
377
+ "use_label": 2288.425048828125
378
  },
379
  {
380
  "epoch": 0.24,
381
  "learning_rate": 4.220023282887078e-07,
382
+ "logits/chosen": -2.4569809436798096,
383
+ "logits/rejected": -2.4612298011779785,
384
+ "logps/chosen": -294.7366943359375,
385
+ "logps/rejected": -254.261962890625,
386
+ "loss": 0.672,
387
+ "pred_label": 1221.175048828125,
388
+ "rewards/accuracies": 0.581250011920929,
389
+ "rewards/chosen": 0.01110165473073721,
390
+ "rewards/margins": 0.014376315288245678,
391
+ "rewards/rejected": -0.0032746598590165377,
392
+ "step": 230,
393
+ "use_label": 2380.824951171875
394
  },
395
  {
396
  "epoch": 0.25,
397
  "learning_rate": 4.1618160651920834e-07,
398
+ "logits/chosen": -2.440027952194214,
399
+ "logits/rejected": -2.415015935897827,
400
+ "logps/chosen": -257.82342529296875,
401
+ "logps/rejected": -215.18408203125,
402
+ "loss": 0.6712,
403
+ "pred_label": 1294.2750244140625,
404
+ "rewards/accuracies": 0.5625,
405
+ "rewards/chosen": 0.011741789057850838,
406
+ "rewards/margins": 0.014540466479957104,
407
+ "rewards/rejected": -0.0027986769564449787,
408
+ "step": 240,
409
+ "use_label": 2467.72509765625
410
  },
411
  {
412
  "epoch": 0.26,
413
  "learning_rate": 4.103608847497089e-07,
414
+ "logits/chosen": -2.4385263919830322,
415
+ "logits/rejected": -2.4420063495635986,
416
+ "logps/chosen": -277.59747314453125,
417
+ "logps/rejected": -258.9275817871094,
418
+ "loss": 0.672,
419
+ "pred_label": 1364.699951171875,
420
+ "rewards/accuracies": 0.6000000238418579,
421
+ "rewards/chosen": 0.01612050086259842,
422
+ "rewards/margins": 0.02269991859793663,
423
+ "rewards/rejected": -0.0065794168040156364,
424
+ "step": 250,
425
+ "use_label": 2557.300048828125
426
  },
427
  {
428
  "epoch": 0.27,
429
  "learning_rate": 4.0454016298020956e-07,
430
+ "logits/chosen": -2.42997407913208,
431
+ "logits/rejected": -2.424861431121826,
432
+ "logps/chosen": -239.51095581054688,
433
+ "logps/rejected": -240.63623046875,
434
+ "loss": 0.6707,
435
+ "pred_label": 1427.824951171875,
436
+ "rewards/accuracies": 0.48750001192092896,
437
+ "rewards/chosen": 0.000991971348412335,
438
+ "rewards/margins": 0.004964248277246952,
439
+ "rewards/rejected": -0.003972277976572514,
440
+ "step": 260,
441
+ "use_label": 2654.175048828125
442
  },
443
  {
444
  "epoch": 0.28,
445
  "learning_rate": 3.987194412107101e-07,
446
+ "logits/chosen": -2.4381518363952637,
447
+ "logits/rejected": -2.46921706199646,
448
+ "logps/chosen": -280.505859375,
449
+ "logps/rejected": -266.14080810546875,
450
  "loss": 0.6735,
451
+ "pred_label": 1494.574951171875,
452
+ "rewards/accuracies": 0.6000000238418579,
453
+ "rewards/chosen": 0.01380440779030323,
454
+ "rewards/margins": 0.023902520537376404,
455
+ "rewards/rejected": -0.010098112747073174,
456
+ "step": 270,
457
+ "use_label": 2747.425048828125
458
  },
459
  {
460
  "epoch": 0.29,
461
  "learning_rate": 3.9289871944121066e-07,
462
+ "logits/chosen": -2.527064800262451,
463
+ "logits/rejected": -2.552640438079834,
464
+ "logps/chosen": -284.7080078125,
465
+ "logps/rejected": -255.3748016357422,
466
+ "loss": 0.6706,
467
+ "pred_label": 1554.8499755859375,
468
+ "rewards/accuracies": 0.6187499761581421,
469
+ "rewards/chosen": 0.0216833408921957,
470
+ "rewards/margins": 0.026800300925970078,
471
+ "rewards/rejected": -0.005116959102451801,
472
+ "step": 280,
473
+ "use_label": 2847.14990234375
474
  },
475
  {
476
  "epoch": 0.3,
477
  "learning_rate": 3.870779976717113e-07,
478
+ "logits/chosen": -2.5222768783569336,
479
+ "logits/rejected": -2.508826732635498,
480
+ "logps/chosen": -278.7129821777344,
481
+ "logps/rejected": -220.00210571289062,
482
+ "loss": 0.669,
483
+ "pred_label": 1625.199951171875,
484
+ "rewards/accuracies": 0.5874999761581421,
485
+ "rewards/chosen": 0.004253073129802942,
486
+ "rewards/margins": 0.014592866413295269,
487
+ "rewards/rejected": -0.010339794680476189,
488
+ "step": 290,
489
+ "use_label": 2936.800048828125
490
  },
491
  {
492
  "epoch": 0.31,
493
  "learning_rate": 3.812572759022118e-07,
494
+ "logits/chosen": -2.4862565994262695,
495
+ "logits/rejected": -2.4585397243499756,
496
+ "logps/chosen": -254.63589477539062,
497
+ "logps/rejected": -230.22238159179688,
498
+ "loss": 0.669,
499
+ "pred_label": 1695.0250244140625,
500
+ "rewards/accuracies": 0.6312500238418579,
501
+ "rewards/chosen": 0.013738395646214485,
502
+ "rewards/margins": 0.030270254239439964,
503
+ "rewards/rejected": -0.01653185673058033,
504
+ "step": 300,
505
+ "use_label": 3026.97509765625
506
  },
507
  {
508
  "epoch": 0.32,
509
  "learning_rate": 3.754365541327124e-07,
510
+ "logits/chosen": -2.490318775177002,
511
+ "logits/rejected": -2.411966323852539,
512
+ "logps/chosen": -255.95016479492188,
513
+ "logps/rejected": -278.394775390625,
514
+ "loss": 0.6677,
515
+ "pred_label": 1769.550048828125,
516
+ "rewards/accuracies": 0.612500011920929,
517
+ "rewards/chosen": 0.011918948963284492,
518
+ "rewards/margins": 0.024249419569969177,
519
+ "rewards/rejected": -0.012330473400652409,
520
+ "step": 310,
521
+ "use_label": 3112.449951171875
522
  },
523
  {
524
  "epoch": 0.33,
525
  "learning_rate": 3.6961583236321304e-07,
526
+ "logits/chosen": -2.4655749797821045,
527
+ "logits/rejected": -2.429624080657959,
528
+ "logps/chosen": -279.8106384277344,
529
+ "logps/rejected": -272.11993408203125,
530
+ "loss": 0.6648,
531
+ "pred_label": 1836.0,
532
+ "rewards/accuracies": 0.606249988079071,
533
+ "rewards/chosen": 0.012938129715621471,
534
+ "rewards/margins": 0.027768433094024658,
535
+ "rewards/rejected": -0.014830301515758038,
536
+ "step": 320,
537
+ "use_label": 3206.0
538
  },
539
  {
540
  "epoch": 0.35,
541
  "learning_rate": 3.637951105937136e-07,
542
+ "logits/chosen": -2.494778633117676,
543
+ "logits/rejected": -2.445328712463379,
544
+ "logps/chosen": -258.9544982910156,
545
+ "logps/rejected": -226.5720977783203,
546
+ "loss": 0.6674,
547
+ "pred_label": 1910.1500244140625,
548
+ "rewards/accuracies": 0.5375000238418579,
549
+ "rewards/chosen": 0.00947931781411171,
550
+ "rewards/margins": 0.01889108493924141,
551
+ "rewards/rejected": -0.009411768987774849,
552
+ "step": 330,
553
+ "use_label": 3291.85009765625
554
  },
555
  {
556
  "epoch": 0.36,
557
  "learning_rate": 3.579743888242142e-07,
558
+ "logits/chosen": -2.44804048538208,
559
+ "logits/rejected": -2.4573702812194824,
560
+ "logps/chosen": -267.03192138671875,
561
+ "logps/rejected": -228.086181640625,
562
+ "loss": 0.6657,
563
+ "pred_label": 1974.324951171875,
564
+ "rewards/accuracies": 0.581250011920929,
565
+ "rewards/chosen": 0.018211424350738525,
566
+ "rewards/margins": 0.021834325045347214,
567
+ "rewards/rejected": -0.0036229020915925503,
568
+ "step": 340,
569
+ "use_label": 3387.675048828125
570
  },
571
  {
572
  "epoch": 0.37,
573
  "learning_rate": 3.521536670547148e-07,
574
+ "logits/chosen": -2.4245553016662598,
575
+ "logits/rejected": -2.4514193534851074,
576
+ "logps/chosen": -284.3450927734375,
577
+ "logps/rejected": -268.92486572265625,
578
+ "loss": 0.6631,
579
+ "pred_label": 2047.8499755859375,
580
+ "rewards/accuracies": 0.71875,
581
+ "rewards/chosen": 0.02513638138771057,
582
+ "rewards/margins": 0.04776451736688614,
583
+ "rewards/rejected": -0.02262813411653042,
584
+ "step": 350,
585
+ "use_label": 3474.14990234375
586
  },
587
  {
588
  "epoch": 0.38,
589
  "learning_rate": 3.4633294528521536e-07,
590
+ "logits/chosen": -2.5096325874328613,
591
+ "logits/rejected": -2.5362212657928467,
592
+ "logps/chosen": -262.33953857421875,
593
+ "logps/rejected": -256.80279541015625,
594
+ "loss": 0.6611,
595
+ "pred_label": 2130.89990234375,
596
+ "rewards/accuracies": 0.612500011920929,
597
+ "rewards/chosen": 0.008055051788687706,
598
+ "rewards/margins": 0.03220166265964508,
599
+ "rewards/rejected": -0.024146610870957375,
600
+ "step": 360,
601
+ "use_label": 3551.10009765625
602
  },
603
  {
604
  "epoch": 0.39,
605
  "learning_rate": 3.4051222351571594e-07,
606
+ "logits/chosen": -2.4874494075775146,
607
+ "logits/rejected": -2.4494900703430176,
608
+ "logps/chosen": -263.681884765625,
609
+ "logps/rejected": -261.24871826171875,
610
+ "loss": 0.6645,
611
+ "pred_label": 2212.10009765625,
612
+ "rewards/accuracies": 0.637499988079071,
613
+ "rewards/chosen": 0.01168682612478733,
614
+ "rewards/margins": 0.035164039582014084,
615
+ "rewards/rejected": -0.023477211594581604,
616
+ "step": 370,
617
+ "use_label": 3629.89990234375
618
  },
619
  {
620
  "epoch": 0.4,
621
  "learning_rate": 3.346915017462165e-07,
622
+ "logits/chosen": -2.5527141094207764,
623
+ "logits/rejected": -2.500330686569214,
624
+ "logps/chosen": -270.70123291015625,
625
+ "logps/rejected": -251.94638061523438,
626
+ "loss": 0.6568,
627
+ "pred_label": 2298.27490234375,
628
+ "rewards/accuracies": 0.65625,
629
+ "rewards/chosen": 0.015008668415248394,
630
+ "rewards/margins": 0.03496234118938446,
631
+ "rewards/rejected": -0.019953671842813492,
632
+ "step": 380,
633
+ "use_label": 3703.72509765625
634
  },
635
  {
636
  "epoch": 0.41,
637
  "learning_rate": 3.288707799767171e-07,
638
+ "logits/chosen": -2.4154019355773926,
639
+ "logits/rejected": -2.410980224609375,
640
+ "logps/chosen": -267.69012451171875,
641
+ "logps/rejected": -234.7194061279297,
642
+ "loss": 0.6612,
643
+ "pred_label": 2388.699951171875,
644
+ "rewards/accuracies": 0.6000000238418579,
645
+ "rewards/chosen": 0.016574550420045853,
646
+ "rewards/margins": 0.03597176447510719,
647
+ "rewards/rejected": -0.01939721405506134,
648
+ "step": 390,
649
+ "use_label": 3773.300048828125
650
  },
651
  {
652
  "epoch": 0.42,
653
  "learning_rate": 3.230500582072177e-07,
654
+ "logits/chosen": -2.573418378829956,
655
+ "logits/rejected": -2.5670266151428223,
656
+ "logps/chosen": -296.6780090332031,
657
+ "logps/rejected": -251.03109741210938,
658
+ "loss": 0.6603,
659
+ "pred_label": 2472.550048828125,
660
+ "rewards/accuracies": 0.625,
661
+ "rewards/chosen": 0.019582100212574005,
662
+ "rewards/margins": 0.03700990229845047,
663
+ "rewards/rejected": -0.017427802085876465,
664
+ "step": 400,
665
+ "use_label": 3849.449951171875
666
  },
667
  {
668
  "epoch": 0.43,
669
  "learning_rate": 3.1722933643771827e-07,
670
+ "logits/chosen": -2.5456345081329346,
671
+ "logits/rejected": -2.5357089042663574,
672
+ "logps/chosen": -287.56024169921875,
673
+ "logps/rejected": -250.7117462158203,
674
+ "loss": 0.6565,
675
+ "pred_label": 2563.89990234375,
676
+ "rewards/accuracies": 0.6000000238418579,
677
+ "rewards/chosen": 0.02417120710015297,
678
+ "rewards/margins": 0.03269309923052788,
679
+ "rewards/rejected": -0.008521895855665207,
680
+ "step": 410,
681
+ "use_label": 3918.10009765625
682
  },
683
  {
684
  "epoch": 0.44,
685
  "learning_rate": 3.1140861466821885e-07,
686
+ "logits/chosen": -2.425220251083374,
687
+ "logits/rejected": -2.403921127319336,
688
+ "logps/chosen": -231.62960815429688,
689
+ "logps/rejected": -243.0712432861328,
690
+ "loss": 0.6565,
691
+ "pred_label": 2649.25,
692
+ "rewards/accuracies": 0.6312500238418579,
693
+ "rewards/chosen": 0.014951197430491447,
694
+ "rewards/margins": 0.026637399569153786,
695
+ "rewards/rejected": -0.011686199344694614,
696
+ "step": 420,
697
+ "use_label": 3992.75
698
  },
699
  {
700
  "epoch": 0.45,
701
  "learning_rate": 3.0558789289871943e-07,
702
+ "logits/chosen": -2.4625051021575928,
703
+ "logits/rejected": -2.4735419750213623,
704
+ "logps/chosen": -266.99554443359375,
705
+ "logps/rejected": -251.20022583007812,
706
+ "loss": 0.6572,
707
+ "pred_label": 2728.300048828125,
708
+ "rewards/accuracies": 0.606249988079071,
709
+ "rewards/chosen": 0.02481374330818653,
710
+ "rewards/margins": 0.031031513586640358,
711
+ "rewards/rejected": -0.006217771675437689,
712
+ "step": 430,
713
+ "use_label": 4073.699951171875
714
  },
715
  {
716
  "epoch": 0.46,
717
  "learning_rate": 2.9976717112922e-07,
718
+ "logits/chosen": -2.51550555229187,
719
+ "logits/rejected": -2.5493228435516357,
720
+ "logps/chosen": -274.2099914550781,
721
+ "logps/rejected": -245.31491088867188,
722
+ "loss": 0.6567,
723
+ "pred_label": 2806.375,
724
+ "rewards/accuracies": 0.5625,
725
+ "rewards/chosen": 0.01486388873308897,
726
+ "rewards/margins": 0.032243579626083374,
727
+ "rewards/rejected": -0.017379695549607277,
728
+ "step": 440,
729
+ "use_label": 4155.625
730
  },
731
  {
732
  "epoch": 0.47,
733
  "learning_rate": 2.939464493597206e-07,
734
+ "logits/chosen": -2.3210535049438477,
735
+ "logits/rejected": -2.3065693378448486,
736
+ "logps/chosen": -217.0265655517578,
737
+ "logps/rejected": -216.3203582763672,
738
+ "loss": 0.6561,
739
+ "pred_label": 2881.75,
740
+ "rewards/accuracies": 0.5375000238418579,
741
+ "rewards/chosen": 0.009125987999141216,
742
+ "rewards/margins": 0.017552336677908897,
743
+ "rewards/rejected": -0.008426347747445107,
744
+ "step": 450,
745
+ "use_label": 4240.25
746
  },
747
  {
748
  "epoch": 0.48,
749
  "learning_rate": 2.8812572759022117e-07,
750
+ "logits/chosen": -2.528326988220215,
751
+ "logits/rejected": -2.513597011566162,
752
+ "logps/chosen": -290.8050231933594,
753
+ "logps/rejected": -260.4567565917969,
754
+ "loss": 0.6526,
755
+ "pred_label": 2965.02490234375,
756
+ "rewards/accuracies": 0.6000000238418579,
757
+ "rewards/chosen": 0.024029741063714027,
758
+ "rewards/margins": 0.03901149705052376,
759
+ "rewards/rejected": -0.014981756918132305,
760
+ "step": 460,
761
+ "use_label": 4316.97509765625
762
  },
763
  {
764
  "epoch": 0.49,
765
  "learning_rate": 2.8230500582072175e-07,
766
+ "logits/chosen": -2.424431085586548,
767
+ "logits/rejected": -2.41347336769104,
768
+ "logps/chosen": -254.66506958007812,
769
+ "logps/rejected": -234.6154022216797,
770
+ "loss": 0.6498,
771
+ "pred_label": 3055.699951171875,
772
+ "rewards/accuracies": 0.606249988079071,
773
+ "rewards/chosen": 0.001125166891142726,
774
+ "rewards/margins": 0.027449512854218483,
775
+ "rewards/rejected": -0.02632434293627739,
776
+ "step": 470,
777
+ "use_label": 4386.2998046875
778
  },
779
  {
780
  "epoch": 0.5,
781
  "learning_rate": 2.7648428405122233e-07,
782
+ "logits/chosen": -2.400242567062378,
783
+ "logits/rejected": -2.3543686866760254,
784
+ "logps/chosen": -235.65719604492188,
785
+ "logps/rejected": -239.40701293945312,
786
+ "loss": 0.6533,
787
+ "pred_label": 3150.449951171875,
788
+ "rewards/accuracies": 0.59375,
789
+ "rewards/chosen": 0.011093830689787865,
790
+ "rewards/margins": 0.0209193117916584,
791
+ "rewards/rejected": -0.009825478307902813,
792
+ "step": 480,
793
+ "use_label": 4451.5498046875
794
  },
795
  {
796
  "epoch": 0.51,
797
  "learning_rate": 2.706635622817229e-07,
798
+ "logits/chosen": -2.4891812801361084,
799
+ "logits/rejected": -2.4687983989715576,
800
+ "logps/chosen": -257.9729919433594,
801
+ "logps/rejected": -231.1624298095703,
802
+ "loss": 0.6504,
803
+ "pred_label": 3233.625,
804
+ "rewards/accuracies": 0.6625000238418579,
805
+ "rewards/chosen": 0.018697496503591537,
806
+ "rewards/margins": 0.042968858033418655,
807
+ "rewards/rejected": -0.02427135966718197,
808
+ "step": 490,
809
+ "use_label": 4528.375
810
  },
811
  {
812
  "epoch": 0.52,
813
  "learning_rate": 2.648428405122235e-07,
814
+ "logits/chosen": -2.4956674575805664,
815
+ "logits/rejected": -2.444516658782959,
816
+ "logps/chosen": -274.7785949707031,
817
+ "logps/rejected": -247.5068359375,
818
+ "loss": 0.6497,
819
+ "pred_label": 3323.5,
820
+ "rewards/accuracies": 0.706250011920929,
821
+ "rewards/chosen": 0.020702462643384933,
822
+ "rewards/margins": 0.048901624977588654,
823
+ "rewards/rejected": -0.02819916605949402,
824
+ "step": 500,
825
+ "use_label": 4598.5
826
  },
827
  {
828
  "epoch": 0.53,
829
  "learning_rate": 2.590221187427241e-07,
830
+ "logits/chosen": -2.490265369415283,
831
+ "logits/rejected": -2.4258570671081543,
832
+ "logps/chosen": -262.3305969238281,
833
+ "logps/rejected": -225.7815704345703,
834
+ "loss": 0.6488,
835
+ "pred_label": 3414.699951171875,
836
+ "rewards/accuracies": 0.6499999761581421,
837
+ "rewards/chosen": 0.022988121956586838,
838
+ "rewards/margins": 0.04825048893690109,
839
+ "rewards/rejected": -0.025262365117669106,
840
+ "step": 510,
841
+ "use_label": 4667.2998046875
842
  },
843
  {
844
  "epoch": 0.54,
845
  "learning_rate": 2.5320139697322466e-07,
846
+ "logits/chosen": -2.533203125,
847
+ "logits/rejected": -2.481980800628662,
848
+ "logps/chosen": -277.6612243652344,
849
+ "logps/rejected": -259.1783752441406,
850
+ "loss": 0.6496,
851
+ "pred_label": 3500.675048828125,
852
+ "rewards/accuracies": 0.6499999761581421,
853
+ "rewards/chosen": 0.03759980946779251,
854
+ "rewards/margins": 0.07107619941234589,
855
+ "rewards/rejected": -0.033476393669843674,
856
+ "step": 520,
857
+ "use_label": 4741.3251953125
858
  },
859
  {
860
  "epoch": 0.55,
861
  "learning_rate": 2.4738067520372524e-07,
862
+ "logits/chosen": -2.478482484817505,
863
+ "logits/rejected": -2.4806153774261475,
864
+ "logps/chosen": -288.8346252441406,
865
+ "logps/rejected": -279.86871337890625,
866
+ "loss": 0.6453,
867
+ "pred_label": 3598.425048828125,
868
+ "rewards/accuracies": 0.6625000238418579,
869
+ "rewards/chosen": 0.029218871146440506,
870
+ "rewards/margins": 0.05338066816329956,
871
+ "rewards/rejected": -0.024161797016859055,
872
+ "step": 530,
873
+ "use_label": 4803.5751953125
874
  },
875
  {
876
  "epoch": 0.57,
877
  "learning_rate": 2.415599534342258e-07,
878
+ "logits/chosen": -2.528951644897461,
879
+ "logits/rejected": -2.4700913429260254,
880
+ "logps/chosen": -278.3326416015625,
881
+ "logps/rejected": -256.6160888671875,
882
+ "loss": 0.6441,
883
+ "pred_label": 3706.47509765625,
884
+ "rewards/accuracies": 0.6312500238418579,
885
+ "rewards/chosen": 0.03666474670171738,
886
+ "rewards/margins": 0.06262056529521942,
887
+ "rewards/rejected": -0.025955811142921448,
888
+ "step": 540,
889
+ "use_label": 4855.52490234375
890
  },
891
  {
892
  "epoch": 0.58,
893
  "learning_rate": 2.3573923166472642e-07,
894
+ "logits/chosen": -2.514728546142578,
895
+ "logits/rejected": -2.4884872436523438,
896
+ "logps/chosen": -278.8384094238281,
897
+ "logps/rejected": -251.11495971679688,
898
+ "loss": 0.6419,
899
+ "pred_label": 3806.125,
900
+ "rewards/accuracies": 0.6000000238418579,
901
+ "rewards/chosen": 0.028542999178171158,
902
+ "rewards/margins": 0.05743313580751419,
903
+ "rewards/rejected": -0.028890132904052734,
904
+ "step": 550,
905
+ "use_label": 4915.875
906
  },
907
  {
908
  "epoch": 0.59,
909
  "learning_rate": 2.2991850989522698e-07,
910
+ "logits/chosen": -2.4250597953796387,
911
+ "logits/rejected": -2.4664273262023926,
912
+ "logps/chosen": -263.1305847167969,
913
+ "logps/rejected": -209.12753295898438,
914
+ "loss": 0.6473,
915
+ "pred_label": 3901.925048828125,
916
+ "rewards/accuracies": 0.612500011920929,
917
+ "rewards/chosen": 0.01411795150488615,
918
+ "rewards/margins": 0.043625928461551666,
919
+ "rewards/rejected": -0.02950797602534294,
920
+ "step": 560,
921
+ "use_label": 4980.0751953125
922
  },
923
  {
924
  "epoch": 0.6,
925
  "learning_rate": 2.2409778812572759e-07,
926
+ "logits/chosen": -2.481841802597046,
927
+ "logits/rejected": -2.4556803703308105,
928
+ "logps/chosen": -256.8991394042969,
929
+ "logps/rejected": -253.22006225585938,
930
+ "loss": 0.6455,
931
+ "pred_label": 3991.375,
932
+ "rewards/accuracies": 0.6499999761581421,
933
+ "rewards/chosen": 0.021176273003220558,
934
+ "rewards/margins": 0.04561372101306915,
935
+ "rewards/rejected": -0.024437451735138893,
936
+ "step": 570,
937
+ "use_label": 5050.625
938
  },
939
  {
940
  "epoch": 0.61,
941
  "learning_rate": 2.1827706635622817e-07,
942
+ "logits/chosen": -2.513284683227539,
943
+ "logits/rejected": -2.4957690238952637,
944
+ "logps/chosen": -265.62261962890625,
945
+ "logps/rejected": -255.05160522460938,
946
+ "loss": 0.6459,
947
+ "pred_label": 4092.97509765625,
948
+ "rewards/accuracies": 0.6312500238418579,
949
+ "rewards/chosen": 0.023949166759848595,
950
+ "rewards/margins": 0.05676042288541794,
951
+ "rewards/rejected": -0.0328112468123436,
952
+ "step": 580,
953
+ "use_label": 5109.02490234375
954
  },
955
  {
956
  "epoch": 0.62,
957
  "learning_rate": 2.1245634458672875e-07,
958
+ "logits/chosen": -2.4279496669769287,
959
+ "logits/rejected": -2.3903157711029053,
960
+ "logps/chosen": -251.9611358642578,
961
+ "logps/rejected": -243.91909790039062,
962
+ "loss": 0.6421,
963
+ "pred_label": 4190.25,
964
+ "rewards/accuracies": 0.6499999761581421,
965
+ "rewards/chosen": 0.02141127921640873,
966
+ "rewards/margins": 0.04126313328742981,
967
+ "rewards/rejected": -0.01985185407102108,
968
+ "step": 590,
969
+ "use_label": 5171.75
970
  },
971
  {
972
  "epoch": 0.63,
973
  "learning_rate": 2.0663562281722933e-07,
974
+ "logits/chosen": -2.4996018409729004,
975
+ "logits/rejected": -2.505694627761841,
976
+ "logps/chosen": -294.9464111328125,
977
+ "logps/rejected": -294.47930908203125,
978
+ "loss": 0.6445,
979
+ "pred_label": 4285.97509765625,
980
+ "rewards/accuracies": 0.675000011920929,
981
+ "rewards/chosen": 0.03435974568128586,
982
+ "rewards/margins": 0.053214918822050095,
983
+ "rewards/rejected": -0.018855175003409386,
984
+ "step": 600,
985
+ "use_label": 5236.02490234375
986
  },
987
  {
988
  "epoch": 0.64,
989
  "learning_rate": 2.008149010477299e-07,
990
+ "logits/chosen": -2.500229597091675,
991
+ "logits/rejected": -2.44234037399292,
992
+ "logps/chosen": -288.0011291503906,
993
+ "logps/rejected": -231.5128936767578,
994
+ "loss": 0.6418,
995
+ "pred_label": 4384.60009765625,
996
+ "rewards/accuracies": 0.643750011920929,
997
+ "rewards/chosen": 0.03441072255373001,
998
+ "rewards/margins": 0.06906420737504959,
999
+ "rewards/rejected": -0.03465348482131958,
1000
+ "step": 610,
1001
+ "use_label": 5297.39990234375
1002
  },
1003
  {
1004
  "epoch": 0.65,
1005
  "learning_rate": 1.949941792782305e-07,
1006
+ "logits/chosen": -2.4949872493743896,
1007
+ "logits/rejected": -2.4376912117004395,
1008
+ "logps/chosen": -271.5177917480469,
1009
+ "logps/rejected": -241.4095458984375,
1010
+ "loss": 0.6436,
1011
+ "pred_label": 4473.35009765625,
1012
+ "rewards/accuracies": 0.643750011920929,
1013
+ "rewards/chosen": 0.02876521274447441,
1014
+ "rewards/margins": 0.04532798379659653,
1015
+ "rewards/rejected": -0.016562769189476967,
1016
+ "step": 620,
1017
+ "use_label": 5368.64990234375
1018
  },
1019
  {
1020
  "epoch": 0.66,
1021
  "learning_rate": 1.8917345750873107e-07,
1022
+ "logits/chosen": -2.4714274406433105,
1023
+ "logits/rejected": -2.4965994358062744,
1024
+ "logps/chosen": -272.4942321777344,
1025
+ "logps/rejected": -245.6349334716797,
1026
+ "loss": 0.6423,
1027
+ "pred_label": 4572.5,
1028
+ "rewards/accuracies": 0.6499999761581421,
1029
+ "rewards/chosen": 0.033678844571113586,
1030
+ "rewards/margins": 0.053819943219423294,
1031
+ "rewards/rejected": -0.02014109678566456,
1032
+ "step": 630,
1033
+ "use_label": 5429.5
1034
  },
1035
  {
1036
  "epoch": 0.67,
1037
  "learning_rate": 1.8335273573923165e-07,
1038
+ "logits/chosen": -2.4581923484802246,
1039
+ "logits/rejected": -2.474356174468994,
1040
+ "logps/chosen": -241.07748413085938,
1041
+ "logps/rejected": -239.5830535888672,
1042
+ "loss": 0.6398,
1043
+ "pred_label": 4672.5751953125,
1044
+ "rewards/accuracies": 0.6312500238418579,
1045
+ "rewards/chosen": 0.036277130246162415,
1046
+ "rewards/margins": 0.06419126689434052,
1047
+ "rewards/rejected": -0.027914145961403847,
1048
+ "step": 640,
1049
+ "use_label": 5489.4248046875
1050
  },
1051
  {
1052
  "epoch": 0.68,
1053
  "learning_rate": 1.7753201396973226e-07,
1054
+ "logits/chosen": -2.4635560512542725,
1055
+ "logits/rejected": -2.4887423515319824,
1056
+ "logps/chosen": -286.7969665527344,
1057
+ "logps/rejected": -265.29522705078125,
1058
+ "loss": 0.6357,
1059
+ "pred_label": 4776.0751953125,
1060
+ "rewards/accuracies": 0.6812499761581421,
1061
+ "rewards/chosen": 0.028953587636351585,
1062
+ "rewards/margins": 0.07534638047218323,
1063
+ "rewards/rejected": -0.04639279097318649,
1064
+ "step": 650,
1065
+ "use_label": 5545.9248046875
1066
  },
1067
  {
1068
  "epoch": 0.69,
1069
  "learning_rate": 1.7171129220023281e-07,
1070
+ "logits/chosen": -2.5433027744293213,
1071
+ "logits/rejected": -2.4936447143554688,
1072
+ "logps/chosen": -285.5106506347656,
1073
+ "logps/rejected": -242.0889434814453,
1074
+ "loss": 0.6451,
1075
+ "pred_label": 4884.875,
1076
+ "rewards/accuracies": 0.637499988079071,
1077
+ "rewards/chosen": 0.024147575721144676,
1078
+ "rewards/margins": 0.03708204999566078,
1079
+ "rewards/rejected": -0.012934470549225807,
1080
+ "step": 660,
1081
+ "use_label": 5597.125
1082
  },
1083
  {
1084
  "epoch": 0.7,
1085
  "learning_rate": 1.658905704307334e-07,
1086
+ "logits/chosen": -2.461125612258911,
1087
+ "logits/rejected": -2.419694185256958,
1088
+ "logps/chosen": -269.296142578125,
1089
+ "logps/rejected": -234.6914520263672,
1090
+ "loss": 0.6392,
1091
+ "pred_label": 4982.97509765625,
1092
+ "rewards/accuracies": 0.7250000238418579,
1093
+ "rewards/chosen": 0.032385144382715225,
1094
+ "rewards/margins": 0.07203620672225952,
1095
+ "rewards/rejected": -0.039651062339544296,
1096
+ "step": 670,
1097
+ "use_label": 5659.02490234375
1098
  },
1099
  {
1100
  "epoch": 0.71,
1101
  "learning_rate": 1.60069848661234e-07,
1102
+ "logits/chosen": -2.4804444313049316,
1103
+ "logits/rejected": -2.5111165046691895,
1104
+ "logps/chosen": -274.47027587890625,
1105
+ "logps/rejected": -250.04782104492188,
1106
+ "loss": 0.6424,
1107
+ "pred_label": 5080.27490234375,
1108
+ "rewards/accuracies": 0.6000000238418579,
1109
+ "rewards/chosen": 0.020998146384954453,
1110
+ "rewards/margins": 0.029987860471010208,
1111
+ "rewards/rejected": -0.008989715948700905,
1112
+ "step": 680,
1113
+ "use_label": 5721.72509765625
1114
  },
1115
  {
1116
  "epoch": 0.72,
1117
  "learning_rate": 1.5424912689173456e-07,
1118
+ "logits/chosen": -2.475935220718384,
1119
+ "logits/rejected": -2.4738192558288574,
1120
+ "logps/chosen": -239.99905395507812,
1121
+ "logps/rejected": -201.9076690673828,
1122
+ "loss": 0.6373,
1123
+ "pred_label": 5173.0751953125,
1124
+ "rewards/accuracies": 0.7124999761581421,
1125
+ "rewards/chosen": 0.02569044753909111,
1126
+ "rewards/margins": 0.05879662558436394,
1127
+ "rewards/rejected": -0.03310617804527283,
1128
+ "step": 690,
1129
+ "use_label": 5788.9248046875
1130
  },
1131
  {
1132
  "epoch": 0.73,
1133
  "learning_rate": 1.4842840512223514e-07,
1134
+ "logits/chosen": -2.5004830360412598,
1135
+ "logits/rejected": -2.4815783500671387,
1136
+ "logps/chosen": -262.7047424316406,
1137
+ "logps/rejected": -232.9453887939453,
1138
+ "loss": 0.6416,
1139
+ "pred_label": 5271.14990234375,
1140
+ "rewards/accuracies": 0.59375,
1141
+ "rewards/chosen": 0.021065320819616318,
1142
+ "rewards/margins": 0.05442025512456894,
1143
+ "rewards/rejected": -0.03335493057966232,
1144
+ "step": 700,
1145
+ "use_label": 5850.85009765625
1146
  },
1147
  {
1148
  "epoch": 0.74,
1149
  "learning_rate": 1.4260768335273574e-07,
1150
+ "logits/chosen": -2.5241992473602295,
1151
+ "logits/rejected": -2.4814987182617188,
1152
+ "logps/chosen": -279.4823303222656,
1153
+ "logps/rejected": -233.07373046875,
1154
+ "loss": 0.6346,
1155
+ "pred_label": 5374.75,
1156
+ "rewards/accuracies": 0.6187499761581421,
1157
+ "rewards/chosen": 0.02996956743299961,
1158
+ "rewards/margins": 0.0639830082654953,
1159
+ "rewards/rejected": -0.03401344642043114,
1160
+ "step": 710,
1161
+ "use_label": 5907.25
1162
  },
1163
  {
1164
  "epoch": 0.75,
1165
  "learning_rate": 1.3678696158323632e-07,
1166
+ "logits/chosen": -2.4806971549987793,
1167
+ "logits/rejected": -2.4919962882995605,
1168
+ "logps/chosen": -261.57806396484375,
1169
+ "logps/rejected": -246.28042602539062,
1170
+ "loss": 0.6397,
1171
+ "pred_label": 5479.1748046875,
1172
+ "rewards/accuracies": 0.706250011920929,
1173
+ "rewards/chosen": 0.034241896122694016,
1174
+ "rewards/margins": 0.06610169261693954,
1175
+ "rewards/rejected": -0.03185979649424553,
1176
+ "step": 720,
1177
+ "use_label": 5962.8251953125
1178
  },
1179
  {
1180
  "epoch": 0.76,
1181
  "learning_rate": 1.3096623981373688e-07,
1182
+ "logits/chosen": -2.4852230548858643,
1183
+ "logits/rejected": -2.473806142807007,
1184
+ "logps/chosen": -287.5245056152344,
1185
+ "logps/rejected": -234.40615844726562,
1186
+ "loss": 0.6392,
1187
+ "pred_label": 5584.5,
1188
+ "rewards/accuracies": 0.643750011920929,
1189
+ "rewards/chosen": 0.04039750620722771,
1190
+ "rewards/margins": 0.07070399820804596,
1191
+ "rewards/rejected": -0.0303064975887537,
1192
+ "step": 730,
1193
+ "use_label": 6017.5
1194
  },
1195
  {
1196
  "epoch": 0.77,
1197
  "learning_rate": 1.2514551804423749e-07,
1198
+ "logits/chosen": -2.466087818145752,
1199
+ "logits/rejected": -2.5063462257385254,
1200
+ "logps/chosen": -295.0579528808594,
1201
+ "logps/rejected": -279.02740478515625,
1202
+ "loss": 0.6358,
1203
+ "pred_label": 5692.77490234375,
1204
+ "rewards/accuracies": 0.6937500238418579,
1205
+ "rewards/chosen": 0.040842343121767044,
1206
+ "rewards/margins": 0.07114710658788681,
1207
+ "rewards/rejected": -0.030304765328764915,
1208
+ "step": 740,
1209
+ "use_label": 6069.22509765625
1210
  },
1211
  {
1212
  "epoch": 0.79,
1213
  "learning_rate": 1.1932479627473807e-07,
1214
+ "logits/chosen": -2.4788904190063477,
1215
+ "logits/rejected": -2.460747718811035,
1216
+ "logps/chosen": -250.11758422851562,
1217
+ "logps/rejected": -250.706787109375,
1218
+ "loss": 0.6351,
1219
+ "pred_label": 5804.22509765625,
1220
+ "rewards/accuracies": 0.6499999761581421,
1221
+ "rewards/chosen": 0.02599816396832466,
1222
+ "rewards/margins": 0.05924210697412491,
1223
+ "rewards/rejected": -0.03324393928050995,
1224
+ "step": 750,
1225
+ "use_label": 6117.77490234375
1226
  },
1227
  {
1228
  "epoch": 0.8,
1229
  "learning_rate": 1.1350407450523865e-07,
1230
+ "logits/chosen": -2.4908533096313477,
1231
+ "logits/rejected": -2.5631959438323975,
1232
+ "logps/chosen": -280.65643310546875,
1233
+ "logps/rejected": -252.4939422607422,
1234
+ "loss": 0.6382,
1235
+ "pred_label": 5910.4248046875,
1236
+ "rewards/accuracies": 0.59375,
1237
+ "rewards/chosen": 0.035374678671360016,
1238
+ "rewards/margins": 0.06185116618871689,
1239
+ "rewards/rejected": -0.026476481929421425,
1240
+ "step": 760,
1241
+ "use_label": 6171.5751953125
1242
  },
1243
  {
1244
  "epoch": 0.81,
1245
  "learning_rate": 1.0768335273573923e-07,
1246
+ "logits/chosen": -2.5305213928222656,
1247
+ "logits/rejected": -2.548992156982422,
1248
+ "logps/chosen": -291.19085693359375,
1249
+ "logps/rejected": -276.0303649902344,
1250
+ "loss": 0.6357,
1251
+ "pred_label": 6023.6748046875,
1252
+ "rewards/accuracies": 0.612500011920929,
1253
+ "rewards/chosen": 0.038612816482782364,
1254
+ "rewards/margins": 0.0703565925359726,
1255
+ "rewards/rejected": -0.03174378350377083,
1256
+ "step": 770,
1257
+ "use_label": 6218.3251953125
1258
  },
1259
  {
1260
  "epoch": 0.82,
1261
  "learning_rate": 1.0186263096623981e-07,
1262
+ "logits/chosen": -2.500197172164917,
1263
+ "logits/rejected": -2.5004265308380127,
1264
+ "logps/chosen": -269.6267395019531,
1265
+ "logps/rejected": -237.23226928710938,
1266
+ "loss": 0.6382,
1267
+ "pred_label": 6124.77490234375,
1268
+ "rewards/accuracies": 0.574999988079071,
1269
+ "rewards/chosen": 0.03419669717550278,
1270
+ "rewards/margins": 0.05804433301091194,
1271
+ "rewards/rejected": -0.023847635835409164,
1272
+ "step": 780,
1273
+ "use_label": 6277.22509765625
1274
  },
1275
  {
1276
  "epoch": 0.83,
1277
  "learning_rate": 9.604190919674039e-08,
1278
+ "logits/chosen": -2.5122509002685547,
1279
+ "logits/rejected": -2.527552843093872,
1280
+ "logps/chosen": -258.3880920410156,
1281
+ "logps/rejected": -250.8657684326172,
1282
+ "loss": 0.6408,
1283
+ "pred_label": 6225.125,
1284
+ "rewards/accuracies": 0.5874999761581421,
1285
+ "rewards/chosen": 0.024235274642705917,
1286
+ "rewards/margins": 0.03934397175908089,
1287
+ "rewards/rejected": -0.015108692459762096,
1288
+ "step": 790,
1289
+ "use_label": 6336.875
1290
  },
1291
  {
1292
  "epoch": 0.84,
1293
  "learning_rate": 9.022118742724097e-08,
1294
+ "logits/chosen": -2.409346103668213,
1295
+ "logits/rejected": -2.458906650543213,
1296
+ "logps/chosen": -291.98406982421875,
1297
+ "logps/rejected": -263.28204345703125,
1298
+ "loss": 0.632,
1299
+ "pred_label": 6330.6748046875,
1300
+ "rewards/accuracies": 0.6187499761581421,
1301
+ "rewards/chosen": 0.03406571224331856,
1302
+ "rewards/margins": 0.06302747130393982,
1303
+ "rewards/rejected": -0.028961753472685814,
1304
+ "step": 800,
1305
+ "use_label": 6391.3251953125
1306
  },
1307
  {
1308
  "epoch": 0.85,
1309
  "learning_rate": 8.440046565774157e-08,
1310
+ "logits/chosen": -2.4541163444519043,
1311
+ "logits/rejected": -2.426970958709717,
1312
+ "logps/chosen": -272.098388671875,
1313
+ "logps/rejected": -219.48318481445312,
1314
+ "loss": 0.6397,
1315
+ "pred_label": 6430.9248046875,
1316
+ "rewards/accuracies": 0.643750011920929,
1317
+ "rewards/chosen": 0.0267607681453228,
1318
+ "rewards/margins": 0.049745358526706696,
1319
+ "rewards/rejected": -0.022984590381383896,
1320
+ "step": 810,
1321
+ "use_label": 6451.0751953125
1322
  },
1323
  {
1324
  "epoch": 0.86,
1325
  "learning_rate": 7.857974388824213e-08,
1326
+ "logits/chosen": -2.443247079849243,
1327
+ "logits/rejected": -2.4559197425842285,
1328
+ "logps/chosen": -278.1483459472656,
1329
+ "logps/rejected": -233.55221557617188,
1330
+ "loss": 0.6401,
1331
+ "pred_label": 6533.77490234375,
1332
+ "rewards/accuracies": 0.65625,
1333
+ "rewards/chosen": 0.03652986139059067,
1334
+ "rewards/margins": 0.0706639438867569,
1335
+ "rewards/rejected": -0.03413407504558563,
1336
+ "step": 820,
1337
+ "use_label": 6508.22509765625
1338
  },
1339
  {
1340
  "epoch": 0.87,
1341
  "learning_rate": 7.275902211874273e-08,
1342
+ "logits/chosen": -2.492734432220459,
1343
+ "logits/rejected": -2.5021793842315674,
1344
+ "logps/chosen": -252.682373046875,
1345
+ "logps/rejected": -271.76885986328125,
1346
+ "loss": 0.6402,
1347
+ "pred_label": 6633.2001953125,
1348
+ "rewards/accuracies": 0.6499999761581421,
1349
+ "rewards/chosen": 0.02864421345293522,
1350
+ "rewards/margins": 0.06612807512283325,
1351
+ "rewards/rejected": -0.03748386353254318,
1352
+ "step": 830,
1353
+ "use_label": 6568.7998046875
1354
  },
1355
  {
1356
  "epoch": 0.88,
1357
  "learning_rate": 6.693830034924331e-08,
1358
+ "logits/chosen": -2.431647300720215,
1359
+ "logits/rejected": -2.4801371097564697,
1360
+ "logps/chosen": -254.59475708007812,
1361
+ "logps/rejected": -252.9589080810547,
1362
+ "loss": 0.6335,
1363
+ "pred_label": 6738.25,
1364
+ "rewards/accuracies": 0.675000011920929,
1365
+ "rewards/chosen": 0.02285713143646717,
1366
+ "rewards/margins": 0.05092828720808029,
1367
+ "rewards/rejected": -0.02807115949690342,
1368
+ "step": 840,
1369
+ "use_label": 6623.75
1370
  },
1371
  {
1372
  "epoch": 0.89,
1373
  "learning_rate": 6.111757857974389e-08,
1374
+ "logits/chosen": -2.4767704010009766,
1375
+ "logits/rejected": -2.5524322986602783,
1376
+ "logps/chosen": -284.7981872558594,
1377
+ "logps/rejected": -236.7716522216797,
1378
+ "loss": 0.6388,
1379
+ "pred_label": 6841.77490234375,
1380
+ "rewards/accuracies": 0.706250011920929,
1381
+ "rewards/chosen": 0.05144144967198372,
1382
+ "rewards/margins": 0.09114910662174225,
1383
+ "rewards/rejected": -0.03970765322446823,
1384
+ "step": 850,
1385
+ "use_label": 6680.22509765625
1386
  },
1387
  {
1388
  "epoch": 0.9,
1389
  "learning_rate": 5.529685681024446e-08,
1390
+ "logits/chosen": -2.440410614013672,
1391
+ "logits/rejected": -2.4388670921325684,
1392
+ "logps/chosen": -274.1076354980469,
1393
+ "logps/rejected": -265.62774658203125,
1394
+ "loss": 0.639,
1395
+ "pred_label": 6939.10009765625,
1396
+ "rewards/accuracies": 0.668749988079071,
1397
+ "rewards/chosen": 0.021466780453920364,
1398
+ "rewards/margins": 0.053748417645692825,
1399
+ "rewards/rejected": -0.032281629741191864,
1400
+ "step": 860,
1401
+ "use_label": 6742.89990234375
1402
  },
1403
  {
1404
  "epoch": 0.91,
1405
  "learning_rate": 4.947613504074505e-08,
1406
+ "logits/chosen": -2.4695324897766113,
1407
+ "logits/rejected": -2.5007336139678955,
1408
+ "logps/chosen": -234.1849822998047,
1409
+ "logps/rejected": -233.623046875,
1410
+ "loss": 0.6369,
1411
+ "pred_label": 7033.625,
1412
+ "rewards/accuracies": 0.6312500238418579,
1413
+ "rewards/chosen": 0.03171985596418381,
1414
+ "rewards/margins": 0.0530589334666729,
1415
+ "rewards/rejected": -0.02133907750248909,
1416
+ "step": 870,
1417
+ "use_label": 6808.375
1418
  },
1419
  {
1420
  "epoch": 0.92,
1421
  "learning_rate": 4.365541327124563e-08,
1422
+ "logits/chosen": -2.5348362922668457,
1423
+ "logits/rejected": -2.48889422416687,
1424
+ "logps/chosen": -275.351806640625,
1425
+ "logps/rejected": -252.8624267578125,
1426
+ "loss": 0.6378,
1427
+ "pred_label": 7132.27490234375,
1428
+ "rewards/accuracies": 0.706250011920929,
1429
+ "rewards/chosen": 0.04395347461104393,
1430
+ "rewards/margins": 0.07273541390895844,
1431
+ "rewards/rejected": -0.028781946748495102,
1432
+ "step": 880,
1433
+ "use_label": 6869.72509765625
1434
  },
1435
  {
1436
  "epoch": 0.93,
1437
  "learning_rate": 3.783469150174622e-08,
1438
+ "logits/chosen": -2.5349538326263428,
1439
+ "logits/rejected": -2.5645627975463867,
1440
+ "logps/chosen": -269.9933166503906,
1441
+ "logps/rejected": -262.0923156738281,
1442
+ "loss": 0.6362,
1443
+ "pred_label": 7239.27490234375,
1444
+ "rewards/accuracies": 0.6625000238418579,
1445
+ "rewards/chosen": 0.024751801043748856,
1446
+ "rewards/margins": 0.05075649172067642,
1447
+ "rewards/rejected": -0.026004692539572716,
1448
+ "step": 890,
1449
+ "use_label": 6922.72509765625
1450
  },
1451
  {
1452
  "epoch": 0.94,
1453
  "learning_rate": 3.20139697322468e-08,
1454
+ "logits/chosen": -2.4200382232666016,
1455
+ "logits/rejected": -2.4404149055480957,
1456
+ "logps/chosen": -264.64501953125,
1457
+ "logps/rejected": -250.6757354736328,
1458
+ "loss": 0.6397,
1459
+ "pred_label": 7349.375,
1460
+ "rewards/accuracies": 0.5874999761581421,
1461
+ "rewards/chosen": 0.02098809741437435,
1462
+ "rewards/margins": 0.051287733018398285,
1463
+ "rewards/rejected": -0.030299633741378784,
1464
+ "step": 900,
1465
+ "use_label": 6972.625
1466
  },
1467
  {
1468
  "epoch": 0.95,
1469
  "learning_rate": 2.619324796274738e-08,
1470
+ "logits/chosen": -2.485106945037842,
1471
+ "logits/rejected": -2.509174346923828,
1472
+ "logps/chosen": -282.1513671875,
1473
+ "logps/rejected": -253.21719360351562,
1474
+ "loss": 0.6356,
1475
+ "pred_label": 7449.4501953125,
1476
+ "rewards/accuracies": 0.6187499761581421,
1477
+ "rewards/chosen": 0.0359305813908577,
1478
+ "rewards/margins": 0.05107826739549637,
1479
+ "rewards/rejected": -0.015147687867283821,
1480
+ "step": 910,
1481
+ "use_label": 7032.5498046875
1482
  },
1483
  {
1484
  "epoch": 0.96,
1485
  "learning_rate": 2.037252619324796e-08,
1486
+ "logits/chosen": -2.4887633323669434,
1487
+ "logits/rejected": -2.4791512489318848,
1488
+ "logps/chosen": -279.5574951171875,
1489
+ "logps/rejected": -249.22793579101562,
1490
+ "loss": 0.6371,
1491
+ "pred_label": 7561.125,
1492
+ "rewards/accuracies": 0.675000011920929,
1493
+ "rewards/chosen": 0.034208498895168304,
1494
+ "rewards/margins": 0.06242678314447403,
1495
+ "rewards/rejected": -0.028218284249305725,
1496
+ "step": 920,
1497
+ "use_label": 7080.875
1498
  },
1499
  {
1500
  "epoch": 0.97,
1501
  "learning_rate": 1.4551804423748545e-08,
1502
+ "logits/chosen": -2.499810218811035,
1503
+ "logits/rejected": -2.465851306915283,
1504
+ "logps/chosen": -258.83099365234375,
1505
+ "logps/rejected": -234.7783203125,
1506
+ "loss": 0.6375,
1507
+ "pred_label": 7669.27490234375,
1508
+ "rewards/accuracies": 0.675000011920929,
1509
+ "rewards/chosen": 0.04888115078210831,
1510
+ "rewards/margins": 0.0783516988158226,
1511
+ "rewards/rejected": -0.02947053872048855,
1512
+ "step": 930,
1513
+ "use_label": 7132.72509765625
1514
  },
1515
  {
1516
  "epoch": 0.98,
1517
  "learning_rate": 8.731082654249125e-09,
1518
+ "logits/chosen": -2.530332565307617,
1519
+ "logits/rejected": -2.512815237045288,
1520
+ "logps/chosen": -277.4713439941406,
1521
+ "logps/rejected": -260.1994934082031,
1522
+ "loss": 0.6359,
1523
+ "pred_label": 7771.25,
1524
+ "rewards/accuracies": 0.6625000238418579,
1525
+ "rewards/chosen": 0.009665247052907944,
1526
+ "rewards/margins": 0.03832637146115303,
1527
+ "rewards/rejected": -0.028661120682954788,
1528
+ "step": 940,
1529
+ "use_label": 7190.75
1530
  },
1531
  {
1532
  "epoch": 0.99,
1533
  "learning_rate": 2.910360884749709e-09,
1534
+ "logits/chosen": -2.4885025024414062,
1535
+ "logits/rejected": -2.4381051063537598,
1536
+ "logps/chosen": -255.72409057617188,
1537
+ "logps/rejected": -244.0724639892578,
1538
+ "loss": 0.6403,
1539
+ "pred_label": 7870.75,
1540
+ "rewards/accuracies": 0.625,
1541
+ "rewards/chosen": 0.0019513871520757675,
1542
+ "rewards/margins": 0.03540351241827011,
1543
+ "rewards/rejected": -0.03345213085412979,
1544
+ "step": 950,
1545
+ "use_label": 7251.25
1546
  },
1547
  {
1548
  "epoch": 1.0,
1549
+ "eval_logits/chosen": -2.5062618255615234,
1550
+ "eval_logits/rejected": -2.5199859142303467,
1551
+ "eval_logps/chosen": -272.93438720703125,
1552
+ "eval_logps/rejected": -250.28643798828125,
1553
+ "eval_loss": 0.6350826025009155,
1554
+ "eval_pred_label": 8131.07177734375,
1555
+ "eval_rewards/accuracies": 0.6200000047683716,
1556
+ "eval_rewards/chosen": 0.029984984546899796,
1557
+ "eval_rewards/margins": 0.06345725804567337,
1558
+ "eval_rewards/rejected": -0.033472273498773575,
1559
+ "eval_runtime": 444.1068,
1560
+ "eval_samples_per_second": 4.503,
1561
+ "eval_steps_per_second": 0.281,
1562
+ "eval_use_label": 7400.92822265625,
1563
  "step": 955
1564
  },
1565
  {
1566
  "epoch": 1.0,
1567
  "step": 955,
1568
  "total_flos": 0.0,
1569
+ "train_loss": 0.6552608817035616,
1570
+ "train_runtime": 24261.6882,
1571
+ "train_samples_per_second": 2.52,
1572
+ "train_steps_per_second": 0.039
1573
  }
1574
  ],
1575
  "logging_steps": 10,