jikaixuan commited on
Commit
76a70e6
1 Parent(s): 03d054f

Model save

Browse files
README.md CHANGED
@@ -14,18 +14,6 @@ should probably proofread and complete it, then remove this comment. -->
14
  # zephyr-ds
15
 
16
  This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
17
- It achieves the following results on the evaluation set:
18
- - Loss: 0.1909
19
- - Rewards/chosen: -3.7579
20
- - Rewards/rejected: -6.7356
21
- - Rewards/accuracies: 0.6940
22
- - Rewards/margins: 2.9777
23
- - Logps/rejected: -329.6432
24
- - Logps/chosen: -319.5546
25
- - Logits/rejected: -2.7931
26
- - Logits/chosen: -2.8199
27
- - Use Label: 12554.0557
28
- - Pred Label: 19507.9434
29
 
30
  ## Model description
31
 
@@ -49,10 +37,10 @@ The following hyperparameters were used during training:
49
  - eval_batch_size: 4
50
  - seed: 42
51
  - distributed_type: multi-GPU
52
- - num_devices: 2
53
- - gradient_accumulation_steps: 8
54
  - total_train_batch_size: 64
55
- - total_eval_batch_size: 8
56
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
57
  - lr_scheduler_type: linear
58
  - lr_scheduler_warmup_ratio: 0.1
@@ -60,9 +48,6 @@ The following hyperparameters were used during training:
60
 
61
  ### Training results
62
 
63
- | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
64
- |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:----------:|:----------:|
65
- | 0.1523 | 1.0 | 955 | 0.1909 | -3.7579 | -6.7356 | 0.6940 | 2.9777 | -329.6432 | -319.5546 | -2.7931 | -2.8199 | 12338.0557 | 18723.9434 |
66
 
67
 
68
  ### Framework versions
 
14
  # zephyr-ds
15
 
16
  This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  ## Model description
19
 
 
37
  - eval_batch_size: 4
38
  - seed: 42
39
  - distributed_type: multi-GPU
40
+ - num_devices: 4
41
+ - gradient_accumulation_steps: 4
42
  - total_train_batch_size: 64
43
+ - total_eval_batch_size: 16
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
  - lr_scheduler_warmup_ratio: 0.1
 
48
 
49
  ### Training results
50
 
 
 
 
51
 
52
 
53
  ### Framework versions
adapter_config.json CHANGED
@@ -16,10 +16,10 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "o_proj",
20
  "k_proj",
21
- "v_proj",
22
- "q_proj"
 
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
 
19
  "k_proj",
20
+ "o_proj",
21
+ "q_proj",
22
+ "v_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3531f89b4e1e9bc6009cebd20925a01848158b16d4bf4ab4f06d2ed6775e1dcc
3
  size 218138576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c4851d0fda021876b9eb5ecbd6ec0d108d643bb4b81a517a68af72134d8b407
3
  size 218138576
all_results.json CHANGED
@@ -1,23 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -2.819918155670166,
4
- "eval_logits/rejected": -2.7931315898895264,
5
- "eval_logps/chosen": -319.5545654296875,
6
- "eval_logps/rejected": -329.6432189941406,
7
- "eval_loss": 0.19092892110347748,
8
- "eval_pred_label": 19507.943359375,
9
- "eval_rewards/accuracies": 0.6940000057220459,
10
- "eval_rewards/chosen": -3.757920265197754,
11
- "eval_rewards/margins": 2.9776601791381836,
12
- "eval_rewards/rejected": -6.735579967498779,
13
- "eval_runtime": 855.9781,
14
- "eval_samples": 2000,
15
- "eval_samples_per_second": 2.337,
16
- "eval_steps_per_second": 0.292,
17
- "eval_use_label": 12554.0556640625,
18
- "train_loss": 0.3070014505486214,
19
- "train_runtime": 47653.0179,
20
- "train_samples": 61135,
21
- "train_samples_per_second": 1.283,
22
- "train_steps_per_second": 0.02
23
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.6931471824645996,
4
+ "train_runtime": 27.1859,
5
+ "train_samples": 61,
6
+ "train_samples_per_second": 2.244,
7
+ "train_steps_per_second": 0.037
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  }
runs/Feb08_14-59-32_uclaml04.cs.ucla.edu/events.out.tfevents.1707433228.uclaml04.cs.ucla.edu.2385848.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa54768cfc2f83049fb86b75de8c756bcb27aab3caaea706223643d1c02f9012
3
+ size 4347
runs/Feb08_15-01-43_uclaml04.cs.ucla.edu/events.out.tfevents.1707433358.uclaml04.cs.ucla.edu.2386554.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad7407776a324966662a5f2d4339fbd7528167f3ec857759d5d6c24ad8b64b80
3
+ size 5361
runs/Feb08_15-09-49_uclaml04.cs.ucla.edu/events.out.tfevents.1707433845.uclaml04.cs.ucla.edu.2387930.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d158d8e5701738b059dcc0c207f95d0252be70aa30bbb4173b970950eebbb59
3
+ size 5361
runs/Feb08_15-34-19_uclaml04.cs.ucla.edu/events.out.tfevents.1707435315.uclaml04.cs.ucla.edu.2392295.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a815b5325316e61e68940e9a8e8bfd421c1ed2a17527dcf13597710f5f5b8e10
3
+ size 5202
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.3070014505486214,
4
- "train_runtime": 47653.0179,
5
- "train_samples": 61135,
6
- "train_samples_per_second": 1.283,
7
- "train_steps_per_second": 0.02
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.6931471824645996,
4
+ "train_runtime": 27.1859,
5
+ "train_samples": 61,
6
+ "train_samples_per_second": 2.244,
7
+ "train_steps_per_second": 0.037
8
  }
trainer_state.json CHANGED
@@ -1,1581 +1,39 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9997382884061764,
5
- "eval_steps": 100,
6
- "global_step": 955,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.0,
13
- "learning_rate": 1.0416666666666667e-07,
14
- "logits/chosen": -2.899709463119507,
15
- "logits/rejected": -2.879509687423706,
16
- "logps/chosen": -314.8815612792969,
17
- "logps/rejected": -239.785888671875,
18
  "loss": 0.6931,
19
- "pred_label": 0.0,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
22
  "rewards/margins": 0.0,
23
  "rewards/rejected": 0.0,
24
- "step": 1,
25
- "use_label": 18.0
26
- },
27
- {
28
- "epoch": 0.01,
29
- "learning_rate": 1.0416666666666667e-06,
30
- "logits/chosen": -2.871338129043579,
31
- "logits/rejected": -2.8671977519989014,
32
- "logps/chosen": -304.6894226074219,
33
- "logps/rejected": -284.7349853515625,
34
- "loss": 0.6939,
35
- "pred_label": 0.0,
36
- "rewards/accuracies": 0.4131944477558136,
37
- "rewards/chosen": 0.0006341927801258862,
38
- "rewards/margins": -0.0011922286357730627,
39
- "rewards/rejected": 0.0018264217069372535,
40
- "step": 10,
41
- "use_label": 178.0
42
- },
43
- {
44
- "epoch": 0.02,
45
- "learning_rate": 2.0833333333333334e-06,
46
- "logits/chosen": -2.761018991470337,
47
- "logits/rejected": -2.7763421535491943,
48
- "logps/chosen": -255.852783203125,
49
- "logps/rejected": -260.04364013671875,
50
- "loss": 0.6902,
51
- "pred_label": 0.0,
52
- "rewards/accuracies": 0.5531250238418579,
53
- "rewards/chosen": 0.0032608681358397007,
54
- "rewards/margins": 0.006093679927289486,
55
- "rewards/rejected": -0.002832812489941716,
56
- "step": 20,
57
- "use_label": 482.0
58
- },
59
- {
60
- "epoch": 0.03,
61
- "learning_rate": 3.125e-06,
62
- "logits/chosen": -2.85542631149292,
63
- "logits/rejected": -2.8387763500213623,
64
- "logps/chosen": -274.9126892089844,
65
- "logps/rejected": -252.97470092773438,
66
- "loss": 0.6909,
67
- "pred_label": 0.0,
68
- "rewards/accuracies": 0.565625011920929,
69
- "rewards/chosen": 0.0062743439339101315,
70
- "rewards/margins": 0.007127248682081699,
71
- "rewards/rejected": -0.0008529046317562461,
72
- "step": 30,
73
- "use_label": 802.0
74
- },
75
- {
76
- "epoch": 0.04,
77
- "learning_rate": 4.166666666666667e-06,
78
- "logits/chosen": -2.829822301864624,
79
- "logits/rejected": -2.8364169597625732,
80
- "logps/chosen": -279.52288818359375,
81
- "logps/rejected": -273.93243408203125,
82
- "loss": 0.6846,
83
- "pred_label": 0.0,
84
- "rewards/accuracies": 0.5687500238418579,
85
- "rewards/chosen": 0.0057884035632014275,
86
- "rewards/margins": 0.016727477312088013,
87
- "rewards/rejected": -0.010939070954918861,
88
- "step": 40,
89
- "use_label": 1122.0
90
- },
91
- {
92
- "epoch": 0.05,
93
- "learning_rate": 5.208333333333334e-06,
94
- "logits/chosen": -2.8475687503814697,
95
- "logits/rejected": -2.8291072845458984,
96
- "logps/chosen": -266.10870361328125,
97
- "logps/rejected": -257.93243408203125,
98
- "loss": 0.6748,
99
- "pred_label": 0.0,
100
- "rewards/accuracies": 0.5874999761581421,
101
- "rewards/chosen": 0.009051208384335041,
102
- "rewards/margins": 0.03577073663473129,
103
- "rewards/rejected": -0.026719529181718826,
104
- "step": 50,
105
- "use_label": 1442.0
106
- },
107
- {
108
- "epoch": 0.06,
109
- "learning_rate": 6.25e-06,
110
- "logits/chosen": -2.8435542583465576,
111
- "logits/rejected": -2.857710838317871,
112
- "logps/chosen": -308.6976623535156,
113
- "logps/rejected": -283.2618103027344,
114
- "loss": 0.6705,
115
- "pred_label": 0.0,
116
- "rewards/accuracies": 0.609375,
117
- "rewards/chosen": 0.015249615535140038,
118
- "rewards/margins": 0.05724747106432915,
119
- "rewards/rejected": -0.04199784994125366,
120
- "step": 60,
121
- "use_label": 1762.0
122
- },
123
- {
124
- "epoch": 0.07,
125
- "learning_rate": 7.291666666666667e-06,
126
- "logits/chosen": -2.8543457984924316,
127
- "logits/rejected": -2.8572847843170166,
128
- "logps/chosen": -284.14410400390625,
129
- "logps/rejected": -268.72344970703125,
130
- "loss": 0.648,
131
- "pred_label": 0.0,
132
- "rewards/accuracies": 0.6875,
133
- "rewards/chosen": 0.023584356531500816,
134
- "rewards/margins": 0.11067845672369003,
135
- "rewards/rejected": -0.08709411323070526,
136
- "step": 70,
137
- "use_label": 2082.0
138
- },
139
- {
140
- "epoch": 0.08,
141
- "learning_rate": 8.333333333333334e-06,
142
- "logits/chosen": -2.8060286045074463,
143
- "logits/rejected": -2.802976131439209,
144
- "logps/chosen": -310.8707580566406,
145
- "logps/rejected": -279.11822509765625,
146
- "loss": 0.6203,
147
- "pred_label": 1.2999999523162842,
148
- "rewards/accuracies": 0.703125,
149
- "rewards/chosen": 0.010406842455267906,
150
- "rewards/margins": 0.17813482880592346,
151
- "rewards/rejected": -0.1677280068397522,
152
- "step": 80,
153
- "use_label": 2400.699951171875
154
- },
155
- {
156
- "epoch": 0.09,
157
- "learning_rate": 9.375000000000001e-06,
158
- "logits/chosen": -2.7980992794036865,
159
- "logits/rejected": -2.7999019622802734,
160
- "logps/chosen": -295.4351806640625,
161
- "logps/rejected": -256.0653381347656,
162
- "loss": 0.6197,
163
- "pred_label": 3.612499952316284,
164
- "rewards/accuracies": 0.706250011920929,
165
- "rewards/chosen": 0.03518597409129143,
166
- "rewards/margins": 0.2245054543018341,
167
- "rewards/rejected": -0.1893194615840912,
168
- "step": 90,
169
- "use_label": 2718.387451171875
170
- },
171
- {
172
- "epoch": 0.1,
173
- "learning_rate": 9.953434225844005e-06,
174
- "logits/chosen": -2.806431770324707,
175
- "logits/rejected": -2.791755437850952,
176
- "logps/chosen": -275.9968566894531,
177
- "logps/rejected": -271.8335876464844,
178
- "loss": 0.6156,
179
- "pred_label": 14.262499809265137,
180
- "rewards/accuracies": 0.6625000238418579,
181
- "rewards/chosen": 0.036512341350317,
182
- "rewards/margins": 0.2290469855070114,
183
- "rewards/rejected": -0.1925346404314041,
184
- "step": 100,
185
- "use_label": 3027.737548828125
186
- },
187
- {
188
- "epoch": 0.12,
189
- "learning_rate": 9.837019790454017e-06,
190
- "logits/chosen": -2.8026273250579834,
191
- "logits/rejected": -2.7944607734680176,
192
- "logps/chosen": -273.5028076171875,
193
- "logps/rejected": -257.3667907714844,
194
- "loss": 0.5959,
195
- "pred_label": 32.9375,
196
- "rewards/accuracies": 0.6937500238418579,
197
- "rewards/chosen": 0.0005084859440103173,
198
- "rewards/margins": 0.29547563195228577,
199
- "rewards/rejected": -0.2949671447277069,
200
- "step": 110,
201
- "use_label": 3329.0625
202
- },
203
- {
204
- "epoch": 0.13,
205
- "learning_rate": 9.72060535506403e-06,
206
- "logits/chosen": -2.8184993267059326,
207
- "logits/rejected": -2.8210482597351074,
208
- "logps/chosen": -295.52386474609375,
209
- "logps/rejected": -268.7758483886719,
210
- "loss": 0.5615,
211
- "pred_label": 68.7125015258789,
212
- "rewards/accuracies": 0.671875,
213
- "rewards/chosen": 0.014611599035561085,
214
- "rewards/margins": 0.31382012367248535,
215
- "rewards/rejected": -0.29920852184295654,
216
- "step": 120,
217
- "use_label": 3613.28759765625
218
- },
219
- {
220
- "epoch": 0.14,
221
- "learning_rate": 9.60419091967404e-06,
222
- "logits/chosen": -2.841566801071167,
223
- "logits/rejected": -2.8285374641418457,
224
- "logps/chosen": -295.79296875,
225
- "logps/rejected": -278.8650817871094,
226
- "loss": 0.5672,
227
- "pred_label": 103.5999984741211,
228
- "rewards/accuracies": 0.6781250238418579,
229
- "rewards/chosen": -0.02395879104733467,
230
- "rewards/margins": 0.3047763705253601,
231
- "rewards/rejected": -0.3287351727485657,
232
- "step": 130,
233
- "use_label": 3898.39990234375
234
- },
235
- {
236
- "epoch": 0.15,
237
- "learning_rate": 9.487776484284052e-06,
238
- "logits/chosen": -2.814074993133545,
239
- "logits/rejected": -2.8163723945617676,
240
- "logps/chosen": -268.4273376464844,
241
- "logps/rejected": -256.7865905761719,
242
- "loss": 0.5278,
243
- "pred_label": 150.2375030517578,
244
- "rewards/accuracies": 0.7093750238418579,
245
- "rewards/chosen": -0.06842182576656342,
246
- "rewards/margins": 0.4312126040458679,
247
- "rewards/rejected": -0.49963444471359253,
248
- "step": 140,
249
- "use_label": 4171.7626953125
250
- },
251
- {
252
- "epoch": 0.16,
253
- "learning_rate": 9.371362048894065e-06,
254
- "logits/chosen": -2.8159050941467285,
255
- "logits/rejected": -2.8136250972747803,
256
- "logps/chosen": -302.8164978027344,
257
- "logps/rejected": -282.1024475097656,
258
- "loss": 0.512,
259
- "pred_label": 220.6125030517578,
260
- "rewards/accuracies": 0.6499999761581421,
261
- "rewards/chosen": -0.09125231206417084,
262
- "rewards/margins": 0.46096962690353394,
263
- "rewards/rejected": -0.5522218942642212,
264
- "step": 150,
265
- "use_label": 4421.3876953125
266
- },
267
- {
268
- "epoch": 0.17,
269
- "learning_rate": 9.254947613504075e-06,
270
- "logits/chosen": -2.824867010116577,
271
- "logits/rejected": -2.7856192588806152,
272
- "logps/chosen": -289.77410888671875,
273
- "logps/rejected": -281.7305603027344,
274
- "loss": 0.4774,
275
- "pred_label": 313.2124938964844,
276
- "rewards/accuracies": 0.7281249761581421,
277
- "rewards/chosen": 0.016176635399460793,
278
- "rewards/margins": 0.6861985325813293,
279
- "rewards/rejected": -0.6700219511985779,
280
- "step": 160,
281
- "use_label": 4648.78759765625
282
- },
283
- {
284
- "epoch": 0.18,
285
- "learning_rate": 9.138533178114087e-06,
286
- "logits/chosen": -2.834872007369995,
287
- "logits/rejected": -2.8196887969970703,
288
- "logps/chosen": -291.0771179199219,
289
- "logps/rejected": -259.09747314453125,
290
- "loss": 0.4858,
291
- "pred_label": 410.9375,
292
- "rewards/accuracies": 0.684374988079071,
293
- "rewards/chosen": -0.12445585429668427,
294
- "rewards/margins": 0.6846336126327515,
295
- "rewards/rejected": -0.8090893626213074,
296
- "step": 170,
297
- "use_label": 4871.0625
298
- },
299
- {
300
- "epoch": 0.19,
301
- "learning_rate": 9.022118742724098e-06,
302
- "logits/chosen": -2.812523126602173,
303
- "logits/rejected": -2.798945188522339,
304
- "logps/chosen": -249.18258666992188,
305
- "logps/rejected": -272.2208557128906,
306
- "loss": 0.4293,
307
- "pred_label": 534.9000244140625,
308
- "rewards/accuracies": 0.699999988079071,
309
- "rewards/chosen": -0.22158575057983398,
310
- "rewards/margins": 0.7619065046310425,
311
- "rewards/rejected": -0.9834922552108765,
312
- "step": 180,
313
- "use_label": 5067.10009765625
314
- },
315
- {
316
- "epoch": 0.2,
317
- "learning_rate": 8.90570430733411e-06,
318
- "logits/chosen": -2.852125883102417,
319
- "logits/rejected": -2.8234543800354004,
320
- "logps/chosen": -306.91717529296875,
321
- "logps/rejected": -280.7167663574219,
322
- "loss": 0.4155,
323
- "pred_label": 671.125,
324
- "rewards/accuracies": 0.737500011920929,
325
- "rewards/chosen": -0.15123331546783447,
326
- "rewards/margins": 0.8876510858535767,
327
- "rewards/rejected": -1.0388844013214111,
328
- "step": 190,
329
- "use_label": 5250.875
330
- },
331
- {
332
- "epoch": 0.21,
333
- "learning_rate": 8.789289871944122e-06,
334
- "logits/chosen": -2.8436083793640137,
335
- "logits/rejected": -2.84915828704834,
336
- "logps/chosen": -272.04266357421875,
337
- "logps/rejected": -273.80450439453125,
338
- "loss": 0.4063,
339
- "pred_label": 820.5999755859375,
340
- "rewards/accuracies": 0.721875011920929,
341
- "rewards/chosen": -0.2906159460544586,
342
- "rewards/margins": 0.9353755712509155,
343
- "rewards/rejected": -1.2259914875030518,
344
- "step": 200,
345
- "use_label": 5421.39990234375
346
- },
347
- {
348
- "epoch": 0.22,
349
- "learning_rate": 8.672875436554133e-06,
350
- "logits/chosen": -2.7995848655700684,
351
- "logits/rejected": -2.805274724960327,
352
- "logps/chosen": -287.6289978027344,
353
- "logps/rejected": -278.2756652832031,
354
- "loss": 0.381,
355
- "pred_label": 977.75,
356
- "rewards/accuracies": 0.6937500238418579,
357
- "rewards/chosen": -0.4142700135707855,
358
- "rewards/margins": 0.9491223096847534,
359
- "rewards/rejected": -1.3633924722671509,
360
- "step": 210,
361
- "use_label": 5584.25
362
- },
363
- {
364
- "epoch": 0.23,
365
- "learning_rate": 8.556461001164145e-06,
366
- "logits/chosen": -2.8255257606506348,
367
- "logits/rejected": -2.829308271408081,
368
- "logps/chosen": -284.17547607421875,
369
- "logps/rejected": -283.9212951660156,
370
- "loss": 0.3555,
371
- "pred_label": 1152.0999755859375,
372
- "rewards/accuracies": 0.699999988079071,
373
- "rewards/chosen": -0.5653145909309387,
374
- "rewards/margins": 0.9969175457954407,
375
- "rewards/rejected": -1.5622321367263794,
376
- "step": 220,
377
- "use_label": 5729.89990234375
378
- },
379
- {
380
- "epoch": 0.24,
381
- "learning_rate": 8.440046565774158e-06,
382
- "logits/chosen": -2.8168904781341553,
383
- "logits/rejected": -2.8215222358703613,
384
- "logps/chosen": -316.2763977050781,
385
- "logps/rejected": -286.81451416015625,
386
- "loss": 0.3603,
387
- "pred_label": 1315.9000244140625,
388
- "rewards/accuracies": 0.6781250238418579,
389
- "rewards/chosen": -0.6668749451637268,
390
- "rewards/margins": 1.111903429031372,
391
- "rewards/rejected": -1.778778314590454,
392
- "step": 230,
393
- "use_label": 5886.10009765625
394
- },
395
- {
396
- "epoch": 0.25,
397
- "learning_rate": 8.323632130384168e-06,
398
- "logits/chosen": -2.781104803085327,
399
- "logits/rejected": -2.7607803344726562,
400
- "logps/chosen": -306.5365295410156,
401
- "logps/rejected": -278.3562927246094,
402
- "loss": 0.3714,
403
- "pred_label": 1495.112548828125,
404
- "rewards/accuracies": 0.703125,
405
- "rewards/chosen": -0.6257501244544983,
406
- "rewards/margins": 1.2071430683135986,
407
- "rewards/rejected": -1.8328930139541626,
408
- "step": 240,
409
- "use_label": 6026.8876953125
410
- },
411
- {
412
- "epoch": 0.26,
413
- "learning_rate": 8.20721769499418e-06,
414
- "logits/chosen": -2.78596830368042,
415
- "logits/rejected": -2.7599189281463623,
416
- "logps/chosen": -307.82861328125,
417
- "logps/rejected": -302.6281433105469,
418
- "loss": 0.3539,
419
- "pred_label": 1677.699951171875,
420
- "rewards/accuracies": 0.715624988079071,
421
- "rewards/chosen": -0.6732528805732727,
422
- "rewards/margins": 1.3034615516662598,
423
- "rewards/rejected": -1.9767143726348877,
424
- "step": 250,
425
- "use_label": 6164.2998046875
426
- },
427
- {
428
- "epoch": 0.27,
429
- "learning_rate": 8.090803259604193e-06,
430
- "logits/chosen": -2.815549373626709,
431
- "logits/rejected": -2.7940564155578613,
432
- "logps/chosen": -286.57562255859375,
433
- "logps/rejected": -281.46282958984375,
434
- "loss": 0.3442,
435
- "pred_label": 1858.2874755859375,
436
- "rewards/accuracies": 0.671875,
437
- "rewards/chosen": -0.8266381025314331,
438
- "rewards/margins": 1.193564772605896,
439
- "rewards/rejected": -2.020203113555908,
440
- "step": 260,
441
- "use_label": 6303.71240234375
442
- },
443
- {
444
- "epoch": 0.28,
445
- "learning_rate": 7.974388824214203e-06,
446
- "logits/chosen": -2.788181781768799,
447
- "logits/rejected": -2.777799129486084,
448
- "logps/chosen": -282.0382080078125,
449
- "logps/rejected": -283.30877685546875,
450
- "loss": 0.3539,
451
- "pred_label": 2042.800048828125,
452
- "rewards/accuracies": 0.7281249761581421,
453
- "rewards/chosen": -0.7480707764625549,
454
- "rewards/margins": 1.517256736755371,
455
- "rewards/rejected": -2.2653274536132812,
456
- "step": 270,
457
- "use_label": 6439.2001953125
458
- },
459
- {
460
- "epoch": 0.29,
461
- "learning_rate": 7.857974388824214e-06,
462
- "logits/chosen": -2.876176357269287,
463
- "logits/rejected": -2.8538033962249756,
464
- "logps/chosen": -301.6811828613281,
465
- "logps/rejected": -271.5317077636719,
466
- "loss": 0.3156,
467
- "pred_label": 2233.362548828125,
468
- "rewards/accuracies": 0.7281249761581421,
469
- "rewards/chosen": -0.7781136631965637,
470
- "rewards/margins": 1.399305820465088,
471
- "rewards/rejected": -2.177419424057007,
472
- "step": 280,
473
- "use_label": 6568.6376953125
474
- },
475
- {
476
- "epoch": 0.3,
477
- "learning_rate": 7.741559953434226e-06,
478
- "logits/chosen": -2.8401272296905518,
479
- "logits/rejected": -2.8481698036193848,
480
- "logps/chosen": -298.8213806152344,
481
- "logps/rejected": -259.22357177734375,
482
- "loss": 0.3398,
483
- "pred_label": 2423.125,
484
- "rewards/accuracies": 0.684374988079071,
485
- "rewards/chosen": -0.863988995552063,
486
- "rewards/margins": 1.1683231592178345,
487
- "rewards/rejected": -2.0323121547698975,
488
- "step": 290,
489
- "use_label": 6698.875
490
- },
491
- {
492
- "epoch": 0.31,
493
- "learning_rate": 7.625145518044238e-06,
494
- "logits/chosen": -2.8349387645721436,
495
- "logits/rejected": -2.8150177001953125,
496
- "logps/chosen": -300.6407775878906,
497
- "logps/rejected": -268.7109375,
498
- "loss": 0.3379,
499
- "pred_label": 2605.0625,
500
- "rewards/accuracies": 0.675000011920929,
501
- "rewards/chosen": -0.9172398447990417,
502
- "rewards/margins": 1.3402159214019775,
503
- "rewards/rejected": -2.257455348968506,
504
- "step": 300,
505
- "use_label": 6836.9375
506
- },
507
- {
508
- "epoch": 0.32,
509
- "learning_rate": 7.50873108265425e-06,
510
- "logits/chosen": -2.8097264766693115,
511
- "logits/rejected": -2.7792162895202637,
512
- "logps/chosen": -285.2196044921875,
513
- "logps/rejected": -293.2747497558594,
514
- "loss": 0.31,
515
- "pred_label": 2792.4375,
516
- "rewards/accuracies": 0.721875011920929,
517
- "rewards/chosen": -1.0178813934326172,
518
- "rewards/margins": 1.7013041973114014,
519
- "rewards/rejected": -2.7191855907440186,
520
- "step": 310,
521
- "use_label": 6969.5625
522
- },
523
- {
524
- "epoch": 0.33,
525
- "learning_rate": 7.392316647264262e-06,
526
- "logits/chosen": -2.8490498065948486,
527
- "logits/rejected": -2.822798728942871,
528
- "logps/chosen": -310.70599365234375,
529
- "logps/rejected": -291.4599609375,
530
- "loss": 0.2964,
531
- "pred_label": 2987.39990234375,
532
- "rewards/accuracies": 0.7281249761581421,
533
- "rewards/chosen": -0.986463189125061,
534
- "rewards/margins": 1.725873589515686,
535
- "rewards/rejected": -2.712336540222168,
536
- "step": 320,
537
- "use_label": 7094.60009765625
538
- },
539
- {
540
- "epoch": 0.35,
541
- "learning_rate": 7.275902211874272e-06,
542
- "logits/chosen": -2.826206684112549,
543
- "logits/rejected": -2.804277181625366,
544
- "logps/chosen": -272.4220275878906,
545
- "logps/rejected": -269.9629821777344,
546
- "loss": 0.3185,
547
- "pred_label": 3184.550048828125,
548
- "rewards/accuracies": 0.6875,
549
- "rewards/chosen": -1.161760687828064,
550
- "rewards/margins": 1.4867627620697021,
551
- "rewards/rejected": -2.6485238075256348,
552
- "step": 330,
553
- "use_label": 7217.4501953125
554
- },
555
- {
556
- "epoch": 0.36,
557
- "learning_rate": 7.1594877764842855e-06,
558
- "logits/chosen": -2.8520660400390625,
559
- "logits/rejected": -2.832625150680542,
560
- "logps/chosen": -307.9129943847656,
561
- "logps/rejected": -286.85198974609375,
562
- "loss": 0.315,
563
- "pred_label": 3378.5625,
564
- "rewards/accuracies": 0.6468750238418579,
565
- "rewards/chosen": -1.1871546506881714,
566
- "rewards/margins": 1.394044280052185,
567
- "rewards/rejected": -2.5811991691589355,
568
- "step": 340,
569
- "use_label": 7343.4375
570
- },
571
- {
572
- "epoch": 0.37,
573
- "learning_rate": 7.043073341094296e-06,
574
- "logits/chosen": -2.8574576377868652,
575
- "logits/rejected": -2.8412604331970215,
576
- "logps/chosen": -302.08367919921875,
577
- "logps/rejected": -297.78802490234375,
578
- "loss": 0.2612,
579
- "pred_label": 3585.8125,
580
- "rewards/accuracies": 0.6875,
581
- "rewards/chosen": -1.353593349456787,
582
- "rewards/margins": 1.50961172580719,
583
- "rewards/rejected": -2.8632051944732666,
584
- "step": 350,
585
- "use_label": 7456.1875
586
- },
587
- {
588
- "epoch": 0.38,
589
- "learning_rate": 6.9266589057043075e-06,
590
- "logits/chosen": -2.9001529216766357,
591
- "logits/rejected": -2.8869528770446777,
592
- "logps/chosen": -308.5780944824219,
593
- "logps/rejected": -304.3133544921875,
594
- "loss": 0.2686,
595
- "pred_label": 3807.35009765625,
596
- "rewards/accuracies": 0.699999988079071,
597
- "rewards/chosen": -1.5075533390045166,
598
- "rewards/margins": 1.8883936405181885,
599
- "rewards/rejected": -3.395947217941284,
600
- "step": 360,
601
- "use_label": 7554.64990234375
602
- },
603
- {
604
- "epoch": 0.39,
605
- "learning_rate": 6.81024447031432e-06,
606
- "logits/chosen": -2.8611207008361816,
607
- "logits/rejected": -2.8043880462646484,
608
- "logps/chosen": -299.6781921386719,
609
- "logps/rejected": -303.0309753417969,
610
- "loss": 0.2914,
611
- "pred_label": 4018.550048828125,
612
- "rewards/accuracies": 0.721875011920929,
613
- "rewards/chosen": -1.2945538759231567,
614
- "rewards/margins": 1.9656009674072266,
615
- "rewards/rejected": -3.2601547241210938,
616
- "step": 370,
617
- "use_label": 7663.4501953125
618
- },
619
- {
620
- "epoch": 0.4,
621
- "learning_rate": 6.693830034924331e-06,
622
- "logits/chosen": -2.864997148513794,
623
- "logits/rejected": -2.836440324783325,
624
- "logps/chosen": -297.97845458984375,
625
- "logps/rejected": -299.3329772949219,
626
- "loss": 0.2302,
627
- "pred_label": 4235.9501953125,
628
- "rewards/accuracies": 0.746874988079071,
629
- "rewards/chosen": -1.5762511491775513,
630
- "rewards/margins": 2.1133368015289307,
631
- "rewards/rejected": -3.6895878314971924,
632
- "step": 380,
633
- "use_label": 7766.0498046875
634
- },
635
- {
636
- "epoch": 0.41,
637
- "learning_rate": 6.5774155995343425e-06,
638
- "logits/chosen": -2.817526340484619,
639
- "logits/rejected": -2.8043646812438965,
640
- "logps/chosen": -289.15155029296875,
641
- "logps/rejected": -285.8031005859375,
642
- "loss": 0.2728,
643
- "pred_label": 4468.28759765625,
644
- "rewards/accuracies": 0.671875,
645
- "rewards/chosen": -1.684571623802185,
646
- "rewards/margins": 2.025607109069824,
647
- "rewards/rejected": -3.7101783752441406,
648
- "step": 390,
649
- "use_label": 7853.71240234375
650
- },
651
- {
652
- "epoch": 0.42,
653
- "learning_rate": 6.461001164144355e-06,
654
- "logits/chosen": -2.853013038635254,
655
- "logits/rejected": -2.8416590690612793,
656
- "logps/chosen": -306.99212646484375,
657
- "logps/rejected": -285.7340393066406,
658
- "loss": 0.2494,
659
- "pred_label": 4690.5751953125,
660
- "rewards/accuracies": 0.7124999761581421,
661
- "rewards/chosen": -1.374151587486267,
662
- "rewards/margins": 2.0254199504852295,
663
- "rewards/rejected": -3.399571657180786,
664
- "step": 400,
665
- "use_label": 7951.4248046875
666
- },
667
- {
668
- "epoch": 0.43,
669
- "learning_rate": 6.344586728754366e-06,
670
- "logits/chosen": -2.840266227722168,
671
- "logits/rejected": -2.8314151763916016,
672
- "logps/chosen": -298.0084533691406,
673
- "logps/rejected": -289.5139465332031,
674
- "loss": 0.2554,
675
- "pred_label": 4908.03759765625,
676
- "rewards/accuracies": 0.6812499761581421,
677
- "rewards/chosen": -1.3851497173309326,
678
- "rewards/margins": 1.9138154983520508,
679
- "rewards/rejected": -3.2989654541015625,
680
- "step": 410,
681
- "use_label": 8053.96240234375
682
- },
683
- {
684
- "epoch": 0.44,
685
- "learning_rate": 6.228172293364378e-06,
686
- "logits/chosen": -2.8333933353424072,
687
- "logits/rejected": -2.8114962577819824,
688
- "logps/chosen": -281.6308288574219,
689
- "logps/rejected": -283.6402282714844,
690
- "loss": 0.2814,
691
- "pred_label": 5120.16259765625,
692
- "rewards/accuracies": 0.6968749761581421,
693
- "rewards/chosen": -1.2441003322601318,
694
- "rewards/margins": 1.8000373840332031,
695
- "rewards/rejected": -3.044137954711914,
696
- "step": 420,
697
- "use_label": 8161.83740234375
698
- },
699
- {
700
- "epoch": 0.45,
701
- "learning_rate": 6.11175785797439e-06,
702
- "logits/chosen": -2.8557116985321045,
703
- "logits/rejected": -2.82271146774292,
704
- "logps/chosen": -305.80322265625,
705
- "logps/rejected": -291.4790954589844,
706
- "loss": 0.2721,
707
- "pred_label": 5342.97509765625,
708
- "rewards/accuracies": 0.6468750238418579,
709
- "rewards/chosen": -1.548147439956665,
710
- "rewards/margins": 1.3705997467041016,
711
- "rewards/rejected": -2.9187471866607666,
712
- "step": 430,
713
- "use_label": 8259.025390625
714
- },
715
- {
716
- "epoch": 0.46,
717
- "learning_rate": 5.995343422584401e-06,
718
- "logits/chosen": -2.8646130561828613,
719
- "logits/rejected": -2.8445792198181152,
720
- "logps/chosen": -299.37811279296875,
721
- "logps/rejected": -275.6679382324219,
722
- "loss": 0.2555,
723
- "pred_label": 5555.8876953125,
724
- "rewards/accuracies": 0.668749988079071,
725
- "rewards/chosen": -1.6375954151153564,
726
- "rewards/margins": 1.7932894229888916,
727
- "rewards/rejected": -3.430884838104248,
728
- "step": 440,
729
- "use_label": 8366.1123046875
730
- },
731
- {
732
- "epoch": 0.47,
733
- "learning_rate": 5.878928987194412e-06,
734
- "logits/chosen": -2.757448434829712,
735
- "logits/rejected": -2.7369332313537598,
736
- "logps/chosen": -273.22796630859375,
737
- "logps/rejected": -287.7809143066406,
738
- "loss": 0.2421,
739
- "pred_label": 5778.60009765625,
740
- "rewards/accuracies": 0.715624988079071,
741
- "rewards/chosen": -1.5854363441467285,
742
- "rewards/margins": 1.861823320388794,
743
- "rewards/rejected": -3.4472599029541016,
744
- "step": 450,
745
- "use_label": 8463.400390625
746
- },
747
- {
748
- "epoch": 0.48,
749
- "learning_rate": 5.762514551804425e-06,
750
- "logits/chosen": -2.8731637001037598,
751
- "logits/rejected": -2.8510243892669678,
752
- "logps/chosen": -322.58294677734375,
753
- "logps/rejected": -296.1365966796875,
754
- "loss": 0.2765,
755
- "pred_label": 5993.14990234375,
756
- "rewards/accuracies": 0.6875,
757
- "rewards/chosen": -1.682427167892456,
758
- "rewards/margins": 1.4240281581878662,
759
- "rewards/rejected": -3.1064553260803223,
760
- "step": 460,
761
- "use_label": 8568.849609375
762
- },
763
- {
764
- "epoch": 0.49,
765
- "learning_rate": 5.6461001164144355e-06,
766
- "logits/chosen": -2.85316801071167,
767
- "logits/rejected": -2.828878164291382,
768
- "logps/chosen": -312.8523254394531,
769
- "logps/rejected": -301.6842346191406,
770
- "loss": 0.2612,
771
- "pred_label": 6206.52490234375,
772
- "rewards/accuracies": 0.690625011920929,
773
- "rewards/chosen": -1.7460191249847412,
774
- "rewards/margins": 1.790623426437378,
775
- "rewards/rejected": -3.536642551422119,
776
- "step": 470,
777
- "use_label": 8675.474609375
778
- },
779
- {
780
- "epoch": 0.5,
781
- "learning_rate": 5.529685681024447e-06,
782
- "logits/chosen": -2.793785810470581,
783
- "logits/rejected": -2.7770721912384033,
784
- "logps/chosen": -273.039306640625,
785
- "logps/rejected": -294.2169189453125,
786
- "loss": 0.2638,
787
- "pred_label": 6428.9248046875,
788
- "rewards/accuracies": 0.612500011920929,
789
- "rewards/chosen": -2.0260262489318848,
790
- "rewards/margins": 1.4155540466308594,
791
- "rewards/rejected": -3.441580295562744,
792
- "step": 480,
793
- "use_label": 8773.0751953125
794
- },
795
- {
796
- "epoch": 0.51,
797
- "learning_rate": 5.413271245634459e-06,
798
- "logits/chosen": -2.8139758110046387,
799
- "logits/rejected": -2.8046863079071045,
800
- "logps/chosen": -302.76043701171875,
801
- "logps/rejected": -283.40069580078125,
802
- "loss": 0.2434,
803
- "pred_label": 6654.64990234375,
804
- "rewards/accuracies": 0.703125,
805
- "rewards/chosen": -1.7550468444824219,
806
- "rewards/margins": 1.9198501110076904,
807
- "rewards/rejected": -3.6748969554901123,
808
- "step": 490,
809
- "use_label": 8867.349609375
810
- },
811
- {
812
- "epoch": 0.52,
813
- "learning_rate": 5.2968568102444705e-06,
814
- "logits/chosen": -2.846433162689209,
815
- "logits/rejected": -2.8172895908355713,
816
- "logps/chosen": -307.9965515136719,
817
- "logps/rejected": -305.2622375488281,
818
- "loss": 0.2286,
819
- "pred_label": 6898.72509765625,
820
- "rewards/accuracies": 0.6812499761581421,
821
- "rewards/chosen": -1.8360557556152344,
822
- "rewards/margins": 2.077921152114868,
823
- "rewards/rejected": -3.9139761924743652,
824
- "step": 500,
825
- "use_label": 8943.275390625
826
- },
827
- {
828
- "epoch": 0.53,
829
- "learning_rate": 5.180442374854482e-06,
830
- "logits/chosen": -2.8676562309265137,
831
- "logits/rejected": -2.8552956581115723,
832
- "logps/chosen": -297.22064208984375,
833
- "logps/rejected": -299.2649841308594,
834
- "loss": 0.2153,
835
- "pred_label": 7138.83740234375,
836
- "rewards/accuracies": 0.703125,
837
- "rewards/chosen": -2.125028610229492,
838
- "rewards/margins": 2.392963409423828,
839
- "rewards/rejected": -4.51799201965332,
840
- "step": 510,
841
- "use_label": 9023.162109375
842
- },
843
- {
844
- "epoch": 0.54,
845
- "learning_rate": 5.064027939464494e-06,
846
- "logits/chosen": -2.861607074737549,
847
- "logits/rejected": -2.824428081512451,
848
- "logps/chosen": -300.9296569824219,
849
- "logps/rejected": -300.1321716308594,
850
- "loss": 0.2336,
851
- "pred_label": 7372.25,
852
- "rewards/accuracies": 0.715624988079071,
853
- "rewards/chosen": -1.6840749979019165,
854
- "rewards/margins": 2.552696466445923,
855
- "rewards/rejected": -4.236771583557129,
856
- "step": 520,
857
- "use_label": 9109.75
858
- },
859
- {
860
- "epoch": 0.55,
861
- "learning_rate": 4.947613504074506e-06,
862
- "logits/chosen": -2.8984599113464355,
863
- "logits/rejected": -2.8596012592315674,
864
- "logps/chosen": -308.25140380859375,
865
- "logps/rejected": -307.4395446777344,
866
- "loss": 0.2249,
867
- "pred_label": 7615.60009765625,
868
- "rewards/accuracies": 0.721875011920929,
869
- "rewards/chosen": -1.700122594833374,
870
- "rewards/margins": 2.4561877250671387,
871
- "rewards/rejected": -4.156310558319092,
872
- "step": 530,
873
- "use_label": 9186.400390625
874
- },
875
- {
876
- "epoch": 0.57,
877
- "learning_rate": 4.831199068684517e-06,
878
- "logits/chosen": -2.8835153579711914,
879
- "logits/rejected": -2.8658623695373535,
880
- "logps/chosen": -307.30853271484375,
881
- "logps/rejected": -305.19061279296875,
882
- "loss": 0.231,
883
- "pred_label": 7856.7001953125,
884
- "rewards/accuracies": 0.7406250238418579,
885
- "rewards/chosen": -1.5762898921966553,
886
- "rewards/margins": 2.7267935276031494,
887
- "rewards/rejected": -4.303083419799805,
888
- "step": 540,
889
- "use_label": 9265.2998046875
890
- },
891
- {
892
- "epoch": 0.58,
893
- "learning_rate": 4.714784633294529e-06,
894
- "logits/chosen": -2.853215456008911,
895
- "logits/rejected": -2.827068567276001,
896
- "logps/chosen": -299.76446533203125,
897
- "logps/rejected": -309.46734619140625,
898
- "loss": 0.2155,
899
- "pred_label": 8086.875,
900
- "rewards/accuracies": 0.684374988079071,
901
- "rewards/chosen": -1.7928301095962524,
902
- "rewards/margins": 2.323638677597046,
903
- "rewards/rejected": -4.116468906402588,
904
- "step": 550,
905
- "use_label": 9355.125
906
- },
907
- {
908
- "epoch": 0.59,
909
- "learning_rate": 4.598370197904541e-06,
910
- "logits/chosen": -2.834362506866455,
911
- "logits/rejected": -2.810864210128784,
912
- "logps/chosen": -316.2347717285156,
913
- "logps/rejected": -276.0157775878906,
914
- "loss": 0.258,
915
- "pred_label": 8320.0126953125,
916
- "rewards/accuracies": 0.659375011920929,
917
- "rewards/chosen": -1.946148157119751,
918
- "rewards/margins": 1.932668685913086,
919
- "rewards/rejected": -3.878816604614258,
920
- "step": 560,
921
- "use_label": 9441.9873046875
922
- },
923
- {
924
- "epoch": 0.6,
925
- "learning_rate": 4.481955762514552e-06,
926
- "logits/chosen": -2.818577766418457,
927
- "logits/rejected": -2.800201416015625,
928
- "logps/chosen": -287.69390869140625,
929
- "logps/rejected": -290.1914367675781,
930
- "loss": 0.211,
931
- "pred_label": 8550.4873046875,
932
- "rewards/accuracies": 0.7250000238418579,
933
- "rewards/chosen": -1.9045298099517822,
934
- "rewards/margins": 2.01288104057312,
935
- "rewards/rejected": -3.9174110889434814,
936
- "step": 570,
937
- "use_label": 9531.5126953125
938
- },
939
- {
940
- "epoch": 0.61,
941
- "learning_rate": 4.365541327124564e-06,
942
- "logits/chosen": -2.833016872406006,
943
- "logits/rejected": -2.8276636600494385,
944
- "logps/chosen": -294.7558288574219,
945
- "logps/rejected": -301.39459228515625,
946
- "loss": 0.2456,
947
- "pred_label": 8784.2001953125,
948
- "rewards/accuracies": 0.6937500238418579,
949
- "rewards/chosen": -2.0422422885894775,
950
- "rewards/margins": 2.122627019882202,
951
- "rewards/rejected": -4.164869785308838,
952
- "step": 580,
953
- "use_label": 9617.7998046875
954
- },
955
- {
956
- "epoch": 0.62,
957
- "learning_rate": 4.249126891734576e-06,
958
- "logits/chosen": -2.8091092109680176,
959
- "logits/rejected": -2.805959939956665,
960
- "logps/chosen": -306.3221740722656,
961
- "logps/rejected": -315.2014465332031,
962
- "loss": 0.245,
963
- "pred_label": 9015.087890625,
964
- "rewards/accuracies": 0.6812499761581421,
965
- "rewards/chosen": -1.8414281606674194,
966
- "rewards/margins": 2.2786455154418945,
967
- "rewards/rejected": -4.1200737953186035,
968
- "step": 590,
969
- "use_label": 9706.912109375
970
- },
971
- {
972
- "epoch": 0.63,
973
- "learning_rate": 4.132712456344587e-06,
974
- "logits/chosen": -2.8337035179138184,
975
- "logits/rejected": -2.829528570175171,
976
- "logps/chosen": -313.0551452636719,
977
- "logps/rejected": -332.26055908203125,
978
- "loss": 0.197,
979
- "pred_label": 9257.0126953125,
980
- "rewards/accuracies": 0.737500011920929,
981
- "rewards/chosen": -1.633183240890503,
982
- "rewards/margins": 2.7223196029663086,
983
- "rewards/rejected": -4.355503082275391,
984
- "step": 600,
985
- "use_label": 9784.9873046875
986
- },
987
- {
988
- "epoch": 0.64,
989
- "learning_rate": 4.0162980209545985e-06,
990
- "logits/chosen": -2.84791898727417,
991
- "logits/rejected": -2.8361434936523438,
992
- "logps/chosen": -317.21636962890625,
993
- "logps/rejected": -296.84759521484375,
994
- "loss": 0.2117,
995
- "pred_label": 9493.875,
996
- "rewards/accuracies": 0.6968749761581421,
997
- "rewards/chosen": -1.8508695363998413,
998
- "rewards/margins": 2.29063081741333,
999
- "rewards/rejected": -4.141500949859619,
1000
- "step": 610,
1001
- "use_label": 9868.125
1002
- },
1003
- {
1004
- "epoch": 0.65,
1005
- "learning_rate": 3.899883585564611e-06,
1006
- "logits/chosen": -2.854703426361084,
1007
- "logits/rejected": -2.7970035076141357,
1008
- "logps/chosen": -306.46490478515625,
1009
- "logps/rejected": -295.9690856933594,
1010
- "loss": 0.2274,
1011
- "pred_label": 9722.4501953125,
1012
- "rewards/accuracies": 0.7093750238418579,
1013
- "rewards/chosen": -2.2507681846618652,
1014
- "rewards/margins": 2.2558112144470215,
1015
- "rewards/rejected": -4.506579399108887,
1016
- "step": 620,
1017
- "use_label": 9959.5498046875
1018
- },
1019
- {
1020
- "epoch": 0.66,
1021
- "learning_rate": 3.7834691501746217e-06,
1022
- "logits/chosen": -2.8078906536102295,
1023
- "logits/rejected": -2.794034957885742,
1024
- "logps/chosen": -300.4102478027344,
1025
- "logps/rejected": -319.0257263183594,
1026
- "loss": 0.2013,
1027
- "pred_label": 9961.900390625,
1028
- "rewards/accuracies": 0.715624988079071,
1029
- "rewards/chosen": -2.219250440597534,
1030
- "rewards/margins": 2.7064452171325684,
1031
- "rewards/rejected": -4.925694942474365,
1032
- "step": 630,
1033
- "use_label": 10040.099609375
1034
- },
1035
- {
1036
- "epoch": 0.67,
1037
- "learning_rate": 3.6670547147846336e-06,
1038
- "logits/chosen": -2.818479061126709,
1039
- "logits/rejected": -2.7741947174072266,
1040
- "logps/chosen": -286.0899353027344,
1041
- "logps/rejected": -285.6925048828125,
1042
- "loss": 0.2183,
1043
- "pred_label": 10201.8125,
1044
- "rewards/accuracies": 0.6499999761581421,
1045
- "rewards/chosen": -2.2236666679382324,
1046
- "rewards/margins": 2.530942440032959,
1047
- "rewards/rejected": -4.75460958480835,
1048
- "step": 640,
1049
- "use_label": 10120.1875
1050
- },
1051
- {
1052
- "epoch": 0.68,
1053
- "learning_rate": 3.5506402793946454e-06,
1054
- "logits/chosen": -2.786527156829834,
1055
- "logits/rejected": -2.7697250843048096,
1056
- "logps/chosen": -311.5790100097656,
1057
- "logps/rejected": -327.41448974609375,
1058
- "loss": 0.1862,
1059
- "pred_label": 10438.400390625,
1060
- "rewards/accuracies": 0.7437499761581421,
1061
- "rewards/chosen": -2.290921688079834,
1062
- "rewards/margins": 3.1013052463531494,
1063
- "rewards/rejected": -5.392226696014404,
1064
- "step": 650,
1065
- "use_label": 10203.599609375
1066
- },
1067
- {
1068
- "epoch": 0.69,
1069
- "learning_rate": 3.434225844004657e-06,
1070
- "logits/chosen": -2.833200693130493,
1071
- "logits/rejected": -2.8118557929992676,
1072
- "logps/chosen": -307.580322265625,
1073
- "logps/rejected": -289.68023681640625,
1074
- "loss": 0.2361,
1075
- "pred_label": 10677.7626953125,
1076
- "rewards/accuracies": 0.65625,
1077
- "rewards/chosen": -2.9750688076019287,
1078
- "rewards/margins": 1.9074808359146118,
1079
- "rewards/rejected": -4.882550239562988,
1080
- "step": 660,
1081
- "use_label": 10284.2373046875
1082
- },
1083
- {
1084
- "epoch": 0.7,
1085
- "learning_rate": 3.3178114086146686e-06,
1086
- "logits/chosen": -2.823399066925049,
1087
- "logits/rejected": -2.794682025909424,
1088
- "logps/chosen": -306.1806640625,
1089
- "logps/rejected": -300.6312255859375,
1090
- "loss": 0.1964,
1091
- "pred_label": 10918.8251953125,
1092
- "rewards/accuracies": 0.7406250238418579,
1093
- "rewards/chosen": -2.6798641681671143,
1094
- "rewards/margins": 2.8597590923309326,
1095
- "rewards/rejected": -5.539623737335205,
1096
- "step": 670,
1097
- "use_label": 10363.1748046875
1098
- },
1099
- {
1100
- "epoch": 0.71,
1101
- "learning_rate": 3.20139697322468e-06,
1102
- "logits/chosen": -2.8301963806152344,
1103
- "logits/rejected": -2.8216447830200195,
1104
- "logps/chosen": -326.2012634277344,
1105
- "logps/rejected": -316.03192138671875,
1106
- "loss": 0.2123,
1107
- "pred_label": 11164.5625,
1108
- "rewards/accuracies": 0.6625000238418579,
1109
- "rewards/chosen": -2.6657440662384033,
1110
- "rewards/margins": 2.024155855178833,
1111
- "rewards/rejected": -4.689899444580078,
1112
- "step": 680,
1113
- "use_label": 10437.4375
1114
- },
1115
- {
1116
- "epoch": 0.72,
1117
- "learning_rate": 3.0849825378346914e-06,
1118
- "logits/chosen": -2.808143138885498,
1119
- "logits/rejected": -2.7844669818878174,
1120
- "logps/chosen": -309.50946044921875,
1121
- "logps/rejected": -288.86865234375,
1122
- "loss": 0.1798,
1123
- "pred_label": 11412.3876953125,
1124
- "rewards/accuracies": 0.737500011920929,
1125
- "rewards/chosen": -2.7414073944091797,
1126
- "rewards/margins": 2.5902717113494873,
1127
- "rewards/rejected": -5.331678867340088,
1128
- "step": 690,
1129
- "use_label": 10509.6123046875
1130
- },
1131
- {
1132
- "epoch": 0.73,
1133
- "learning_rate": 2.9685681024447033e-06,
1134
- "logits/chosen": -2.7990927696228027,
1135
- "logits/rejected": -2.770561695098877,
1136
- "logps/chosen": -296.21697998046875,
1137
- "logps/rejected": -300.41229248046875,
1138
- "loss": 0.1884,
1139
- "pred_label": 11669.099609375,
1140
- "rewards/accuracies": 0.703125,
1141
- "rewards/chosen": -2.7120413780212402,
1142
- "rewards/margins": 2.8404040336608887,
1143
- "rewards/rejected": -5.552445411682129,
1144
- "step": 700,
1145
- "use_label": 10572.900390625
1146
- },
1147
- {
1148
- "epoch": 0.74,
1149
- "learning_rate": 2.852153667054715e-06,
1150
- "logits/chosen": -2.8590335845947266,
1151
- "logits/rejected": -2.842290163040161,
1152
- "logps/chosen": -300.3395080566406,
1153
- "logps/rejected": -291.9437561035156,
1154
- "loss": 0.1848,
1155
- "pred_label": 11914.4501953125,
1156
- "rewards/accuracies": 0.7093750238418579,
1157
- "rewards/chosen": -2.490241527557373,
1158
- "rewards/margins": 2.938403606414795,
1159
- "rewards/rejected": -5.428645133972168,
1160
- "step": 710,
1161
- "use_label": 10647.5498046875
1162
- },
1163
- {
1164
- "epoch": 0.75,
1165
- "learning_rate": 2.735739231664727e-06,
1166
- "logits/chosen": -2.8020033836364746,
1167
- "logits/rejected": -2.8077361583709717,
1168
- "logps/chosen": -307.3730773925781,
1169
- "logps/rejected": -305.9153137207031,
1170
- "loss": 0.1691,
1171
- "pred_label": 12175.75,
1172
- "rewards/accuracies": 0.706250011920929,
1173
- "rewards/chosen": -3.191791534423828,
1174
- "rewards/margins": 2.743975877761841,
1175
- "rewards/rejected": -5.93576717376709,
1176
- "step": 720,
1177
- "use_label": 10706.25
1178
- },
1179
- {
1180
- "epoch": 0.76,
1181
- "learning_rate": 2.6193247962747383e-06,
1182
- "logits/chosen": -2.84368896484375,
1183
- "logits/rejected": -2.8325092792510986,
1184
- "logps/chosen": -319.6998596191406,
1185
- "logps/rejected": -299.0663146972656,
1186
- "loss": 0.1831,
1187
- "pred_label": 12431.7001953125,
1188
- "rewards/accuracies": 0.706250011920929,
1189
- "rewards/chosen": -2.6903202533721924,
1190
- "rewards/margins": 2.7115437984466553,
1191
- "rewards/rejected": -5.401864051818848,
1192
- "step": 730,
1193
- "use_label": 10770.2998046875
1194
- },
1195
- {
1196
- "epoch": 0.77,
1197
- "learning_rate": 2.5029103608847497e-06,
1198
- "logits/chosen": -2.8156962394714355,
1199
- "logits/rejected": -2.816779613494873,
1200
- "logps/chosen": -331.8976135253906,
1201
- "logps/rejected": -335.2292175292969,
1202
- "loss": 0.184,
1203
- "pred_label": 12686.3251953125,
1204
- "rewards/accuracies": 0.715624988079071,
1205
- "rewards/chosen": -3.126286745071411,
1206
- "rewards/margins": 3.2427031993865967,
1207
- "rewards/rejected": -6.36898946762085,
1208
- "step": 740,
1209
- "use_label": 10835.6748046875
1210
- },
1211
- {
1212
- "epoch": 0.79,
1213
- "learning_rate": 2.3864959254947616e-06,
1214
- "logits/chosen": -2.8092589378356934,
1215
- "logits/rejected": -2.7743942737579346,
1216
- "logps/chosen": -294.40643310546875,
1217
- "logps/rejected": -323.6761169433594,
1218
- "loss": 0.1726,
1219
- "pred_label": 12944.5498046875,
1220
- "rewards/accuracies": 0.737500011920929,
1221
- "rewards/chosen": -3.147745132446289,
1222
- "rewards/margins": 3.201345443725586,
1223
- "rewards/rejected": -6.349090576171875,
1224
- "step": 750,
1225
- "use_label": 10897.4501953125
1226
- },
1227
- {
1228
- "epoch": 0.8,
1229
- "learning_rate": 2.2700814901047734e-06,
1230
- "logits/chosen": -2.81386399269104,
1231
- "logits/rejected": -2.815582752227783,
1232
- "logps/chosen": -326.869873046875,
1233
- "logps/rejected": -325.6067199707031,
1234
- "loss": 0.1704,
1235
- "pred_label": 13210.6748046875,
1236
- "rewards/accuracies": 0.734375,
1237
- "rewards/chosen": -3.039788246154785,
1238
- "rewards/margins": 3.261427402496338,
1239
- "rewards/rejected": -6.301215171813965,
1240
- "step": 760,
1241
- "use_label": 10951.3251953125
1242
- },
1243
- {
1244
- "epoch": 0.81,
1245
- "learning_rate": 2.153667054714785e-06,
1246
- "logits/chosen": -2.886383533477783,
1247
- "logits/rejected": -2.861985445022583,
1248
- "logps/chosen": -347.4055480957031,
1249
- "logps/rejected": -348.0740661621094,
1250
- "loss": 0.171,
1251
- "pred_label": 13468.4375,
1252
- "rewards/accuracies": 0.715624988079071,
1253
- "rewards/chosen": -3.009340763092041,
1254
- "rewards/margins": 3.336160182952881,
1255
- "rewards/rejected": -6.3454999923706055,
1256
- "step": 770,
1257
- "use_label": 11013.5625
1258
- },
1259
- {
1260
- "epoch": 0.82,
1261
- "learning_rate": 2.0372526193247966e-06,
1262
- "logits/chosen": -2.831890344619751,
1263
- "logits/rejected": -2.814185619354248,
1264
- "logps/chosen": -318.90240478515625,
1265
- "logps/rejected": -319.7893981933594,
1266
- "loss": 0.1618,
1267
- "pred_label": 13735.0751953125,
1268
- "rewards/accuracies": 0.706250011920929,
1269
- "rewards/chosen": -3.30363392829895,
1270
- "rewards/margins": 2.769784688949585,
1271
- "rewards/rejected": -6.073418140411377,
1272
- "step": 780,
1273
- "use_label": 11066.9248046875
1274
- },
1275
- {
1276
- "epoch": 0.83,
1277
- "learning_rate": 1.920838183934808e-06,
1278
- "logits/chosen": -2.845829486846924,
1279
- "logits/rejected": -2.8150553703308105,
1280
- "logps/chosen": -299.32720947265625,
1281
- "logps/rejected": -322.7772521972656,
1282
- "loss": 0.1829,
1283
- "pred_label": 13994.1875,
1284
- "rewards/accuracies": 0.675000011920929,
1285
- "rewards/chosen": -3.5865261554718018,
1286
- "rewards/margins": 2.9081783294677734,
1287
- "rewards/rejected": -6.494704246520996,
1288
- "step": 790,
1289
- "use_label": 11127.8125
1290
- },
1291
- {
1292
- "epoch": 0.84,
1293
- "learning_rate": 1.8044237485448196e-06,
1294
- "logits/chosen": -2.7826781272888184,
1295
- "logits/rejected": -2.74163818359375,
1296
- "logps/chosen": -319.0447082519531,
1297
- "logps/rejected": -339.4092712402344,
1298
- "loss": 0.1622,
1299
- "pred_label": 14244.275390625,
1300
- "rewards/accuracies": 0.7093750238418579,
1301
- "rewards/chosen": -3.1698780059814453,
1302
- "rewards/margins": 3.544410228729248,
1303
- "rewards/rejected": -6.714288234710693,
1304
- "step": 800,
1305
- "use_label": 11197.724609375
1306
- },
1307
- {
1308
- "epoch": 0.85,
1309
- "learning_rate": 1.6880093131548315e-06,
1310
- "logits/chosen": -2.7925405502319336,
1311
- "logits/rejected": -2.7898240089416504,
1312
- "logps/chosen": -316.834716796875,
1313
- "logps/rejected": -300.3663330078125,
1314
- "loss": 0.1831,
1315
- "pred_label": 14493.287109375,
1316
- "rewards/accuracies": 0.6937500238418579,
1317
- "rewards/chosen": -3.4652843475341797,
1318
- "rewards/margins": 2.749237060546875,
1319
- "rewards/rejected": -6.214521408081055,
1320
- "step": 810,
1321
- "use_label": 11268.712890625
1322
- },
1323
- {
1324
- "epoch": 0.86,
1325
- "learning_rate": 1.5715948777648429e-06,
1326
- "logits/chosen": -2.7998623847961426,
1327
- "logits/rejected": -2.7955284118652344,
1328
- "logps/chosen": -329.0666198730469,
1329
- "logps/rejected": -320.14495849609375,
1330
- "loss": 0.203,
1331
- "pred_label": 14746.7998046875,
1332
- "rewards/accuracies": 0.6937500238418579,
1333
- "rewards/chosen": -3.2825725078582764,
1334
- "rewards/margins": 3.25768780708313,
1335
- "rewards/rejected": -6.540260314941406,
1336
- "step": 820,
1337
- "use_label": 11335.2001953125
1338
- },
1339
- {
1340
- "epoch": 0.87,
1341
- "learning_rate": 1.4551804423748547e-06,
1342
- "logits/chosen": -2.800252914428711,
1343
- "logits/rejected": -2.7940683364868164,
1344
- "logps/chosen": -293.72113037109375,
1345
- "logps/rejected": -365.59320068359375,
1346
- "loss": 0.1561,
1347
- "pred_label": 15014.0126953125,
1348
- "rewards/accuracies": 0.734375,
1349
- "rewards/chosen": -3.40069842338562,
1350
- "rewards/margins": 3.9982776641845703,
1351
- "rewards/rejected": -7.3989763259887695,
1352
- "step": 830,
1353
- "use_label": 11387.9873046875
1354
- },
1355
- {
1356
- "epoch": 0.88,
1357
- "learning_rate": 1.3387660069848663e-06,
1358
- "logits/chosen": -2.803891181945801,
1359
- "logits/rejected": -2.8099112510681152,
1360
- "logps/chosen": -318.6631164550781,
1361
- "logps/rejected": -330.1468811035156,
1362
- "loss": 0.1712,
1363
- "pred_label": 15274.9873046875,
1364
- "rewards/accuracies": 0.6781250238418579,
1365
- "rewards/chosen": -3.874751567840576,
1366
- "rewards/margins": 2.169405698776245,
1367
- "rewards/rejected": -6.044157981872559,
1368
- "step": 840,
1369
- "use_label": 11447.0126953125
1370
- },
1371
- {
1372
- "epoch": 0.89,
1373
- "learning_rate": 1.222351571594878e-06,
1374
- "logits/chosen": -2.803412914276123,
1375
- "logits/rejected": -2.825878620147705,
1376
- "logps/chosen": -309.2081604003906,
1377
- "logps/rejected": -315.7529602050781,
1378
- "loss": 0.1758,
1379
- "pred_label": 15528.2001953125,
1380
- "rewards/accuracies": 0.7124999761581421,
1381
- "rewards/chosen": -3.5470733642578125,
1382
- "rewards/margins": 3.1740689277648926,
1383
- "rewards/rejected": -6.721141815185547,
1384
- "step": 850,
1385
- "use_label": 11513.7998046875
1386
- },
1387
- {
1388
- "epoch": 0.9,
1389
- "learning_rate": 1.1059371362048893e-06,
1390
- "logits/chosen": -2.7905640602111816,
1391
- "logits/rejected": -2.7931439876556396,
1392
- "logps/chosen": -333.94171142578125,
1393
- "logps/rejected": -340.4829406738281,
1394
- "loss": 0.194,
1395
- "pred_label": 15781.775390625,
1396
- "rewards/accuracies": 0.640625,
1397
- "rewards/chosen": -4.047911643981934,
1398
- "rewards/margins": 2.3354756832122803,
1399
- "rewards/rejected": -6.383387565612793,
1400
- "step": 860,
1401
- "use_label": 11580.224609375
1402
- },
1403
- {
1404
- "epoch": 0.91,
1405
- "learning_rate": 9.895227008149012e-07,
1406
- "logits/chosen": -2.773538112640381,
1407
- "logits/rejected": -2.7751145362854004,
1408
- "logps/chosen": -297.7669982910156,
1409
- "logps/rejected": -327.5148010253906,
1410
- "loss": 0.1636,
1411
- "pred_label": 16038.3251953125,
1412
- "rewards/accuracies": 0.7093750238418579,
1413
- "rewards/chosen": -3.391941547393799,
1414
- "rewards/margins": 3.371931552886963,
1415
- "rewards/rejected": -6.7638726234436035,
1416
- "step": 870,
1417
- "use_label": 11643.6748046875
1418
- },
1419
- {
1420
- "epoch": 0.92,
1421
- "learning_rate": 8.731082654249128e-07,
1422
- "logits/chosen": -2.8104777336120605,
1423
- "logits/rejected": -2.7986841201782227,
1424
- "logps/chosen": -308.0206298828125,
1425
- "logps/rejected": -325.0122985839844,
1426
- "loss": 0.1666,
1427
- "pred_label": 16292.400390625,
1428
- "rewards/accuracies": 0.6875,
1429
- "rewards/chosen": -3.5868797302246094,
1430
- "rewards/margins": 2.8977627754211426,
1431
- "rewards/rejected": -6.484642028808594,
1432
- "step": 880,
1433
- "use_label": 11709.599609375
1434
- },
1435
- {
1436
- "epoch": 0.93,
1437
- "learning_rate": 7.566938300349244e-07,
1438
- "logits/chosen": -2.830348491668701,
1439
- "logits/rejected": -2.8405864238739014,
1440
- "logps/chosen": -300.59930419921875,
1441
- "logps/rejected": -333.3864440917969,
1442
- "loss": 0.1743,
1443
- "pred_label": 16542.474609375,
1444
- "rewards/accuracies": 0.703125,
1445
- "rewards/chosen": -3.5683798789978027,
1446
- "rewards/margins": 2.9162092208862305,
1447
- "rewards/rejected": -6.48459005355835,
1448
- "step": 890,
1449
- "use_label": 11779.525390625
1450
- },
1451
- {
1452
- "epoch": 0.94,
1453
- "learning_rate": 6.402793946449361e-07,
1454
- "logits/chosen": -2.7899200916290283,
1455
- "logits/rejected": -2.780874252319336,
1456
- "logps/chosen": -311.51422119140625,
1457
- "logps/rejected": -318.4853210449219,
1458
- "loss": 0.1903,
1459
- "pred_label": 16801.587890625,
1460
- "rewards/accuracies": 0.7093750238418579,
1461
- "rewards/chosen": -3.593315601348877,
1462
- "rewards/margins": 2.630795955657959,
1463
- "rewards/rejected": -6.224111080169678,
1464
- "step": 900,
1465
- "use_label": 11840.412109375
1466
- },
1467
- {
1468
- "epoch": 0.95,
1469
- "learning_rate": 5.238649592549476e-07,
1470
- "logits/chosen": -2.805046558380127,
1471
- "logits/rejected": -2.774458408355713,
1472
- "logps/chosen": -337.86859130859375,
1473
- "logps/rejected": -338.61444091796875,
1474
- "loss": 0.1984,
1475
- "pred_label": 17054.8125,
1476
- "rewards/accuracies": 0.6937500238418579,
1477
- "rewards/chosen": -3.481353282928467,
1478
- "rewards/margins": 3.0629849433898926,
1479
- "rewards/rejected": -6.544338226318359,
1480
- "step": 910,
1481
- "use_label": 11907.1875
1482
- },
1483
- {
1484
- "epoch": 0.96,
1485
- "learning_rate": 4.0745052386495924e-07,
1486
- "logits/chosen": -2.776197671890259,
1487
- "logits/rejected": -2.7662010192871094,
1488
- "logps/chosen": -325.04901123046875,
1489
- "logps/rejected": -332.0552978515625,
1490
- "loss": 0.1684,
1491
- "pred_label": 17301.6875,
1492
- "rewards/accuracies": 0.6968749761581421,
1493
- "rewards/chosen": -3.9256300926208496,
1494
- "rewards/margins": 2.9148411750793457,
1495
- "rewards/rejected": -6.8404717445373535,
1496
- "step": 920,
1497
- "use_label": 11980.3125
1498
- },
1499
- {
1500
- "epoch": 0.97,
1501
- "learning_rate": 2.910360884749709e-07,
1502
- "logits/chosen": -2.7972004413604736,
1503
- "logits/rejected": -2.7847728729248047,
1504
- "logps/chosen": -313.59600830078125,
1505
- "logps/rejected": -332.9956359863281,
1506
- "loss": 0.1698,
1507
- "pred_label": 17560.25,
1508
- "rewards/accuracies": 0.7593749761581421,
1509
- "rewards/chosen": -3.093097686767578,
1510
- "rewards/margins": 4.027865886688232,
1511
- "rewards/rejected": -7.120964050292969,
1512
- "step": 930,
1513
- "use_label": 12041.75
1514
- },
1515
- {
1516
- "epoch": 0.98,
1517
- "learning_rate": 1.7462165308498255e-07,
1518
- "logits/chosen": -2.8208765983581543,
1519
- "logits/rejected": -2.8135807514190674,
1520
- "logps/chosen": -323.46075439453125,
1521
- "logps/rejected": -343.5670471191406,
1522
- "loss": 0.1804,
1523
- "pred_label": 17812.375,
1524
- "rewards/accuracies": 0.7281249761581421,
1525
- "rewards/chosen": -3.7054646015167236,
1526
- "rewards/margins": 3.41404390335083,
1527
- "rewards/rejected": -7.119508266448975,
1528
- "step": 940,
1529
- "use_label": 12109.625
1530
- },
1531
- {
1532
- "epoch": 0.99,
1533
- "learning_rate": 5.8207217694994185e-08,
1534
- "logits/chosen": -2.8133676052093506,
1535
- "logits/rejected": -2.7860281467437744,
1536
- "logps/chosen": -309.82440185546875,
1537
- "logps/rejected": -331.61480712890625,
1538
- "loss": 0.1523,
1539
- "pred_label": 18072.86328125,
1540
- "rewards/accuracies": 0.7250000238418579,
1541
- "rewards/chosen": -3.8211193084716797,
1542
- "rewards/margins": 3.1999752521514893,
1543
- "rewards/rejected": -7.021093845367432,
1544
- "step": 950,
1545
- "use_label": 12169.1376953125
1546
- },
1547
- {
1548
- "epoch": 1.0,
1549
- "eval_logits/chosen": -2.819918155670166,
1550
- "eval_logits/rejected": -2.7931315898895264,
1551
- "eval_logps/chosen": -319.5545654296875,
1552
- "eval_logps/rejected": -329.6432189941406,
1553
- "eval_loss": 0.19092892110347748,
1554
- "eval_pred_label": 18723.943359375,
1555
- "eval_rewards/accuracies": 0.6940000057220459,
1556
- "eval_rewards/chosen": -3.757920265197754,
1557
- "eval_rewards/margins": 2.9776601791381836,
1558
- "eval_rewards/rejected": -6.735579967498779,
1559
- "eval_runtime": 857.7691,
1560
- "eval_samples_per_second": 2.332,
1561
- "eval_steps_per_second": 0.291,
1562
- "eval_use_label": 12338.0556640625,
1563
- "step": 955
1564
  },
1565
  {
1566
  "epoch": 1.0,
1567
- "step": 955,
1568
  "total_flos": 0.0,
1569
- "train_loss": 0.3070014505486214,
1570
- "train_runtime": 47653.0179,
1571
- "train_samples_per_second": 1.283,
1572
- "train_steps_per_second": 0.02
1573
  }
1574
  ],
1575
  "logging_steps": 10,
1576
- "max_steps": 955,
1577
  "num_train_epochs": 1,
1578
- "save_steps": 50,
1579
  "total_flos": 0.0,
1580
  "trial_name": null,
1581
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.0,
13
+ "learning_rate": 0.0,
14
+ "logps/chosen": -242.30880737304688,
15
+ "logps/rejected": -225.008056640625,
 
 
16
  "loss": 0.6931,
 
17
  "rewards/accuracies": 0.0,
18
  "rewards/chosen": 0.0,
19
  "rewards/margins": 0.0,
20
  "rewards/rejected": 0.0,
21
+ "step": 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  },
23
  {
24
  "epoch": 1.0,
25
+ "step": 1,
26
  "total_flos": 0.0,
27
+ "train_loss": 0.6931471824645996,
28
+ "train_runtime": 27.1859,
29
+ "train_samples_per_second": 2.244,
30
+ "train_steps_per_second": 0.037
31
  }
32
  ],
33
  "logging_steps": 10,
34
+ "max_steps": 1,
35
  "num_train_epochs": 1,
36
+ "save_steps": 100,
37
  "total_flos": 0.0,
38
  "trial_name": null,
39
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c29657dde208ca196ea18410c6b1a73428e06a2ce35fbe02875332ab53008a6
3
- size 4792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d299158ded921c7c4a382f6de9a9047ea7879151db50274c82a74d2f19ddaeb9
3
+ size 4664