ketchup123 commited on
Commit
a5b0816
1 Parent(s): 528477b

Model save

Browse files
Files changed (4) hide show
  1. README.md +78 -0
  2. all_results.json +9 -0
  3. train_results.json +9 -0
  4. trainer_state.json +826 -0
README.md ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ license: apache-2.0
4
+ base_model: mistralai/Mistral-7B-v0.1
5
+ tags:
6
+ - trl
7
+ - dpo
8
+ - generated_from_trainer
9
+ model-index:
10
+ - name: zephyr-7b-dpo-qlora
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # zephyr-7b-dpo-qlora
18
+
19
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.5035
22
+ - Rewards/chosen: -2.0213
23
+ - Rewards/rejected: -3.0170
24
+ - Rewards/accuracies: 0.7656
25
+ - Rewards/margins: 0.9957
26
+ - Logps/rejected: -549.2363
27
+ - Logps/chosen: -448.5603
28
+ - Logits/rejected: -1.1850
29
+ - Logits/chosen: -1.2569
30
+
31
+ ## Model description
32
+
33
+ More information needed
34
+
35
+ ## Intended uses & limitations
36
+
37
+ More information needed
38
+
39
+ ## Training and evaluation data
40
+
41
+ More information needed
42
+
43
+ ## Training procedure
44
+
45
+ ### Training hyperparameters
46
+
47
+ The following hyperparameters were used during training:
48
+ - learning_rate: 5e-06
49
+ - train_batch_size: 4
50
+ - eval_batch_size: 8
51
+ - seed: 42
52
+ - distributed_type: multi-GPU
53
+ - num_devices: 8
54
+ - gradient_accumulation_steps: 4
55
+ - total_train_batch_size: 128
56
+ - total_eval_batch_size: 64
57
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
58
+ - lr_scheduler_type: cosine
59
+ - lr_scheduler_warmup_ratio: 0.1
60
+ - num_epochs: 1
61
+
62
+ ### Training results
63
+
64
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
65
+ |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
66
+ | 0.5646 | 0.2093 | 100 | 0.5739 | -0.9253 | -1.4816 | 0.7188 | 0.5564 | -395.6964 | -338.9565 | -1.9267 | -1.9878 |
67
+ | 0.5524 | 0.4186 | 200 | 0.5318 | -0.8476 | -1.5395 | 0.7617 | 0.6919 | -401.4810 | -331.1845 | -1.5104 | -1.5801 |
68
+ | 0.4977 | 0.6279 | 300 | 0.5100 | -1.8821 | -2.8383 | 0.7773 | 0.9562 | -531.3586 | -434.6388 | -1.1156 | -1.1878 |
69
+ | 0.5096 | 0.8373 | 400 | 0.5035 | -2.0213 | -3.0170 | 0.7656 | 0.9957 | -549.2363 | -448.5603 | -1.1850 | -1.2569 |
70
+
71
+
72
+ ### Framework versions
73
+
74
+ - PEFT 0.13.2
75
+ - Transformers 4.45.2
76
+ - Pytorch 2.1.2+cu121
77
+ - Datasets 3.0.1
78
+ - Tokenizers 0.20.1
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9984301412872841,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.5421361258444796,
5
+ "train_runtime": 7660.497,
6
+ "train_samples": 61134,
7
+ "train_samples_per_second": 7.98,
8
+ "train_steps_per_second": 0.062
9
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9984301412872841,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.5421361258444796,
5
+ "train_runtime": 7660.497,
6
+ "train_samples": 61134,
7
+ "train_samples_per_second": 7.98,
8
+ "train_steps_per_second": 0.062
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,826 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9984301412872841,
5
+ "eval_steps": 100,
6
+ "global_step": 477,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0020931449502878076,
13
+ "grad_norm": 0.8892470002174377,
14
+ "learning_rate": 1.0416666666666667e-07,
15
+ "logits/chosen": -2.5115435123443604,
16
+ "logits/rejected": -2.418633460998535,
17
+ "logps/chosen": -296.6790771484375,
18
+ "logps/rejected": -343.70672607421875,
19
+ "loss": 0.6931,
20
+ "rewards/accuracies": 0.0,
21
+ "rewards/chosen": 0.0,
22
+ "rewards/margins": 0.0,
23
+ "rewards/rejected": 0.0,
24
+ "step": 1
25
+ },
26
+ {
27
+ "epoch": 0.020931449502878074,
28
+ "grad_norm": 0.8503940105438232,
29
+ "learning_rate": 1.0416666666666667e-06,
30
+ "logits/chosen": -2.2245330810546875,
31
+ "logits/rejected": -2.1629724502563477,
32
+ "logps/chosen": -283.80975341796875,
33
+ "logps/rejected": -255.17117309570312,
34
+ "loss": 0.6926,
35
+ "rewards/accuracies": 0.4861111044883728,
36
+ "rewards/chosen": 0.008690375834703445,
37
+ "rewards/margins": 0.0015612257411703467,
38
+ "rewards/rejected": 0.0071291509084403515,
39
+ "step": 10
40
+ },
41
+ {
42
+ "epoch": 0.04186289900575615,
43
+ "grad_norm": 0.8254271149635315,
44
+ "learning_rate": 2.0833333333333334e-06,
45
+ "logits/chosen": -2.335510015487671,
46
+ "logits/rejected": -2.22532057762146,
47
+ "logps/chosen": -286.57855224609375,
48
+ "logps/rejected": -261.207763671875,
49
+ "loss": 0.6874,
50
+ "rewards/accuracies": 0.65625,
51
+ "rewards/chosen": 0.03880625218153,
52
+ "rewards/margins": 0.011353857815265656,
53
+ "rewards/rejected": 0.027452394366264343,
54
+ "step": 20
55
+ },
56
+ {
57
+ "epoch": 0.06279434850863422,
58
+ "grad_norm": 0.8504378795623779,
59
+ "learning_rate": 3.125e-06,
60
+ "logits/chosen": -2.2984628677368164,
61
+ "logits/rejected": -2.184295892715454,
62
+ "logps/chosen": -261.04315185546875,
63
+ "logps/rejected": -227.68020629882812,
64
+ "loss": 0.6805,
65
+ "rewards/accuracies": 0.699999988079071,
66
+ "rewards/chosen": 0.06454791873693466,
67
+ "rewards/margins": 0.028396595269441605,
68
+ "rewards/rejected": 0.03615132346749306,
69
+ "step": 30
70
+ },
71
+ {
72
+ "epoch": 0.0837257980115123,
73
+ "grad_norm": 0.9619700908660889,
74
+ "learning_rate": 4.166666666666667e-06,
75
+ "logits/chosen": -2.1841602325439453,
76
+ "logits/rejected": -2.1042842864990234,
77
+ "logps/chosen": -244.4292449951172,
78
+ "logps/rejected": -248.12057495117188,
79
+ "loss": 0.664,
80
+ "rewards/accuracies": 0.6875,
81
+ "rewards/chosen": 0.03205280378460884,
82
+ "rewards/margins": 0.06447319686412811,
83
+ "rewards/rejected": -0.03242039680480957,
84
+ "step": 40
85
+ },
86
+ {
87
+ "epoch": 0.10465724751439037,
88
+ "grad_norm": 1.198764443397522,
89
+ "learning_rate": 4.999731868769027e-06,
90
+ "logits/chosen": -2.2160000801086426,
91
+ "logits/rejected": -2.09934663772583,
92
+ "logps/chosen": -269.432861328125,
93
+ "logps/rejected": -274.974365234375,
94
+ "loss": 0.6454,
95
+ "rewards/accuracies": 0.7437499761581421,
96
+ "rewards/chosen": -0.08960571140050888,
97
+ "rewards/margins": 0.10663382709026337,
98
+ "rewards/rejected": -0.19623954594135284,
99
+ "step": 50
100
+ },
101
+ {
102
+ "epoch": 0.12558869701726844,
103
+ "grad_norm": 1.695876121520996,
104
+ "learning_rate": 4.9903533134293035e-06,
105
+ "logits/chosen": -2.236078977584839,
106
+ "logits/rejected": -2.1389355659484863,
107
+ "logps/chosen": -262.9930725097656,
108
+ "logps/rejected": -276.64984130859375,
109
+ "loss": 0.6264,
110
+ "rewards/accuracies": 0.7437499761581421,
111
+ "rewards/chosen": -0.264637291431427,
112
+ "rewards/margins": 0.2180159091949463,
113
+ "rewards/rejected": -0.4826532006263733,
114
+ "step": 60
115
+ },
116
+ {
117
+ "epoch": 0.14652014652014653,
118
+ "grad_norm": 2.115797519683838,
119
+ "learning_rate": 4.967625656594782e-06,
120
+ "logits/chosen": -2.1270174980163574,
121
+ "logits/rejected": -2.063481569290161,
122
+ "logps/chosen": -319.60888671875,
123
+ "logps/rejected": -334.22705078125,
124
+ "loss": 0.607,
125
+ "rewards/accuracies": 0.737500011920929,
126
+ "rewards/chosen": -0.4378367066383362,
127
+ "rewards/margins": 0.2902434766292572,
128
+ "rewards/rejected": -0.728080153465271,
129
+ "step": 70
130
+ },
131
+ {
132
+ "epoch": 0.1674515960230246,
133
+ "grad_norm": 2.7295830249786377,
134
+ "learning_rate": 4.93167072587771e-06,
135
+ "logits/chosen": -2.275411605834961,
136
+ "logits/rejected": -2.0854580402374268,
137
+ "logps/chosen": -358.58154296875,
138
+ "logps/rejected": -320.63385009765625,
139
+ "loss": 0.6069,
140
+ "rewards/accuracies": 0.768750011920929,
141
+ "rewards/chosen": -0.49991363286972046,
142
+ "rewards/margins": 0.31041082739830017,
143
+ "rewards/rejected": -0.8103245496749878,
144
+ "step": 80
145
+ },
146
+ {
147
+ "epoch": 0.18838304552590268,
148
+ "grad_norm": 2.6313765048980713,
149
+ "learning_rate": 4.882681251368549e-06,
150
+ "logits/chosen": -2.1408169269561768,
151
+ "logits/rejected": -2.067002058029175,
152
+ "logps/chosen": -289.2994079589844,
153
+ "logps/rejected": -331.0093994140625,
154
+ "loss": 0.576,
155
+ "rewards/accuracies": 0.699999988079071,
156
+ "rewards/chosen": -0.5190187692642212,
157
+ "rewards/margins": 0.3863602876663208,
158
+ "rewards/rejected": -0.9053791165351868,
159
+ "step": 90
160
+ },
161
+ {
162
+ "epoch": 0.20931449502878074,
163
+ "grad_norm": 3.2287309169769287,
164
+ "learning_rate": 4.8209198325401815e-06,
165
+ "logits/chosen": -2.0671892166137695,
166
+ "logits/rejected": -1.958622694015503,
167
+ "logps/chosen": -370.7698059082031,
168
+ "logps/rejected": -413.80120849609375,
169
+ "loss": 0.5646,
170
+ "rewards/accuracies": 0.7749999761581421,
171
+ "rewards/chosen": -0.933598518371582,
172
+ "rewards/margins": 0.5866175889968872,
173
+ "rewards/rejected": -1.5202162265777588,
174
+ "step": 100
175
+ },
176
+ {
177
+ "epoch": 0.20931449502878074,
178
+ "eval_logits/chosen": -1.9878015518188477,
179
+ "eval_logits/rejected": -1.9267165660858154,
180
+ "eval_logps/chosen": -338.9564514160156,
181
+ "eval_logps/rejected": -395.69635009765625,
182
+ "eval_loss": 0.573878288269043,
183
+ "eval_rewards/accuracies": 0.71875,
184
+ "eval_rewards/chosen": -0.9252709150314331,
185
+ "eval_rewards/margins": 0.5563679933547974,
186
+ "eval_rewards/rejected": -1.4816389083862305,
187
+ "eval_runtime": 110.707,
188
+ "eval_samples_per_second": 18.066,
189
+ "eval_steps_per_second": 0.289,
190
+ "step": 100
191
+ },
192
+ {
193
+ "epoch": 0.2302459445316588,
194
+ "grad_norm": 3.1588852405548096,
195
+ "learning_rate": 4.746717530629565e-06,
196
+ "logits/chosen": -2.0595319271087646,
197
+ "logits/rejected": -1.9567897319793701,
198
+ "logps/chosen": -360.9635009765625,
199
+ "logps/rejected": -388.3221130371094,
200
+ "loss": 0.5789,
201
+ "rewards/accuracies": 0.65625,
202
+ "rewards/chosen": -0.860044002532959,
203
+ "rewards/margins": 0.4338661730289459,
204
+ "rewards/rejected": -1.2939101457595825,
205
+ "step": 110
206
+ },
207
+ {
208
+ "epoch": 0.25117739403453687,
209
+ "grad_norm": 2.8980185985565186,
210
+ "learning_rate": 4.660472094042121e-06,
211
+ "logits/chosen": -1.8865253925323486,
212
+ "logits/rejected": -1.8495429754257202,
213
+ "logps/chosen": -350.013916015625,
214
+ "logps/rejected": -394.4359130859375,
215
+ "loss": 0.5592,
216
+ "rewards/accuracies": 0.7437499761581421,
217
+ "rewards/chosen": -0.9938249588012695,
218
+ "rewards/margins": 0.5278288125991821,
219
+ "rewards/rejected": -1.5216538906097412,
220
+ "step": 120
221
+ },
222
+ {
223
+ "epoch": 0.272108843537415,
224
+ "grad_norm": 3.334541082382202,
225
+ "learning_rate": 4.5626458262912745e-06,
226
+ "logits/chosen": -1.8878917694091797,
227
+ "logits/rejected": -1.8218252658843994,
228
+ "logps/chosen": -396.32659912109375,
229
+ "logps/rejected": -439.26513671875,
230
+ "loss": 0.5373,
231
+ "rewards/accuracies": 0.7124999761581421,
232
+ "rewards/chosen": -1.3216043710708618,
233
+ "rewards/margins": 0.5591233968734741,
234
+ "rewards/rejected": -1.880727767944336,
235
+ "step": 130
236
+ },
237
+ {
238
+ "epoch": 0.29304029304029305,
239
+ "grad_norm": 3.480584144592285,
240
+ "learning_rate": 4.453763107901676e-06,
241
+ "logits/chosen": -1.8928722143173218,
242
+ "logits/rejected": -1.7833354473114014,
243
+ "logps/chosen": -396.819091796875,
244
+ "logps/rejected": -430.72674560546875,
245
+ "loss": 0.549,
246
+ "rewards/accuracies": 0.75,
247
+ "rewards/chosen": -1.0090841054916382,
248
+ "rewards/margins": 0.7491954565048218,
249
+ "rewards/rejected": -1.75827956199646,
250
+ "step": 140
251
+ },
252
+ {
253
+ "epoch": 0.3139717425431711,
254
+ "grad_norm": 3.2153046131134033,
255
+ "learning_rate": 4.33440758555951e-06,
256
+ "logits/chosen": -1.8725719451904297,
257
+ "logits/rejected": -1.7752315998077393,
258
+ "logps/chosen": -358.0649719238281,
259
+ "logps/rejected": -379.94464111328125,
260
+ "loss": 0.5479,
261
+ "rewards/accuracies": 0.6937500238418579,
262
+ "rewards/chosen": -0.9543037414550781,
263
+ "rewards/margins": 0.5243933796882629,
264
+ "rewards/rejected": -1.4786970615386963,
265
+ "step": 150
266
+ },
267
+ {
268
+ "epoch": 0.3349031920460492,
269
+ "grad_norm": 3.572585344314575,
270
+ "learning_rate": 4.205219043576955e-06,
271
+ "logits/chosen": -1.8374134302139282,
272
+ "logits/rejected": -1.6999791860580444,
273
+ "logps/chosen": -347.92791748046875,
274
+ "logps/rejected": -397.92236328125,
275
+ "loss": 0.5582,
276
+ "rewards/accuracies": 0.737500011920929,
277
+ "rewards/chosen": -0.8851444125175476,
278
+ "rewards/margins": 0.7162662744522095,
279
+ "rewards/rejected": -1.6014108657836914,
280
+ "step": 160
281
+ },
282
+ {
283
+ "epoch": 0.35583464154892724,
284
+ "grad_norm": 4.003506183624268,
285
+ "learning_rate": 4.066889974440757e-06,
286
+ "logits/chosen": -1.4849274158477783,
287
+ "logits/rejected": -1.420831561088562,
288
+ "logps/chosen": -422.58807373046875,
289
+ "logps/rejected": -454.4142150878906,
290
+ "loss": 0.5563,
291
+ "rewards/accuracies": 0.6937500238418579,
292
+ "rewards/chosen": -1.7905734777450562,
293
+ "rewards/margins": 0.5325244665145874,
294
+ "rewards/rejected": -2.3230979442596436,
295
+ "step": 170
296
+ },
297
+ {
298
+ "epoch": 0.37676609105180536,
299
+ "grad_norm": 3.9669156074523926,
300
+ "learning_rate": 3.92016186682789e-06,
301
+ "logits/chosen": -1.6295009851455688,
302
+ "logits/rejected": -1.550588846206665,
303
+ "logps/chosen": -370.55133056640625,
304
+ "logps/rejected": -407.09735107421875,
305
+ "loss": 0.5112,
306
+ "rewards/accuracies": 0.731249988079071,
307
+ "rewards/chosen": -1.166177749633789,
308
+ "rewards/margins": 0.6077830195426941,
309
+ "rewards/rejected": -1.773960828781128,
310
+ "step": 180
311
+ },
312
+ {
313
+ "epoch": 0.3976975405546834,
314
+ "grad_norm": 4.69191312789917,
315
+ "learning_rate": 3.7658212309857576e-06,
316
+ "logits/chosen": -1.5681777000427246,
317
+ "logits/rejected": -1.5323166847229004,
318
+ "logps/chosen": -348.3360900878906,
319
+ "logps/rejected": -402.2555847167969,
320
+ "loss": 0.5366,
321
+ "rewards/accuracies": 0.7250000238418579,
322
+ "rewards/chosen": -1.0483043193817139,
323
+ "rewards/margins": 0.5953764915466309,
324
+ "rewards/rejected": -1.6436808109283447,
325
+ "step": 190
326
+ },
327
+ {
328
+ "epoch": 0.4186289900575615,
329
+ "grad_norm": 3.6216626167297363,
330
+ "learning_rate": 3.604695382782159e-06,
331
+ "logits/chosen": -1.514801263809204,
332
+ "logits/rejected": -1.5004067420959473,
333
+ "logps/chosen": -343.3397216796875,
334
+ "logps/rejected": -422.3468322753906,
335
+ "loss": 0.5524,
336
+ "rewards/accuracies": 0.6812499761581421,
337
+ "rewards/chosen": -1.099816918373108,
338
+ "rewards/margins": 0.5951756834983826,
339
+ "rewards/rejected": -1.6949926614761353,
340
+ "step": 200
341
+ },
342
+ {
343
+ "epoch": 0.4186289900575615,
344
+ "eval_logits/chosen": -1.5800749063491821,
345
+ "eval_logits/rejected": -1.5103871822357178,
346
+ "eval_logps/chosen": -331.1845397949219,
347
+ "eval_logps/rejected": -401.48101806640625,
348
+ "eval_loss": 0.5318315029144287,
349
+ "eval_rewards/accuracies": 0.76171875,
350
+ "eval_rewards/chosen": -0.8475518226623535,
351
+ "eval_rewards/margins": 0.6919333338737488,
352
+ "eval_rewards/rejected": -1.5394850969314575,
353
+ "eval_runtime": 110.7567,
354
+ "eval_samples_per_second": 18.058,
355
+ "eval_steps_per_second": 0.289,
356
+ "step": 200
357
+ },
358
+ {
359
+ "epoch": 0.43956043956043955,
360
+ "grad_norm": 4.658122539520264,
361
+ "learning_rate": 3.437648009023905e-06,
362
+ "logits/chosen": -1.5987634658813477,
363
+ "logits/rejected": -1.4259040355682373,
364
+ "logps/chosen": -384.1799011230469,
365
+ "logps/rejected": -397.27679443359375,
366
+ "loss": 0.5351,
367
+ "rewards/accuracies": 0.7250000238418579,
368
+ "rewards/chosen": -0.9452459216117859,
369
+ "rewards/margins": 0.6696128845214844,
370
+ "rewards/rejected": -1.614858865737915,
371
+ "step": 210
372
+ },
373
+ {
374
+ "epoch": 0.4604918890633176,
375
+ "grad_norm": 4.130067348480225,
376
+ "learning_rate": 3.265574537815398e-06,
377
+ "logits/chosen": -1.387083649635315,
378
+ "logits/rejected": -1.3616085052490234,
379
+ "logps/chosen": -346.65240478515625,
380
+ "logps/rejected": -429.26007080078125,
381
+ "loss": 0.5306,
382
+ "rewards/accuracies": 0.731249988079071,
383
+ "rewards/chosen": -1.253117561340332,
384
+ "rewards/margins": 0.7737840414047241,
385
+ "rewards/rejected": -2.0269014835357666,
386
+ "step": 220
387
+ },
388
+ {
389
+ "epoch": 0.48142333856619574,
390
+ "grad_norm": 3.835031032562256,
391
+ "learning_rate": 3.089397338773569e-06,
392
+ "logits/chosen": -1.5156991481781006,
393
+ "logits/rejected": -1.389380693435669,
394
+ "logps/chosen": -364.519287109375,
395
+ "logps/rejected": -443.21649169921875,
396
+ "loss": 0.5296,
397
+ "rewards/accuracies": 0.7749999761581421,
398
+ "rewards/chosen": -1.2139475345611572,
399
+ "rewards/margins": 0.9700647592544556,
400
+ "rewards/rejected": -2.1840121746063232,
401
+ "step": 230
402
+ },
403
+ {
404
+ "epoch": 0.5023547880690737,
405
+ "grad_norm": 3.9292032718658447,
406
+ "learning_rate": 2.9100607788275547e-06,
407
+ "logits/chosen": -1.4860846996307373,
408
+ "logits/rejected": -1.3620604276657104,
409
+ "logps/chosen": -396.4831848144531,
410
+ "logps/rejected": -455.0039978027344,
411
+ "loss": 0.4992,
412
+ "rewards/accuracies": 0.731249988079071,
413
+ "rewards/chosen": -1.2688108682632446,
414
+ "rewards/margins": 0.8345935940742493,
415
+ "rewards/rejected": -2.1034045219421387,
416
+ "step": 240
417
+ },
418
+ {
419
+ "epoch": 0.5232862375719518,
420
+ "grad_norm": 3.9835152626037598,
421
+ "learning_rate": 2.72852616010567e-06,
422
+ "logits/chosen": -1.3683382272720337,
423
+ "logits/rejected": -1.203263282775879,
424
+ "logps/chosen": -439.71734619140625,
425
+ "logps/rejected": -500.26995849609375,
426
+ "loss": 0.5193,
427
+ "rewards/accuracies": 0.762499988079071,
428
+ "rewards/chosen": -1.8103727102279663,
429
+ "rewards/margins": 0.9737499356269836,
430
+ "rewards/rejected": -2.7841227054595947,
431
+ "step": 250
432
+ },
433
+ {
434
+ "epoch": 0.54421768707483,
435
+ "grad_norm": 4.053774356842041,
436
+ "learning_rate": 2.5457665670441937e-06,
437
+ "logits/chosen": -1.3506557941436768,
438
+ "logits/rejected": -1.3202815055847168,
439
+ "logps/chosen": -425.1206970214844,
440
+ "logps/rejected": -482.37060546875,
441
+ "loss": 0.5157,
442
+ "rewards/accuracies": 0.7124999761581421,
443
+ "rewards/chosen": -1.7851899862289429,
444
+ "rewards/margins": 0.7175502181053162,
445
+ "rewards/rejected": -2.5027403831481934,
446
+ "step": 260
447
+ },
448
+ {
449
+ "epoch": 0.565149136577708,
450
+ "grad_norm": 3.4300835132598877,
451
+ "learning_rate": 2.3627616503391813e-06,
452
+ "logits/chosen": -1.3187822103500366,
453
+ "logits/rejected": -1.2432146072387695,
454
+ "logps/chosen": -435.5782775878906,
455
+ "logps/rejected": -497.5511169433594,
456
+ "loss": 0.5274,
457
+ "rewards/accuracies": 0.737500011920929,
458
+ "rewards/chosen": -1.6222509145736694,
459
+ "rewards/margins": 0.8602613210678101,
460
+ "rewards/rejected": -2.4825127124786377,
461
+ "step": 270
462
+ },
463
+ {
464
+ "epoch": 0.5860805860805861,
465
+ "grad_norm": 4.350644588470459,
466
+ "learning_rate": 2.1804923757009885e-06,
467
+ "logits/chosen": -1.173998236656189,
468
+ "logits/rejected": -1.0935077667236328,
469
+ "logps/chosen": -396.106201171875,
470
+ "logps/rejected": -447.62359619140625,
471
+ "loss": 0.5269,
472
+ "rewards/accuracies": 0.706250011920929,
473
+ "rewards/chosen": -1.5709511041641235,
474
+ "rewards/margins": 0.7292364835739136,
475
+ "rewards/rejected": -2.300187587738037,
476
+ "step": 280
477
+ },
478
+ {
479
+ "epoch": 0.6070120355834642,
480
+ "grad_norm": 3.7417469024658203,
481
+ "learning_rate": 1.9999357655598894e-06,
482
+ "logits/chosen": -1.29354727268219,
483
+ "logits/rejected": -1.1772781610488892,
484
+ "logps/chosen": -411.03814697265625,
485
+ "logps/rejected": -484.0711975097656,
486
+ "loss": 0.5207,
487
+ "rewards/accuracies": 0.737500011920929,
488
+ "rewards/chosen": -1.7882314920425415,
489
+ "rewards/margins": 0.787774920463562,
490
+ "rewards/rejected": -2.5760064125061035,
491
+ "step": 290
492
+ },
493
+ {
494
+ "epoch": 0.6279434850863422,
495
+ "grad_norm": 5.170950889587402,
496
+ "learning_rate": 1.8220596619089576e-06,
497
+ "logits/chosen": -1.2801265716552734,
498
+ "logits/rejected": -1.1654409170150757,
499
+ "logps/chosen": -476.93023681640625,
500
+ "logps/rejected": -508.8740234375,
501
+ "loss": 0.4977,
502
+ "rewards/accuracies": 0.7124999761581421,
503
+ "rewards/chosen": -1.7797939777374268,
504
+ "rewards/margins": 0.7282718420028687,
505
+ "rewards/rejected": -2.508065700531006,
506
+ "step": 300
507
+ },
508
+ {
509
+ "epoch": 0.6279434850863422,
510
+ "eval_logits/chosen": -1.187766194343567,
511
+ "eval_logits/rejected": -1.1155990362167358,
512
+ "eval_logps/chosen": -434.6387939453125,
513
+ "eval_logps/rejected": -531.358642578125,
514
+ "eval_loss": 0.5100404620170593,
515
+ "eval_rewards/accuracies": 0.77734375,
516
+ "eval_rewards/chosen": -1.882094383239746,
517
+ "eval_rewards/margins": 0.9561675786972046,
518
+ "eval_rewards/rejected": -2.838261842727661,
519
+ "eval_runtime": 110.6981,
520
+ "eval_samples_per_second": 18.067,
521
+ "eval_steps_per_second": 0.289,
522
+ "step": 300
523
+ },
524
+ {
525
+ "epoch": 0.6488749345892203,
526
+ "grad_norm": 4.2551469802856445,
527
+ "learning_rate": 1.647817538357072e-06,
528
+ "logits/chosen": -1.2920277118682861,
529
+ "logits/rejected": -1.1881086826324463,
530
+ "logps/chosen": -491.37213134765625,
531
+ "logps/rejected": -541.3331298828125,
532
+ "loss": 0.4999,
533
+ "rewards/accuracies": 0.7875000238418579,
534
+ "rewards/chosen": -2.065791130065918,
535
+ "rewards/margins": 1.0087900161743164,
536
+ "rewards/rejected": -3.0745811462402344,
537
+ "step": 310
538
+ },
539
+ {
540
+ "epoch": 0.6698063840920984,
541
+ "grad_norm": 5.16890811920166,
542
+ "learning_rate": 1.4781433892011132e-06,
543
+ "logits/chosen": -1.2393107414245605,
544
+ "logits/rejected": -1.1026521921157837,
545
+ "logps/chosen": -445.18408203125,
546
+ "logps/rejected": -523.7984619140625,
547
+ "loss": 0.4901,
548
+ "rewards/accuracies": 0.800000011920929,
549
+ "rewards/chosen": -2.0319957733154297,
550
+ "rewards/margins": 1.0875332355499268,
551
+ "rewards/rejected": -3.1195292472839355,
552
+ "step": 320
553
+ },
554
+ {
555
+ "epoch": 0.6907378335949764,
556
+ "grad_norm": 4.174256324768066,
557
+ "learning_rate": 1.3139467229135999e-06,
558
+ "logits/chosen": -1.3440932035446167,
559
+ "logits/rejected": -1.3159363269805908,
560
+ "logps/chosen": -418.71905517578125,
561
+ "logps/rejected": -499.96612548828125,
562
+ "loss": 0.5117,
563
+ "rewards/accuracies": 0.7250000238418579,
564
+ "rewards/chosen": -1.6654974222183228,
565
+ "rewards/margins": 0.8459057807922363,
566
+ "rewards/rejected": -2.5114030838012695,
567
+ "step": 330
568
+ },
569
+ {
570
+ "epoch": 0.7116692830978545,
571
+ "grad_norm": 4.793274402618408,
572
+ "learning_rate": 1.1561076868822756e-06,
573
+ "logits/chosen": -1.2274696826934814,
574
+ "logits/rejected": -1.1634485721588135,
575
+ "logps/chosen": -472.03167724609375,
576
+ "logps/rejected": -538.4515380859375,
577
+ "loss": 0.5043,
578
+ "rewards/accuracies": 0.731249988079071,
579
+ "rewards/chosen": -2.003690719604492,
580
+ "rewards/margins": 0.8766867518424988,
581
+ "rewards/rejected": -2.8803775310516357,
582
+ "step": 340
583
+ },
584
+ {
585
+ "epoch": 0.7326007326007326,
586
+ "grad_norm": 4.15632963180542,
587
+ "learning_rate": 1.0054723495346484e-06,
588
+ "logits/chosen": -1.2941699028015137,
589
+ "logits/rejected": -1.1815686225891113,
590
+ "logps/chosen": -464.3369140625,
591
+ "logps/rejected": -537.1565551757812,
592
+ "loss": 0.467,
593
+ "rewards/accuracies": 0.793749988079071,
594
+ "rewards/chosen": -2.2074246406555176,
595
+ "rewards/margins": 0.9847939610481262,
596
+ "rewards/rejected": -3.192218542098999,
597
+ "step": 350
598
+ },
599
+ {
600
+ "epoch": 0.7535321821036107,
601
+ "grad_norm": 3.9081270694732666,
602
+ "learning_rate": 8.628481651367876e-07,
603
+ "logits/chosen": -1.2783113718032837,
604
+ "logits/rejected": -1.1381876468658447,
605
+ "logps/chosen": -522.1004028320312,
606
+ "logps/rejected": -580.3312377929688,
607
+ "loss": 0.5229,
608
+ "rewards/accuracies": 0.762499988079071,
609
+ "rewards/chosen": -2.310837745666504,
610
+ "rewards/margins": 1.0041909217834473,
611
+ "rewards/rejected": -3.315028429031372,
612
+ "step": 360
613
+ },
614
+ {
615
+ "epoch": 0.7744636316064888,
616
+ "grad_norm": 4.175972938537598,
617
+ "learning_rate": 7.289996455765749e-07,
618
+ "logits/chosen": -1.305639386177063,
619
+ "logits/rejected": -1.1913295984268188,
620
+ "logps/chosen": -472.50103759765625,
621
+ "logps/rejected": -539.5281372070312,
622
+ "loss": 0.515,
623
+ "rewards/accuracies": 0.762499988079071,
624
+ "rewards/chosen": -2.2103240489959717,
625
+ "rewards/margins": 0.9552016258239746,
626
+ "rewards/rejected": -3.1655256748199463,
627
+ "step": 370
628
+ },
629
+ {
630
+ "epoch": 0.7953950811093669,
631
+ "grad_norm": 4.144151210784912,
632
+ "learning_rate": 6.046442623320145e-07,
633
+ "logits/chosen": -1.1470415592193604,
634
+ "logits/rejected": -1.1134142875671387,
635
+ "logps/chosen": -463.9144592285156,
636
+ "logps/rejected": -589.644287109375,
637
+ "loss": 0.4996,
638
+ "rewards/accuracies": 0.7749999761581421,
639
+ "rewards/chosen": -2.329439401626587,
640
+ "rewards/margins": 1.0926482677459717,
641
+ "rewards/rejected": -3.4220874309539795,
642
+ "step": 380
643
+ },
644
+ {
645
+ "epoch": 0.8163265306122449,
646
+ "grad_norm": 3.9941041469573975,
647
+ "learning_rate": 4.904486005914027e-07,
648
+ "logits/chosen": -1.3255889415740967,
649
+ "logits/rejected": -1.2115201950073242,
650
+ "logps/chosen": -541.8289794921875,
651
+ "logps/rejected": -589.1190185546875,
652
+ "loss": 0.4868,
653
+ "rewards/accuracies": 0.71875,
654
+ "rewards/chosen": -2.216815710067749,
655
+ "rewards/margins": 0.8514670133590698,
656
+ "rewards/rejected": -3.0682828426361084,
657
+ "step": 390
658
+ },
659
+ {
660
+ "epoch": 0.837257980115123,
661
+ "grad_norm": 4.9559149742126465,
662
+ "learning_rate": 3.8702478614051353e-07,
663
+ "logits/chosen": -1.2322378158569336,
664
+ "logits/rejected": -1.1362693309783936,
665
+ "logps/chosen": -454.14727783203125,
666
+ "logps/rejected": -534.5038452148438,
667
+ "loss": 0.5096,
668
+ "rewards/accuracies": 0.7437499761581421,
669
+ "rewards/chosen": -2.1453731060028076,
670
+ "rewards/margins": 0.9312618374824524,
671
+ "rewards/rejected": -3.0766348838806152,
672
+ "step": 400
673
+ },
674
+ {
675
+ "epoch": 0.837257980115123,
676
+ "eval_logits/chosen": -1.2569077014923096,
677
+ "eval_logits/rejected": -1.1850428581237793,
678
+ "eval_logps/chosen": -448.560302734375,
679
+ "eval_logps/rejected": -549.236328125,
680
+ "eval_loss": 0.5035256743431091,
681
+ "eval_rewards/accuracies": 0.765625,
682
+ "eval_rewards/chosen": -2.0213088989257812,
683
+ "eval_rewards/margins": 0.9957298040390015,
684
+ "eval_rewards/rejected": -3.017038583755493,
685
+ "eval_runtime": 110.7002,
686
+ "eval_samples_per_second": 18.067,
687
+ "eval_steps_per_second": 0.289,
688
+ "step": 400
689
+ },
690
+ {
691
+ "epoch": 0.858189429618001,
692
+ "grad_norm": 4.206878185272217,
693
+ "learning_rate": 2.9492720416985004e-07,
694
+ "logits/chosen": -1.3825483322143555,
695
+ "logits/rejected": -1.2790623903274536,
696
+ "logps/chosen": -492.615234375,
697
+ "logps/rejected": -567.6419067382812,
698
+ "loss": 0.5101,
699
+ "rewards/accuracies": 0.768750011920929,
700
+ "rewards/chosen": -2.1133949756622314,
701
+ "rewards/margins": 1.0342962741851807,
702
+ "rewards/rejected": -3.147691249847412,
703
+ "step": 410
704
+ },
705
+ {
706
+ "epoch": 0.8791208791208791,
707
+ "grad_norm": 4.6710124015808105,
708
+ "learning_rate": 2.1464952759020857e-07,
709
+ "logits/chosen": -1.2218900918960571,
710
+ "logits/rejected": -1.1765925884246826,
711
+ "logps/chosen": -447.31451416015625,
712
+ "logps/rejected": -551.3411865234375,
713
+ "loss": 0.5055,
714
+ "rewards/accuracies": 0.737500011920929,
715
+ "rewards/chosen": -2.0722880363464355,
716
+ "rewards/margins": 0.9839819073677063,
717
+ "rewards/rejected": -3.056270122528076,
718
+ "step": 420
719
+ },
720
+ {
721
+ "epoch": 0.9000523286237572,
722
+ "grad_norm": 3.861823081970215,
723
+ "learning_rate": 1.4662207078575685e-07,
724
+ "logits/chosen": -1.2683221101760864,
725
+ "logits/rejected": -1.1284939050674438,
726
+ "logps/chosen": -473.9938049316406,
727
+ "logps/rejected": -547.7625122070312,
728
+ "loss": 0.478,
729
+ "rewards/accuracies": 0.768750011920929,
730
+ "rewards/chosen": -1.9979991912841797,
731
+ "rewards/margins": 0.9907487630844116,
732
+ "rewards/rejected": -2.988748073577881,
733
+ "step": 430
734
+ },
735
+ {
736
+ "epoch": 0.9209837781266352,
737
+ "grad_norm": 4.48820686340332,
738
+ "learning_rate": 9.120948298936422e-08,
739
+ "logits/chosen": -1.4051973819732666,
740
+ "logits/rejected": -1.2804924249649048,
741
+ "logps/chosen": -447.3819274902344,
742
+ "logps/rejected": -505.71221923828125,
743
+ "loss": 0.4913,
744
+ "rewards/accuracies": 0.699999988079071,
745
+ "rewards/chosen": -2.0068001747131348,
746
+ "rewards/margins": 0.9424559473991394,
747
+ "rewards/rejected": -2.94925594329834,
748
+ "step": 440
749
+ },
750
+ {
751
+ "epoch": 0.9419152276295133,
752
+ "grad_norm": 4.315399169921875,
753
+ "learning_rate": 4.870879364444109e-08,
754
+ "logits/chosen": -1.1320288181304932,
755
+ "logits/rejected": -1.0678789615631104,
756
+ "logps/chosen": -466.1886291503906,
757
+ "logps/rejected": -553.8982543945312,
758
+ "loss": 0.4949,
759
+ "rewards/accuracies": 0.7437499761581421,
760
+ "rewards/chosen": -2.0924811363220215,
761
+ "rewards/margins": 0.9840418696403503,
762
+ "rewards/rejected": -3.0765230655670166,
763
+ "step": 450
764
+ },
765
+ {
766
+ "epoch": 0.9628466771323915,
767
+ "grad_norm": 4.0728840827941895,
768
+ "learning_rate": 1.93478202307823e-08,
769
+ "logits/chosen": -1.290541410446167,
770
+ "logits/rejected": -1.133880376815796,
771
+ "logps/chosen": -460.96710205078125,
772
+ "logps/rejected": -513.3797607421875,
773
+ "loss": 0.5042,
774
+ "rewards/accuracies": 0.7749999761581421,
775
+ "rewards/chosen": -2.039780855178833,
776
+ "rewards/margins": 0.937634289264679,
777
+ "rewards/rejected": -2.977414608001709,
778
+ "step": 460
779
+ },
780
+ {
781
+ "epoch": 0.9837781266352695,
782
+ "grad_norm": 3.986497640609741,
783
+ "learning_rate": 3.283947088983663e-09,
784
+ "logits/chosen": -1.2861273288726807,
785
+ "logits/rejected": -1.1986534595489502,
786
+ "logps/chosen": -484.498046875,
787
+ "logps/rejected": -541.9881591796875,
788
+ "loss": 0.4914,
789
+ "rewards/accuracies": 0.6875,
790
+ "rewards/chosen": -2.0398573875427246,
791
+ "rewards/margins": 0.8533897399902344,
792
+ "rewards/rejected": -2.893247127532959,
793
+ "step": 470
794
+ },
795
+ {
796
+ "epoch": 0.9984301412872841,
797
+ "step": 477,
798
+ "total_flos": 0.0,
799
+ "train_loss": 0.5421361258444796,
800
+ "train_runtime": 7660.497,
801
+ "train_samples_per_second": 7.98,
802
+ "train_steps_per_second": 0.062
803
+ }
804
+ ],
805
+ "logging_steps": 10,
806
+ "max_steps": 477,
807
+ "num_input_tokens_seen": 0,
808
+ "num_train_epochs": 1,
809
+ "save_steps": 100,
810
+ "stateful_callbacks": {
811
+ "TrainerControl": {
812
+ "args": {
813
+ "should_epoch_stop": false,
814
+ "should_evaluate": false,
815
+ "should_log": false,
816
+ "should_save": true,
817
+ "should_training_stop": true
818
+ },
819
+ "attributes": {}
820
+ }
821
+ },
822
+ "total_flos": 0.0,
823
+ "train_batch_size": 4,
824
+ "trial_name": null,
825
+ "trial_params": null
826
+ }