sfulay commited on
Commit
715b7e3
1 Parent(s): bef748b

Model save

Browse files
README.md ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: alignment-handbook/zephyr-7b-sft-full
4
+ tags:
5
+ - trl
6
+ - dpo
7
+ - generated_from_trainer
8
+ model-index:
9
+ - name: zephyr-7b-dpo-full-magpi-low-bleu-3-epochs
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # zephyr-7b-dpo-full-magpi-low-bleu-3-epochs
17
+
18
+ This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.0004
21
+ - Rewards/chosen: -1.8844
22
+ - Rewards/rejected: -46.8077
23
+ - Rewards/accuracies: 1.0
24
+ - Rewards/margins: 44.9232
25
+ - Logps/rejected: -5321.5576
26
+ - Logps/chosen: -555.4259
27
+ - Logits/rejected: 2.7529
28
+ - Logits/chosen: -1.2323
29
+
30
+ ## Model description
31
+
32
+ More information needed
33
+
34
+ ## Intended uses & limitations
35
+
36
+ More information needed
37
+
38
+ ## Training and evaluation data
39
+
40
+ More information needed
41
+
42
+ ## Training procedure
43
+
44
+ ### Training hyperparameters
45
+
46
+ The following hyperparameters were used during training:
47
+ - learning_rate: 5e-07
48
+ - train_batch_size: 8
49
+ - eval_batch_size: 8
50
+ - seed: 55
51
+ - distributed_type: multi-GPU
52
+ - num_devices: 8
53
+ - gradient_accumulation_steps: 2
54
+ - total_train_batch_size: 128
55
+ - total_eval_batch_size: 64
56
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
57
+ - lr_scheduler_type: cosine
58
+ - lr_scheduler_warmup_ratio: 0.1
59
+ - num_epochs: 3
60
+
61
+ ### Training results
62
+
63
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
64
+ |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
65
+ | 0.0066 | 0.4739 | 50 | 0.0028 | -1.0908 | -33.9616 | 0.9980 | 32.8709 | -4036.9529 | -476.0595 | -1.2144 | -1.9103 |
66
+ | 0.0177 | 0.9479 | 100 | 0.0006 | -1.6117 | -43.9541 | 1.0 | 42.3424 | -5036.1978 | -528.1522 | 1.4562 | -2.1299 |
67
+ | 0.0006 | 1.4218 | 150 | 0.0004 | -1.7244 | -46.1666 | 1.0 | 44.4422 | -5257.4517 | -539.4232 | 1.6969 | -1.9837 |
68
+ | 0.0002 | 1.8957 | 200 | 0.0005 | -1.7575 | -44.7450 | 1.0 | 42.9875 | -5115.2886 | -542.7341 | 2.1634 | -2.0033 |
69
+ | 0.0001 | 2.3697 | 250 | 0.0004 | -1.8985 | -46.5225 | 1.0 | 44.6240 | -5293.0405 | -556.8339 | 2.7114 | -1.2429 |
70
+ | 0.0001 | 2.8436 | 300 | 0.0004 | -1.8844 | -46.8077 | 1.0 | 44.9232 | -5321.5576 | -555.4259 | 2.7529 | -1.2323 |
71
+
72
+
73
+ ### Framework versions
74
+
75
+ - Transformers 4.44.0.dev0
76
+ - Pytorch 2.1.2
77
+ - Datasets 2.20.0
78
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.985781990521327,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.04266955489773185,
5
+ "train_runtime": 9090.3989,
6
+ "train_samples": 13500,
7
+ "train_samples_per_second": 4.455,
8
+ "train_steps_per_second": 0.035
9
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.44.0.dev0"
6
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.985781990521327,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.04266955489773185,
5
+ "train_runtime": 9090.3989,
6
+ "train_samples": 13500,
7
+ "train_samples_per_second": 4.455,
8
+ "train_steps_per_second": 0.035
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,603 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.985781990521327,
5
+ "eval_steps": 50,
6
+ "global_step": 315,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0947867298578199,
13
+ "grad_norm": 50.81444347836179,
14
+ "learning_rate": 1.5624999999999999e-07,
15
+ "logits/chosen": -2.8273773193359375,
16
+ "logits/rejected": -2.573636054992676,
17
+ "logps/chosen": -369.3688049316406,
18
+ "logps/rejected": -693.6748046875,
19
+ "loss": 0.6858,
20
+ "rewards/accuracies": 0.6187499761581421,
21
+ "rewards/chosen": 0.0030312505550682545,
22
+ "rewards/margins": 0.015196545049548149,
23
+ "rewards/rejected": -0.012165295891463757,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.1895734597156398,
28
+ "grad_norm": 17.76028531231973,
29
+ "learning_rate": 3.1249999999999997e-07,
30
+ "logits/chosen": -2.8040361404418945,
31
+ "logits/rejected": -2.5344460010528564,
32
+ "logps/chosen": -356.6896057128906,
33
+ "logps/rejected": -714.5941162109375,
34
+ "loss": 0.4666,
35
+ "rewards/accuracies": 1.0,
36
+ "rewards/chosen": 0.1258755326271057,
37
+ "rewards/margins": 0.69224613904953,
38
+ "rewards/rejected": -0.5663706064224243,
39
+ "step": 20
40
+ },
41
+ {
42
+ "epoch": 0.2843601895734597,
43
+ "grad_norm": 3.027708678861818,
44
+ "learning_rate": 4.6874999999999996e-07,
45
+ "logits/chosen": -2.8109562397003174,
46
+ "logits/rejected": -2.5492990016937256,
47
+ "logps/chosen": -299.7785949707031,
48
+ "logps/rejected": -1061.5032958984375,
49
+ "loss": 0.124,
50
+ "rewards/accuracies": 1.0,
51
+ "rewards/chosen": 0.4659281373023987,
52
+ "rewards/margins": 4.801316261291504,
53
+ "rewards/rejected": -4.33538818359375,
54
+ "step": 30
55
+ },
56
+ {
57
+ "epoch": 0.3791469194312796,
58
+ "grad_norm": 4.339445107844336,
59
+ "learning_rate": 4.990147841143461e-07,
60
+ "logits/chosen": -2.6756181716918945,
61
+ "logits/rejected": -2.3431499004364014,
62
+ "logps/chosen": -309.88055419921875,
63
+ "logps/rejected": -2287.322265625,
64
+ "loss": 0.0229,
65
+ "rewards/accuracies": 1.0,
66
+ "rewards/chosen": 0.4024983048439026,
67
+ "rewards/margins": 16.896318435668945,
68
+ "rewards/rejected": -16.493820190429688,
69
+ "step": 40
70
+ },
71
+ {
72
+ "epoch": 0.47393364928909953,
73
+ "grad_norm": 1.4582859394909966,
74
+ "learning_rate": 4.950256493879794e-07,
75
+ "logits/chosen": -2.350309371948242,
76
+ "logits/rejected": -1.7835966348648071,
77
+ "logps/chosen": -439.63720703125,
78
+ "logps/rejected": -3742.43408203125,
79
+ "loss": 0.0066,
80
+ "rewards/accuracies": 1.0,
81
+ "rewards/chosen": -0.5562213659286499,
82
+ "rewards/margins": 29.479211807250977,
83
+ "rewards/rejected": -30.035430908203125,
84
+ "step": 50
85
+ },
86
+ {
87
+ "epoch": 0.47393364928909953,
88
+ "eval_logits/chosen": -1.910264015197754,
89
+ "eval_logits/rejected": -1.2144216299057007,
90
+ "eval_logps/chosen": -476.05950927734375,
91
+ "eval_logps/rejected": -4036.952880859375,
92
+ "eval_loss": 0.00275122607126832,
93
+ "eval_rewards/accuracies": 0.9979838728904724,
94
+ "eval_rewards/chosen": -1.0907776355743408,
95
+ "eval_rewards/margins": 32.87086868286133,
96
+ "eval_rewards/rejected": -33.96164321899414,
97
+ "eval_runtime": 197.0098,
98
+ "eval_samples_per_second": 19.821,
99
+ "eval_steps_per_second": 0.315,
100
+ "step": 50
101
+ },
102
+ {
103
+ "epoch": 0.5687203791469194,
104
+ "grad_norm": 0.2408524061043023,
105
+ "learning_rate": 4.88020090697132e-07,
106
+ "logits/chosen": -1.8117077350616455,
107
+ "logits/rejected": -0.6155702471733093,
108
+ "logps/chosen": -491.74359130859375,
109
+ "logps/rejected": -4422.6455078125,
110
+ "loss": 0.0021,
111
+ "rewards/accuracies": 1.0,
112
+ "rewards/chosen": -1.4004383087158203,
113
+ "rewards/margins": 36.08842086791992,
114
+ "rewards/rejected": -37.48885726928711,
115
+ "step": 60
116
+ },
117
+ {
118
+ "epoch": 0.6635071090047393,
119
+ "grad_norm": 0.047142917188209066,
120
+ "learning_rate": 4.780843509929904e-07,
121
+ "logits/chosen": -1.8627986907958984,
122
+ "logits/rejected": 0.22284331917762756,
123
+ "logps/chosen": -509.3787536621094,
124
+ "logps/rejected": -4690.046875,
125
+ "loss": 0.0014,
126
+ "rewards/accuracies": 1.0,
127
+ "rewards/chosen": -1.4054739475250244,
128
+ "rewards/margins": 39.368186950683594,
129
+ "rewards/rejected": -40.773658752441406,
130
+ "step": 70
131
+ },
132
+ {
133
+ "epoch": 0.7582938388625592,
134
+ "grad_norm": 9.120624486054943,
135
+ "learning_rate": 4.6534074564712217e-07,
136
+ "logits/chosen": -0.7828740477561951,
137
+ "logits/rejected": 1.5713117122650146,
138
+ "logps/chosen": -559.362548828125,
139
+ "logps/rejected": -5071.544921875,
140
+ "loss": 0.0022,
141
+ "rewards/accuracies": 1.0,
142
+ "rewards/chosen": -1.678344488143921,
143
+ "rewards/margins": 42.42586135864258,
144
+ "rewards/rejected": -44.10420227050781,
145
+ "step": 80
146
+ },
147
+ {
148
+ "epoch": 0.8530805687203792,
149
+ "grad_norm": 0.11081628927270178,
150
+ "learning_rate": 4.4994615667026846e-07,
151
+ "logits/chosen": -1.3702471256256104,
152
+ "logits/rejected": 2.505129337310791,
153
+ "logps/chosen": -544.5760498046875,
154
+ "logps/rejected": -5206.58203125,
155
+ "loss": 0.0017,
156
+ "rewards/accuracies": 1.0,
157
+ "rewards/chosen": -1.5564204454421997,
158
+ "rewards/margins": 44.48157501220703,
159
+ "rewards/rejected": -46.03799057006836,
160
+ "step": 90
161
+ },
162
+ {
163
+ "epoch": 0.9478672985781991,
164
+ "grad_norm": 0.3080840911945339,
165
+ "learning_rate": 4.320901013934887e-07,
166
+ "logits/chosen": -2.1306087970733643,
167
+ "logits/rejected": 1.8308042287826538,
168
+ "logps/chosen": -503.85980224609375,
169
+ "logps/rejected": -5392.4296875,
170
+ "loss": 0.0177,
171
+ "rewards/accuracies": 1.0,
172
+ "rewards/chosen": -1.653719186782837,
173
+ "rewards/margins": 45.56684112548828,
174
+ "rewards/rejected": -47.22056198120117,
175
+ "step": 100
176
+ },
177
+ {
178
+ "epoch": 0.9478672985781991,
179
+ "eval_logits/chosen": -2.1299259662628174,
180
+ "eval_logits/rejected": 1.4562028646469116,
181
+ "eval_logps/chosen": -528.1521606445312,
182
+ "eval_logps/rejected": -5036.19775390625,
183
+ "eval_loss": 0.0005923541029915214,
184
+ "eval_rewards/accuracies": 1.0,
185
+ "eval_rewards/chosen": -1.6117043495178223,
186
+ "eval_rewards/margins": 42.342384338378906,
187
+ "eval_rewards/rejected": -43.95408630371094,
188
+ "eval_runtime": 193.3087,
189
+ "eval_samples_per_second": 20.201,
190
+ "eval_steps_per_second": 0.321,
191
+ "step": 100
192
+ },
193
+ {
194
+ "epoch": 1.042654028436019,
195
+ "grad_norm": 18.444713565032494,
196
+ "learning_rate": 4.119923993874379e-07,
197
+ "logits/chosen": -1.6922178268432617,
198
+ "logits/rejected": 2.0268759727478027,
199
+ "logps/chosen": -523.6668090820312,
200
+ "logps/rejected": -5431.6923828125,
201
+ "loss": 0.0056,
202
+ "rewards/accuracies": 0.9937499761581421,
203
+ "rewards/chosen": -1.692284345626831,
204
+ "rewards/margins": 45.80804443359375,
205
+ "rewards/rejected": -47.500328063964844,
206
+ "step": 110
207
+ },
208
+ {
209
+ "epoch": 1.1374407582938388,
210
+ "grad_norm": 0.667964087575959,
211
+ "learning_rate": 3.899004663415083e-07,
212
+ "logits/chosen": -1.945476770401001,
213
+ "logits/rejected": 2.063563346862793,
214
+ "logps/chosen": -512.0524291992188,
215
+ "logps/rejected": -5017.99755859375,
216
+ "loss": 0.0008,
217
+ "rewards/accuracies": 1.0,
218
+ "rewards/chosen": -1.6633787155151367,
219
+ "rewards/margins": 41.92264175415039,
220
+ "rewards/rejected": -43.586021423339844,
221
+ "step": 120
222
+ },
223
+ {
224
+ "epoch": 1.2322274881516588,
225
+ "grad_norm": 0.07609835769363717,
226
+ "learning_rate": 3.6608626821692824e-07,
227
+ "logits/chosen": -1.3765870332717896,
228
+ "logits/rejected": 2.4521493911743164,
229
+ "logps/chosen": -511.48992919921875,
230
+ "logps/rejected": -5102.3095703125,
231
+ "loss": 0.0003,
232
+ "rewards/accuracies": 1.0,
233
+ "rewards/chosen": -1.574892282485962,
234
+ "rewards/margins": 42.903465270996094,
235
+ "rewards/rejected": -44.47835922241211,
236
+ "step": 130
237
+ },
238
+ {
239
+ "epoch": 1.3270142180094786,
240
+ "grad_norm": 0.056132145026876815,
241
+ "learning_rate": 3.408429731701635e-07,
242
+ "logits/chosen": -1.673305869102478,
243
+ "logits/rejected": 2.8528292179107666,
244
+ "logps/chosen": -515.4207763671875,
245
+ "logps/rejected": -5185.7666015625,
246
+ "loss": 0.0021,
247
+ "rewards/accuracies": 1.0,
248
+ "rewards/chosen": -1.6433398723602295,
249
+ "rewards/margins": 44.202003479003906,
250
+ "rewards/rejected": -45.84534454345703,
251
+ "step": 140
252
+ },
253
+ {
254
+ "epoch": 1.4218009478672986,
255
+ "grad_norm": 0.23322034769023395,
256
+ "learning_rate": 3.144813424636031e-07,
257
+ "logits/chosen": -2.0590405464172363,
258
+ "logits/rejected": 2.5171058177948,
259
+ "logps/chosen": -564.9078369140625,
260
+ "logps/rejected": -5421.3935546875,
261
+ "loss": 0.0006,
262
+ "rewards/accuracies": 1.0,
263
+ "rewards/chosen": -1.7184299230575562,
264
+ "rewards/margins": 46.220848083496094,
265
+ "rewards/rejected": -47.93927764892578,
266
+ "step": 150
267
+ },
268
+ {
269
+ "epoch": 1.4218009478672986,
270
+ "eval_logits/chosen": -1.983699083328247,
271
+ "eval_logits/rejected": 1.6969449520111084,
272
+ "eval_logps/chosen": -539.4231567382812,
273
+ "eval_logps/rejected": -5257.45166015625,
274
+ "eval_loss": 0.00044810696272179484,
275
+ "eval_rewards/accuracies": 1.0,
276
+ "eval_rewards/chosen": -1.7244139909744263,
277
+ "eval_rewards/margins": 44.44221878051758,
278
+ "eval_rewards/rejected": -46.166629791259766,
279
+ "eval_runtime": 194.364,
280
+ "eval_samples_per_second": 20.091,
281
+ "eval_steps_per_second": 0.319,
282
+ "step": 150
283
+ },
284
+ {
285
+ "epoch": 1.5165876777251186,
286
+ "grad_norm": 0.12179595633717599,
287
+ "learning_rate": 2.8732590479375165e-07,
288
+ "logits/chosen": -1.6932016611099243,
289
+ "logits/rejected": 3.00923752784729,
290
+ "logps/chosen": -561.7379150390625,
291
+ "logps/rejected": -5284.3369140625,
292
+ "loss": 0.0002,
293
+ "rewards/accuracies": 1.0,
294
+ "rewards/chosen": -1.8306872844696045,
295
+ "rewards/margins": 45.2327995300293,
296
+ "rewards/rejected": -47.06348419189453,
297
+ "step": 160
298
+ },
299
+ {
300
+ "epoch": 1.6113744075829384,
301
+ "grad_norm": 0.0933485824938723,
302
+ "learning_rate": 2.597109611334169e-07,
303
+ "logits/chosen": -1.3871994018554688,
304
+ "logits/rejected": 3.261793613433838,
305
+ "logps/chosen": -551.7293090820312,
306
+ "logps/rejected": -5060.6259765625,
307
+ "loss": 0.0005,
308
+ "rewards/accuracies": 1.0,
309
+ "rewards/chosen": -1.971683144569397,
310
+ "rewards/margins": 42.35912322998047,
311
+ "rewards/rejected": -44.33080291748047,
312
+ "step": 170
313
+ },
314
+ {
315
+ "epoch": 1.7061611374407581,
316
+ "grad_norm": 3.1974087952825787,
317
+ "learning_rate": 2.3197646927086694e-07,
318
+ "logits/chosen": -1.4737141132354736,
319
+ "logits/rejected": 3.022137403488159,
320
+ "logps/chosen": -511.197509765625,
321
+ "logps/rejected": -5292.6005859375,
322
+ "loss": 0.0008,
323
+ "rewards/accuracies": 1.0,
324
+ "rewards/chosen": -1.733986258506775,
325
+ "rewards/margins": 45.09421157836914,
326
+ "rewards/rejected": -46.82819366455078,
327
+ "step": 180
328
+ },
329
+ {
330
+ "epoch": 1.8009478672985781,
331
+ "grad_norm": 0.027408737628248046,
332
+ "learning_rate": 2.0446385870993467e-07,
333
+ "logits/chosen": -2.2061755657196045,
334
+ "logits/rejected": 2.45582914352417,
335
+ "logps/chosen": -533.607177734375,
336
+ "logps/rejected": -5150.46044921875,
337
+ "loss": 0.0003,
338
+ "rewards/accuracies": 1.0,
339
+ "rewards/chosen": -1.7912607192993164,
340
+ "rewards/margins": 43.65951919555664,
341
+ "rewards/rejected": -45.45077896118164,
342
+ "step": 190
343
+ },
344
+ {
345
+ "epoch": 1.8957345971563981,
346
+ "grad_norm": 0.030194576861770926,
347
+ "learning_rate": 1.775118274523545e-07,
348
+ "logits/chosen": -2.1220927238464355,
349
+ "logits/rejected": 2.6763927936553955,
350
+ "logps/chosen": -580.6775512695312,
351
+ "logps/rejected": -5746.35791015625,
352
+ "loss": 0.0002,
353
+ "rewards/accuracies": 1.0,
354
+ "rewards/chosen": -1.894126534461975,
355
+ "rewards/margins": 48.489967346191406,
356
+ "rewards/rejected": -50.38408660888672,
357
+ "step": 200
358
+ },
359
+ {
360
+ "epoch": 1.8957345971563981,
361
+ "eval_logits/chosen": -2.0032970905303955,
362
+ "eval_logits/rejected": 2.1634280681610107,
363
+ "eval_logps/chosen": -542.7340698242188,
364
+ "eval_logps/rejected": -5115.28857421875,
365
+ "eval_loss": 0.0005020965472795069,
366
+ "eval_rewards/accuracies": 1.0,
367
+ "eval_rewards/chosen": -1.7575234174728394,
368
+ "eval_rewards/margins": 42.98747634887695,
369
+ "eval_rewards/rejected": -44.744998931884766,
370
+ "eval_runtime": 192.9625,
371
+ "eval_samples_per_second": 20.237,
372
+ "eval_steps_per_second": 0.321,
373
+ "step": 200
374
+ },
375
+ {
376
+ "epoch": 1.9905213270142181,
377
+ "grad_norm": 0.649351349290712,
378
+ "learning_rate": 1.514521724066537e-07,
379
+ "logits/chosen": -1.6789367198944092,
380
+ "logits/rejected": 3.430915355682373,
381
+ "logps/chosen": -524.8860473632812,
382
+ "logps/rejected": -4889.97998046875,
383
+ "loss": 0.0003,
384
+ "rewards/accuracies": 1.0,
385
+ "rewards/chosen": -1.7625246047973633,
386
+ "rewards/margins": 40.506309509277344,
387
+ "rewards/rejected": -42.268829345703125,
388
+ "step": 210
389
+ },
390
+ {
391
+ "epoch": 2.085308056872038,
392
+ "grad_norm": 0.04035446049031933,
393
+ "learning_rate": 1.266057047539568e-07,
394
+ "logits/chosen": -1.4461164474487305,
395
+ "logits/rejected": 3.0876190662384033,
396
+ "logps/chosen": -525.4357299804688,
397
+ "logps/rejected": -4884.6865234375,
398
+ "loss": 0.0004,
399
+ "rewards/accuracies": 1.0,
400
+ "rewards/chosen": -1.8402020931243896,
401
+ "rewards/margins": 40.36837387084961,
402
+ "rewards/rejected": -42.208580017089844,
403
+ "step": 220
404
+ },
405
+ {
406
+ "epoch": 2.1800947867298577,
407
+ "grad_norm": 0.015128682487430115,
408
+ "learning_rate": 1.032783005551884e-07,
409
+ "logits/chosen": -1.137474775314331,
410
+ "logits/rejected": 3.6721444129943848,
411
+ "logps/chosen": -568.20751953125,
412
+ "logps/rejected": -5129.24658203125,
413
+ "loss": 0.0001,
414
+ "rewards/accuracies": 1.0,
415
+ "rewards/chosen": -1.940818190574646,
416
+ "rewards/margins": 42.981773376464844,
417
+ "rewards/rejected": -44.92259216308594,
418
+ "step": 230
419
+ },
420
+ {
421
+ "epoch": 2.2748815165876777,
422
+ "grad_norm": 0.037321954682957494,
423
+ "learning_rate": 8.175713521924976e-08,
424
+ "logits/chosen": -1.189206838607788,
425
+ "logits/rejected": 3.206519603729248,
426
+ "logps/chosen": -552.6511840820312,
427
+ "logps/rejected": -5748.79541015625,
428
+ "loss": 0.0001,
429
+ "rewards/accuracies": 1.0,
430
+ "rewards/chosen": -1.8525272607803345,
431
+ "rewards/margins": 48.330223083496094,
432
+ "rewards/rejected": -50.1827507019043,
433
+ "step": 240
434
+ },
435
+ {
436
+ "epoch": 2.3696682464454977,
437
+ "grad_norm": 0.006059762174940937,
438
+ "learning_rate": 6.230714818829733e-08,
439
+ "logits/chosen": -0.9478242993354797,
440
+ "logits/rejected": 3.6084961891174316,
441
+ "logps/chosen": -529.2486572265625,
442
+ "logps/rejected": -5455.07421875,
443
+ "loss": 0.0001,
444
+ "rewards/accuracies": 1.0,
445
+ "rewards/chosen": -1.7955585718154907,
446
+ "rewards/margins": 46.23430252075195,
447
+ "rewards/rejected": -48.02985382080078,
448
+ "step": 250
449
+ },
450
+ {
451
+ "epoch": 2.3696682464454977,
452
+ "eval_logits/chosen": -1.2428650856018066,
453
+ "eval_logits/rejected": 2.7113900184631348,
454
+ "eval_logps/chosen": -556.8338623046875,
455
+ "eval_logps/rejected": -5293.04052734375,
456
+ "eval_loss": 0.00038583340938203037,
457
+ "eval_rewards/accuracies": 1.0,
458
+ "eval_rewards/chosen": -1.8985214233398438,
459
+ "eval_rewards/margins": 44.623992919921875,
460
+ "eval_rewards/rejected": -46.522518157958984,
461
+ "eval_runtime": 194.5012,
462
+ "eval_samples_per_second": 20.077,
463
+ "eval_steps_per_second": 0.319,
464
+ "step": 250
465
+ },
466
+ {
467
+ "epoch": 2.4644549763033177,
468
+ "grad_norm": 0.03581328161307903,
469
+ "learning_rate": 4.516778136213037e-08,
470
+ "logits/chosen": -0.6857299208641052,
471
+ "logits/rejected": 4.016716003417969,
472
+ "logps/chosen": -546.9486083984375,
473
+ "logps/rejected": -6013.9873046875,
474
+ "loss": 0.0001,
475
+ "rewards/accuracies": 1.0,
476
+ "rewards/chosen": -2.029759407043457,
477
+ "rewards/margins": 51.3958740234375,
478
+ "rewards/rejected": -53.425636291503906,
479
+ "step": 260
480
+ },
481
+ {
482
+ "epoch": 2.5592417061611377,
483
+ "grad_norm": 0.022468211975921346,
484
+ "learning_rate": 3.055003141378948e-08,
485
+ "logits/chosen": -1.1639906167984009,
486
+ "logits/rejected": 3.84511137008667,
487
+ "logps/chosen": -576.4962158203125,
488
+ "logps/rejected": -5156.3203125,
489
+ "loss": 0.0001,
490
+ "rewards/accuracies": 1.0,
491
+ "rewards/chosen": -1.8559118509292603,
492
+ "rewards/margins": 43.48760223388672,
493
+ "rewards/rejected": -45.34351348876953,
494
+ "step": 270
495
+ },
496
+ {
497
+ "epoch": 2.654028436018957,
498
+ "grad_norm": 0.04077948274793423,
499
+ "learning_rate": 1.8633852284264508e-08,
500
+ "logits/chosen": -0.8976588249206543,
501
+ "logits/rejected": 3.54345440864563,
502
+ "logps/chosen": -536.1121826171875,
503
+ "logps/rejected": -5684.75048828125,
504
+ "loss": 0.0001,
505
+ "rewards/accuracies": 1.0,
506
+ "rewards/chosen": -1.8762388229370117,
507
+ "rewards/margins": 48.596927642822266,
508
+ "rewards/rejected": -50.473167419433594,
509
+ "step": 280
510
+ },
511
+ {
512
+ "epoch": 2.748815165876777,
513
+ "grad_norm": 0.025754086632460663,
514
+ "learning_rate": 9.56593983327919e-09,
515
+ "logits/chosen": -1.061200737953186,
516
+ "logits/rejected": 3.624175548553467,
517
+ "logps/chosen": -544.1278076171875,
518
+ "logps/rejected": -5561.8818359375,
519
+ "loss": 0.0002,
520
+ "rewards/accuracies": 1.0,
521
+ "rewards/chosen": -1.8096107244491577,
522
+ "rewards/margins": 47.35784149169922,
523
+ "rewards/rejected": -49.16745376586914,
524
+ "step": 290
525
+ },
526
+ {
527
+ "epoch": 2.843601895734597,
528
+ "grad_norm": 0.00972787744038685,
529
+ "learning_rate": 3.4579259185321398e-09,
530
+ "logits/chosen": -0.9289520978927612,
531
+ "logits/rejected": 3.9043102264404297,
532
+ "logps/chosen": -559.0933837890625,
533
+ "logps/rejected": -5231.408203125,
534
+ "loss": 0.0001,
535
+ "rewards/accuracies": 1.0,
536
+ "rewards/chosen": -1.9514116048812866,
537
+ "rewards/margins": 43.472190856933594,
538
+ "rewards/rejected": -45.423606872558594,
539
+ "step": 300
540
+ },
541
+ {
542
+ "epoch": 2.843601895734597,
543
+ "eval_logits/chosen": -1.2322728633880615,
544
+ "eval_logits/rejected": 2.752917766571045,
545
+ "eval_logps/chosen": -555.4259033203125,
546
+ "eval_logps/rejected": -5321.5576171875,
547
+ "eval_loss": 0.0003751559997908771,
548
+ "eval_rewards/accuracies": 1.0,
549
+ "eval_rewards/chosen": -1.8844420909881592,
550
+ "eval_rewards/margins": 44.92324447631836,
551
+ "eval_rewards/rejected": -46.80768585205078,
552
+ "eval_runtime": 193.0161,
553
+ "eval_samples_per_second": 20.231,
554
+ "eval_steps_per_second": 0.321,
555
+ "step": 300
556
+ },
557
+ {
558
+ "epoch": 2.938388625592417,
559
+ "grad_norm": 0.07731963429515382,
560
+ "learning_rate": 3.850041354441502e-10,
561
+ "logits/chosen": -0.9526262283325195,
562
+ "logits/rejected": 3.7660250663757324,
563
+ "logps/chosen": -527.6392822265625,
564
+ "logps/rejected": -5758.5341796875,
565
+ "loss": 0.0001,
566
+ "rewards/accuracies": 1.0,
567
+ "rewards/chosen": -1.9126548767089844,
568
+ "rewards/margins": 49.44573211669922,
569
+ "rewards/rejected": -51.3583869934082,
570
+ "step": 310
571
+ },
572
+ {
573
+ "epoch": 2.985781990521327,
574
+ "step": 315,
575
+ "total_flos": 0.0,
576
+ "train_loss": 0.04266955489773185,
577
+ "train_runtime": 9090.3989,
578
+ "train_samples_per_second": 4.455,
579
+ "train_steps_per_second": 0.035
580
+ }
581
+ ],
582
+ "logging_steps": 10,
583
+ "max_steps": 315,
584
+ "num_input_tokens_seen": 0,
585
+ "num_train_epochs": 3,
586
+ "save_steps": 100,
587
+ "stateful_callbacks": {
588
+ "TrainerControl": {
589
+ "args": {
590
+ "should_epoch_stop": false,
591
+ "should_evaluate": false,
592
+ "should_log": false,
593
+ "should_save": true,
594
+ "should_training_stop": true
595
+ },
596
+ "attributes": {}
597
+ }
598
+ },
599
+ "total_flos": 0.0,
600
+ "train_batch_size": 8,
601
+ "trial_name": null,
602
+ "trial_params": null
603
+ }