jikaixuan commited on
Commit
dc83d7c
1 Parent(s): b550397

Model save

Browse files
README.md CHANGED
@@ -15,15 +15,15 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.5656
19
- - Rewards/chosen: 0.0673
20
- - Rewards/rejected: -0.5344
21
- - Rewards/accuracies: 0.7000
22
- - Rewards/margins: 0.6018
23
- - Logps/rejected: -264.6442
24
- - Logps/chosen: -283.5841
25
- - Logits/rejected: -2.8292
26
- - Logits/chosen: -2.8303
27
  - Use Label: 0.0
28
  - Pred Label: 0.0
29
 
@@ -44,7 +44,7 @@ More information needed
44
  ### Training hyperparameters
45
 
46
  The following hyperparameters were used during training:
47
- - learning_rate: 5e-06
48
  - train_batch_size: 4
49
  - eval_batch_size: 4
50
  - seed: 42
@@ -62,7 +62,7 @@ The following hyperparameters were used during training:
62
 
63
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
64
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:---------:|:----------:|
65
- | 0.5689 | 1.0 | 955 | 0.5656 | 0.0673 | -0.5344 | 0.7000 | 0.6018 | -264.6442 | -283.5841 | -2.8292 | -2.8303 | 0.0 | 0.0 |
66
 
67
 
68
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.5460
19
+ - Rewards/chosen: 0.0878
20
+ - Rewards/rejected: -0.6697
21
+ - Rewards/accuracies: 0.7140
22
+ - Rewards/margins: 0.7575
23
+ - Logps/rejected: -265.9970
24
+ - Logps/chosen: -283.3795
25
+ - Logits/rejected: -2.8187
26
+ - Logits/chosen: -2.8274
27
  - Use Label: 0.0
28
  - Pred Label: 0.0
29
 
 
44
  ### Training hyperparameters
45
 
46
  The following hyperparameters were used during training:
47
+ - learning_rate: 1e-05
48
  - train_batch_size: 4
49
  - eval_batch_size: 4
50
  - seed: 42
 
62
 
63
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
64
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:---------:|:----------:|
65
+ | 0.545 | 1.0 | 955 | 0.5460 | 0.0878 | -0.6697 | 0.7140 | 0.7575 | -265.9970 | -283.3795 | -2.8187 | -2.8274 | 0.0 | 0.0 |
66
 
67
 
68
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acfc333285ecd363f21ac9a422d31780339b22dc96a535dfa323fa71da01e6dd
3
  size 218138576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee44cce906c7a6f93e762633279c05b8cb98e432f3c9ac83bd6f6213cf55c20a
3
  size 218138576
all_results.json CHANGED
@@ -1,23 +1,23 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -2.8302695751190186,
4
- "eval_logits/rejected": -2.8291618824005127,
5
- "eval_logps/chosen": -283.5841369628906,
6
- "eval_logps/rejected": -264.64422607421875,
7
- "eval_loss": 0.5655555129051208,
8
  "eval_pred_label": 0.0,
9
- "eval_rewards/accuracies": 0.699999988079071,
10
- "eval_rewards/chosen": 0.06732505559921265,
11
- "eval_rewards/margins": 0.6017746329307556,
12
- "eval_rewards/rejected": -0.534449577331543,
13
- "eval_runtime": 457.1465,
14
  "eval_samples": 2000,
15
- "eval_samples_per_second": 4.375,
16
- "eval_steps_per_second": 0.273,
17
  "eval_use_label": 0.0,
18
- "train_loss": 0.5870625535855118,
19
- "train_runtime": 25384.4334,
20
  "train_samples": 61135,
21
- "train_samples_per_second": 2.408,
22
- "train_steps_per_second": 0.038
23
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": -2.827404737472534,
4
+ "eval_logits/rejected": -2.818655014038086,
5
+ "eval_logps/chosen": -283.37945556640625,
6
+ "eval_logps/rejected": -265.9969787597656,
7
+ "eval_loss": 0.5459502935409546,
8
  "eval_pred_label": 0.0,
9
+ "eval_rewards/accuracies": 0.7139999866485596,
10
+ "eval_rewards/chosen": 0.08778975158929825,
11
+ "eval_rewards/margins": 0.7575166821479797,
12
+ "eval_rewards/rejected": -0.6697269678115845,
13
+ "eval_runtime": 478.4263,
14
  "eval_samples": 2000,
15
+ "eval_samples_per_second": 4.18,
16
+ "eval_steps_per_second": 0.261,
17
  "eval_use_label": 0.0,
18
+ "train_loss": 0.5628191218950361,
19
+ "train_runtime": 25746.1298,
20
  "train_samples": 61135,
21
+ "train_samples_per_second": 2.375,
22
+ "train_steps_per_second": 0.037
23
  }
eval_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -2.8302695751190186,
4
- "eval_logits/rejected": -2.8291618824005127,
5
- "eval_logps/chosen": -283.5841369628906,
6
- "eval_logps/rejected": -264.64422607421875,
7
- "eval_loss": 0.5655555129051208,
8
  "eval_pred_label": 0.0,
9
- "eval_rewards/accuracies": 0.699999988079071,
10
- "eval_rewards/chosen": 0.06732505559921265,
11
- "eval_rewards/margins": 0.6017746329307556,
12
- "eval_rewards/rejected": -0.534449577331543,
13
- "eval_runtime": 457.1465,
14
  "eval_samples": 2000,
15
- "eval_samples_per_second": 4.375,
16
- "eval_steps_per_second": 0.273,
17
  "eval_use_label": 0.0
18
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": -2.827404737472534,
4
+ "eval_logits/rejected": -2.818655014038086,
5
+ "eval_logps/chosen": -283.37945556640625,
6
+ "eval_logps/rejected": -265.9969787597656,
7
+ "eval_loss": 0.5459502935409546,
8
  "eval_pred_label": 0.0,
9
+ "eval_rewards/accuracies": 0.7139999866485596,
10
+ "eval_rewards/chosen": 0.08778975158929825,
11
+ "eval_rewards/margins": 0.7575166821479797,
12
+ "eval_rewards/rejected": -0.6697269678115845,
13
+ "eval_runtime": 478.4263,
14
  "eval_samples": 2000,
15
+ "eval_samples_per_second": 4.18,
16
+ "eval_steps_per_second": 0.261,
17
  "eval_use_label": 0.0
18
  }
runs/Jan21_22-53-06_uclaml04.cs.ucla.edu/events.out.tfevents.1705906443.uclaml04.cs.ucla.edu.843444.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f92826a808e2be9dfed130fa67ad1a0eea1c11f95fe17ca655d1df5bd174f43
3
- size 11837
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70cef13045c99f0b2e0174d81d676e33577c08d8132d6a7dd5fe8a82051a671a
3
+ size 13038
runs/Jan21_22-53-06_uclaml04.cs.ucla.edu/events.out.tfevents.1705932667.uclaml04.cs.ucla.edu.843444.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e4a8aa0de3f53d33d6f9f03d407834c788dfffa1cfc429c328ae7af5f7003e3
3
+ size 935
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.5870625535855118,
4
- "train_runtime": 25384.4334,
5
  "train_samples": 61135,
6
- "train_samples_per_second": 2.408,
7
- "train_steps_per_second": 0.038
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.5628191218950361,
4
+ "train_runtime": 25746.1298,
5
  "train_samples": 61135,
6
+ "train_samples_per_second": 2.375,
7
+ "train_steps_per_second": 0.037
8
  }
trainer_state.json CHANGED
@@ -10,7 +10,7 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
- "learning_rate": 5.208333333333333e-08,
14
  "logits/chosen": -2.980285167694092,
15
  "logits/rejected": -2.87275767326355,
16
  "logps/chosen": -313.4390563964844,
@@ -26,163 +26,163 @@
26
  },
27
  {
28
  "epoch": 0.1,
29
- "learning_rate": 4.9767171129220025e-06,
30
- "logits/chosen": -2.8189077377319336,
31
- "logits/rejected": -2.828260660171509,
32
- "logps/chosen": -285.1784362792969,
33
- "logps/rejected": -271.1445617675781,
34
- "loss": 0.6803,
35
  "pred_label": 0.0,
36
- "rewards/accuracies": 0.5738636255264282,
37
- "rewards/chosen": 0.01021653600037098,
38
- "rewards/margins": 0.029710056260228157,
39
- "rewards/rejected": -0.019493522122502327,
40
  "step": 100,
41
  "use_label": 0.0
42
  },
43
  {
44
  "epoch": 0.21,
45
- "learning_rate": 4.394644935972061e-06,
46
- "logits/chosen": -2.8255398273468018,
47
- "logits/rejected": -2.813203811645508,
48
- "logps/chosen": -278.2582092285156,
49
- "logps/rejected": -262.1144714355469,
50
- "loss": 0.6236,
51
  "pred_label": 0.0,
52
- "rewards/accuracies": 0.6868749856948853,
53
- "rewards/chosen": 0.05237884446978569,
54
- "rewards/margins": 0.20995216071605682,
55
- "rewards/rejected": -0.15757331252098083,
56
  "step": 200,
57
  "use_label": 0.0
58
  },
59
  {
60
  "epoch": 0.31,
61
- "learning_rate": 3.812572759022119e-06,
62
- "logits/chosen": -2.809633255004883,
63
- "logits/rejected": -2.80940580368042,
64
- "logps/chosen": -283.6641540527344,
65
- "logps/rejected": -257.23779296875,
66
- "loss": 0.5901,
67
  "pred_label": 0.0,
68
- "rewards/accuracies": 0.6956250071525574,
69
- "rewards/chosen": 0.04034877195954323,
70
- "rewards/margins": 0.3768764138221741,
71
- "rewards/rejected": -0.33652764558792114,
72
  "step": 300,
73
  "use_label": 0.0
74
  },
75
  {
76
  "epoch": 0.42,
77
- "learning_rate": 3.2305005820721774e-06,
78
- "logits/chosen": -2.821267604827881,
79
- "logits/rejected": -2.8011868000030518,
80
- "logps/chosen": -284.2022399902344,
81
- "logps/rejected": -267.48358154296875,
82
- "loss": 0.5757,
83
  "pred_label": 0.0,
84
- "rewards/accuracies": 0.7081249952316284,
85
- "rewards/chosen": 0.022827474400401115,
86
- "rewards/margins": 0.4984941780567169,
87
- "rewards/rejected": -0.47566673159599304,
88
  "step": 400,
89
  "use_label": 0.0
90
  },
91
  {
92
  "epoch": 0.52,
93
- "learning_rate": 2.6484284051222353e-06,
94
- "logits/chosen": -2.7993886470794678,
95
- "logits/rejected": -2.7845959663391113,
96
- "logps/chosen": -270.09637451171875,
97
- "logps/rejected": -256.13458251953125,
98
- "loss": 0.5798,
99
  "pred_label": 0.0,
100
- "rewards/accuracies": 0.6612499952316284,
101
- "rewards/chosen": -0.004339172504842281,
102
- "rewards/margins": 0.4106566905975342,
103
- "rewards/rejected": -0.41499578952789307,
104
  "step": 500,
105
  "use_label": 0.0
106
  },
107
  {
108
  "epoch": 0.63,
109
- "learning_rate": 2.0663562281722936e-06,
110
- "logits/chosen": -2.824444532394409,
111
- "logits/rejected": -2.808506965637207,
112
- "logps/chosen": -284.5413513183594,
113
- "logps/rejected": -270.1356201171875,
114
- "loss": 0.5643,
115
  "pred_label": 0.0,
116
- "rewards/accuracies": 0.706250011920929,
117
- "rewards/chosen": 0.06895674020051956,
118
- "rewards/margins": 0.5562920570373535,
119
- "rewards/rejected": -0.4873352348804474,
120
  "step": 600,
121
  "use_label": 0.0
122
  },
123
  {
124
  "epoch": 0.73,
125
- "learning_rate": 1.4842840512223516e-06,
126
- "logits/chosen": -2.8160746097564697,
127
- "logits/rejected": -2.795208692550659,
128
- "logps/chosen": -280.79766845703125,
129
- "logps/rejected": -253.41058349609375,
130
- "loss": 0.5631,
131
  "pred_label": 0.0,
132
- "rewards/accuracies": 0.7231249809265137,
133
- "rewards/chosen": 0.07620371133089066,
134
- "rewards/margins": 0.5511507391929626,
135
- "rewards/rejected": -0.474947065114975,
136
  "step": 700,
137
  "use_label": 0.0
138
  },
139
  {
140
  "epoch": 0.84,
141
- "learning_rate": 9.022118742724098e-07,
142
- "logits/chosen": -2.8168938159942627,
143
- "logits/rejected": -2.8180816173553467,
144
- "logps/chosen": -287.77679443359375,
145
- "logps/rejected": -266.3511657714844,
146
- "loss": 0.5548,
147
  "pred_label": 0.0,
148
- "rewards/accuracies": 0.7112500071525574,
149
- "rewards/chosen": 0.09802371263504028,
150
- "rewards/margins": 0.5919383764266968,
151
- "rewards/rejected": -0.49391472339630127,
152
  "step": 800,
153
  "use_label": 0.0
154
  },
155
  {
156
  "epoch": 0.94,
157
- "learning_rate": 3.2013969732246806e-07,
158
- "logits/chosen": -2.808311700820923,
159
- "logits/rejected": -2.812298536300659,
160
- "logps/chosen": -276.976806640625,
161
- "logps/rejected": -261.3187561035156,
162
- "loss": 0.5689,
163
  "pred_label": 0.0,
164
- "rewards/accuracies": 0.7093750238418579,
165
- "rewards/chosen": 0.05313897505402565,
166
- "rewards/margins": 0.551435649394989,
167
- "rewards/rejected": -0.4982966482639313,
168
  "step": 900,
169
  "use_label": 0.0
170
  },
171
  {
172
  "epoch": 1.0,
173
- "eval_logits/chosen": -2.8302695751190186,
174
- "eval_logits/rejected": -2.8291618824005127,
175
- "eval_logps/chosen": -283.5841369628906,
176
- "eval_logps/rejected": -264.64422607421875,
177
- "eval_loss": 0.5655555129051208,
178
  "eval_pred_label": 0.0,
179
- "eval_rewards/accuracies": 0.699999988079071,
180
- "eval_rewards/chosen": 0.06732505559921265,
181
- "eval_rewards/margins": 0.6017746329307556,
182
- "eval_rewards/rejected": -0.534449577331543,
183
- "eval_runtime": 457.335,
184
- "eval_samples_per_second": 4.373,
185
- "eval_steps_per_second": 0.273,
186
  "eval_use_label": 0.0,
187
  "step": 955
188
  },
@@ -190,16 +190,16 @@
190
  "epoch": 1.0,
191
  "step": 955,
192
  "total_flos": 0.0,
193
- "train_loss": 0.5870625535855118,
194
- "train_runtime": 25384.4334,
195
- "train_samples_per_second": 2.408,
196
- "train_steps_per_second": 0.038
197
  }
198
  ],
199
  "logging_steps": 100,
200
  "max_steps": 955,
201
  "num_train_epochs": 1,
202
- "save_steps": 100,
203
  "total_flos": 0.0,
204
  "trial_name": null,
205
  "trial_params": null
 
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
+ "learning_rate": 1.0416666666666667e-07,
14
  "logits/chosen": -2.980285167694092,
15
  "logits/rejected": -2.87275767326355,
16
  "logps/chosen": -313.4390563964844,
 
26
  },
27
  {
28
  "epoch": 0.1,
29
+ "learning_rate": 9.953434225844005e-06,
30
+ "logits/chosen": -2.8180909156799316,
31
+ "logits/rejected": -2.8273613452911377,
32
+ "logps/chosen": -285.13623046875,
33
+ "logps/rejected": -271.66839599609375,
34
+ "loss": 0.6624,
35
  "pred_label": 0.0,
36
+ "rewards/accuracies": 0.6041666865348816,
37
+ "rewards/chosen": 0.014434419572353363,
38
+ "rewards/margins": 0.08631344139575958,
39
+ "rewards/rejected": -0.07187902927398682,
40
  "step": 100,
41
  "use_label": 0.0
42
  },
43
  {
44
  "epoch": 0.21,
45
+ "learning_rate": 8.789289871944122e-06,
46
+ "logits/chosen": -2.8230364322662354,
47
+ "logits/rejected": -2.8086395263671875,
48
+ "logps/chosen": -278.2524108886719,
49
+ "logps/rejected": -263.9921569824219,
50
+ "loss": 0.5868,
51
  "pred_label": 0.0,
52
+ "rewards/accuracies": 0.6949999928474426,
53
+ "rewards/chosen": 0.05295524746179581,
54
+ "rewards/margins": 0.39829620718955994,
55
+ "rewards/rejected": -0.3453409671783447,
56
  "step": 200,
57
  "use_label": 0.0
58
  },
59
  {
60
  "epoch": 0.31,
61
+ "learning_rate": 7.625145518044238e-06,
62
+ "logits/chosen": -2.803905725479126,
63
+ "logits/rejected": -2.802032232284546,
64
+ "logps/chosen": -284.01385498046875,
65
+ "logps/rejected": -259.5546569824219,
66
+ "loss": 0.562,
67
  "pred_label": 0.0,
68
+ "rewards/accuracies": 0.7056249976158142,
69
+ "rewards/chosen": 0.005377008114010096,
70
+ "rewards/margins": 0.5735920667648315,
71
+ "rewards/rejected": -0.5682151317596436,
72
  "step": 300,
73
  "use_label": 0.0
74
  },
75
  {
76
  "epoch": 0.42,
77
+ "learning_rate": 6.461001164144355e-06,
78
+ "logits/chosen": -2.8141045570373535,
79
+ "logits/rejected": -2.7911813259124756,
80
+ "logps/chosen": -284.3139953613281,
81
+ "logps/rejected": -269.4837951660156,
82
+ "loss": 0.5527,
83
  "pred_label": 0.0,
84
+ "rewards/accuracies": 0.7250000238418579,
85
+ "rewards/chosen": 0.011648621410131454,
86
+ "rewards/margins": 0.6873368620872498,
87
+ "rewards/rejected": -0.6756882667541504,
88
  "step": 400,
89
  "use_label": 0.0
90
  },
91
  {
92
  "epoch": 0.52,
93
+ "learning_rate": 5.2968568102444705e-06,
94
+ "logits/chosen": -2.7915823459625244,
95
+ "logits/rejected": -2.776299476623535,
96
+ "logps/chosen": -269.73016357421875,
97
+ "logps/rejected": -257.2498474121094,
98
+ "loss": 0.5556,
99
  "pred_label": 0.0,
100
+ "rewards/accuracies": 0.6862499713897705,
101
+ "rewards/chosen": 0.03227977454662323,
102
+ "rewards/margins": 0.5588020086288452,
103
+ "rewards/rejected": -0.5265222191810608,
104
  "step": 500,
105
  "use_label": 0.0
106
  },
107
  {
108
  "epoch": 0.63,
109
+ "learning_rate": 4.132712456344587e-06,
110
+ "logits/chosen": -2.8199498653411865,
111
+ "logits/rejected": -2.8022332191467285,
112
+ "logps/chosen": -284.0947265625,
113
+ "logps/rejected": -271.3774108886719,
114
+ "loss": 0.5422,
115
  "pred_label": 0.0,
116
+ "rewards/accuracies": 0.7143750190734863,
117
+ "rewards/chosen": 0.11361943930387497,
118
+ "rewards/margins": 0.7251341938972473,
119
+ "rewards/rejected": -0.6115147471427917,
120
  "step": 600,
121
  "use_label": 0.0
122
  },
123
  {
124
  "epoch": 0.73,
125
+ "learning_rate": 2.9685681024447033e-06,
126
+ "logits/chosen": -2.8110527992248535,
127
+ "logits/rejected": -2.788975477218628,
128
+ "logps/chosen": -280.3959045410156,
129
+ "logps/rejected": -254.49673461914062,
130
+ "loss": 0.5404,
131
  "pred_label": 0.0,
132
+ "rewards/accuracies": 0.7212499976158142,
133
+ "rewards/chosen": 0.11637673527002335,
134
+ "rewards/margins": 0.6999369263648987,
135
+ "rewards/rejected": -0.5835601687431335,
136
  "step": 700,
137
  "use_label": 0.0
138
  },
139
  {
140
  "epoch": 0.84,
141
+ "learning_rate": 1.8044237485448196e-06,
142
+ "logits/chosen": -2.8146722316741943,
143
+ "logits/rejected": -2.812129020690918,
144
+ "logps/chosen": -287.4331359863281,
145
+ "logps/rejected": -267.59161376953125,
146
+ "loss": 0.5343,
147
  "pred_label": 0.0,
148
+ "rewards/accuracies": 0.721875011920929,
149
+ "rewards/chosen": 0.13239255547523499,
150
+ "rewards/margins": 0.7503484487533569,
151
+ "rewards/rejected": -0.6179558634757996,
152
  "step": 800,
153
  "use_label": 0.0
154
  },
155
  {
156
  "epoch": 0.94,
157
+ "learning_rate": 6.402793946449361e-07,
158
+ "logits/chosen": -2.8043179512023926,
159
+ "logits/rejected": -2.8079681396484375,
160
+ "logps/chosen": -276.8100891113281,
161
+ "logps/rejected": -262.690673828125,
162
+ "loss": 0.545,
163
  "pred_label": 0.0,
164
+ "rewards/accuracies": 0.7193750143051147,
165
+ "rewards/chosen": 0.06981150805950165,
166
+ "rewards/margins": 0.7052963972091675,
167
+ "rewards/rejected": -0.6354848742485046,
168
  "step": 900,
169
  "use_label": 0.0
170
  },
171
  {
172
  "epoch": 1.0,
173
+ "eval_logits/chosen": -2.827404737472534,
174
+ "eval_logits/rejected": -2.818655014038086,
175
+ "eval_logps/chosen": -283.37945556640625,
176
+ "eval_logps/rejected": -265.9969787597656,
177
+ "eval_loss": 0.5459502935409546,
178
  "eval_pred_label": 0.0,
179
+ "eval_rewards/accuracies": 0.7139999866485596,
180
+ "eval_rewards/chosen": 0.08778975158929825,
181
+ "eval_rewards/margins": 0.7575166821479797,
182
+ "eval_rewards/rejected": -0.6697269678115845,
183
+ "eval_runtime": 479.5351,
184
+ "eval_samples_per_second": 4.171,
185
+ "eval_steps_per_second": 0.261,
186
  "eval_use_label": 0.0,
187
  "step": 955
188
  },
 
190
  "epoch": 1.0,
191
  "step": 955,
192
  "total_flos": 0.0,
193
+ "train_loss": 0.5628191218950361,
194
+ "train_runtime": 25746.1298,
195
+ "train_samples_per_second": 2.375,
196
+ "train_steps_per_second": 0.037
197
  }
198
  ],
199
  "logging_steps": 100,
200
  "max_steps": 955,
201
  "num_train_epochs": 1,
202
+ "save_steps": 50,
203
  "total_flos": 0.0,
204
  "trial_name": null,
205
  "trial_params": null