jikaixuan commited on
Commit
b039b8d
1 Parent(s): f939fa6

Model save

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: apache-2.0
3
- base_model: mistralai/Mistral-7B-v0.1
4
  tags:
5
  - generated_from_trainer
6
  model-index:
@@ -13,19 +13,19 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # zephyr-ds
15
 
16
- This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.6366
19
- - Rewards/chosen: 0.0331
20
- - Rewards/rejected: -0.0356
21
- - Rewards/accuracies: 0.6320
22
- - Rewards/margins: 0.0687
23
- - Logps/rejected: -250.3080
24
- - Logps/chosen: -272.9035
25
- - Logits/rejected: -2.5200
26
- - Logits/chosen: -2.5064
27
- - Use Label: 9174.8564
28
- - Pred Label: 6857.1440
29
 
30
  ## Model description
31
 
@@ -60,9 +60,9 @@ The following hyperparameters were used during training:
60
 
61
  ### Training results
62
 
63
- | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
64
- |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:---------:|:----------:|
65
- | 0.6393 | 1.0 | 955 | 0.6366 | 0.0331 | -0.0356 | 0.6320 | 0.0687 | -250.3080 | -272.9035 | -2.5200 | -2.5064 | 8966.8564 | 6565.1440 |
66
 
67
 
68
  ### Framework versions
 
1
  ---
2
  license: apache-2.0
3
+ base_model: alignment-handbook/zephyr-7b-sft-full
4
  tags:
5
  - generated_from_trainer
6
  model-index:
 
13
 
14
  # zephyr-ds
15
 
16
+ This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.6636
19
+ - Rewards/chosen: 0.0135
20
+ - Rewards/rejected: -0.0160
21
+ - Rewards/accuracies: 0.6280
22
+ - Rewards/margins: 0.0295
23
+ - Logps/rejected: -259.4594
24
+ - Logps/chosen: -284.1223
25
+ - Logits/rejected: -2.8462
26
+ - Logits/chosen: -2.8424
27
+ - Use Label: 11234.4961
28
+ - Pred Label: 4797.5039
29
 
30
  ## Model description
31
 
 
60
 
61
  ### Training results
62
 
63
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
64
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:----------:|:----------:|
65
+ | 0.6641 | 1.0 | 955 | 0.6636 | 0.0135 | -0.0160 | 0.6280 | 0.0295 | -259.4594 | -284.1223 | -2.8462 | -2.8424 | 10931.4961 | 4600.5039 |
66
 
67
 
68
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:400a7361981dd0e63d715116385fd010bda670be4c1e8ed3c62d1e28e1b63a78
3
  size 109086672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1de4c5f8773caabb175e8e9302697663a01cfa6d9a23a5a3c3f8587fa010e133
3
  size 109086672
all_results.json CHANGED
@@ -1,23 +1,23 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -2.5063796043395996,
4
- "eval_logits/rejected": -2.5199685096740723,
5
- "eval_logps/chosen": -272.9034729003906,
6
- "eval_logps/rejected": -250.30796813964844,
7
- "eval_loss": 0.6366299986839294,
8
- "eval_pred_label": 6857.14404296875,
9
- "eval_rewards/accuracies": 0.6320000290870667,
10
- "eval_rewards/chosen": 0.03307868540287018,
11
- "eval_rewards/margins": 0.06870328634977341,
12
- "eval_rewards/rejected": -0.03562460467219353,
13
- "eval_runtime": 469.8823,
14
  "eval_samples": 2000,
15
- "eval_samples_per_second": 4.256,
16
- "eval_steps_per_second": 0.266,
17
- "eval_use_label": 9174.8564453125,
18
- "train_loss": 0.6567496789063458,
19
- "train_runtime": 24286.6837,
20
  "train_samples": 61135,
21
- "train_samples_per_second": 2.517,
22
  "train_steps_per_second": 0.039
23
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": -2.842418670654297,
4
+ "eval_logits/rejected": -2.846235752105713,
5
+ "eval_logps/chosen": -284.122314453125,
6
+ "eval_logps/rejected": -259.4594421386719,
7
+ "eval_loss": 0.6635700464248657,
8
+ "eval_pred_label": 4797.50390625,
9
+ "eval_rewards/accuracies": 0.628000020980835,
10
+ "eval_rewards/chosen": 0.013506044633686543,
11
+ "eval_rewards/margins": 0.029479000717401505,
12
+ "eval_rewards/rejected": -0.015972958877682686,
13
+ "eval_runtime": 439.3222,
14
  "eval_samples": 2000,
15
+ "eval_samples_per_second": 4.552,
16
+ "eval_steps_per_second": 0.285,
17
+ "eval_use_label": 11234.49609375,
18
+ "train_loss": 0.6728555943953429,
19
+ "train_runtime": 24272.064,
20
  "train_samples": 61135,
21
+ "train_samples_per_second": 2.519,
22
  "train_steps_per_second": 0.039
23
  }
eval_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -2.5063796043395996,
4
- "eval_logits/rejected": -2.5199685096740723,
5
- "eval_logps/chosen": -272.9034729003906,
6
- "eval_logps/rejected": -250.30796813964844,
7
- "eval_loss": 0.6366299986839294,
8
- "eval_pred_label": 6857.14404296875,
9
- "eval_rewards/accuracies": 0.6320000290870667,
10
- "eval_rewards/chosen": 0.03307868540287018,
11
- "eval_rewards/margins": 0.06870328634977341,
12
- "eval_rewards/rejected": -0.03562460467219353,
13
- "eval_runtime": 469.8823,
14
  "eval_samples": 2000,
15
- "eval_samples_per_second": 4.256,
16
- "eval_steps_per_second": 0.266,
17
- "eval_use_label": 9174.8564453125
18
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": -2.842418670654297,
4
+ "eval_logits/rejected": -2.846235752105713,
5
+ "eval_logps/chosen": -284.122314453125,
6
+ "eval_logps/rejected": -259.4594421386719,
7
+ "eval_loss": 0.6635700464248657,
8
+ "eval_pred_label": 4797.50390625,
9
+ "eval_rewards/accuracies": 0.628000020980835,
10
+ "eval_rewards/chosen": 0.013506044633686543,
11
+ "eval_rewards/margins": 0.029479000717401505,
12
+ "eval_rewards/rejected": -0.015972958877682686,
13
+ "eval_runtime": 439.3222,
14
  "eval_samples": 2000,
15
+ "eval_samples_per_second": 4.552,
16
+ "eval_steps_per_second": 0.285,
17
+ "eval_use_label": 11234.49609375
18
  }
runs/Jan19_16-29-15_uclaml03.cs.ucla.edu/events.out.tfevents.1705710616.uclaml03.cs.ucla.edu.481426.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:963e934d462d48b1a6122736d4c918dbeac5b3da110a29533f949b7bab7228bc
3
- size 11822
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2af8bfe864f62deac06832b4305bb07472b81b906426793d0f60344cf9a3377d
3
+ size 13023
runs/Jan19_16-29-15_uclaml03.cs.ucla.edu/events.out.tfevents.1705735327.uclaml03.cs.ucla.edu.481426.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a44e6c569c89adfe90d242bf101f53dc16b8c2dfea8e5ded66786852a199ed0c
3
+ size 935
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.6567496789063458,
4
- "train_runtime": 24286.6837,
5
  "train_samples": 61135,
6
- "train_samples_per_second": 2.517,
7
  "train_steps_per_second": 0.039
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.6728555943953429,
4
+ "train_runtime": 24272.064,
5
  "train_samples": 61135,
6
+ "train_samples_per_second": 2.519,
7
  "train_steps_per_second": 0.039
8
  }
trainer_state.json CHANGED
@@ -11,10 +11,10 @@
11
  {
12
  "epoch": 0.0,
13
  "learning_rate": 5.208333333333333e-09,
14
- "logits/chosen": -2.676934003829956,
15
- "logits/rejected": -2.509021043777466,
16
- "logps/chosen": -304.709228515625,
17
- "logps/rejected": -229.49505615234375,
18
  "loss": 0.6931,
19
  "pred_label": 0.0,
20
  "rewards/accuracies": 0.0,
@@ -27,172 +27,172 @@
27
  {
28
  "epoch": 0.1,
29
  "learning_rate": 4.976717112922002e-07,
30
- "logits/chosen": -2.4758388996124268,
31
- "logits/rejected": -2.4836206436157227,
32
- "logps/chosen": -273.62322998046875,
33
- "logps/rejected": -258.89813232421875,
34
- "loss": 0.6823,
35
- "pred_label": 156.05050659179688,
36
- "rewards/accuracies": 0.4886363744735718,
37
- "rewards/chosen": -0.002133187372237444,
38
- "rewards/margins": -0.0010164172854274511,
39
- "rewards/rejected": -0.0011167696211487055,
40
  "step": 100,
41
- "use_label": 653.9495239257812
42
  },
43
  {
44
  "epoch": 0.21,
45
  "learning_rate": 4.3946449359720607e-07,
46
- "logits/chosen": -2.4878945350646973,
47
- "logits/rejected": -2.4750781059265137,
48
- "logps/chosen": -267.72540283203125,
49
- "logps/rejected": -250.30291748046875,
50
- "loss": 0.6801,
51
- "pred_label": 512.7750244140625,
52
- "rewards/accuracies": 0.5575000047683716,
53
- "rewards/chosen": 0.003957623615860939,
54
- "rewards/margins": 0.008218127302825451,
55
- "rewards/rejected": -0.0042605032213032246,
56
  "step": 200,
57
- "use_label": 1889.2249755859375
58
  },
59
  {
60
  "epoch": 0.31,
61
  "learning_rate": 3.812572759022118e-07,
62
- "logits/chosen": -2.47322940826416,
63
- "logits/rejected": -2.4768893718719482,
64
- "logps/chosen": -272.0399169921875,
65
- "logps/rejected": -244.90228271484375,
66
- "loss": 0.6733,
67
- "pred_label": 943.1199951171875,
68
- "rewards/accuracies": 0.6075000166893005,
69
- "rewards/chosen": 0.010850328952074051,
70
- "rewards/margins": 0.01955023780465126,
71
- "rewards/rejected": -0.008699909783899784,
72
  "step": 300,
73
- "use_label": 3058.8798828125
74
  },
75
  {
76
  "epoch": 0.42,
77
  "learning_rate": 3.230500582072177e-07,
78
- "logits/chosen": -2.486250400543213,
79
- "logits/rejected": -2.466102361679077,
80
- "logps/chosen": -270.7090759277344,
81
- "logps/rejected": -252.9911346435547,
82
- "loss": 0.6635,
83
- "pred_label": 1496.31005859375,
84
- "rewards/accuracies": 0.628125011920929,
85
- "rewards/chosen": 0.01578013226389885,
86
- "rewards/margins": 0.0331539586186409,
87
- "rewards/rejected": -0.01737382635474205,
88
  "step": 400,
89
- "use_label": 4105.68994140625
90
  },
91
  {
92
  "epoch": 0.52,
93
  "learning_rate": 2.648428405122235e-07,
94
- "logits/chosen": -2.46098256111145,
95
- "logits/rejected": -2.446549654006958,
96
- "logps/chosen": -259.1031799316406,
97
- "logps/rejected": -241.98345947265625,
98
- "loss": 0.6543,
99
- "pred_label": 2180.89990234375,
100
- "rewards/accuracies": 0.6175000071525574,
101
- "rewards/chosen": 0.019046209752559662,
102
- "rewards/margins": 0.03530467674136162,
103
- "rewards/rejected": -0.016258466988801956,
104
  "step": 500,
105
- "use_label": 5021.10009765625
106
  },
107
  {
108
  "epoch": 0.63,
109
  "learning_rate": 2.0663562281722933e-07,
110
- "logits/chosen": -2.4896275997161865,
111
- "logits/rejected": -2.4663710594177246,
112
- "logps/chosen": -271.8394470214844,
113
- "logps/rejected": -252.83351135253906,
114
- "loss": 0.646,
115
- "pred_label": 2942.87255859375,
116
- "rewards/accuracies": 0.6600000262260437,
117
- "rewards/chosen": 0.02863229252398014,
118
- "rewards/margins": 0.05473264306783676,
119
- "rewards/rejected": -0.02610035613179207,
120
  "step": 600,
121
- "use_label": 5859.12744140625
122
  },
123
  {
124
  "epoch": 0.73,
125
  "learning_rate": 1.4842840512223514e-07,
126
- "logits/chosen": -2.485153913497925,
127
- "logits/rejected": -2.472170352935791,
128
- "logps/chosen": -269.14117431640625,
129
- "logps/rejected": -238.4977569580078,
130
- "loss": 0.6421,
131
- "pred_label": 3756.925048828125,
132
- "rewards/accuracies": 0.6418750286102295,
133
- "rewards/chosen": 0.03320219740271568,
134
- "rewards/margins": 0.05917687341570854,
135
- "rewards/rejected": -0.02597467601299286,
136
  "step": 700,
137
- "use_label": 6645.0751953125
138
  },
139
  {
140
  "epoch": 0.84,
141
  "learning_rate": 9.022118742724097e-08,
142
- "logits/chosen": -2.4880199432373047,
143
- "logits/rejected": -2.5014524459838867,
144
- "logps/chosen": -276.52313232421875,
145
- "logps/rejected": -252.34681701660156,
146
- "loss": 0.6386,
147
- "pred_label": 4625.42236328125,
148
- "rewards/accuracies": 0.640625,
149
- "rewards/chosen": 0.03754829242825508,
150
- "rewards/margins": 0.06682833284139633,
151
- "rewards/rejected": -0.02928004413843155,
152
  "step": 800,
153
- "use_label": 7376.57763671875
154
  },
155
  {
156
  "epoch": 0.94,
157
  "learning_rate": 3.20139697322468e-08,
158
- "logits/chosen": -2.4699513912200928,
159
- "logits/rejected": -2.485213279724121,
160
- "logps/chosen": -266.04229736328125,
161
- "logps/rejected": -247.93341064453125,
162
- "loss": 0.6393,
163
- "pred_label": 5488.7001953125,
164
- "rewards/accuracies": 0.6587499976158142,
165
- "rewards/chosen": 0.03272656351327896,
166
- "rewards/margins": 0.06201673671603203,
167
- "rewards/rejected": -0.02929016388952732,
168
  "step": 900,
169
- "use_label": 8113.2998046875
170
  },
171
  {
172
  "epoch": 1.0,
173
- "eval_logits/chosen": -2.5063796043395996,
174
- "eval_logits/rejected": -2.5199685096740723,
175
- "eval_logps/chosen": -272.9034729003906,
176
- "eval_logps/rejected": -250.30796813964844,
177
- "eval_loss": 0.6366299986839294,
178
- "eval_pred_label": 6565.14404296875,
179
- "eval_rewards/accuracies": 0.6320000290870667,
180
- "eval_rewards/chosen": 0.03307868540287018,
181
- "eval_rewards/margins": 0.06870328634977341,
182
- "eval_rewards/rejected": -0.03562460467219353,
183
- "eval_runtime": 474.2549,
184
- "eval_samples_per_second": 4.217,
185
- "eval_steps_per_second": 0.264,
186
- "eval_use_label": 8966.8564453125,
187
  "step": 955
188
  },
189
  {
190
  "epoch": 1.0,
191
  "step": 955,
192
  "total_flos": 0.0,
193
- "train_loss": 0.6567496789063458,
194
- "train_runtime": 24286.6837,
195
- "train_samples_per_second": 2.517,
196
  "train_steps_per_second": 0.039
197
  }
198
  ],
 
11
  {
12
  "epoch": 0.0,
13
  "learning_rate": 5.208333333333333e-09,
14
+ "logits/chosen": -2.980285167694092,
15
+ "logits/rejected": -2.87275767326355,
16
+ "logps/chosen": -313.4390563964844,
17
+ "logps/rejected": -236.1754150390625,
18
  "loss": 0.6931,
19
  "pred_label": 0.0,
20
  "rewards/accuracies": 0.0,
 
27
  {
28
  "epoch": 0.1,
29
  "learning_rate": 4.976717112922002e-07,
30
+ "logits/chosen": -2.8194870948791504,
31
+ "logits/rejected": -2.8288567066192627,
32
+ "logps/chosen": -285.2724304199219,
33
+ "logps/rejected": -270.956298828125,
34
+ "loss": 0.6838,
35
+ "pred_label": 150.7020263671875,
36
+ "rewards/accuracies": 0.5050504803657532,
37
+ "rewards/chosen": 0.000817809603177011,
38
+ "rewards/margins": 0.0014873194741085172,
39
+ "rewards/rejected": -0.0006695101037621498,
40
  "step": 100,
41
+ "use_label": 659.2979736328125
42
  },
43
  {
44
  "epoch": 0.21,
45
  "learning_rate": 4.3946449359720607e-07,
46
+ "logits/chosen": -2.828075647354126,
47
+ "logits/rejected": -2.816530227661133,
48
+ "logps/chosen": -278.7549133300781,
49
+ "logps/rejected": -260.5694274902344,
50
+ "loss": 0.683,
51
+ "pred_label": 452.552490234375,
52
+ "rewards/accuracies": 0.534375011920929,
53
+ "rewards/chosen": 0.002707230392843485,
54
+ "rewards/margins": 0.005774380639195442,
55
+ "rewards/rejected": -0.003067150479182601,
56
  "step": 200,
57
+ "use_label": 1949.447509765625
58
  },
59
  {
60
  "epoch": 0.31,
61
  "learning_rate": 3.812572759022118e-07,
62
+ "logits/chosen": -2.8141846656799316,
63
+ "logits/rejected": -2.8159701824188232,
64
+ "logps/chosen": -284.0125732421875,
65
+ "logps/rejected": -253.9112091064453,
66
+ "loss": 0.6807,
67
+ "pred_label": 775.85498046875,
68
+ "rewards/accuracies": 0.5575000047683716,
69
+ "rewards/chosen": 0.005504029802978039,
70
+ "rewards/margins": 0.009370613843202591,
71
+ "rewards/rejected": -0.0038665838073939085,
72
  "step": 300,
73
+ "use_label": 3226.14501953125
74
  },
75
  {
76
  "epoch": 0.42,
77
  "learning_rate": 3.230500582072177e-07,
78
+ "logits/chosen": -2.826817512512207,
79
+ "logits/rejected": -2.8094358444213867,
80
+ "logps/chosen": -284.3566589355469,
81
+ "logps/rejected": -262.80731201171875,
82
+ "loss": 0.6769,
83
+ "pred_label": 1149.0574951171875,
84
+ "rewards/accuracies": 0.5774999856948853,
85
+ "rewards/chosen": 0.007384983357042074,
86
+ "rewards/margins": 0.015422500669956207,
87
+ "rewards/rejected": -0.008037514984607697,
88
  "step": 400,
89
+ "use_label": 4452.9423828125
90
  },
91
  {
92
  "epoch": 0.52,
93
  "learning_rate": 2.648428405122235e-07,
94
+ "logits/chosen": -2.807734966278076,
95
+ "logits/rejected": -2.796409845352173,
96
+ "logps/chosen": -269.9852600097656,
97
+ "logps/rejected": -252.07232666015625,
98
+ "loss": 0.6728,
99
+ "pred_label": 1592.5675048828125,
100
+ "rewards/accuracies": 0.5756250023841858,
101
+ "rewards/chosen": 0.006774631794542074,
102
+ "rewards/margins": 0.01554279588162899,
103
+ "rewards/rejected": -0.008768163621425629,
104
  "step": 500,
105
+ "use_label": 5609.4326171875
106
  },
107
  {
108
  "epoch": 0.63,
109
  "learning_rate": 2.0663562281722933e-07,
110
+ "logits/chosen": -2.8339650630950928,
111
+ "logits/rejected": -2.82075572013855,
112
+ "logps/chosen": -285.0927734375,
113
+ "logps/rejected": -265.4134826660156,
114
+ "loss": 0.6681,
115
+ "pred_label": 2111.6650390625,
116
+ "rewards/accuracies": 0.6206250190734863,
117
+ "rewards/chosen": 0.013815036043524742,
118
+ "rewards/margins": 0.0289370846003294,
119
+ "rewards/rejected": -0.015122047625482082,
120
  "step": 600,
121
+ "use_label": 6690.3349609375
122
  },
123
  {
124
  "epoch": 0.73,
125
  "learning_rate": 1.4842840512223514e-07,
126
+ "logits/chosen": -2.827232599258423,
127
+ "logits/rejected": -2.811751127243042,
128
+ "logps/chosen": -281.4178771972656,
129
+ "logps/rejected": -248.81068420410156,
130
+ "loss": 0.6659,
131
+ "pred_label": 2680.2724609375,
132
+ "rewards/accuracies": 0.6200000047683716,
133
+ "rewards/chosen": 0.01417633332312107,
134
+ "rewards/margins": 0.029135096818208694,
135
+ "rewards/rejected": -0.014958759769797325,
136
  "step": 700,
137
+ "use_label": 7721.7275390625
138
  },
139
  {
140
  "epoch": 0.84,
141
  "learning_rate": 9.022118742724097e-08,
142
+ "logits/chosen": -2.8300516605377197,
143
+ "logits/rejected": -2.835542678833008,
144
+ "logps/chosen": -288.608642578125,
145
+ "logps/rejected": -261.5773010253906,
146
+ "loss": 0.6646,
147
+ "pred_label": 3286.232421875,
148
+ "rewards/accuracies": 0.6200000047683716,
149
+ "rewards/chosen": 0.014839441515505314,
150
+ "rewards/margins": 0.03136582300066948,
151
+ "rewards/rejected": -0.01652638241648674,
152
  "step": 800,
153
+ "use_label": 8715.767578125
154
  },
155
  {
156
  "epoch": 0.94,
157
  "learning_rate": 3.20139697322468e-08,
158
+ "logits/chosen": -2.8211710453033447,
159
+ "logits/rejected": -2.8280835151672363,
160
+ "logps/chosen": -277.363525390625,
161
+ "logps/rejected": -256.4843444824219,
162
+ "loss": 0.6641,
163
+ "pred_label": 3882.75244140625,
164
+ "rewards/accuracies": 0.6331250071525574,
165
+ "rewards/chosen": 0.01446867547929287,
166
+ "rewards/margins": 0.02932187356054783,
167
+ "rewards/rejected": -0.014853193424642086,
168
  "step": 900,
169
+ "use_label": 9719.2470703125
170
  },
171
  {
172
  "epoch": 1.0,
173
+ "eval_logits/chosen": -2.842418670654297,
174
+ "eval_logits/rejected": -2.846235752105713,
175
+ "eval_logps/chosen": -284.122314453125,
176
+ "eval_logps/rejected": -259.4594421386719,
177
+ "eval_loss": 0.6635700464248657,
178
+ "eval_pred_label": 4600.50390625,
179
+ "eval_rewards/accuracies": 0.628000020980835,
180
+ "eval_rewards/chosen": 0.013506044633686543,
181
+ "eval_rewards/margins": 0.029479000717401505,
182
+ "eval_rewards/rejected": -0.015972958877682686,
183
+ "eval_runtime": 438.8322,
184
+ "eval_samples_per_second": 4.558,
185
+ "eval_steps_per_second": 0.285,
186
+ "eval_use_label": 10931.49609375,
187
  "step": 955
188
  },
189
  {
190
  "epoch": 1.0,
191
  "step": 955,
192
  "total_flos": 0.0,
193
+ "train_loss": 0.6728555943953429,
194
+ "train_runtime": 24272.064,
195
+ "train_samples_per_second": 2.519,
196
  "train_steps_per_second": 0.039
197
  }
198
  ],