jikaixuan commited on
Commit
8c6b41c
1 Parent(s): bdda380

Model save

Browse files
README.md CHANGED
@@ -15,17 +15,17 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.6366
19
- - Rewards/chosen: 0.0331
20
- - Rewards/rejected: -0.0356
21
- - Rewards/accuracies: 0.6320
22
- - Rewards/margins: 0.0687
23
- - Logps/rejected: -250.3080
24
- - Logps/chosen: -272.9035
25
- - Logits/rejected: -2.5200
26
- - Logits/chosen: -2.5064
27
- - Use Label: 9174.8564
28
- - Pred Label: 6857.1440
29
 
30
  ## Model description
31
 
@@ -45,14 +45,14 @@ More information needed
45
 
46
  The following hyperparameters were used during training:
47
  - learning_rate: 5e-07
48
- - train_batch_size: 4
49
  - eval_batch_size: 4
50
  - seed: 42
51
  - distributed_type: multi-GPU
52
- - num_devices: 4
53
- - gradient_accumulation_steps: 4
54
  - total_train_batch_size: 64
55
- - total_eval_batch_size: 16
56
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
57
  - lr_scheduler_type: linear
58
  - lr_scheduler_warmup_ratio: 0.1
@@ -60,9 +60,9 @@ The following hyperparameters were used during training:
60
 
61
  ### Training results
62
 
63
- | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
64
- |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:---------:|:----------:|
65
- | 0.6393 | 1.0 | 955 | 0.6366 | 0.0331 | -0.0356 | 0.6320 | 0.0687 | -250.3080 | -272.9035 | -2.5200 | -2.5064 | 8966.8564 | 6565.1440 |
66
 
67
 
68
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.6354
19
+ - Rewards/chosen: 0.0271
20
+ - Rewards/rejected: -0.0297
21
+ - Rewards/accuracies: 0.6260
22
+ - Rewards/margins: 0.0568
23
+ - Logps/rejected: -253.2359
24
+ - Logps/chosen: -269.2855
25
+ - Logits/rejected: -2.4958
26
+ - Logits/chosen: -2.4939
27
+ - Use Label: 18265.6758
28
+ - Pred Label: 13796.3242
29
 
30
  ## Model description
31
 
 
45
 
46
  The following hyperparameters were used during training:
47
  - learning_rate: 5e-07
48
+ - train_batch_size: 2
49
  - eval_batch_size: 4
50
  - seed: 42
51
  - distributed_type: multi-GPU
52
+ - num_devices: 2
53
+ - gradient_accumulation_steps: 16
54
  - total_train_batch_size: 64
55
+ - total_eval_batch_size: 8
56
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
57
  - lr_scheduler_type: linear
58
  - lr_scheduler_warmup_ratio: 0.1
 
60
 
61
  ### Training results
62
 
63
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
64
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:----------:|:----------:|
65
+ | 0.6377 | 1.0 | 955 | 0.6354 | 0.0271 | -0.0297 | 0.6260 | 0.0568 | -253.2359 | -269.2855 | -2.4958 | -2.4939 | 17827.6758 | 13234.3242 |
66
 
67
 
68
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:687f28b2c569851076b1156cac98b07b2e15e82c56032251b902e8d7a7a2eb57
3
  size 109086672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93eff2aaaece374224573b008ce1ce472e09be6ef9c4a68d282729c77e326912
3
  size 109086672
all_results.json CHANGED
@@ -1,23 +1,23 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -2.5063796043395996,
4
- "eval_logits/rejected": -2.5199685096740723,
5
- "eval_logps/chosen": -272.9034729003906,
6
- "eval_logps/rejected": -250.30796813964844,
7
- "eval_loss": 0.6366299986839294,
8
- "eval_pred_label": 6857.14404296875,
9
- "eval_rewards/accuracies": 0.6320000290870667,
10
- "eval_rewards/chosen": 0.03307868540287018,
11
- "eval_rewards/margins": 0.06870328634977341,
12
- "eval_rewards/rejected": -0.03562460467219353,
13
- "eval_runtime": 441.6525,
14
  "eval_samples": 2000,
15
- "eval_samples_per_second": 4.528,
16
- "eval_steps_per_second": 0.283,
17
- "eval_use_label": 9174.8564453125,
18
- "train_loss": 0.6567496789063458,
19
- "train_runtime": 24439.922,
20
  "train_samples": 61135,
21
- "train_samples_per_second": 2.501,
22
- "train_steps_per_second": 0.039
23
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": -2.4939169883728027,
4
+ "eval_logits/rejected": -2.495774507522583,
5
+ "eval_logps/chosen": -269.28546142578125,
6
+ "eval_logps/rejected": -253.23594665527344,
7
+ "eval_loss": 0.6354129910469055,
8
+ "eval_pred_label": 13796.32421875,
9
+ "eval_rewards/accuracies": 0.6259999871253967,
10
+ "eval_rewards/chosen": 0.027118388563394547,
11
+ "eval_rewards/margins": 0.056793875992298126,
12
+ "eval_rewards/rejected": -0.029675481840968132,
13
+ "eval_runtime": 1013.6049,
14
  "eval_samples": 2000,
15
+ "eval_samples_per_second": 1.973,
16
+ "eval_steps_per_second": 0.247,
17
+ "eval_use_label": 18265.67578125,
18
+ "train_loss": 0.6554346030919339,
19
+ "train_runtime": 50166.5495,
20
  "train_samples": 61135,
21
+ "train_samples_per_second": 1.219,
22
+ "train_steps_per_second": 0.019
23
  }
eval_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -2.5063796043395996,
4
- "eval_logits/rejected": -2.5199685096740723,
5
- "eval_logps/chosen": -272.9034729003906,
6
- "eval_logps/rejected": -250.30796813964844,
7
- "eval_loss": 0.6366299986839294,
8
- "eval_pred_label": 6857.14404296875,
9
- "eval_rewards/accuracies": 0.6320000290870667,
10
- "eval_rewards/chosen": 0.03307868540287018,
11
- "eval_rewards/margins": 0.06870328634977341,
12
- "eval_rewards/rejected": -0.03562460467219353,
13
- "eval_runtime": 441.6525,
14
  "eval_samples": 2000,
15
- "eval_samples_per_second": 4.528,
16
- "eval_steps_per_second": 0.283,
17
- "eval_use_label": 9174.8564453125
18
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": -2.4939169883728027,
4
+ "eval_logits/rejected": -2.495774507522583,
5
+ "eval_logps/chosen": -269.28546142578125,
6
+ "eval_logps/rejected": -253.23594665527344,
7
+ "eval_loss": 0.6354129910469055,
8
+ "eval_pred_label": 13796.32421875,
9
+ "eval_rewards/accuracies": 0.6259999871253967,
10
+ "eval_rewards/chosen": 0.027118388563394547,
11
+ "eval_rewards/margins": 0.056793875992298126,
12
+ "eval_rewards/rejected": -0.029675481840968132,
13
+ "eval_runtime": 1013.6049,
14
  "eval_samples": 2000,
15
+ "eval_samples_per_second": 1.973,
16
+ "eval_steps_per_second": 0.247,
17
+ "eval_use_label": 18265.67578125
18
  }
runs/Jan17_10-24-46_uclaml03.cs.ucla.edu/events.out.tfevents.1705516166.uclaml03.cs.ucla.edu.3614264.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2cdcea9253154718e626c44be407e4ceee6445a33899a4484e6c8c20846bb33
3
- size 11768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2736f15944d801784c31bc0b45a9d5a501f9144d3a985e1feac4fd8716584c23
3
+ size 12969
runs/Jan17_10-24-46_uclaml03.cs.ucla.edu/events.out.tfevents.1705567345.uclaml03.cs.ucla.edu.3614264.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eae34ab475ac4656727c96107ce229d954f9a60d4530ec3a4557a0a514c79326
3
+ size 935
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.6567496789063458,
4
- "train_runtime": 24439.922,
5
  "train_samples": 61135,
6
- "train_samples_per_second": 2.501,
7
- "train_steps_per_second": 0.039
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.6554346030919339,
4
+ "train_runtime": 50166.5495,
5
  "train_samples": 61135,
6
+ "train_samples_per_second": 1.219,
7
+ "train_steps_per_second": 0.019
8
  }
trainer_state.json CHANGED
@@ -11,10 +11,10 @@
11
  {
12
  "epoch": 0.0,
13
  "learning_rate": 5.208333333333333e-09,
14
- "logits/chosen": -2.676934003829956,
15
- "logits/rejected": -2.509021043777466,
16
- "logps/chosen": -304.709228515625,
17
- "logps/rejected": -229.49505615234375,
18
  "loss": 0.6931,
19
  "pred_label": 0.0,
20
  "rewards/accuracies": 0.0,
@@ -22,184 +22,184 @@
22
  "rewards/margins": 0.0,
23
  "rewards/rejected": 0.0,
24
  "step": 1,
25
- "use_label": 10.0
26
  },
27
  {
28
  "epoch": 0.1,
29
  "learning_rate": 4.976717112922002e-07,
30
- "logits/chosen": -2.4758388996124268,
31
- "logits/rejected": -2.4836206436157227,
32
- "logps/chosen": -273.62322998046875,
33
- "logps/rejected": -258.89813232421875,
34
- "loss": 0.6823,
35
- "pred_label": 156.05050659179688,
36
- "rewards/accuracies": 0.4886363744735718,
37
- "rewards/chosen": -0.002133187372237444,
38
- "rewards/margins": -0.0010164172854274511,
39
- "rewards/rejected": -0.0011167696211487055,
40
  "step": 100,
41
- "use_label": 653.9495239257812
42
  },
43
  {
44
  "epoch": 0.21,
45
  "learning_rate": 4.3946449359720607e-07,
46
- "logits/chosen": -2.4878945350646973,
47
- "logits/rejected": -2.4750781059265137,
48
- "logps/chosen": -267.72540283203125,
49
- "logps/rejected": -250.30291748046875,
50
- "loss": 0.6801,
51
- "pred_label": 512.7750244140625,
52
- "rewards/accuracies": 0.5575000047683716,
53
- "rewards/chosen": 0.003957623615860939,
54
- "rewards/margins": 0.008218127302825451,
55
- "rewards/rejected": -0.0042605032213032246,
56
  "step": 200,
57
- "use_label": 1889.2249755859375
58
  },
59
  {
60
  "epoch": 0.31,
61
  "learning_rate": 3.812572759022118e-07,
62
- "logits/chosen": -2.47322940826416,
63
- "logits/rejected": -2.4768893718719482,
64
- "logps/chosen": -272.0399169921875,
65
- "logps/rejected": -244.90228271484375,
66
- "loss": 0.6733,
67
- "pred_label": 943.1199951171875,
68
- "rewards/accuracies": 0.6075000166893005,
69
- "rewards/chosen": 0.010850328952074051,
70
- "rewards/margins": 0.01955023780465126,
71
- "rewards/rejected": -0.008699909783899784,
72
  "step": 300,
73
- "use_label": 3058.8798828125
74
  },
75
  {
76
  "epoch": 0.42,
77
  "learning_rate": 3.230500582072177e-07,
78
- "logits/chosen": -2.486250400543213,
79
- "logits/rejected": -2.466102361679077,
80
- "logps/chosen": -270.7090759277344,
81
- "logps/rejected": -252.9911346435547,
82
- "loss": 0.6635,
83
- "pred_label": 1496.31005859375,
84
- "rewards/accuracies": 0.628125011920929,
85
- "rewards/chosen": 0.01578013226389885,
86
- "rewards/margins": 0.0331539586186409,
87
- "rewards/rejected": -0.01737382635474205,
88
  "step": 400,
89
- "use_label": 4105.68994140625
90
  },
91
  {
92
  "epoch": 0.52,
93
  "learning_rate": 2.648428405122235e-07,
94
- "logits/chosen": -2.46098256111145,
95
- "logits/rejected": -2.446549654006958,
96
- "logps/chosen": -259.1031799316406,
97
- "logps/rejected": -241.98345947265625,
98
- "loss": 0.6543,
99
- "pred_label": 2180.89990234375,
100
- "rewards/accuracies": 0.6175000071525574,
101
- "rewards/chosen": 0.019046209752559662,
102
- "rewards/margins": 0.03530467674136162,
103
- "rewards/rejected": -0.016258466988801956,
104
  "step": 500,
105
- "use_label": 5021.10009765625
106
  },
107
  {
108
  "epoch": 0.63,
109
  "learning_rate": 2.0663562281722933e-07,
110
- "logits/chosen": -2.4896275997161865,
111
- "logits/rejected": -2.4663710594177246,
112
- "logps/chosen": -271.8394470214844,
113
- "logps/rejected": -252.83351135253906,
114
- "loss": 0.646,
115
- "pred_label": 2942.87255859375,
116
- "rewards/accuracies": 0.6600000262260437,
117
- "rewards/chosen": 0.02863229252398014,
118
- "rewards/margins": 0.05473264306783676,
119
- "rewards/rejected": -0.02610035613179207,
120
  "step": 600,
121
- "use_label": 5859.12744140625
122
  },
123
  {
124
  "epoch": 0.73,
125
  "learning_rate": 1.4842840512223514e-07,
126
- "logits/chosen": -2.485153913497925,
127
- "logits/rejected": -2.472170352935791,
128
- "logps/chosen": -269.14117431640625,
129
- "logps/rejected": -238.4977569580078,
130
- "loss": 0.6421,
131
- "pred_label": 3756.925048828125,
132
- "rewards/accuracies": 0.6418750286102295,
133
- "rewards/chosen": 0.03320219740271568,
134
- "rewards/margins": 0.05917687341570854,
135
- "rewards/rejected": -0.02597467601299286,
136
  "step": 700,
137
- "use_label": 6645.0751953125
138
  },
139
  {
140
  "epoch": 0.84,
141
  "learning_rate": 9.022118742724097e-08,
142
- "logits/chosen": -2.4880199432373047,
143
- "logits/rejected": -2.5014524459838867,
144
- "logps/chosen": -276.52313232421875,
145
- "logps/rejected": -252.34681701660156,
146
- "loss": 0.6386,
147
- "pred_label": 4625.42236328125,
148
- "rewards/accuracies": 0.640625,
149
- "rewards/chosen": 0.03754829242825508,
150
- "rewards/margins": 0.06682833284139633,
151
- "rewards/rejected": -0.02928004413843155,
152
  "step": 800,
153
- "use_label": 7376.57763671875
154
  },
155
  {
156
  "epoch": 0.94,
157
  "learning_rate": 3.20139697322468e-08,
158
- "logits/chosen": -2.4699513912200928,
159
- "logits/rejected": -2.485213279724121,
160
- "logps/chosen": -266.04229736328125,
161
- "logps/rejected": -247.93341064453125,
162
- "loss": 0.6393,
163
- "pred_label": 5488.7001953125,
164
- "rewards/accuracies": 0.6587499976158142,
165
- "rewards/chosen": 0.03272656351327896,
166
- "rewards/margins": 0.06201673671603203,
167
- "rewards/rejected": -0.02929016388952732,
168
  "step": 900,
169
- "use_label": 8113.2998046875
170
  },
171
  {
172
  "epoch": 1.0,
173
- "eval_logits/chosen": -2.5063796043395996,
174
- "eval_logits/rejected": -2.5199685096740723,
175
- "eval_logps/chosen": -272.9034729003906,
176
- "eval_logps/rejected": -250.30796813964844,
177
- "eval_loss": 0.6366299986839294,
178
- "eval_pred_label": 6565.14404296875,
179
- "eval_rewards/accuracies": 0.6320000290870667,
180
- "eval_rewards/chosen": 0.03307868540287018,
181
- "eval_rewards/margins": 0.06870328634977341,
182
- "eval_rewards/rejected": -0.03562460467219353,
183
- "eval_runtime": 443.034,
184
- "eval_samples_per_second": 4.514,
185
- "eval_steps_per_second": 0.282,
186
- "eval_use_label": 8966.8564453125,
187
  "step": 955
188
  },
189
  {
190
  "epoch": 1.0,
191
  "step": 955,
192
  "total_flos": 0.0,
193
- "train_loss": 0.6567496789063458,
194
- "train_runtime": 24439.922,
195
- "train_samples_per_second": 2.501,
196
- "train_steps_per_second": 0.039
197
  }
198
  ],
199
  "logging_steps": 100,
200
  "max_steps": 955,
201
  "num_train_epochs": 1,
202
- "save_steps": 500,
203
  "total_flos": 0.0,
204
  "trial_name": null,
205
  "trial_params": null
 
11
  {
12
  "epoch": 0.0,
13
  "learning_rate": 5.208333333333333e-09,
14
+ "logits/chosen": -2.7525930404663086,
15
+ "logits/rejected": -2.6732418537139893,
16
+ "logps/chosen": -297.177001953125,
17
+ "logps/rejected": -236.72621154785156,
18
  "loss": 0.6931,
19
  "pred_label": 0.0,
20
  "rewards/accuracies": 0.0,
 
22
  "rewards/margins": 0.0,
23
  "rewards/rejected": 0.0,
24
  "step": 1,
25
+ "use_label": 17.0
26
  },
27
  {
28
  "epoch": 0.1,
29
  "learning_rate": 4.976717112922002e-07,
30
+ "logits/chosen": -2.6616106033325195,
31
+ "logits/rejected": -2.6597719192504883,
32
+ "logps/chosen": -270.4000244140625,
33
+ "logps/rejected": -249.33827209472656,
34
+ "loss": 0.6829,
35
+ "pred_label": 333.43182373046875,
36
+ "rewards/accuracies": 0.4965277910232544,
37
+ "rewards/chosen": 0.0011782451765611768,
38
+ "rewards/margins": 0.001073930412530899,
39
+ "rewards/rejected": 0.00010431456030346453,
40
  "step": 100,
41
+ "use_label": 1283.5682373046875
42
  },
43
  {
44
  "epoch": 0.21,
45
  "learning_rate": 4.3946449359720607e-07,
46
+ "logits/chosen": -2.6945221424102783,
47
+ "logits/rejected": -2.678621530532837,
48
+ "logps/chosen": -271.6979064941406,
49
+ "logps/rejected": -254.37026977539062,
50
+ "loss": 0.6799,
51
+ "pred_label": 1038.7462158203125,
52
+ "rewards/accuracies": 0.5350000262260437,
53
+ "rewards/chosen": 0.004888341296464205,
54
+ "rewards/margins": 0.007898561656475067,
55
+ "rewards/rejected": -0.0030102210585027933,
56
  "step": 200,
57
+ "use_label": 3762.253662109375
58
  },
59
  {
60
  "epoch": 0.31,
61
  "learning_rate": 3.812572759022118e-07,
62
+ "logits/chosen": -2.6708526611328125,
63
+ "logits/rejected": -2.6628105640411377,
64
+ "logps/chosen": -272.3077392578125,
65
+ "logps/rejected": -253.75027465820312,
66
+ "loss": 0.6728,
67
+ "pred_label": 1884.596923828125,
68
+ "rewards/accuracies": 0.5653125047683716,
69
+ "rewards/chosen": 0.010109632275998592,
70
+ "rewards/margins": 0.016557401046156883,
71
+ "rewards/rejected": -0.006447767838835716,
72
  "step": 300,
73
+ "use_label": 6116.4033203125
74
  },
75
  {
76
  "epoch": 0.42,
77
  "learning_rate": 3.230500582072177e-07,
78
+ "logits/chosen": -2.668009042739868,
79
+ "logits/rejected": -2.650494337081909,
80
+ "logps/chosen": -267.6447448730469,
81
+ "logps/rejected": -253.59107971191406,
82
+ "loss": 0.6616,
83
+ "pred_label": 3012.675537109375,
84
+ "rewards/accuracies": 0.6193749904632568,
85
+ "rewards/chosen": 0.017754318192601204,
86
+ "rewards/margins": 0.030351871624588966,
87
+ "rewards/rejected": -0.012597555294632912,
88
  "step": 400,
89
+ "use_label": 8188.32421875
90
  },
91
  {
92
  "epoch": 0.52,
93
  "learning_rate": 2.648428405122235e-07,
94
+ "logits/chosen": -2.6697680950164795,
95
+ "logits/rejected": -2.6707708835601807,
96
+ "logps/chosen": -271.2095642089844,
97
+ "logps/rejected": -247.21224975585938,
98
+ "loss": 0.6528,
99
+ "pred_label": 4377.916015625,
100
+ "rewards/accuracies": 0.625,
101
+ "rewards/chosen": 0.024391591548919678,
102
+ "rewards/margins": 0.04303843528032303,
103
+ "rewards/rejected": -0.01864684373140335,
104
  "step": 500,
105
+ "use_label": 10023.083984375
106
  },
107
  {
108
  "epoch": 0.63,
109
  "learning_rate": 2.0663562281722933e-07,
110
+ "logits/chosen": -2.659043073654175,
111
+ "logits/rejected": -2.6555004119873047,
112
+ "logps/chosen": -272.95050048828125,
113
+ "logps/rejected": -251.1392364501953,
114
+ "loss": 0.6442,
115
+ "pred_label": 5962.0673828125,
116
+ "rewards/accuracies": 0.6553124785423279,
117
+ "rewards/chosen": 0.030743848532438278,
118
+ "rewards/margins": 0.0554736964404583,
119
+ "rewards/rejected": -0.024729840457439423,
120
  "step": 600,
121
+ "use_label": 11638.9326171875
122
  },
123
  {
124
  "epoch": 0.73,
125
  "learning_rate": 1.4842840512223514e-07,
126
+ "logits/chosen": -2.6591668128967285,
127
+ "logits/rejected": -2.6622869968414307,
128
+ "logps/chosen": -269.9889221191406,
129
+ "logps/rejected": -245.4040985107422,
130
+ "loss": 0.64,
131
+ "pred_label": 7640.8505859375,
132
+ "rewards/accuracies": 0.6478124856948853,
133
+ "rewards/chosen": 0.03263993561267853,
134
+ "rewards/margins": 0.061180587857961655,
135
+ "rewards/rejected": -0.02854064851999283,
136
  "step": 700,
137
+ "use_label": 13160.150390625
138
  },
139
  {
140
  "epoch": 0.84,
141
  "learning_rate": 9.022118742724097e-08,
142
+ "logits/chosen": -2.650268793106079,
143
+ "logits/rejected": -2.6555473804473877,
144
+ "logps/chosen": -272.705322265625,
145
+ "logps/rejected": -252.30169677734375,
146
+ "loss": 0.6368,
147
+ "pred_label": 9366.9609375,
148
+ "rewards/accuracies": 0.6415625214576721,
149
+ "rewards/chosen": 0.031398553401231766,
150
+ "rewards/margins": 0.06083739921450615,
151
+ "rewards/rejected": -0.029438842087984085,
152
  "step": 800,
153
+ "use_label": 14634.0390625
154
  },
155
  {
156
  "epoch": 0.94,
157
  "learning_rate": 3.20139697322468e-08,
158
+ "logits/chosen": -2.6563680171966553,
159
+ "logits/rejected": -2.6590001583099365,
160
+ "logps/chosen": -269.04559326171875,
161
+ "logps/rejected": -253.2301025390625,
162
+ "loss": 0.6377,
163
+ "pred_label": 11126.677734375,
164
+ "rewards/accuracies": 0.6418750286102295,
165
+ "rewards/chosen": 0.02964354306459427,
166
+ "rewards/margins": 0.05687180534005165,
167
+ "rewards/rejected": -0.027228260412812233,
168
  "step": 900,
169
+ "use_label": 16074.322265625
170
  },
171
  {
172
  "epoch": 1.0,
173
+ "eval_logits/chosen": -2.4939169883728027,
174
+ "eval_logits/rejected": -2.495774507522583,
175
+ "eval_logps/chosen": -269.28546142578125,
176
+ "eval_logps/rejected": -253.23594665527344,
177
+ "eval_loss": 0.6354129910469055,
178
+ "eval_pred_label": 13234.32421875,
179
+ "eval_rewards/accuracies": 0.6259999871253967,
180
+ "eval_rewards/chosen": 0.027118388563394547,
181
+ "eval_rewards/margins": 0.056793875992298126,
182
+ "eval_rewards/rejected": -0.029675481840968132,
183
+ "eval_runtime": 1016.337,
184
+ "eval_samples_per_second": 1.968,
185
+ "eval_steps_per_second": 0.246,
186
+ "eval_use_label": 17827.67578125,
187
  "step": 955
188
  },
189
  {
190
  "epoch": 1.0,
191
  "step": 955,
192
  "total_flos": 0.0,
193
+ "train_loss": 0.6554346030919339,
194
+ "train_runtime": 50166.5495,
195
+ "train_samples_per_second": 1.219,
196
+ "train_steps_per_second": 0.019
197
  }
198
  ],
199
  "logging_steps": 100,
200
  "max_steps": 955,
201
  "num_train_epochs": 1,
202
+ "save_steps": 10,
203
  "total_flos": 0.0,
204
  "trial_name": null,
205
  "trial_params": null