jikaixuan commited on
Commit
68a6137
1 Parent(s): 12c64ec

Model save

Browse files
README.md CHANGED
@@ -15,17 +15,17 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.0096
19
- - Rewards/chosen: -886.8282
20
- - Rewards/rejected: -758.4788
21
- - Rewards/accuracies: 0.4600
22
- - Rewards/margins: -128.3494
23
- - Logps/rejected: -7844.0879
24
- - Logps/chosen: -9152.5400
25
- - Logits/rejected: 1.1413
26
- - Logits/chosen: 1.1882
27
- - Use Label: 755.2240
28
- - Pred Label: 15276.7764
29
 
30
  ## Model description
31
 
@@ -49,10 +49,10 @@ The following hyperparameters were used during training:
49
  - eval_batch_size: 4
50
  - seed: 42
51
  - distributed_type: multi-GPU
52
- - num_devices: 4
53
- - gradient_accumulation_steps: 4
54
  - total_train_batch_size: 64
55
- - total_eval_batch_size: 16
56
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
57
  - lr_scheduler_type: linear
58
  - lr_scheduler_warmup_ratio: 0.1
@@ -60,9 +60,9 @@ The following hyperparameters were used during training:
60
 
61
  ### Training results
62
 
63
- | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
64
- |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:---------:|:----------:|
65
- | 0.0116 | 1.0 | 955 | 0.0096 | -886.8282 | -758.4788 | 0.4600 | -128.3494 | -7844.0879 | -9152.5400 | 1.1413 | 1.1882 | 753.2240 | 14778.7764 |
66
 
67
 
68
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.6688
19
+ - Rewards/chosen: -0.0034
20
+ - Rewards/rejected: -0.6444
21
+ - Rewards/accuracies: 0.7210
22
+ - Rewards/margins: 0.6410
23
+ - Logps/rejected: -268.7319
24
+ - Logps/chosen: -282.0097
25
+ - Logits/rejected: -2.8242
26
+ - Logits/chosen: -2.8346
27
+ - Use Label: 21379.7969
28
+ - Pred Label: 10682.2041
29
 
30
  ## Model description
31
 
 
49
  - eval_batch_size: 4
50
  - seed: 42
51
  - distributed_type: multi-GPU
52
+ - num_devices: 2
53
+ - gradient_accumulation_steps: 8
54
  - total_train_batch_size: 64
55
+ - total_eval_batch_size: 8
56
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
57
  - lr_scheduler_type: linear
58
  - lr_scheduler_warmup_ratio: 0.1
 
60
 
61
  ### Training results
62
 
63
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
64
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:----------:|:----------:|
65
+ | 0.6708 | 1.0 | 955 | 0.6688 | -0.0034 | -0.6444 | 0.7210 | 0.6410 | -268.7319 | -282.0097 | -2.8242 | -2.8346 | 20769.7969 | 10292.2041 |
66
 
67
 
68
  ### Framework versions
adapter_config.json CHANGED
@@ -16,9 +16,9 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "v_proj",
20
- "q_proj",
21
  "k_proj",
 
 
22
  "o_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
 
 
19
  "k_proj",
20
+ "q_proj",
21
+ "v_proj",
22
  "o_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0db5c9f90efa4e2c8a7c5688d464cbf070f37a4a09055b7b97ea6ac294486bce
3
  size 218138576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f58fe82b34998250bf2df2e040ef6472804071478f8eb7dacddfd8fae011c2a3
3
  size 218138576
all_results.json CHANGED
@@ -1,23 +1,23 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": 1.1882163286209106,
4
- "eval_logits/rejected": 1.1412570476531982,
5
- "eval_logps/chosen": -9152.5400390625,
6
- "eval_logps/rejected": -7844.087890625,
7
- "eval_loss": 0.00963310431689024,
8
- "eval_pred_label": 15276.7763671875,
9
- "eval_rewards/accuracies": 0.46000000834465027,
10
- "eval_rewards/chosen": -886.8281860351562,
11
- "eval_rewards/margins": -128.34939575195312,
12
- "eval_rewards/rejected": -758.4788208007812,
13
- "eval_runtime": 456.0072,
14
  "eval_samples": 2000,
15
- "eval_samples_per_second": 4.386,
16
- "eval_steps_per_second": 0.274,
17
- "eval_use_label": 755.2239990234375,
18
- "train_loss": 0.08153048697566487,
19
- "train_runtime": 25447.1701,
20
  "train_samples": 61135,
21
- "train_samples_per_second": 2.402,
22
- "train_steps_per_second": 0.038
23
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": -2.83455753326416,
4
+ "eval_logits/rejected": -2.824216604232788,
5
+ "eval_logps/chosen": -282.00970458984375,
6
+ "eval_logps/rejected": -268.7319030761719,
7
+ "eval_loss": 0.6687781810760498,
8
+ "eval_pred_label": 10682.2041015625,
9
+ "eval_rewards/accuracies": 0.7210000157356262,
10
+ "eval_rewards/chosen": -0.0034403554163873196,
11
+ "eval_rewards/margins": 0.6410075426101685,
12
+ "eval_rewards/rejected": -0.6444479823112488,
13
+ "eval_runtime": 855.5169,
14
  "eval_samples": 2000,
15
+ "eval_samples_per_second": 2.338,
16
+ "eval_steps_per_second": 0.292,
17
+ "eval_use_label": 21379.796875,
18
+ "train_loss": 0.6626948830969046,
19
+ "train_runtime": 47570.4937,
20
  "train_samples": 61135,
21
+ "train_samples_per_second": 1.285,
22
+ "train_steps_per_second": 0.02
23
  }
eval_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": 1.1882163286209106,
4
- "eval_logits/rejected": 1.1412570476531982,
5
- "eval_logps/chosen": -9152.5400390625,
6
- "eval_logps/rejected": -7844.087890625,
7
- "eval_loss": 0.00963310431689024,
8
- "eval_pred_label": 15276.7763671875,
9
- "eval_rewards/accuracies": 0.46000000834465027,
10
- "eval_rewards/chosen": -886.8281860351562,
11
- "eval_rewards/margins": -128.34939575195312,
12
- "eval_rewards/rejected": -758.4788208007812,
13
- "eval_runtime": 456.0072,
14
  "eval_samples": 2000,
15
- "eval_samples_per_second": 4.386,
16
- "eval_steps_per_second": 0.274,
17
- "eval_use_label": 755.2239990234375
18
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": -2.83455753326416,
4
+ "eval_logits/rejected": -2.824216604232788,
5
+ "eval_logps/chosen": -282.00970458984375,
6
+ "eval_logps/rejected": -268.7319030761719,
7
+ "eval_loss": 0.6687781810760498,
8
+ "eval_pred_label": 10682.2041015625,
9
+ "eval_rewards/accuracies": 0.7210000157356262,
10
+ "eval_rewards/chosen": -0.0034403554163873196,
11
+ "eval_rewards/margins": 0.6410075426101685,
12
+ "eval_rewards/rejected": -0.6444479823112488,
13
+ "eval_runtime": 855.5169,
14
  "eval_samples": 2000,
15
+ "eval_samples_per_second": 2.338,
16
+ "eval_steps_per_second": 0.292,
17
+ "eval_use_label": 21379.796875
18
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.08153048697566487,
4
- "train_runtime": 25447.1701,
5
  "train_samples": 61135,
6
- "train_samples_per_second": 2.402,
7
- "train_steps_per_second": 0.038
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.6626948830969046,
4
+ "train_runtime": 47570.4937,
5
  "train_samples": 61135,
6
+ "train_samples_per_second": 1.285,
7
+ "train_steps_per_second": 0.02
8
  }
trainer_state.json CHANGED
@@ -11,10 +11,10 @@
11
  {
12
  "epoch": 0.0,
13
  "learning_rate": 1.0416666666666667e-07,
14
- "logits/chosen": -2.980285167694092,
15
- "logits/rejected": -2.87275767326355,
16
- "logps/chosen": -313.4390563964844,
17
- "logps/rejected": -236.1754150390625,
18
  "loss": 0.6931,
19
  "pred_label": 0.0,
20
  "rewards/accuracies": 0.0,
@@ -22,181 +22,1557 @@
22
  "rewards/margins": 0.0,
23
  "rewards/rejected": 0.0,
24
  "step": 1,
25
- "use_label": 10.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  },
27
  {
28
  "epoch": 0.1,
29
  "learning_rate": 9.953434225844005e-06,
30
- "logits/chosen": -2.817742347717285,
31
- "logits/rejected": -2.827914237976074,
32
- "logps/chosen": -286.3696594238281,
33
- "logps/rejected": -274.020751953125,
34
- "loss": 0.5688,
35
- "pred_label": 371.8611145019531,
36
- "rewards/accuracies": 0.5864899158477783,
37
- "rewards/chosen": -0.10890861600637436,
38
- "rewards/margins": 0.19820529222488403,
39
- "rewards/rejected": -0.307113915681839,
40
  "step": 100,
41
- "use_label": 438.1388854980469
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  },
43
  {
44
  "epoch": 0.21,
45
  "learning_rate": 8.789289871944122e-06,
46
- "logits/chosen": -2.6729955673217773,
47
- "logits/rejected": -2.6674561500549316,
48
- "logps/chosen": -501.3514099121094,
49
- "logps/rejected": -504.921142578125,
50
- "loss": 0.0955,
51
- "pred_label": 1766.35498046875,
52
- "rewards/accuracies": 0.5368750095367432,
53
- "rewards/chosen": -22.256940841674805,
54
- "rewards/margins": 2.181300401687622,
55
- "rewards/rejected": -24.438241958618164,
56
  "step": 200,
57
- "use_label": 635.64501953125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  },
59
  {
60
  "epoch": 0.31,
61
  "learning_rate": 7.625145518044238e-06,
62
- "logits/chosen": -2.2953169345855713,
63
- "logits/rejected": -2.2711739540100098,
64
- "logps/chosen": -912.6821899414062,
65
- "logps/rejected": -962.4336547851562,
66
- "loss": 0.0344,
67
- "pred_label": 3347.25244140625,
68
- "rewards/accuracies": 0.5299999713897705,
69
- "rewards/chosen": -62.8614616394043,
70
- "rewards/margins": 7.994655609130859,
71
- "rewards/rejected": -70.85610961914062,
72
  "step": 300,
73
- "use_label": 654.7474975585938
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  },
75
  {
76
  "epoch": 0.42,
77
  "learning_rate": 6.461001164144355e-06,
78
- "logits/chosen": -1.0074330568313599,
79
- "logits/rejected": -0.970811665058136,
80
- "logps/chosen": -2976.061279296875,
81
- "logps/rejected": -2806.461669921875,
82
- "loss": 0.0188,
83
- "pred_label": 4933.72998046875,
84
- "rewards/accuracies": 0.4650000035762787,
85
- "rewards/chosen": -269.1630554199219,
86
- "rewards/margins": -14.789560317993164,
87
- "rewards/rejected": -254.3734893798828,
88
  "step": 400,
89
- "use_label": 668.27001953125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  },
91
  {
92
  "epoch": 0.52,
93
  "learning_rate": 5.2968568102444705e-06,
94
- "logits/chosen": -0.2576097249984741,
95
- "logits/rejected": -0.2523376941680908,
96
- "logps/chosen": -5626.99072265625,
97
- "logps/rejected": -5072.259765625,
98
- "loss": 0.0136,
99
- "pred_label": 6517.34765625,
100
- "rewards/accuracies": 0.45500001311302185,
101
- "rewards/chosen": -535.6937866210938,
102
- "rewards/margins": -53.666282653808594,
103
- "rewards/rejected": -482.0274963378906,
104
  "step": 500,
105
- "use_label": 684.6525268554688
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  },
107
  {
108
  "epoch": 0.63,
109
  "learning_rate": 4.132712456344587e-06,
110
- "logits/chosen": -0.003637822810560465,
111
- "logits/rejected": 0.014684724621474743,
112
- "logps/chosen": -6158.3154296875,
113
- "logps/rejected": -5607.77392578125,
114
- "loss": 0.0103,
115
- "pred_label": 8102.7001953125,
116
- "rewards/accuracies": 0.4650000035762787,
117
- "rewards/chosen": -587.3085327148438,
118
- "rewards/margins": -53.05733871459961,
119
- "rewards/rejected": -534.2510986328125,
120
  "step": 600,
121
- "use_label": 699.2999877929688
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  },
123
  {
124
  "epoch": 0.73,
125
  "learning_rate": 2.9685681024447033e-06,
126
- "logits/chosen": 0.1506887823343277,
127
- "logits/rejected": 0.15534333884716034,
128
- "logps/chosen": -6708.4775390625,
129
- "logps/rejected": -5900.45166015625,
130
- "loss": 0.0106,
131
- "pred_label": 9685.60546875,
132
- "rewards/accuracies": 0.4518750011920929,
133
- "rewards/chosen": -642.6917724609375,
134
- "rewards/margins": -77.51276397705078,
135
- "rewards/rejected": -565.1790771484375,
136
  "step": 700,
137
- "use_label": 716.39501953125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  },
139
  {
140
  "epoch": 0.84,
141
  "learning_rate": 1.8044237485448196e-06,
142
- "logits/chosen": 0.5320289134979248,
143
- "logits/rejected": 0.5197857022285461,
144
- "logps/chosen": -8210.8046875,
145
- "logps/rejected": -7387.4423828125,
146
- "loss": 0.0088,
147
- "pred_label": 11275.4423828125,
148
- "rewards/accuracies": 0.45124998688697815,
149
- "rewards/chosen": -792.204833984375,
150
- "rewards/margins": -79.6017837524414,
151
- "rewards/rejected": -712.6029663085938,
152
  "step": 800,
153
- "use_label": 726.5574951171875
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  },
155
  {
156
  "epoch": 0.94,
157
  "learning_rate": 6.402793946449361e-07,
158
- "logits/chosen": 0.9689835906028748,
159
- "logits/rejected": 0.9425008296966553,
160
- "logps/chosen": -8653.7041015625,
161
- "logps/rejected": -7719.83642578125,
162
- "loss": 0.0116,
163
- "pred_label": 12861.46484375,
164
- "rewards/accuracies": 0.4625000059604645,
165
- "rewards/chosen": -837.61962890625,
166
- "rewards/margins": -91.26953125,
167
- "rewards/rejected": -746.3499755859375,
168
  "step": 900,
169
- "use_label": 740.5349731445312
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  },
171
  {
172
  "epoch": 1.0,
173
- "eval_logits/chosen": 1.1882163286209106,
174
- "eval_logits/rejected": 1.1412570476531982,
175
- "eval_logps/chosen": -9152.5400390625,
176
- "eval_logps/rejected": -7844.087890625,
177
- "eval_loss": 0.00963310431689024,
178
- "eval_pred_label": 14778.7763671875,
179
- "eval_rewards/accuracies": 0.46000000834465027,
180
- "eval_rewards/chosen": -886.8281860351562,
181
- "eval_rewards/margins": -128.34939575195312,
182
- "eval_rewards/rejected": -758.4788208007812,
183
- "eval_runtime": 456.8983,
184
- "eval_samples_per_second": 4.377,
185
- "eval_steps_per_second": 0.274,
186
- "eval_use_label": 753.2239990234375,
187
  "step": 955
188
  },
189
  {
190
  "epoch": 1.0,
191
  "step": 955,
192
  "total_flos": 0.0,
193
- "train_loss": 0.08153048697566487,
194
- "train_runtime": 25447.1701,
195
- "train_samples_per_second": 2.402,
196
- "train_steps_per_second": 0.038
197
  }
198
  ],
199
- "logging_steps": 100,
200
  "max_steps": 955,
201
  "num_train_epochs": 1,
202
  "save_steps": 50,
 
11
  {
12
  "epoch": 0.0,
13
  "learning_rate": 1.0416666666666667e-07,
14
+ "logits/chosen": -2.899709463119507,
15
+ "logits/rejected": -2.879509687423706,
16
+ "logps/chosen": -314.8815612792969,
17
+ "logps/rejected": -239.785888671875,
18
  "loss": 0.6931,
19
  "pred_label": 0.0,
20
  "rewards/accuracies": 0.0,
 
22
  "rewards/margins": 0.0,
23
  "rewards/rejected": 0.0,
24
  "step": 1,
25
+ "use_label": 18.0
26
+ },
27
+ {
28
+ "epoch": 0.01,
29
+ "learning_rate": 1.0416666666666667e-06,
30
+ "logits/chosen": -2.871338129043579,
31
+ "logits/rejected": -2.8671977519989014,
32
+ "logps/chosen": -304.6894226074219,
33
+ "logps/rejected": -284.7349853515625,
34
+ "loss": 0.6939,
35
+ "pred_label": 0.0,
36
+ "rewards/accuracies": 0.4131944477558136,
37
+ "rewards/chosen": 0.0006341927801258862,
38
+ "rewards/margins": -0.0011922286357730627,
39
+ "rewards/rejected": 0.0018264217069372535,
40
+ "step": 10,
41
+ "use_label": 178.0
42
+ },
43
+ {
44
+ "epoch": 0.02,
45
+ "learning_rate": 2.0833333333333334e-06,
46
+ "logits/chosen": -2.761018991470337,
47
+ "logits/rejected": -2.7763421535491943,
48
+ "logps/chosen": -255.852783203125,
49
+ "logps/rejected": -260.04364013671875,
50
+ "loss": 0.6902,
51
+ "pred_label": 0.0,
52
+ "rewards/accuracies": 0.5531250238418579,
53
+ "rewards/chosen": 0.0032608681358397007,
54
+ "rewards/margins": 0.006093679927289486,
55
+ "rewards/rejected": -0.002832812489941716,
56
+ "step": 20,
57
+ "use_label": 482.0
58
+ },
59
+ {
60
+ "epoch": 0.03,
61
+ "learning_rate": 3.125e-06,
62
+ "logits/chosen": -2.85542631149292,
63
+ "logits/rejected": -2.8387763500213623,
64
+ "logps/chosen": -274.9126892089844,
65
+ "logps/rejected": -252.97470092773438,
66
+ "loss": 0.6909,
67
+ "pred_label": 0.0,
68
+ "rewards/accuracies": 0.565625011920929,
69
+ "rewards/chosen": 0.0062743439339101315,
70
+ "rewards/margins": 0.007127248682081699,
71
+ "rewards/rejected": -0.0008529046317562461,
72
+ "step": 30,
73
+ "use_label": 802.0
74
+ },
75
+ {
76
+ "epoch": 0.04,
77
+ "learning_rate": 4.166666666666667e-06,
78
+ "logits/chosen": -2.829822301864624,
79
+ "logits/rejected": -2.8364169597625732,
80
+ "logps/chosen": -279.52288818359375,
81
+ "logps/rejected": -273.93243408203125,
82
+ "loss": 0.6846,
83
+ "pred_label": 0.0,
84
+ "rewards/accuracies": 0.5687500238418579,
85
+ "rewards/chosen": 0.0057884035632014275,
86
+ "rewards/margins": 0.016727477312088013,
87
+ "rewards/rejected": -0.010939070954918861,
88
+ "step": 40,
89
+ "use_label": 1122.0
90
+ },
91
+ {
92
+ "epoch": 0.05,
93
+ "learning_rate": 5.208333333333334e-06,
94
+ "logits/chosen": -2.8475687503814697,
95
+ "logits/rejected": -2.8291072845458984,
96
+ "logps/chosen": -266.10870361328125,
97
+ "logps/rejected": -257.93243408203125,
98
+ "loss": 0.6748,
99
+ "pred_label": 0.0,
100
+ "rewards/accuracies": 0.5874999761581421,
101
+ "rewards/chosen": 0.009051208384335041,
102
+ "rewards/margins": 0.03577073663473129,
103
+ "rewards/rejected": -0.026719529181718826,
104
+ "step": 50,
105
+ "use_label": 1442.0
106
+ },
107
+ {
108
+ "epoch": 0.06,
109
+ "learning_rate": 6.25e-06,
110
+ "logits/chosen": -2.8435542583465576,
111
+ "logits/rejected": -2.857710838317871,
112
+ "logps/chosen": -308.6976623535156,
113
+ "logps/rejected": -283.2618103027344,
114
+ "loss": 0.6705,
115
+ "pred_label": 0.0,
116
+ "rewards/accuracies": 0.609375,
117
+ "rewards/chosen": 0.015249615535140038,
118
+ "rewards/margins": 0.05724747106432915,
119
+ "rewards/rejected": -0.04199784994125366,
120
+ "step": 60,
121
+ "use_label": 1762.0
122
+ },
123
+ {
124
+ "epoch": 0.07,
125
+ "learning_rate": 7.291666666666667e-06,
126
+ "logits/chosen": -2.8543224334716797,
127
+ "logits/rejected": -2.8573386669158936,
128
+ "logps/chosen": -284.1624755859375,
129
+ "logps/rejected": -268.72540283203125,
130
+ "loss": 0.6493,
131
+ "pred_label": 0.0,
132
+ "rewards/accuracies": 0.6812499761581421,
133
+ "rewards/chosen": 0.021748105064034462,
134
+ "rewards/margins": 0.10904034227132797,
135
+ "rewards/rejected": -0.08729223161935806,
136
+ "step": 70,
137
+ "use_label": 2082.0
138
+ },
139
+ {
140
+ "epoch": 0.08,
141
+ "learning_rate": 8.333333333333334e-06,
142
+ "logits/chosen": -2.8060083389282227,
143
+ "logits/rejected": -2.8031229972839355,
144
+ "logps/chosen": -310.87384033203125,
145
+ "logps/rejected": -279.0975341796875,
146
+ "loss": 0.6288,
147
+ "pred_label": 2.575000047683716,
148
+ "rewards/accuracies": 0.706250011920929,
149
+ "rewards/chosen": 0.010100151412189007,
150
+ "rewards/margins": 0.17576415836811066,
151
+ "rewards/rejected": -0.16566403210163116,
152
+ "step": 80,
153
+ "use_label": 2399.425048828125
154
+ },
155
+ {
156
+ "epoch": 0.09,
157
+ "learning_rate": 9.375000000000001e-06,
158
+ "logits/chosen": -2.79826283454895,
159
+ "logits/rejected": -2.7996926307678223,
160
+ "logps/chosen": -295.39630126953125,
161
+ "logps/rejected": -255.96920776367188,
162
+ "loss": 0.6425,
163
+ "pred_label": 18.225000381469727,
164
+ "rewards/accuracies": 0.6937500238418579,
165
+ "rewards/chosen": 0.03907310217618942,
166
+ "rewards/margins": 0.2187824696302414,
167
+ "rewards/rejected": -0.17970936000347137,
168
+ "step": 90,
169
+ "use_label": 2703.77490234375
170
  },
171
  {
172
  "epoch": 0.1,
173
  "learning_rate": 9.953434225844005e-06,
174
+ "logits/chosen": -2.806579351425171,
175
+ "logits/rejected": -2.791835308074951,
176
+ "logps/chosen": -275.96881103515625,
177
+ "logps/rejected": -271.619140625,
178
+ "loss": 0.6514,
179
+ "pred_label": 45.287498474121094,
180
+ "rewards/accuracies": 0.668749988079071,
181
+ "rewards/chosen": 0.03931427747011185,
182
+ "rewards/margins": 0.21040363609790802,
183
+ "rewards/rejected": -0.17108935117721558,
184
  "step": 100,
185
+ "use_label": 2996.71240234375
186
+ },
187
+ {
188
+ "epoch": 0.12,
189
+ "learning_rate": 9.837019790454017e-06,
190
+ "logits/chosen": -2.802504062652588,
191
+ "logits/rejected": -2.794646739959717,
192
+ "logps/chosen": -273.3879699707031,
193
+ "logps/rejected": -256.89385986328125,
194
+ "loss": 0.6432,
195
+ "pred_label": 74.8375015258789,
196
+ "rewards/accuracies": 0.6937500238418579,
197
+ "rewards/chosen": 0.011993913911283016,
198
+ "rewards/margins": 0.2596678137779236,
199
+ "rewards/rejected": -0.24767383933067322,
200
+ "step": 110,
201
+ "use_label": 3287.16259765625
202
+ },
203
+ {
204
+ "epoch": 0.13,
205
+ "learning_rate": 9.72060535506403e-06,
206
+ "logits/chosen": -2.8181800842285156,
207
+ "logits/rejected": -2.8214244842529297,
208
+ "logps/chosen": -295.31146240234375,
209
+ "logps/rejected": -268.0743713378906,
210
+ "loss": 0.6446,
211
+ "pred_label": 118.8499984741211,
212
+ "rewards/accuracies": 0.6781250238418579,
213
+ "rewards/chosen": 0.035851169377565384,
214
+ "rewards/margins": 0.26491057872772217,
215
+ "rewards/rejected": -0.22905941307544708,
216
+ "step": 120,
217
+ "use_label": 3563.14990234375
218
+ },
219
+ {
220
+ "epoch": 0.14,
221
+ "learning_rate": 9.60419091967404e-06,
222
+ "logits/chosen": -2.8419888019561768,
223
+ "logits/rejected": -2.830252170562744,
224
+ "logps/chosen": -295.4115295410156,
225
+ "logps/rejected": -277.87567138671875,
226
+ "loss": 0.6481,
227
+ "pred_label": 160.16250610351562,
228
+ "rewards/accuracies": 0.6781250238418579,
229
+ "rewards/chosen": 0.014184275642037392,
230
+ "rewards/margins": 0.24397656321525574,
231
+ "rewards/rejected": -0.22979231178760529,
232
+ "step": 130,
233
+ "use_label": 3841.83740234375
234
+ },
235
+ {
236
+ "epoch": 0.15,
237
+ "learning_rate": 9.487776484284052e-06,
238
+ "logits/chosen": -2.814854145050049,
239
+ "logits/rejected": -2.818312644958496,
240
+ "logps/chosen": -267.75347900390625,
241
+ "logps/rejected": -255.11355590820312,
242
+ "loss": 0.6374,
243
+ "pred_label": 208.9875030517578,
244
+ "rewards/accuracies": 0.7093750238418579,
245
+ "rewards/chosen": -0.0010342694586142898,
246
+ "rewards/margins": 0.3312947750091553,
247
+ "rewards/rejected": -0.33232906460762024,
248
+ "step": 140,
249
+ "use_label": 4113.0126953125
250
+ },
251
+ {
252
+ "epoch": 0.16,
253
+ "learning_rate": 9.371362048894065e-06,
254
+ "logits/chosen": -2.8179469108581543,
255
+ "logits/rejected": -2.81597900390625,
256
+ "logps/chosen": -301.8087463378906,
257
+ "logps/rejected": -279.8531799316406,
258
+ "loss": 0.6613,
259
+ "pred_label": 270.86248779296875,
260
+ "rewards/accuracies": 0.659375011920929,
261
+ "rewards/chosen": 0.009522153064608574,
262
+ "rewards/margins": 0.33681994676589966,
263
+ "rewards/rejected": -0.3272978365421295,
264
+ "step": 150,
265
+ "use_label": 4371.1376953125
266
+ },
267
+ {
268
+ "epoch": 0.17,
269
+ "learning_rate": 9.254947613504075e-06,
270
+ "logits/chosen": -2.828568458557129,
271
+ "logits/rejected": -2.789271354675293,
272
+ "logps/chosen": -288.80511474609375,
273
+ "logps/rejected": -278.41839599609375,
274
+ "loss": 0.6349,
275
+ "pred_label": 344.1625061035156,
276
+ "rewards/accuracies": 0.7406250238418579,
277
+ "rewards/chosen": 0.11308002471923828,
278
+ "rewards/margins": 0.451881468296051,
279
+ "rewards/rejected": -0.33880144357681274,
280
+ "step": 160,
281
+ "use_label": 4617.83740234375
282
+ },
283
+ {
284
+ "epoch": 0.18,
285
+ "learning_rate": 9.138533178114087e-06,
286
+ "logits/chosen": -2.8365304470062256,
287
+ "logits/rejected": -2.822173595428467,
288
+ "logps/chosen": -289.2629089355469,
289
+ "logps/rejected": -254.9129180908203,
290
+ "loss": 0.6585,
291
+ "pred_label": 416.25,
292
+ "rewards/accuracies": 0.706250011920929,
293
+ "rewards/chosen": 0.056967057287693024,
294
+ "rewards/margins": 0.447601854801178,
295
+ "rewards/rejected": -0.39063477516174316,
296
+ "step": 170,
297
+ "use_label": 4865.75
298
+ },
299
+ {
300
+ "epoch": 0.19,
301
+ "learning_rate": 9.022118742724098e-06,
302
+ "logits/chosen": -2.8115687370300293,
303
+ "logits/rejected": -2.8006014823913574,
304
+ "logps/chosen": -246.71243286132812,
305
+ "logps/rejected": -266.5823059082031,
306
+ "loss": 0.6515,
307
+ "pred_label": 510.7749938964844,
308
+ "rewards/accuracies": 0.675000011920929,
309
+ "rewards/chosen": 0.02543136477470398,
310
+ "rewards/margins": 0.4450693726539612,
311
+ "rewards/rejected": -0.4196379780769348,
312
+ "step": 180,
313
+ "use_label": 5091.22509765625
314
+ },
315
+ {
316
+ "epoch": 0.2,
317
+ "learning_rate": 8.90570430733411e-06,
318
+ "logits/chosen": -2.8512680530548096,
319
+ "logits/rejected": -2.8256707191467285,
320
+ "logps/chosen": -304.07403564453125,
321
+ "logps/rejected": -274.03021240234375,
322
+ "loss": 0.643,
323
+ "pred_label": 602.5,
324
+ "rewards/accuracies": 0.7562500238418579,
325
+ "rewards/chosen": 0.13308081030845642,
326
+ "rewards/margins": 0.5033096671104431,
327
+ "rewards/rejected": -0.3702288568019867,
328
+ "step": 190,
329
+ "use_label": 5319.5
330
  },
331
  {
332
  "epoch": 0.21,
333
  "learning_rate": 8.789289871944122e-06,
334
+ "logits/chosen": -2.8375296592712402,
335
+ "logits/rejected": -2.8491904735565186,
336
+ "logps/chosen": -268.386962890625,
337
+ "logps/rejected": -265.6180725097656,
338
+ "loss": 0.6502,
339
+ "pred_label": 694.9249877929688,
340
+ "rewards/accuracies": 0.715624988079071,
341
+ "rewards/chosen": 0.07495273649692535,
342
+ "rewards/margins": 0.48230332136154175,
343
+ "rewards/rejected": -0.4073505997657776,
344
  "step": 200,
345
+ "use_label": 5547.0751953125
346
+ },
347
+ {
348
+ "epoch": 0.22,
349
+ "learning_rate": 8.672875436554133e-06,
350
+ "logits/chosen": -2.8025362491607666,
351
+ "logits/rejected": -2.813215494155884,
352
+ "logps/chosen": -282.7426452636719,
353
+ "logps/rejected": -268.80078125,
354
+ "loss": 0.65,
355
+ "pred_label": 789.7625122070312,
356
+ "rewards/accuracies": 0.715624988079071,
357
+ "rewards/chosen": 0.07436653971672058,
358
+ "rewards/margins": 0.490271657705307,
359
+ "rewards/rejected": -0.41590508818626404,
360
+ "step": 210,
361
+ "use_label": 5772.2373046875
362
+ },
363
+ {
364
+ "epoch": 0.23,
365
+ "learning_rate": 8.556461001164145e-06,
366
+ "logits/chosen": -2.8301258087158203,
367
+ "logits/rejected": -2.8380088806152344,
368
+ "logps/chosen": -277.80615234375,
369
+ "logps/rejected": -272.68377685546875,
370
+ "loss": 0.6534,
371
+ "pred_label": 899.75,
372
+ "rewards/accuracies": 0.71875,
373
+ "rewards/chosen": 0.07161492854356766,
374
+ "rewards/margins": 0.5100919604301453,
375
+ "rewards/rejected": -0.4384769797325134,
376
+ "step": 220,
377
+ "use_label": 5982.25
378
+ },
379
+ {
380
+ "epoch": 0.24,
381
+ "learning_rate": 8.440046565774158e-06,
382
+ "logits/chosen": -2.8242669105529785,
383
+ "logits/rejected": -2.835315465927124,
384
+ "logps/chosen": -309.482177734375,
385
+ "logps/rejected": -273.8406677246094,
386
+ "loss": 0.6632,
387
+ "pred_label": 1013.2999877929688,
388
+ "rewards/accuracies": 0.668749988079071,
389
+ "rewards/chosen": 0.012543338350951672,
390
+ "rewards/margins": 0.49393802881240845,
391
+ "rewards/rejected": -0.4813947081565857,
392
+ "step": 230,
393
+ "use_label": 6188.7001953125
394
+ },
395
+ {
396
+ "epoch": 0.25,
397
+ "learning_rate": 8.323632130384168e-06,
398
+ "logits/chosen": -2.7957725524902344,
399
+ "logits/rejected": -2.776923894882202,
400
+ "logps/chosen": -299.4118957519531,
401
+ "logps/rejected": -264.6926574707031,
402
+ "loss": 0.6664,
403
+ "pred_label": 1130.2249755859375,
404
+ "rewards/accuracies": 0.731249988079071,
405
+ "rewards/chosen": 0.08671466261148453,
406
+ "rewards/margins": 0.5532404780387878,
407
+ "rewards/rejected": -0.4665258526802063,
408
+ "step": 240,
409
+ "use_label": 6391.77490234375
410
+ },
411
+ {
412
+ "epoch": 0.26,
413
+ "learning_rate": 8.20721769499418e-06,
414
+ "logits/chosen": -2.7934088706970215,
415
+ "logits/rejected": -2.7692036628723145,
416
+ "logps/chosen": -300.4549255371094,
417
+ "logps/rejected": -288.0910949707031,
418
+ "loss": 0.6608,
419
+ "pred_label": 1240.574951171875,
420
+ "rewards/accuracies": 0.737500011920929,
421
+ "rewards/chosen": 0.06411644071340561,
422
+ "rewards/margins": 0.5871235132217407,
423
+ "rewards/rejected": -0.5230070948600769,
424
+ "step": 250,
425
+ "use_label": 6601.4248046875
426
+ },
427
+ {
428
+ "epoch": 0.27,
429
+ "learning_rate": 8.090803259604193e-06,
430
+ "logits/chosen": -2.8178822994232178,
431
+ "logits/rejected": -2.797152519226074,
432
+ "logps/chosen": -278.56048583984375,
433
+ "logps/rejected": -266.2746887207031,
434
+ "loss": 0.6506,
435
+ "pred_label": 1338.4625244140625,
436
+ "rewards/accuracies": 0.6781250238418579,
437
+ "rewards/chosen": -0.02512388862669468,
438
+ "rewards/margins": 0.4762639105319977,
439
+ "rewards/rejected": -0.5013878345489502,
440
+ "step": 260,
441
+ "use_label": 6823.53759765625
442
+ },
443
+ {
444
+ "epoch": 0.28,
445
+ "learning_rate": 7.974388824214203e-06,
446
+ "logits/chosen": -2.7823479175567627,
447
+ "logits/rejected": -2.7818832397460938,
448
+ "logps/chosen": -274.42913818359375,
449
+ "logps/rejected": -266.7970886230469,
450
+ "loss": 0.6585,
451
+ "pred_label": 1452.3499755859375,
452
+ "rewards/accuracies": 0.7281249761581421,
453
+ "rewards/chosen": 0.012837971560657024,
454
+ "rewards/margins": 0.6269992589950562,
455
+ "rewards/rejected": -0.6141613125801086,
456
+ "step": 270,
457
+ "use_label": 7029.64990234375
458
+ },
459
+ {
460
+ "epoch": 0.29,
461
+ "learning_rate": 7.857974388824214e-06,
462
+ "logits/chosen": -2.872814178466797,
463
+ "logits/rejected": -2.85591459274292,
464
+ "logps/chosen": -293.08001708984375,
465
+ "logps/rejected": -255.07369995117188,
466
+ "loss": 0.6564,
467
+ "pred_label": 1567.074951171875,
468
+ "rewards/accuracies": 0.7437499761581421,
469
+ "rewards/chosen": 0.08200586587190628,
470
+ "rewards/margins": 0.6136232614517212,
471
+ "rewards/rejected": -0.5316173434257507,
472
+ "step": 280,
473
+ "use_label": 7234.9248046875
474
+ },
475
+ {
476
+ "epoch": 0.3,
477
+ "learning_rate": 7.741559953434226e-06,
478
+ "logits/chosen": -2.829739809036255,
479
+ "logits/rejected": -2.848804473876953,
480
+ "logps/chosen": -289.7538146972656,
481
+ "logps/rejected": -243.77880859375,
482
+ "loss": 0.6518,
483
+ "pred_label": 1677.5999755859375,
484
+ "rewards/accuracies": 0.734375,
485
+ "rewards/chosen": 0.04277036339044571,
486
+ "rewards/margins": 0.530608057975769,
487
+ "rewards/rejected": -0.48783770203590393,
488
+ "step": 290,
489
+ "use_label": 7444.39990234375
490
  },
491
  {
492
  "epoch": 0.31,
493
  "learning_rate": 7.625145518044238e-06,
494
+ "logits/chosen": -2.827993154525757,
495
+ "logits/rejected": -2.820228338241577,
496
+ "logps/chosen": -291.6603088378906,
497
+ "logps/rejected": -252.08853149414062,
498
+ "loss": 0.6598,
499
+ "pred_label": 1791.0250244140625,
500
+ "rewards/accuracies": 0.6968749761581421,
501
+ "rewards/chosen": -0.019189920276403427,
502
+ "rewards/margins": 0.5760209560394287,
503
+ "rewards/rejected": -0.5952108502388,
504
  "step": 300,
505
+ "use_label": 7650.97509765625
506
+ },
507
+ {
508
+ "epoch": 0.32,
509
+ "learning_rate": 7.50873108265425e-06,
510
+ "logits/chosen": -2.8010551929473877,
511
+ "logits/rejected": -2.775717258453369,
512
+ "logps/chosen": -275.4491271972656,
513
+ "logps/rejected": -273.45440673828125,
514
+ "loss": 0.6603,
515
+ "pred_label": 1909.6875,
516
+ "rewards/accuracies": 0.7250000238418579,
517
+ "rewards/chosen": -0.04083142429590225,
518
+ "rewards/margins": 0.6963170766830444,
519
+ "rewards/rejected": -0.7371484041213989,
520
+ "step": 310,
521
+ "use_label": 7852.3125
522
+ },
523
+ {
524
+ "epoch": 0.33,
525
+ "learning_rate": 7.392316647264262e-06,
526
+ "logits/chosen": -2.834115505218506,
527
+ "logits/rejected": -2.8193936347961426,
528
+ "logps/chosen": -300.43115234375,
529
+ "logps/rejected": -270.88232421875,
530
+ "loss": 0.6711,
531
+ "pred_label": 2038.449951171875,
532
+ "rewards/accuracies": 0.746874988079071,
533
+ "rewards/chosen": 0.041024383157491684,
534
+ "rewards/margins": 0.695600688457489,
535
+ "rewards/rejected": -0.654576301574707,
536
+ "step": 320,
537
+ "use_label": 8043.5498046875
538
+ },
539
+ {
540
+ "epoch": 0.35,
541
+ "learning_rate": 7.275902211874272e-06,
542
+ "logits/chosen": -2.810176134109497,
543
+ "logits/rejected": -2.7961533069610596,
544
+ "logps/chosen": -261.0621032714844,
545
+ "logps/rejected": -249.53463745117188,
546
+ "loss": 0.6833,
547
+ "pred_label": 2174.97509765625,
548
+ "rewards/accuracies": 0.703125,
549
+ "rewards/chosen": -0.025768589228391647,
550
+ "rewards/margins": 0.5799206495285034,
551
+ "rewards/rejected": -0.605689287185669,
552
+ "step": 330,
553
+ "use_label": 8227.025390625
554
+ },
555
+ {
556
+ "epoch": 0.36,
557
+ "learning_rate": 7.1594877764842855e-06,
558
+ "logits/chosen": -2.831383466720581,
559
+ "logits/rejected": -2.8242664337158203,
560
+ "logps/chosen": -295.96441650390625,
561
+ "logps/rejected": -265.889404296875,
562
+ "loss": 0.6752,
563
+ "pred_label": 2300.237548828125,
564
+ "rewards/accuracies": 0.6468750238418579,
565
+ "rewards/chosen": 0.007702559232711792,
566
+ "rewards/margins": 0.4926396310329437,
567
+ "rewards/rejected": -0.48493701219558716,
568
+ "step": 340,
569
+ "use_label": 8421.7626953125
570
+ },
571
+ {
572
+ "epoch": 0.37,
573
+ "learning_rate": 7.043073341094296e-06,
574
+ "logits/chosen": -2.8328559398651123,
575
+ "logits/rejected": -2.8223798274993896,
576
+ "logps/chosen": -288.3695983886719,
577
+ "logps/rejected": -274.58697509765625,
578
+ "loss": 0.6644,
579
+ "pred_label": 2420.324951171875,
580
+ "rewards/accuracies": 0.721875011920929,
581
+ "rewards/chosen": 0.017813527956604958,
582
+ "rewards/margins": 0.5609097480773926,
583
+ "rewards/rejected": -0.5430961847305298,
584
+ "step": 350,
585
+ "use_label": 8621.6748046875
586
+ },
587
+ {
588
+ "epoch": 0.38,
589
+ "learning_rate": 6.9266589057043075e-06,
590
+ "logits/chosen": -2.87721586227417,
591
+ "logits/rejected": -2.87174654006958,
592
+ "logps/chosen": -293.4356689453125,
593
+ "logps/rejected": -276.60235595703125,
594
+ "loss": 0.6666,
595
+ "pred_label": 2544.78759765625,
596
+ "rewards/accuracies": 0.7250000238418579,
597
+ "rewards/chosen": 0.006689542438834906,
598
+ "rewards/margins": 0.6315399408340454,
599
+ "rewards/rejected": -0.6248503923416138,
600
+ "step": 360,
601
+ "use_label": 8817.212890625
602
+ },
603
+ {
604
+ "epoch": 0.39,
605
+ "learning_rate": 6.81024447031432e-06,
606
+ "logits/chosen": -2.8437862396240234,
607
+ "logits/rejected": -2.791590690612793,
608
+ "logps/chosen": -286.032470703125,
609
+ "logps/rejected": -276.44036865234375,
610
+ "loss": 0.6545,
611
+ "pred_label": 2656.925048828125,
612
+ "rewards/accuracies": 0.7437499761581421,
613
+ "rewards/chosen": 0.07002071291208267,
614
+ "rewards/margins": 0.6711146235466003,
615
+ "rewards/rejected": -0.6010940074920654,
616
+ "step": 370,
617
+ "use_label": 9025.0751953125
618
+ },
619
+ {
620
+ "epoch": 0.4,
621
+ "learning_rate": 6.693830034924331e-06,
622
+ "logits/chosen": -2.8383095264434814,
623
+ "logits/rejected": -2.820664644241333,
624
+ "logps/chosen": -281.7953796386719,
625
+ "logps/rejected": -268.8168640136719,
626
+ "loss": 0.6536,
627
+ "pred_label": 2769.987548828125,
628
+ "rewards/accuracies": 0.7406250238418579,
629
+ "rewards/chosen": 0.04205373674631119,
630
+ "rewards/margins": 0.6800293326377869,
631
+ "rewards/rejected": -0.6379756331443787,
632
+ "step": 380,
633
+ "use_label": 9232.0126953125
634
+ },
635
+ {
636
+ "epoch": 0.41,
637
+ "learning_rate": 6.5774155995343425e-06,
638
+ "logits/chosen": -2.7942054271698,
639
+ "logits/rejected": -2.785630702972412,
640
+ "logps/chosen": -272.2677917480469,
641
+ "logps/rejected": -255.5617218017578,
642
+ "loss": 0.6655,
643
+ "pred_label": 2901.1875,
644
+ "rewards/accuracies": 0.699999988079071,
645
+ "rewards/chosen": 0.003805020358413458,
646
+ "rewards/margins": 0.6898488998413086,
647
+ "rewards/rejected": -0.6860438585281372,
648
+ "step": 390,
649
+ "use_label": 9420.8125
650
  },
651
  {
652
  "epoch": 0.42,
653
  "learning_rate": 6.461001164144355e-06,
654
+ "logits/chosen": -2.833627223968506,
655
+ "logits/rejected": -2.8359274864196777,
656
+ "logps/chosen": -292.16717529296875,
657
+ "logps/rejected": -257.1654968261719,
658
+ "loss": 0.6586,
659
+ "pred_label": 3028.52490234375,
660
+ "rewards/accuracies": 0.737500011920929,
661
+ "rewards/chosen": 0.10834367573261261,
662
+ "rewards/margins": 0.6510626077651978,
663
+ "rewards/rejected": -0.5427189469337463,
664
  "step": 400,
665
+ "use_label": 9613.474609375
666
+ },
667
+ {
668
+ "epoch": 0.43,
669
+ "learning_rate": 6.344586728754366e-06,
670
+ "logits/chosen": -2.8230559825897217,
671
+ "logits/rejected": -2.8199567794799805,
672
+ "logps/chosen": -283.50323486328125,
673
+ "logps/rejected": -262.53216552734375,
674
+ "loss": 0.6703,
675
+ "pred_label": 3146.237548828125,
676
+ "rewards/accuracies": 0.7250000238418579,
677
+ "rewards/chosen": 0.06537099182605743,
678
+ "rewards/margins": 0.6661577820777893,
679
+ "rewards/rejected": -0.6007868051528931,
680
+ "step": 410,
681
+ "use_label": 9815.7626953125
682
+ },
683
+ {
684
+ "epoch": 0.44,
685
+ "learning_rate": 6.228172293364378e-06,
686
+ "logits/chosen": -2.81683087348938,
687
+ "logits/rejected": -2.7953038215637207,
688
+ "logps/chosen": -268.87017822265625,
689
+ "logps/rejected": -258.7505798339844,
690
+ "loss": 0.6553,
691
+ "pred_label": 3272.28759765625,
692
+ "rewards/accuracies": 0.7124999761581421,
693
+ "rewards/chosen": 0.03196418285369873,
694
+ "rewards/margins": 0.5871380567550659,
695
+ "rewards/rejected": -0.5551738739013672,
696
+ "step": 420,
697
+ "use_label": 10009.712890625
698
+ },
699
+ {
700
+ "epoch": 0.45,
701
+ "learning_rate": 6.11175785797439e-06,
702
+ "logits/chosen": -2.838977575302124,
703
+ "logits/rejected": -2.8169591426849365,
704
+ "logps/chosen": -290.03753662109375,
705
+ "logps/rejected": -266.868896484375,
706
+ "loss": 0.6758,
707
+ "pred_label": 3398.512451171875,
708
+ "rewards/accuracies": 0.6656249761581421,
709
+ "rewards/chosen": 0.028419841080904007,
710
+ "rewards/margins": 0.4861460328102112,
711
+ "rewards/rejected": -0.4577261805534363,
712
+ "step": 430,
713
+ "use_label": 10203.4873046875
714
+ },
715
+ {
716
+ "epoch": 0.46,
717
+ "learning_rate": 5.995343422584401e-06,
718
+ "logits/chosen": -2.849961996078491,
719
+ "logits/rejected": -2.8381643295288086,
720
+ "logps/chosen": -282.6123046875,
721
+ "logps/rejected": -246.48464965820312,
722
+ "loss": 0.6602,
723
+ "pred_label": 3515.35009765625,
724
+ "rewards/accuracies": 0.703125,
725
+ "rewards/chosen": 0.038983773440122604,
726
+ "rewards/margins": 0.5515373945236206,
727
+ "rewards/rejected": -0.5125535726547241,
728
+ "step": 440,
729
+ "use_label": 10406.650390625
730
+ },
731
+ {
732
+ "epoch": 0.47,
733
+ "learning_rate": 5.878928987194412e-06,
734
+ "logits/chosen": -2.749311923980713,
735
+ "logits/rejected": -2.7318615913391113,
736
+ "logps/chosen": -257.08990478515625,
737
+ "logps/rejected": -258.7293395996094,
738
+ "loss": 0.6612,
739
+ "pred_label": 3629.699951171875,
740
+ "rewards/accuracies": 0.734375,
741
+ "rewards/chosen": 0.028371259570121765,
742
+ "rewards/margins": 0.5704750418663025,
743
+ "rewards/rejected": -0.5421038866043091,
744
+ "step": 450,
745
+ "use_label": 10612.2998046875
746
+ },
747
+ {
748
+ "epoch": 0.48,
749
+ "learning_rate": 5.762514551804425e-06,
750
+ "logits/chosen": -2.8554000854492188,
751
+ "logits/rejected": -2.8365120887756348,
752
+ "logps/chosen": -305.9751892089844,
753
+ "logps/rejected": -270.2017517089844,
754
+ "loss": 0.6574,
755
+ "pred_label": 3751.949951171875,
756
+ "rewards/accuracies": 0.7250000238418579,
757
+ "rewards/chosen": -0.021648898720741272,
758
+ "rewards/margins": 0.4913213849067688,
759
+ "rewards/rejected": -0.5129703283309937,
760
+ "step": 460,
761
+ "use_label": 10810.0498046875
762
+ },
763
+ {
764
+ "epoch": 0.49,
765
+ "learning_rate": 5.6461001164144355e-06,
766
+ "logits/chosen": -2.8250269889831543,
767
+ "logits/rejected": -2.8220317363739014,
768
+ "logps/chosen": -295.07611083984375,
769
+ "logps/rejected": -271.61090087890625,
770
+ "loss": 0.6658,
771
+ "pred_label": 3857.625,
772
+ "rewards/accuracies": 0.7281249761581421,
773
+ "rewards/chosen": 0.03160310536623001,
774
+ "rewards/margins": 0.5609144568443298,
775
+ "rewards/rejected": -0.5293112993240356,
776
+ "step": 470,
777
+ "use_label": 11024.375
778
+ },
779
+ {
780
+ "epoch": 0.5,
781
+ "learning_rate": 5.529685681024447e-06,
782
+ "logits/chosen": -2.765904188156128,
783
+ "logits/rejected": -2.7620699405670166,
784
+ "logps/chosen": -253.2978973388672,
785
+ "logps/rejected": -264.97259521484375,
786
+ "loss": 0.674,
787
+ "pred_label": 3982.0,
788
+ "rewards/accuracies": 0.625,
789
+ "rewards/chosen": -0.05188765376806259,
790
+ "rewards/margins": 0.46525877714157104,
791
+ "rewards/rejected": -0.5171464085578918,
792
+ "step": 480,
793
+ "use_label": 11220.0
794
+ },
795
+ {
796
+ "epoch": 0.51,
797
+ "learning_rate": 5.413271245634459e-06,
798
+ "logits/chosen": -2.80226993560791,
799
+ "logits/rejected": -2.7974660396575928,
800
+ "logps/chosen": -284.6823425292969,
801
+ "logps/rejected": -252.20889282226562,
802
+ "loss": 0.6631,
803
+ "pred_label": 4095.4375,
804
+ "rewards/accuracies": 0.706250011920929,
805
+ "rewards/chosen": 0.0527660958468914,
806
+ "rewards/margins": 0.6084800958633423,
807
+ "rewards/rejected": -0.5557140707969666,
808
+ "step": 490,
809
+ "use_label": 11426.5625
810
  },
811
  {
812
  "epoch": 0.52,
813
  "learning_rate": 5.2968568102444705e-06,
814
+ "logits/chosen": -2.8293509483337402,
815
+ "logits/rejected": -2.798262119293213,
816
+ "logps/chosen": -289.0420837402344,
817
+ "logps/rejected": -272.00714111328125,
818
+ "loss": 0.6644,
819
+ "pred_label": 4224.21240234375,
820
+ "rewards/accuracies": 0.721875011920929,
821
+ "rewards/chosen": 0.059389661997556686,
822
+ "rewards/margins": 0.6478527784347534,
823
+ "rewards/rejected": -0.5884631276130676,
824
  "step": 500,
825
+ "use_label": 11617.787109375
826
+ },
827
+ {
828
+ "epoch": 0.53,
829
+ "learning_rate": 5.180442374854482e-06,
830
+ "logits/chosen": -2.844891309738159,
831
+ "logits/rejected": -2.833529233932495,
832
+ "logps/chosen": -275.63726806640625,
833
+ "logps/rejected": -260.7143249511719,
834
+ "loss": 0.6628,
835
+ "pred_label": 4350.9873046875,
836
+ "rewards/accuracies": 0.71875,
837
+ "rewards/chosen": 0.03330535814166069,
838
+ "rewards/margins": 0.6962288618087769,
839
+ "rewards/rejected": -0.6629236340522766,
840
+ "step": 510,
841
+ "use_label": 11811.0126953125
842
+ },
843
+ {
844
+ "epoch": 0.54,
845
+ "learning_rate": 5.064027939464494e-06,
846
+ "logits/chosen": -2.854224681854248,
847
+ "logits/rejected": -2.816175937652588,
848
+ "logps/chosen": -282.9071044921875,
849
+ "logps/rejected": -263.75927734375,
850
+ "loss": 0.6631,
851
+ "pred_label": 4478.27490234375,
852
+ "rewards/accuracies": 0.737500011920929,
853
+ "rewards/chosen": 0.11818108707666397,
854
+ "rewards/margins": 0.7176607251167297,
855
+ "rewards/rejected": -0.5994796752929688,
856
+ "step": 520,
857
+ "use_label": 12003.724609375
858
+ },
859
+ {
860
+ "epoch": 0.55,
861
+ "learning_rate": 4.947613504074506e-06,
862
+ "logits/chosen": -2.861614942550659,
863
+ "logits/rejected": -2.8368430137634277,
864
+ "logps/chosen": -289.9693603515625,
865
+ "logps/rejected": -271.81524658203125,
866
+ "loss": 0.6546,
867
+ "pred_label": 4613.4873046875,
868
+ "rewards/accuracies": 0.7124999761581421,
869
+ "rewards/chosen": 0.12808291614055634,
870
+ "rewards/margins": 0.721964955329895,
871
+ "rewards/rejected": -0.5938820838928223,
872
+ "step": 530,
873
+ "use_label": 12188.5126953125
874
+ },
875
+ {
876
+ "epoch": 0.57,
877
+ "learning_rate": 4.831199068684517e-06,
878
+ "logits/chosen": -2.8578391075134277,
879
+ "logits/rejected": -2.835608959197998,
880
+ "logps/chosen": -290.48114013671875,
881
+ "logps/rejected": -267.905029296875,
882
+ "loss": 0.6684,
883
+ "pred_label": 4741.5751953125,
884
+ "rewards/accuracies": 0.737500011920929,
885
+ "rewards/chosen": 0.10645272582769394,
886
+ "rewards/margins": 0.6809743642807007,
887
+ "rewards/rejected": -0.5745216608047485,
888
+ "step": 540,
889
+ "use_label": 12380.4248046875
890
+ },
891
+ {
892
+ "epoch": 0.58,
893
+ "learning_rate": 4.714784633294529e-06,
894
+ "logits/chosen": -2.83345627784729,
895
+ "logits/rejected": -2.812713623046875,
896
+ "logps/chosen": -281.4727783203125,
897
+ "logps/rejected": -274.65338134765625,
898
+ "loss": 0.682,
899
+ "pred_label": 4869.96240234375,
900
+ "rewards/accuracies": 0.6968749761581421,
901
+ "rewards/chosen": 0.036335308104753494,
902
+ "rewards/margins": 0.6714037656784058,
903
+ "rewards/rejected": -0.6350685358047485,
904
+ "step": 550,
905
+ "use_label": 12572.037109375
906
+ },
907
+ {
908
+ "epoch": 0.59,
909
+ "learning_rate": 4.598370197904541e-06,
910
+ "logits/chosen": -2.788679838180542,
911
+ "logits/rejected": -2.7916810512542725,
912
+ "logps/chosen": -296.66680908203125,
913
+ "logps/rejected": -242.8921661376953,
914
+ "loss": 0.6684,
915
+ "pred_label": 4992.08740234375,
916
+ "rewards/accuracies": 0.668749988079071,
917
+ "rewards/chosen": 0.01064818911254406,
918
+ "rewards/margins": 0.577103316783905,
919
+ "rewards/rejected": -0.5664551854133606,
920
+ "step": 560,
921
+ "use_label": 12769.912109375
922
+ },
923
+ {
924
+ "epoch": 0.6,
925
+ "learning_rate": 4.481955762514552e-06,
926
+ "logits/chosen": -2.789961814880371,
927
+ "logits/rejected": -2.769895553588867,
928
+ "logps/chosen": -268.1661071777344,
929
+ "logps/rejected": -256.2967834472656,
930
+ "loss": 0.6717,
931
+ "pred_label": 5114.22509765625,
932
+ "rewards/accuracies": 0.7093750238418579,
933
+ "rewards/chosen": 0.04825209826231003,
934
+ "rewards/margins": 0.5761987566947937,
935
+ "rewards/rejected": -0.5279466509819031,
936
+ "step": 570,
937
+ "use_label": 12967.775390625
938
+ },
939
+ {
940
+ "epoch": 0.61,
941
+ "learning_rate": 4.365541327124564e-06,
942
+ "logits/chosen": -2.8300013542175293,
943
+ "logits/rejected": -2.822327136993408,
944
+ "logps/chosen": -273.7094421386719,
945
+ "logps/rejected": -265.09967041015625,
946
+ "loss": 0.6591,
947
+ "pred_label": 5237.35009765625,
948
+ "rewards/accuracies": 0.7437499761581421,
949
+ "rewards/chosen": 0.062391918152570724,
950
+ "rewards/margins": 0.5977689623832703,
951
+ "rewards/rejected": -0.535377025604248,
952
+ "step": 580,
953
+ "use_label": 13164.650390625
954
+ },
955
+ {
956
+ "epoch": 0.62,
957
+ "learning_rate": 4.249126891734576e-06,
958
+ "logits/chosen": -2.795945644378662,
959
+ "logits/rejected": -2.7982325553894043,
960
+ "logps/chosen": -287.38848876953125,
961
+ "logps/rejected": -279.43511962890625,
962
+ "loss": 0.6574,
963
+ "pred_label": 5347.78759765625,
964
+ "rewards/accuracies": 0.703125,
965
+ "rewards/chosen": 0.05194039270281792,
966
+ "rewards/margins": 0.595386266708374,
967
+ "rewards/rejected": -0.5434459447860718,
968
+ "step": 590,
969
+ "use_label": 13374.212890625
970
  },
971
  {
972
  "epoch": 0.63,
973
  "learning_rate": 4.132712456344587e-06,
974
+ "logits/chosen": -2.8118224143981934,
975
+ "logits/rejected": -2.8233461380004883,
976
+ "logps/chosen": -295.39080810546875,
977
+ "logps/rejected": -294.5987854003906,
978
+ "loss": 0.6624,
979
+ "pred_label": 5470.6376953125,
980
+ "rewards/accuracies": 0.753125011920929,
981
+ "rewards/chosen": 0.13325130939483643,
982
+ "rewards/margins": 0.7225804924964905,
983
+ "rewards/rejected": -0.5893291234970093,
984
  "step": 600,
985
+ "use_label": 13571.3623046875
986
+ },
987
+ {
988
+ "epoch": 0.64,
989
+ "learning_rate": 4.0162980209545985e-06,
990
+ "logits/chosen": -2.838010311126709,
991
+ "logits/rejected": -2.822805166244507,
992
+ "logps/chosen": -297.541259765625,
993
+ "logps/rejected": -260.2327575683594,
994
+ "loss": 0.6611,
995
+ "pred_label": 5597.2373046875,
996
+ "rewards/accuracies": 0.699999988079071,
997
+ "rewards/chosen": 0.11663957685232162,
998
+ "rewards/margins": 0.5966584086418152,
999
+ "rewards/rejected": -0.48001885414123535,
1000
+ "step": 610,
1001
+ "use_label": 13764.7626953125
1002
+ },
1003
+ {
1004
+ "epoch": 0.65,
1005
+ "learning_rate": 3.899883585564611e-06,
1006
+ "logits/chosen": -2.8419346809387207,
1007
+ "logits/rejected": -2.7907586097717285,
1008
+ "logps/chosen": -282.9375,
1009
+ "logps/rejected": -255.97988891601562,
1010
+ "loss": 0.6553,
1011
+ "pred_label": 5717.875,
1012
+ "rewards/accuracies": 0.7281249761581421,
1013
+ "rewards/chosen": 0.10196954011917114,
1014
+ "rewards/margins": 0.6096292734146118,
1015
+ "rewards/rejected": -0.5076597332954407,
1016
+ "step": 620,
1017
+ "use_label": 13964.125
1018
+ },
1019
+ {
1020
+ "epoch": 0.66,
1021
+ "learning_rate": 3.7834691501746217e-06,
1022
+ "logits/chosen": -2.7976930141448975,
1023
+ "logits/rejected": -2.79984974861145,
1024
+ "logps/chosen": -277.3227844238281,
1025
+ "logps/rejected": -275.438720703125,
1026
+ "loss": 0.6624,
1027
+ "pred_label": 5844.72509765625,
1028
+ "rewards/accuracies": 0.7124999761581421,
1029
+ "rewards/chosen": 0.0894940048456192,
1030
+ "rewards/margins": 0.6564904451370239,
1031
+ "rewards/rejected": -0.5669963955879211,
1032
+ "step": 630,
1033
+ "use_label": 14157.275390625
1034
+ },
1035
+ {
1036
+ "epoch": 0.67,
1037
+ "learning_rate": 3.6670547147846336e-06,
1038
+ "logits/chosen": -2.8346762657165527,
1039
+ "logits/rejected": -2.7844390869140625,
1040
+ "logps/chosen": -263.27545166015625,
1041
+ "logps/rejected": -244.00167846679688,
1042
+ "loss": 0.6712,
1043
+ "pred_label": 5966.8251953125,
1044
+ "rewards/accuracies": 0.715624988079071,
1045
+ "rewards/chosen": 0.057781945914030075,
1046
+ "rewards/margins": 0.6433111429214478,
1047
+ "rewards/rejected": -0.5855292081832886,
1048
+ "step": 640,
1049
+ "use_label": 14355.1748046875
1050
+ },
1051
+ {
1052
+ "epoch": 0.68,
1053
+ "learning_rate": 3.5506402793946454e-06,
1054
+ "logits/chosen": -2.7912142276763916,
1055
+ "logits/rejected": -2.781743049621582,
1056
+ "logps/chosen": -287.99041748046875,
1057
+ "logps/rejected": -280.1361389160156,
1058
+ "loss": 0.6597,
1059
+ "pred_label": 6101.3876953125,
1060
+ "rewards/accuracies": 0.7437499761581421,
1061
+ "rewards/chosen": 0.06794126331806183,
1062
+ "rewards/margins": 0.7323317527770996,
1063
+ "rewards/rejected": -0.6643904447555542,
1064
+ "step": 650,
1065
+ "use_label": 14540.6123046875
1066
+ },
1067
+ {
1068
+ "epoch": 0.69,
1069
+ "learning_rate": 3.434225844004657e-06,
1070
+ "logits/chosen": -2.828040599822998,
1071
+ "logits/rejected": -2.811732530593872,
1072
+ "logps/chosen": -277.9667663574219,
1073
+ "logps/rejected": -246.4541015625,
1074
+ "loss": 0.659,
1075
+ "pred_label": 6221.6875,
1076
+ "rewards/accuracies": 0.6937500238418579,
1077
+ "rewards/chosen": -0.013715244829654694,
1078
+ "rewards/margins": 0.5462251901626587,
1079
+ "rewards/rejected": -0.5599404573440552,
1080
+ "step": 660,
1081
+ "use_label": 14740.3125
1082
+ },
1083
+ {
1084
+ "epoch": 0.7,
1085
+ "learning_rate": 3.3178114086146686e-06,
1086
+ "logits/chosen": -2.8130388259887695,
1087
+ "logits/rejected": -2.787423610687256,
1088
+ "logps/chosen": -279.0962829589844,
1089
+ "logps/rejected": -251.55233764648438,
1090
+ "loss": 0.6513,
1091
+ "pred_label": 6336.22509765625,
1092
+ "rewards/accuracies": 0.731249988079071,
1093
+ "rewards/chosen": 0.028571803122758865,
1094
+ "rewards/margins": 0.6603085398674011,
1095
+ "rewards/rejected": -0.6317366361618042,
1096
+ "step": 670,
1097
+ "use_label": 14945.775390625
1098
+ },
1099
+ {
1100
+ "epoch": 0.71,
1101
+ "learning_rate": 3.20139697322468e-06,
1102
+ "logits/chosen": -2.8303864002227783,
1103
+ "logits/rejected": -2.8300769329071045,
1104
+ "logps/chosen": -299.0453186035156,
1105
+ "logps/rejected": -273.7367858886719,
1106
+ "loss": 0.6679,
1107
+ "pred_label": 6456.0498046875,
1108
+ "rewards/accuracies": 0.6656249761581421,
1109
+ "rewards/chosen": 0.049853820353746414,
1110
+ "rewards/margins": 0.5102404356002808,
1111
+ "rewards/rejected": -0.46038660407066345,
1112
+ "step": 680,
1113
+ "use_label": 15145.9501953125
1114
+ },
1115
+ {
1116
+ "epoch": 0.72,
1117
+ "learning_rate": 3.0849825378346914e-06,
1118
+ "logits/chosen": -2.812546968460083,
1119
+ "logits/rejected": -2.7978675365448,
1120
+ "logps/chosen": -281.13677978515625,
1121
+ "logps/rejected": -241.3180694580078,
1122
+ "loss": 0.6575,
1123
+ "pred_label": 6579.16259765625,
1124
+ "rewards/accuracies": 0.746874988079071,
1125
+ "rewards/chosen": 0.095861054956913,
1126
+ "rewards/margins": 0.6724832653999329,
1127
+ "rewards/rejected": -0.5766221880912781,
1128
+ "step": 690,
1129
+ "use_label": 15342.837890625
1130
  },
1131
  {
1132
  "epoch": 0.73,
1133
  "learning_rate": 2.9685681024447033e-06,
1134
+ "logits/chosen": -2.8043007850646973,
1135
+ "logits/rejected": -2.7992827892303467,
1136
+ "logps/chosen": -268.5847473144531,
1137
+ "logps/rejected": -251.1324462890625,
1138
+ "loss": 0.6649,
1139
+ "pred_label": 6708.8623046875,
1140
+ "rewards/accuracies": 0.7124999761581421,
1141
+ "rewards/chosen": 0.051186300814151764,
1142
+ "rewards/margins": 0.675644040107727,
1143
+ "rewards/rejected": -0.6244576573371887,
1144
  "step": 700,
1145
+ "use_label": 15533.1376953125
1146
+ },
1147
+ {
1148
+ "epoch": 0.74,
1149
+ "learning_rate": 2.852153667054715e-06,
1150
+ "logits/chosen": -2.8468000888824463,
1151
+ "logits/rejected": -2.833977222442627,
1152
+ "logps/chosen": -274.0541076660156,
1153
+ "logps/rejected": -243.0079345703125,
1154
+ "loss": 0.6775,
1155
+ "pred_label": 6838.0126953125,
1156
+ "rewards/accuracies": 0.715624988079071,
1157
+ "rewards/chosen": 0.1383003294467926,
1158
+ "rewards/margins": 0.6733654737472534,
1159
+ "rewards/rejected": -0.535065233707428,
1160
+ "step": 710,
1161
+ "use_label": 15723.9873046875
1162
+ },
1163
+ {
1164
+ "epoch": 0.75,
1165
+ "learning_rate": 2.735739231664727e-06,
1166
+ "logits/chosen": -2.7949423789978027,
1167
+ "logits/rejected": -2.8083877563476562,
1168
+ "logps/chosen": -275.5536804199219,
1169
+ "logps/rejected": -253.4557342529297,
1170
+ "loss": 0.6622,
1171
+ "pred_label": 6984.41259765625,
1172
+ "rewards/accuracies": 0.7593749761581421,
1173
+ "rewards/chosen": -0.009852488525211811,
1174
+ "rewards/margins": 0.679956316947937,
1175
+ "rewards/rejected": -0.6898088455200195,
1176
+ "step": 720,
1177
+ "use_label": 15897.587890625
1178
+ },
1179
+ {
1180
+ "epoch": 0.76,
1181
+ "learning_rate": 2.6193247962747383e-06,
1182
+ "logits/chosen": -2.8305153846740723,
1183
+ "logits/rejected": -2.8256735801696777,
1184
+ "logps/chosen": -292.13153076171875,
1185
+ "logps/rejected": -250.5773468017578,
1186
+ "loss": 0.658,
1187
+ "pred_label": 7112.52490234375,
1188
+ "rewards/accuracies": 0.7250000238418579,
1189
+ "rewards/chosen": 0.06651445478200912,
1190
+ "rewards/margins": 0.6194807887077332,
1191
+ "rewards/rejected": -0.5529662370681763,
1192
+ "step": 730,
1193
+ "use_label": 16089.474609375
1194
+ },
1195
+ {
1196
+ "epoch": 0.77,
1197
+ "learning_rate": 2.5029103608847497e-06,
1198
+ "logits/chosen": -2.8071470260620117,
1199
+ "logits/rejected": -2.826627016067505,
1200
+ "logps/chosen": -299.8040771484375,
1201
+ "logps/rejected": -278.0760192871094,
1202
+ "loss": 0.6779,
1203
+ "pred_label": 7236.35009765625,
1204
+ "rewards/accuracies": 0.7124999761581421,
1205
+ "rewards/chosen": 0.08306702226400375,
1206
+ "rewards/margins": 0.7367380261421204,
1207
+ "rewards/rejected": -0.6536709070205688,
1208
+ "step": 740,
1209
+ "use_label": 16285.650390625
1210
+ },
1211
+ {
1212
+ "epoch": 0.79,
1213
+ "learning_rate": 2.3864959254947616e-06,
1214
+ "logits/chosen": -2.8130180835723877,
1215
+ "logits/rejected": -2.7900407314300537,
1216
+ "logps/chosen": -262.660888671875,
1217
+ "logps/rejected": -266.95574951171875,
1218
+ "loss": 0.6657,
1219
+ "pred_label": 7375.4375,
1220
+ "rewards/accuracies": 0.746874988079071,
1221
+ "rewards/chosen": 0.02680896781384945,
1222
+ "rewards/margins": 0.7038629651069641,
1223
+ "rewards/rejected": -0.6770539879798889,
1224
+ "step": 750,
1225
+ "use_label": 16466.5625
1226
+ },
1227
+ {
1228
+ "epoch": 0.8,
1229
+ "learning_rate": 2.2700814901047734e-06,
1230
+ "logits/chosen": -2.8128421306610107,
1231
+ "logits/rejected": -2.8296356201171875,
1232
+ "logps/chosen": -295.57342529296875,
1233
+ "logps/rejected": -269.14324951171875,
1234
+ "loss": 0.6628,
1235
+ "pred_label": 7496.8623046875,
1236
+ "rewards/accuracies": 0.7406250238418579,
1237
+ "rewards/chosen": 0.08986032009124756,
1238
+ "rewards/margins": 0.7447289228439331,
1239
+ "rewards/rejected": -0.6548686027526855,
1240
+ "step": 760,
1241
+ "use_label": 16665.13671875
1242
+ },
1243
+ {
1244
+ "epoch": 0.81,
1245
+ "learning_rate": 2.153667054714785e-06,
1246
+ "logits/chosen": -2.8838798999786377,
1247
+ "logits/rejected": -2.8680777549743652,
1248
+ "logps/chosen": -316.2966003417969,
1249
+ "logps/rejected": -290.4427490234375,
1250
+ "loss": 0.6869,
1251
+ "pred_label": 7625.2373046875,
1252
+ "rewards/accuracies": 0.7093750238418579,
1253
+ "rewards/chosen": 0.10155443847179413,
1254
+ "rewards/margins": 0.6839195489883423,
1255
+ "rewards/rejected": -0.5823651552200317,
1256
+ "step": 770,
1257
+ "use_label": 16856.76171875
1258
+ },
1259
+ {
1260
+ "epoch": 0.82,
1261
+ "learning_rate": 2.0372526193247966e-06,
1262
+ "logits/chosen": -2.8313162326812744,
1263
+ "logits/rejected": -2.8232970237731934,
1264
+ "logps/chosen": -285.3306579589844,
1265
+ "logps/rejected": -265.0252990722656,
1266
+ "loss": 0.6586,
1267
+ "pred_label": 7761.66259765625,
1268
+ "rewards/accuracies": 0.706250011920929,
1269
+ "rewards/chosen": 0.05353979393839836,
1270
+ "rewards/margins": 0.6505471467971802,
1271
+ "rewards/rejected": -0.5970073342323303,
1272
+ "step": 780,
1273
+ "use_label": 17040.337890625
1274
+ },
1275
+ {
1276
+ "epoch": 0.83,
1277
+ "learning_rate": 1.920838183934808e-06,
1278
+ "logits/chosen": -2.834827423095703,
1279
+ "logits/rejected": -2.8339152336120605,
1280
+ "logps/chosen": -263.69171142578125,
1281
+ "logps/rejected": -264.4107666015625,
1282
+ "loss": 0.6639,
1283
+ "pred_label": 7894.5,
1284
+ "rewards/accuracies": 0.7124999761581421,
1285
+ "rewards/chosen": -0.022974440827965736,
1286
+ "rewards/margins": 0.6350787878036499,
1287
+ "rewards/rejected": -0.6580532193183899,
1288
+ "step": 790,
1289
+ "use_label": 17227.5
1290
  },
1291
  {
1292
  "epoch": 0.84,
1293
  "learning_rate": 1.8044237485448196e-06,
1294
+ "logits/chosen": -2.804044485092163,
1295
+ "logits/rejected": -2.772721529006958,
1296
+ "logps/chosen": -286.9491271972656,
1297
+ "logps/rejected": -278.8674011230469,
1298
+ "loss": 0.6627,
1299
+ "pred_label": 8027.97509765625,
1300
+ "rewards/accuracies": 0.75,
1301
+ "rewards/chosen": 0.03967855125665665,
1302
+ "rewards/margins": 0.6997824907302856,
1303
+ "rewards/rejected": -0.6601039171218872,
1304
  "step": 800,
1305
+ "use_label": 17414.025390625
1306
+ },
1307
+ {
1308
+ "epoch": 0.85,
1309
+ "learning_rate": 1.6880093131548315e-06,
1310
+ "logits/chosen": -2.8020050525665283,
1311
+ "logits/rejected": -2.8072879314422607,
1312
+ "logps/chosen": -281.62799072265625,
1313
+ "logps/rejected": -244.6085205078125,
1314
+ "loss": 0.6643,
1315
+ "pred_label": 8165.77490234375,
1316
+ "rewards/accuracies": 0.7124999761581421,
1317
+ "rewards/chosen": 0.05538611859083176,
1318
+ "rewards/margins": 0.6941258907318115,
1319
+ "rewards/rejected": -0.6387397050857544,
1320
+ "step": 810,
1321
+ "use_label": 17596.224609375
1322
+ },
1323
+ {
1324
+ "epoch": 0.86,
1325
+ "learning_rate": 1.5715948777648429e-06,
1326
+ "logits/chosen": -2.816628932952881,
1327
+ "logits/rejected": -2.8157966136932373,
1328
+ "logps/chosen": -295.4468078613281,
1329
+ "logps/rejected": -261.2769775390625,
1330
+ "loss": 0.673,
1331
+ "pred_label": 8300.3623046875,
1332
+ "rewards/accuracies": 0.721875011920929,
1333
+ "rewards/chosen": 0.07940875738859177,
1334
+ "rewards/margins": 0.7328697443008423,
1335
+ "rewards/rejected": -0.6534609794616699,
1336
+ "step": 820,
1337
+ "use_label": 17781.63671875
1338
+ },
1339
+ {
1340
+ "epoch": 0.87,
1341
+ "learning_rate": 1.4551804423748547e-06,
1342
+ "logits/chosen": -2.825873851776123,
1343
+ "logits/rejected": -2.812453508377075,
1344
+ "logps/chosen": -259.86407470703125,
1345
+ "logps/rejected": -299.1662902832031,
1346
+ "loss": 0.6584,
1347
+ "pred_label": 8431.2373046875,
1348
+ "rewards/accuracies": 0.71875,
1349
+ "rewards/chosen": -0.014992868527770042,
1350
+ "rewards/margins": 0.7412894368171692,
1351
+ "rewards/rejected": -0.7562823295593262,
1352
+ "step": 830,
1353
+ "use_label": 17970.76171875
1354
+ },
1355
+ {
1356
+ "epoch": 0.88,
1357
+ "learning_rate": 1.3387660069848663e-06,
1358
+ "logits/chosen": -2.8066563606262207,
1359
+ "logits/rejected": -2.819666624069214,
1360
+ "logps/chosen": -279.8387451171875,
1361
+ "logps/rejected": -275.2579650878906,
1362
+ "loss": 0.673,
1363
+ "pred_label": 8558.837890625,
1364
+ "rewards/accuracies": 0.671875,
1365
+ "rewards/chosen": 0.007683378644287586,
1366
+ "rewards/margins": 0.5629515647888184,
1367
+ "rewards/rejected": -0.5552681088447571,
1368
+ "step": 840,
1369
+ "use_label": 18163.162109375
1370
+ },
1371
+ {
1372
+ "epoch": 0.89,
1373
+ "learning_rate": 1.222351571594878e-06,
1374
+ "logits/chosen": -2.817474365234375,
1375
+ "logits/rejected": -2.848310947418213,
1376
+ "logps/chosen": -273.4720764160156,
1377
+ "logps/rejected": -254.59060668945312,
1378
+ "loss": 0.6512,
1379
+ "pred_label": 8672.349609375,
1380
+ "rewards/accuracies": 0.7406250238418579,
1381
+ "rewards/chosen": 0.02653767168521881,
1382
+ "rewards/margins": 0.6314449906349182,
1383
+ "rewards/rejected": -0.6049073338508606,
1384
+ "step": 850,
1385
+ "use_label": 18369.650390625
1386
+ },
1387
+ {
1388
+ "epoch": 0.9,
1389
+ "learning_rate": 1.1059371362048893e-06,
1390
+ "logits/chosen": -2.7932801246643066,
1391
+ "logits/rejected": -2.800811767578125,
1392
+ "logps/chosen": -294.1841735839844,
1393
+ "logps/rejected": -282.7134704589844,
1394
+ "loss": 0.6757,
1395
+ "pred_label": 8782.8125,
1396
+ "rewards/accuracies": 0.653124988079071,
1397
+ "rewards/chosen": -0.07215853035449982,
1398
+ "rewards/margins": 0.5342829823493958,
1399
+ "rewards/rejected": -0.6064414978027344,
1400
+ "step": 860,
1401
+ "use_label": 18579.1875
1402
+ },
1403
+ {
1404
+ "epoch": 0.91,
1405
+ "learning_rate": 9.895227008149012e-07,
1406
+ "logits/chosen": -2.7996864318847656,
1407
+ "logits/rejected": -2.8061373233795166,
1408
+ "logps/chosen": -263.4325256347656,
1409
+ "logps/rejected": -266.0005798339844,
1410
+ "loss": 0.6697,
1411
+ "pred_label": 8905.3125,
1412
+ "rewards/accuracies": 0.6937500238418579,
1413
+ "rewards/chosen": 0.041508838534355164,
1414
+ "rewards/margins": 0.653960108757019,
1415
+ "rewards/rejected": -0.6124512553215027,
1416
+ "step": 870,
1417
+ "use_label": 18776.6875
1418
+ },
1419
+ {
1420
+ "epoch": 0.92,
1421
+ "learning_rate": 8.731082654249128e-07,
1422
+ "logits/chosen": -2.8358547687530518,
1423
+ "logits/rejected": -2.820361614227295,
1424
+ "logps/chosen": -271.4715576171875,
1425
+ "logps/rejected": -266.07562255859375,
1426
+ "loss": 0.6717,
1427
+ "pred_label": 9048.2998046875,
1428
+ "rewards/accuracies": 0.6937500238418579,
1429
+ "rewards/chosen": 0.06802918016910553,
1430
+ "rewards/margins": 0.6590049862861633,
1431
+ "rewards/rejected": -0.5909757018089294,
1432
+ "step": 880,
1433
+ "use_label": 18953.69921875
1434
+ },
1435
+ {
1436
+ "epoch": 0.93,
1437
+ "learning_rate": 7.566938300349244e-07,
1438
+ "logits/chosen": -2.8539083003997803,
1439
+ "logits/rejected": -2.8629262447357178,
1440
+ "logps/chosen": -264.7303771972656,
1441
+ "logps/rejected": -274.3258056640625,
1442
+ "loss": 0.6724,
1443
+ "pred_label": 9178.8251953125,
1444
+ "rewards/accuracies": 0.6937500238418579,
1445
+ "rewards/chosen": 0.018511313945055008,
1446
+ "rewards/margins": 0.5970341563224792,
1447
+ "rewards/rejected": -0.578522801399231,
1448
+ "step": 890,
1449
+ "use_label": 19143.17578125
1450
  },
1451
  {
1452
  "epoch": 0.94,
1453
  "learning_rate": 6.402793946449361e-07,
1454
+ "logits/chosen": -2.8083693981170654,
1455
+ "logits/rejected": -2.8180294036865234,
1456
+ "logps/chosen": -275.43511962890625,
1457
+ "logps/rejected": -261.9901428222656,
1458
+ "loss": 0.6688,
1459
+ "pred_label": 9309.8623046875,
1460
+ "rewards/accuracies": 0.7281249761581421,
1461
+ "rewards/chosen": 0.014594699256122112,
1462
+ "rewards/margins": 0.5891886949539185,
1463
+ "rewards/rejected": -0.5745939612388611,
1464
  "step": 900,
1465
+ "use_label": 19332.13671875
1466
+ },
1467
+ {
1468
+ "epoch": 0.95,
1469
+ "learning_rate": 5.238649592549476e-07,
1470
+ "logits/chosen": -2.8189098834991455,
1471
+ "logits/rejected": -2.8110015392303467,
1472
+ "logps/chosen": -302.4877014160156,
1473
+ "logps/rejected": -278.4508972167969,
1474
+ "loss": 0.6586,
1475
+ "pred_label": 9430.1748046875,
1476
+ "rewards/accuracies": 0.699999988079071,
1477
+ "rewards/chosen": 0.05673975870013237,
1478
+ "rewards/margins": 0.5847252607345581,
1479
+ "rewards/rejected": -0.5279855132102966,
1480
+ "step": 910,
1481
+ "use_label": 19531.82421875
1482
+ },
1483
+ {
1484
+ "epoch": 0.96,
1485
+ "learning_rate": 4.0745052386495924e-07,
1486
+ "logits/chosen": -2.805025100708008,
1487
+ "logits/rejected": -2.8007519245147705,
1488
+ "logps/chosen": -286.05731201171875,
1489
+ "logps/rejected": -270.36279296875,
1490
+ "loss": 0.6555,
1491
+ "pred_label": 9553.6376953125,
1492
+ "rewards/accuracies": 0.7406250238418579,
1493
+ "rewards/chosen": -0.026457080617547035,
1494
+ "rewards/margins": 0.6447644233703613,
1495
+ "rewards/rejected": -0.6712215542793274,
1496
+ "step": 920,
1497
+ "use_label": 19728.36328125
1498
+ },
1499
+ {
1500
+ "epoch": 0.97,
1501
+ "learning_rate": 2.910360884749709e-07,
1502
+ "logits/chosen": -2.805614471435547,
1503
+ "logits/rejected": -2.798377513885498,
1504
+ "logps/chosen": -281.56890869140625,
1505
+ "logps/rejected": -268.83734130859375,
1506
+ "loss": 0.6677,
1507
+ "pred_label": 9692.5126953125,
1508
+ "rewards/accuracies": 0.721875011920929,
1509
+ "rewards/chosen": 0.1096206083893776,
1510
+ "rewards/margins": 0.8147541284561157,
1511
+ "rewards/rejected": -0.7051334977149963,
1512
+ "step": 930,
1513
+ "use_label": 19909.48828125
1514
+ },
1515
+ {
1516
+ "epoch": 0.98,
1517
+ "learning_rate": 1.7462165308498255e-07,
1518
+ "logits/chosen": -2.8316524028778076,
1519
+ "logits/rejected": -2.833402156829834,
1520
+ "logps/chosen": -286.48126220703125,
1521
+ "logps/rejected": -278.9422302246094,
1522
+ "loss": 0.6568,
1523
+ "pred_label": 9830.4873046875,
1524
+ "rewards/accuracies": 0.7437499761581421,
1525
+ "rewards/chosen": -0.0075108022429049015,
1526
+ "rewards/margins": 0.649519145488739,
1527
+ "rewards/rejected": -0.6570299863815308,
1528
+ "step": 940,
1529
+ "use_label": 20091.51171875
1530
+ },
1531
+ {
1532
+ "epoch": 0.99,
1533
+ "learning_rate": 5.8207217694994185e-08,
1534
+ "logits/chosen": -2.8337855339050293,
1535
+ "logits/rejected": -2.82015323638916,
1536
+ "logps/chosen": -271.77117919921875,
1537
+ "logps/rejected": -268.10894775390625,
1538
+ "loss": 0.6708,
1539
+ "pred_label": 9960.287109375,
1540
+ "rewards/accuracies": 0.6968749761581421,
1541
+ "rewards/chosen": -0.015800004824995995,
1542
+ "rewards/margins": 0.6547096371650696,
1543
+ "rewards/rejected": -0.6705096364021301,
1544
+ "step": 950,
1545
+ "use_label": 20281.712890625
1546
  },
1547
  {
1548
  "epoch": 1.0,
1549
+ "eval_logits/chosen": -2.83455753326416,
1550
+ "eval_logits/rejected": -2.824216604232788,
1551
+ "eval_logps/chosen": -282.00970458984375,
1552
+ "eval_logps/rejected": -268.7319030761719,
1553
+ "eval_loss": 0.6687781810760498,
1554
+ "eval_pred_label": 10292.2041015625,
1555
+ "eval_rewards/accuracies": 0.7210000157356262,
1556
+ "eval_rewards/chosen": -0.0034403554163873196,
1557
+ "eval_rewards/margins": 0.6410075426101685,
1558
+ "eval_rewards/rejected": -0.6444479823112488,
1559
+ "eval_runtime": 855.4886,
1560
+ "eval_samples_per_second": 2.338,
1561
+ "eval_steps_per_second": 0.292,
1562
+ "eval_use_label": 20769.796875,
1563
  "step": 955
1564
  },
1565
  {
1566
  "epoch": 1.0,
1567
  "step": 955,
1568
  "total_flos": 0.0,
1569
+ "train_loss": 0.6626948830969046,
1570
+ "train_runtime": 47570.4937,
1571
+ "train_samples_per_second": 1.285,
1572
+ "train_steps_per_second": 0.02
1573
  }
1574
  ],
1575
+ "logging_steps": 10,
1576
  "max_steps": 955,
1577
  "num_train_epochs": 1,
1578
  "save_steps": 50,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edce2e11210317a4a76d82d8a7c2b37941adda1b1fa556f5298400e5a86069ba
3
  size 4792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7596c88734ee5efa7881796813f15d453e7e29fcd0afd3f76fd123c7078f7a1f
3
  size 4792