davidberenstein1957 HF staff commited on
Commit
5dea528
1 Parent(s): cc8baed

Model save

Browse files
README.md ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-nc-4.0
3
+ base_model: davidberenstein1957/ultra-feedback-dutch-cleaned-hq-spin-geitje-7b-ultra-sft_iter1
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: ultra-feedback-dutch-cleaned-hq-spin-geitje-7b-ultra-sft_iter2
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # ultra-feedback-dutch-cleaned-hq-spin-geitje-7b-ultra-sft_iter2
15
+
16
+ This model is a fine-tuned version of [davidberenstein1957/ultra-feedback-dutch-cleaned-hq-spin-geitje-7b-ultra-sft_iter1](https://huggingface.co/davidberenstein1957/ultra-feedback-dutch-cleaned-hq-spin-geitje-7b-ultra-sft_iter1) on the None dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 0.0162
19
+ - Rewards/real: -8.1731
20
+ - Rewards/generated: -31.3826
21
+ - Rewards/accuracies: 0.9917
22
+ - Rewards/margins: 23.2095
23
+ - Logps/generated: -956.3063
24
+ - Logps/real: -525.1735
25
+ - Logits/generated: -1.5719
26
+ - Logits/real: -1.7813
27
+
28
+ ## Model description
29
+
30
+ More information needed
31
+
32
+ ## Intended uses & limitations
33
+
34
+ More information needed
35
+
36
+ ## Training and evaluation data
37
+
38
+ More information needed
39
+
40
+ ## Training procedure
41
+
42
+ ### Training hyperparameters
43
+
44
+ The following hyperparameters were used during training:
45
+ - learning_rate: 1e-07
46
+ - train_batch_size: 8
47
+ - eval_batch_size: 8
48
+ - seed: 42
49
+ - distributed_type: multi-GPU
50
+ - num_devices: 4
51
+ - gradient_accumulation_steps: 2
52
+ - total_train_batch_size: 64
53
+ - total_eval_batch_size: 32
54
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
+ - lr_scheduler_type: linear
56
+ - lr_scheduler_warmup_ratio: 0.1
57
+ - num_epochs: 2
58
+
59
+ ### Training results
60
+
61
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/real | Rewards/generated | Rewards/accuracies | Rewards/margins | Logps/generated | Logps/real | Logits/generated | Logits/real |
62
+ |:-------------:|:-----:|:----:|:---------------:|:------------:|:-----------------:|:------------------:|:---------------:|:---------------:|:----------:|:----------------:|:-----------:|
63
+ | 0.6097 | 0.04 | 25 | 0.4147 | -0.6192 | -1.4312 | 0.9250 | 0.8120 | -656.7919 | -449.6341 | -2.0004 | -2.0773 |
64
+ | 0.2137 | 0.08 | 50 | 0.1745 | -2.0300 | -5.0060 | 0.9519 | 2.9761 | -692.5404 | -463.7422 | -1.9306 | -2.0237 |
65
+ | 0.1292 | 0.12 | 75 | 0.1012 | -2.8227 | -7.4967 | 0.9685 | 4.6740 | -717.4471 | -471.6697 | -1.8843 | -1.9887 |
66
+ | 0.0665 | 0.16 | 100 | 0.0676 | -3.2936 | -9.3177 | 0.9778 | 6.0240 | -735.6567 | -476.3786 | -1.8508 | -1.9628 |
67
+ | 0.0429 | 0.21 | 125 | 0.0477 | -3.7328 | -11.2722 | 0.9824 | 7.5395 | -755.2025 | -480.7701 | -1.8123 | -1.9332 |
68
+ | 0.0299 | 0.25 | 150 | 0.0369 | -4.2161 | -13.2599 | 0.9870 | 9.0437 | -775.0787 | -485.6039 | -1.7938 | -1.9226 |
69
+ | 0.0252 | 0.29 | 175 | 0.0320 | -4.7201 | -15.0489 | 0.9880 | 10.3288 | -792.9691 | -490.6432 | -1.7758 | -1.9116 |
70
+ | 0.0249 | 0.33 | 200 | 0.0301 | -5.0757 | -16.3570 | 0.9880 | 11.2813 | -806.0497 | -494.1995 | -1.7515 | -1.8923 |
71
+ | 0.0175 | 0.37 | 225 | 0.0273 | -5.4299 | -17.6751 | 0.9880 | 12.2451 | -819.2310 | -497.7419 | -1.7362 | -1.8821 |
72
+ | 0.0183 | 0.41 | 250 | 0.0254 | -5.4183 | -18.3899 | 0.9889 | 12.9715 | -826.3791 | -497.6259 | -1.7300 | -1.8793 |
73
+ | 0.0182 | 0.45 | 275 | 0.0245 | -6.0900 | -20.5760 | 0.9889 | 14.4860 | -848.2401 | -504.3426 | -1.6961 | -1.8564 |
74
+ | 0.0253 | 0.49 | 300 | 0.0224 | -5.9239 | -20.7184 | 0.9898 | 14.7944 | -849.6640 | -502.6819 | -1.6938 | -1.8573 |
75
+ | 0.0075 | 0.53 | 325 | 0.0234 | -7.0436 | -24.1126 | 0.9898 | 17.0691 | -883.6064 | -513.8781 | -1.6522 | -1.8252 |
76
+ | 0.0141 | 0.58 | 350 | 0.0212 | -5.5696 | -20.9714 | 0.9898 | 15.4017 | -852.1937 | -499.1387 | -1.7082 | -1.8693 |
77
+ | 0.0135 | 0.62 | 375 | 0.0182 | -5.2646 | -20.3901 | 0.9907 | 15.1254 | -846.3809 | -496.0890 | -1.7285 | -1.8897 |
78
+ | 0.014 | 0.66 | 400 | 0.0182 | -5.5057 | -21.1579 | 0.9907 | 15.6522 | -854.0594 | -498.4994 | -1.7137 | -1.8783 |
79
+ | 0.0122 | 0.7 | 425 | 0.0172 | -5.3398 | -20.7520 | 0.9907 | 15.4122 | -849.9997 | -496.8405 | -1.7231 | -1.8857 |
80
+ | 0.0144 | 0.74 | 450 | 0.0164 | -4.6606 | -19.3766 | 0.9917 | 14.7160 | -836.2463 | -490.0483 | -1.7465 | -1.9042 |
81
+ | 0.0103 | 0.78 | 475 | 0.0160 | -4.8739 | -20.1058 | 0.9907 | 15.2319 | -843.5385 | -492.1819 | -1.7445 | -1.9064 |
82
+ | 0.0147 | 0.82 | 500 | 0.0156 | -5.1220 | -20.9607 | 0.9917 | 15.8387 | -852.0875 | -494.6623 | -1.7434 | -1.9092 |
83
+ | 0.0154 | 0.86 | 525 | 0.0155 | -5.1481 | -21.3994 | 0.9917 | 16.2513 | -856.4740 | -494.9235 | -1.7357 | -1.9040 |
84
+ | 0.0158 | 0.91 | 550 | 0.0151 | -5.6088 | -22.9532 | 0.9917 | 17.3444 | -872.0123 | -499.5304 | -1.7139 | -1.8881 |
85
+ | 0.0053 | 0.95 | 575 | 0.0149 | -5.7209 | -23.5217 | 0.9917 | 17.8008 | -877.6972 | -500.6515 | -1.7113 | -1.8888 |
86
+ | 0.008 | 0.99 | 600 | 0.0147 | -5.7523 | -23.7474 | 0.9917 | 17.9952 | -879.9544 | -500.9651 | -1.7086 | -1.8878 |
87
+ | 0.0049 | 1.03 | 625 | 0.0154 | -6.1839 | -24.8883 | 0.9907 | 18.7044 | -891.3632 | -505.2818 | -1.6731 | -1.8585 |
88
+ | 0.0057 | 1.07 | 650 | 0.0155 | -6.4947 | -25.8924 | 0.9917 | 19.3977 | -901.4037 | -508.3892 | -1.6592 | -1.8484 |
89
+ | 0.0076 | 1.11 | 675 | 0.0158 | -6.8543 | -26.9217 | 0.9917 | 20.0674 | -911.6970 | -511.9859 | -1.6407 | -1.8339 |
90
+ | 0.004 | 1.15 | 700 | 0.0158 | -7.1325 | -27.7743 | 0.9917 | 20.6418 | -920.2236 | -514.7678 | -1.6269 | -1.8236 |
91
+ | 0.0168 | 1.19 | 725 | 0.0157 | -6.9019 | -26.2791 | 0.9917 | 19.3772 | -905.2711 | -512.4611 | -1.6566 | -1.8448 |
92
+ | 0.0022 | 1.23 | 750 | 0.0163 | -6.9586 | -26.5145 | 0.9917 | 19.5559 | -907.6251 | -513.0281 | -1.6533 | -1.8423 |
93
+ | 0.0039 | 1.28 | 775 | 0.0165 | -7.5386 | -28.2224 | 0.9917 | 20.6837 | -924.7038 | -518.8289 | -1.6369 | -1.8327 |
94
+ | 0.002 | 1.32 | 800 | 0.0165 | -7.6568 | -28.6441 | 0.9907 | 20.9872 | -928.9208 | -520.0109 | -1.6365 | -1.8344 |
95
+ | 0.002 | 1.36 | 825 | 0.0165 | -7.7989 | -29.2028 | 0.9917 | 21.4038 | -934.5078 | -521.4318 | -1.6348 | -1.8352 |
96
+ | 0.0019 | 1.4 | 850 | 0.0165 | -7.8978 | -29.5958 | 0.9917 | 21.6980 | -938.4382 | -522.4203 | -1.6166 | -1.8169 |
97
+ | 0.0041 | 1.44 | 875 | 0.0162 | -7.9696 | -29.7930 | 0.9917 | 21.8234 | -940.4100 | -523.1380 | -1.6165 | -1.8176 |
98
+ | 0.0023 | 1.48 | 900 | 0.0164 | -8.2086 | -30.6909 | 0.9917 | 22.4823 | -949.3892 | -525.5286 | -1.6045 | -1.8093 |
99
+ | 0.0038 | 1.52 | 925 | 0.0166 | -8.1217 | -30.6727 | 0.9917 | 22.5510 | -949.2076 | -524.6597 | -1.5919 | -1.7978 |
100
+ | 0.0096 | 1.56 | 950 | 0.0162 | -7.8257 | -30.1144 | 0.9917 | 22.2887 | -943.6237 | -521.6992 | -1.5909 | -1.7956 |
101
+ | 0.0057 | 1.6 | 975 | 0.0166 | -8.0335 | -30.6654 | 0.9917 | 22.6319 | -949.1342 | -523.7775 | -1.5854 | -1.7919 |
102
+ | 0.0046 | 1.65 | 1000 | 0.0165 | -8.1757 | -31.0139 | 0.9917 | 22.8382 | -952.6191 | -525.2000 | -1.5768 | -1.7852 |
103
+ | 0.0009 | 1.69 | 1025 | 0.0165 | -8.0553 | -30.7565 | 0.9917 | 22.7012 | -950.0453 | -523.9951 | -1.5757 | -1.7830 |
104
+ | 0.002 | 1.73 | 1050 | 0.0164 | -8.1838 | -31.3365 | 0.9917 | 23.1528 | -955.8453 | -525.2800 | -1.5692 | -1.7790 |
105
+ | 0.0069 | 1.77 | 1075 | 0.0163 | -8.1908 | -31.4118 | 0.9917 | 23.2210 | -956.5981 | -525.3508 | -1.5749 | -1.7850 |
106
+ | 0.0029 | 1.81 | 1100 | 0.0166 | -8.4138 | -32.0830 | 0.9917 | 23.6692 | -963.3098 | -527.5802 | -1.5624 | -1.7752 |
107
+ | 0.0047 | 1.85 | 1125 | 0.0166 | -8.4223 | -32.1526 | 0.9917 | 23.7304 | -964.0065 | -527.6652 | -1.5631 | -1.7759 |
108
+ | 0.0037 | 1.89 | 1150 | 0.0163 | -8.1563 | -31.3209 | 0.9917 | 23.1646 | -955.6895 | -525.0057 | -1.5739 | -1.7832 |
109
+ | 0.0026 | 1.93 | 1175 | 0.0163 | -8.2107 | -31.5009 | 0.9917 | 23.2901 | -957.4888 | -525.5498 | -1.5708 | -1.7807 |
110
+ | 0.0058 | 1.98 | 1200 | 0.0162 | -8.1731 | -31.3826 | 0.9917 | 23.2095 | -956.3063 | -525.1735 | -1.5719 | -1.7813 |
111
+
112
+
113
+ ### Framework versions
114
+
115
+ - Transformers 4.37.0
116
+ - Pytorch 2.1.2+cu121
117
+ - Datasets 2.14.6
118
+ - Tokenizers 0.15.2
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "train_loss": 0.03410133493748145,
4
+ "train_runtime": 146707.6169,
5
+ "train_samples": 38852,
6
+ "train_samples_per_second": 0.53,
7
+ "train_steps_per_second": 0.008
8
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.37.0"
6
+ }
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57e9857df887a6b1b144af1585279a23a0ae3430070e68075346aac5217ec00a
3
+ size 4943162336
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1bf75c356656ff403f00a3d4462060172061182e728559442f2c2f45a7c1f2b
3
+ size 4999819336
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33bccce882dc0d2b9b0e2bbe44e0af7477fcac4847cd055067f458c0a1d66068
3
+ size 4540516344
model.safetensors.index.json ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 14483464192
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00003-of-00003.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00003.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
202
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
203
+ "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
204
+ "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
205
+ "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
215
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
216
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
217
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
218
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
219
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
220
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
221
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
222
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
223
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
242
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
243
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
244
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
245
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
246
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
247
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
248
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
249
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
250
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
251
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
252
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
253
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
254
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
255
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
256
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
257
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
258
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
259
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
260
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
261
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
262
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
263
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
264
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
265
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
266
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
267
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
268
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
269
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
270
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
271
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
272
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
273
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
274
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
275
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
276
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
277
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
278
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
279
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
280
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
281
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
282
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
283
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
284
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
285
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
286
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
287
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
288
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
289
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
290
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
291
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
292
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
293
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
294
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
295
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
296
+ "model.norm.weight": "model-00003-of-00003.safetensors"
297
+ }
298
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "train_loss": 0.03410133493748145,
4
+ "train_runtime": 146707.6169,
5
+ "train_samples": 38852,
6
+ "train_samples_per_second": 0.53,
7
+ "train_steps_per_second": 0.008
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,2506 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.9983539094650205,
5
+ "eval_steps": 25,
6
+ "global_step": 1214,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "learning_rate": 8.196721311475409e-10,
14
+ "logits/generated": -2.0642459392547607,
15
+ "logits/real": -2.1011667251586914,
16
+ "logps/generated": -767.111328125,
17
+ "logps/real": -424.18878173828125,
18
+ "loss": 0.6931,
19
+ "rewards/accuracies": 0.0,
20
+ "rewards/generated": 0.0,
21
+ "rewards/margins": 0.0,
22
+ "rewards/real": 0.0,
23
+ "step": 1
24
+ },
25
+ {
26
+ "epoch": 0.02,
27
+ "learning_rate": 8.196721311475408e-09,
28
+ "logits/generated": -2.0099620819091797,
29
+ "logits/real": -2.1245546340942383,
30
+ "logps/generated": -645.1455688476562,
31
+ "logps/real": -425.1603698730469,
32
+ "loss": 0.6885,
33
+ "rewards/accuracies": 0.4791666567325592,
34
+ "rewards/generated": -0.022459693253040314,
35
+ "rewards/margins": 0.015254557132720947,
36
+ "rewards/real": -0.007205137051641941,
37
+ "step": 10
38
+ },
39
+ {
40
+ "epoch": 0.03,
41
+ "learning_rate": 1.6393442622950816e-08,
42
+ "logits/generated": -2.017244338989258,
43
+ "logits/real": -2.1224846839904785,
44
+ "logps/generated": -626.304443359375,
45
+ "logps/real": -415.47955322265625,
46
+ "loss": 0.6097,
47
+ "rewards/accuracies": 0.78125,
48
+ "rewards/generated": -0.31143561005592346,
49
+ "rewards/margins": 0.1948009729385376,
50
+ "rewards/real": -0.11663466691970825,
51
+ "step": 20
52
+ },
53
+ {
54
+ "epoch": 0.04,
55
+ "eval_logits/generated": -2.0004239082336426,
56
+ "eval_logits/real": -2.0772550106048584,
57
+ "eval_logps/generated": -656.7918701171875,
58
+ "eval_logps/real": -449.63409423828125,
59
+ "eval_loss": 0.4146920442581177,
60
+ "eval_rewards/accuracies": 0.925000011920929,
61
+ "eval_rewards/generated": -1.4311684370040894,
62
+ "eval_rewards/margins": 0.8120061159133911,
63
+ "eval_rewards/real": -0.6191622018814087,
64
+ "eval_runtime": 1777.8699,
65
+ "eval_samples_per_second": 2.429,
66
+ "eval_steps_per_second": 0.076,
67
+ "step": 25
68
+ },
69
+ {
70
+ "epoch": 0.05,
71
+ "learning_rate": 2.459016393442623e-08,
72
+ "logits/generated": -2.018745183944702,
73
+ "logits/real": -2.120075225830078,
74
+ "logps/generated": -681.8663940429688,
75
+ "logps/real": -443.0894470214844,
76
+ "loss": 0.4439,
77
+ "rewards/accuracies": 0.887499988079071,
78
+ "rewards/generated": -1.3025258779525757,
79
+ "rewards/margins": 0.753677487373352,
80
+ "rewards/real": -0.5488484501838684,
81
+ "step": 30
82
+ },
83
+ {
84
+ "epoch": 0.07,
85
+ "learning_rate": 3.278688524590163e-08,
86
+ "logits/generated": -2.003990411758423,
87
+ "logits/real": -2.0808629989624023,
88
+ "logps/generated": -633.3893432617188,
89
+ "logps/real": -416.35333251953125,
90
+ "loss": 0.3191,
91
+ "rewards/accuracies": 0.9624999761581421,
92
+ "rewards/generated": -2.1262094974517822,
93
+ "rewards/margins": 1.220593810081482,
94
+ "rewards/real": -0.9056156277656555,
95
+ "step": 40
96
+ },
97
+ {
98
+ "epoch": 0.08,
99
+ "learning_rate": 4.0983606557377046e-08,
100
+ "logits/generated": -1.9361705780029297,
101
+ "logits/real": -2.0647199153900146,
102
+ "logps/generated": -706.2001342773438,
103
+ "logps/real": -468.58807373046875,
104
+ "loss": 0.2137,
105
+ "rewards/accuracies": 0.949999988079071,
106
+ "rewards/generated": -3.8283188343048096,
107
+ "rewards/margins": 2.2455239295959473,
108
+ "rewards/real": -1.582794427871704,
109
+ "step": 50
110
+ },
111
+ {
112
+ "epoch": 0.08,
113
+ "eval_logits/generated": -1.9305709600448608,
114
+ "eval_logits/real": -2.0236809253692627,
115
+ "eval_logps/generated": -692.5404052734375,
116
+ "eval_logps/real": -463.74224853515625,
117
+ "eval_loss": 0.1745266169309616,
118
+ "eval_rewards/accuracies": 0.9518518447875977,
119
+ "eval_rewards/generated": -5.006031036376953,
120
+ "eval_rewards/margins": 2.9760546684265137,
121
+ "eval_rewards/real": -2.029975652694702,
122
+ "eval_runtime": 1800.9154,
123
+ "eval_samples_per_second": 2.398,
124
+ "eval_steps_per_second": 0.075,
125
+ "step": 50
126
+ },
127
+ {
128
+ "epoch": 0.1,
129
+ "learning_rate": 4.918032786885246e-08,
130
+ "logits/generated": -1.953768014907837,
131
+ "logits/real": -2.022987127304077,
132
+ "logps/generated": -717.5247192382812,
133
+ "logps/real": -424.16162109375,
134
+ "loss": 0.1354,
135
+ "rewards/accuracies": 0.987500011920929,
136
+ "rewards/generated": -6.01259708404541,
137
+ "rewards/margins": 3.8680121898651123,
138
+ "rewards/real": -2.144585371017456,
139
+ "step": 60
140
+ },
141
+ {
142
+ "epoch": 0.12,
143
+ "learning_rate": 5.7377049180327866e-08,
144
+ "logits/generated": -1.8369897603988647,
145
+ "logits/real": -1.9798635244369507,
146
+ "logps/generated": -716.9135131835938,
147
+ "logps/real": -459.76275634765625,
148
+ "loss": 0.1292,
149
+ "rewards/accuracies": 0.987500011920929,
150
+ "rewards/generated": -6.8557868003845215,
151
+ "rewards/margins": 4.214533805847168,
152
+ "rewards/real": -2.641252040863037,
153
+ "step": 70
154
+ },
155
+ {
156
+ "epoch": 0.12,
157
+ "eval_logits/generated": -1.884318470954895,
158
+ "eval_logits/real": -1.9886623620986938,
159
+ "eval_logps/generated": -717.4470825195312,
160
+ "eval_logps/real": -471.66973876953125,
161
+ "eval_loss": 0.10119830071926117,
162
+ "eval_rewards/accuracies": 0.9685184955596924,
163
+ "eval_rewards/generated": -7.496694087982178,
164
+ "eval_rewards/margins": 4.673972129821777,
165
+ "eval_rewards/real": -2.8227217197418213,
166
+ "eval_runtime": 1800.5623,
167
+ "eval_samples_per_second": 2.398,
168
+ "eval_steps_per_second": 0.075,
169
+ "step": 75
170
+ },
171
+ {
172
+ "epoch": 0.13,
173
+ "learning_rate": 6.557377049180327e-08,
174
+ "logits/generated": -1.8934190273284912,
175
+ "logits/real": -2.0053441524505615,
176
+ "logps/generated": -735.2626953125,
177
+ "logps/real": -467.66961669921875,
178
+ "loss": 0.0881,
179
+ "rewards/accuracies": 1.0,
180
+ "rewards/generated": -7.727712154388428,
181
+ "rewards/margins": 4.887805461883545,
182
+ "rewards/real": -2.8399062156677246,
183
+ "step": 80
184
+ },
185
+ {
186
+ "epoch": 0.15,
187
+ "learning_rate": 7.377049180327869e-08,
188
+ "logits/generated": -1.8700984716415405,
189
+ "logits/real": -1.931532859802246,
190
+ "logps/generated": -777.4097900390625,
191
+ "logps/real": -457.1133728027344,
192
+ "loss": 0.075,
193
+ "rewards/accuracies": 0.981249988079071,
194
+ "rewards/generated": -9.006689071655273,
195
+ "rewards/margins": 5.990359306335449,
196
+ "rewards/real": -3.0163300037384033,
197
+ "step": 90
198
+ },
199
+ {
200
+ "epoch": 0.16,
201
+ "learning_rate": 8.196721311475409e-08,
202
+ "logits/generated": -1.8540890216827393,
203
+ "logits/real": -1.952444076538086,
204
+ "logps/generated": -760.2003784179688,
205
+ "logps/real": -471.71221923828125,
206
+ "loss": 0.0665,
207
+ "rewards/accuracies": 0.981249988079071,
208
+ "rewards/generated": -8.933283805847168,
209
+ "rewards/margins": 5.90076208114624,
210
+ "rewards/real": -3.0325207710266113,
211
+ "step": 100
212
+ },
213
+ {
214
+ "epoch": 0.16,
215
+ "eval_logits/generated": -1.8507987260818481,
216
+ "eval_logits/real": -1.9628313779830933,
217
+ "eval_logps/generated": -735.65673828125,
218
+ "eval_logps/real": -476.3786315917969,
219
+ "eval_loss": 0.0675550326704979,
220
+ "eval_rewards/accuracies": 0.9777777791023254,
221
+ "eval_rewards/generated": -9.317663192749023,
222
+ "eval_rewards/margins": 6.0240478515625,
223
+ "eval_rewards/real": -3.2936155796051025,
224
+ "eval_runtime": 1798.5965,
225
+ "eval_samples_per_second": 2.401,
226
+ "eval_steps_per_second": 0.075,
227
+ "step": 100
228
+ },
229
+ {
230
+ "epoch": 0.18,
231
+ "learning_rate": 9.01639344262295e-08,
232
+ "logits/generated": -1.7943336963653564,
233
+ "logits/real": -1.9300905466079712,
234
+ "logps/generated": -762.0491943359375,
235
+ "logps/real": -434.4507751464844,
236
+ "loss": 0.0579,
237
+ "rewards/accuracies": 0.987500011920929,
238
+ "rewards/generated": -9.933004379272461,
239
+ "rewards/margins": 6.8074140548706055,
240
+ "rewards/real": -3.1255910396575928,
241
+ "step": 110
242
+ },
243
+ {
244
+ "epoch": 0.2,
245
+ "learning_rate": 9.836065573770492e-08,
246
+ "logits/generated": -1.8353208303451538,
247
+ "logits/real": -1.9718765020370483,
248
+ "logps/generated": -750.9710693359375,
249
+ "logps/real": -449.0281677246094,
250
+ "loss": 0.0429,
251
+ "rewards/accuracies": 0.9937499761581421,
252
+ "rewards/generated": -10.487658500671387,
253
+ "rewards/margins": 7.143439292907715,
254
+ "rewards/real": -3.3442184925079346,
255
+ "step": 120
256
+ },
257
+ {
258
+ "epoch": 0.21,
259
+ "eval_logits/generated": -1.8123193979263306,
260
+ "eval_logits/real": -1.9332078695297241,
261
+ "eval_logps/generated": -755.2024536132812,
262
+ "eval_logps/real": -480.7701110839844,
263
+ "eval_loss": 0.04767724126577377,
264
+ "eval_rewards/accuracies": 0.9824073910713196,
265
+ "eval_rewards/generated": -11.27223014831543,
266
+ "eval_rewards/margins": 7.53946590423584,
267
+ "eval_rewards/real": -3.73276424407959,
268
+ "eval_runtime": 1803.3715,
269
+ "eval_samples_per_second": 2.394,
270
+ "eval_steps_per_second": 0.075,
271
+ "step": 125
272
+ },
273
+ {
274
+ "epoch": 0.21,
275
+ "learning_rate": 9.926739926739926e-08,
276
+ "logits/generated": -1.8151371479034424,
277
+ "logits/real": -1.9583898782730103,
278
+ "logps/generated": -810.3426513671875,
279
+ "logps/real": -501.2919921875,
280
+ "loss": 0.0431,
281
+ "rewards/accuracies": 1.0,
282
+ "rewards/generated": -11.520541191101074,
283
+ "rewards/margins": 7.8352227210998535,
284
+ "rewards/real": -3.6853184700012207,
285
+ "step": 130
286
+ },
287
+ {
288
+ "epoch": 0.23,
289
+ "learning_rate": 9.835164835164835e-08,
290
+ "logits/generated": -1.8159958124160767,
291
+ "logits/real": -1.9128528833389282,
292
+ "logps/generated": -802.4890747070312,
293
+ "logps/real": -464.2137756347656,
294
+ "loss": 0.0511,
295
+ "rewards/accuracies": 0.981249988079071,
296
+ "rewards/generated": -12.06971263885498,
297
+ "rewards/margins": 8.350500106811523,
298
+ "rewards/real": -3.719212055206299,
299
+ "step": 140
300
+ },
301
+ {
302
+ "epoch": 0.25,
303
+ "learning_rate": 9.743589743589743e-08,
304
+ "logits/generated": -1.754547119140625,
305
+ "logits/real": -1.9312493801116943,
306
+ "logps/generated": -814.783935546875,
307
+ "logps/real": -467.99609375,
308
+ "loss": 0.0299,
309
+ "rewards/accuracies": 0.9937499761581421,
310
+ "rewards/generated": -13.577325820922852,
311
+ "rewards/margins": 9.742910385131836,
312
+ "rewards/real": -3.83441424369812,
313
+ "step": 150
314
+ },
315
+ {
316
+ "epoch": 0.25,
317
+ "eval_logits/generated": -1.7938494682312012,
318
+ "eval_logits/real": -1.9225581884384155,
319
+ "eval_logps/generated": -775.0787353515625,
320
+ "eval_logps/real": -485.6038818359375,
321
+ "eval_loss": 0.036931850016117096,
322
+ "eval_rewards/accuracies": 0.9870370626449585,
323
+ "eval_rewards/generated": -13.259866714477539,
324
+ "eval_rewards/margins": 9.043731689453125,
325
+ "eval_rewards/real": -4.216136932373047,
326
+ "eval_runtime": 1778.5818,
327
+ "eval_samples_per_second": 2.428,
328
+ "eval_steps_per_second": 0.076,
329
+ "step": 150
330
+ },
331
+ {
332
+ "epoch": 0.26,
333
+ "learning_rate": 9.652014652014652e-08,
334
+ "logits/generated": -1.7989473342895508,
335
+ "logits/real": -1.9711263179779053,
336
+ "logps/generated": -816.1602783203125,
337
+ "logps/real": -429.8924865722656,
338
+ "loss": 0.0275,
339
+ "rewards/accuracies": 0.987500011920929,
340
+ "rewards/generated": -14.095451354980469,
341
+ "rewards/margins": 10.274964332580566,
342
+ "rewards/real": -3.820486545562744,
343
+ "step": 160
344
+ },
345
+ {
346
+ "epoch": 0.28,
347
+ "learning_rate": 9.56043956043956e-08,
348
+ "logits/generated": -1.8312492370605469,
349
+ "logits/real": -1.9516799449920654,
350
+ "logps/generated": -803.2518920898438,
351
+ "logps/real": -467.13983154296875,
352
+ "loss": 0.0252,
353
+ "rewards/accuracies": 0.9937499761581421,
354
+ "rewards/generated": -13.788885116577148,
355
+ "rewards/margins": 9.561357498168945,
356
+ "rewards/real": -4.227527618408203,
357
+ "step": 170
358
+ },
359
+ {
360
+ "epoch": 0.29,
361
+ "eval_logits/generated": -1.7758067846298218,
362
+ "eval_logits/real": -1.9115736484527588,
363
+ "eval_logps/generated": -792.9690551757812,
364
+ "eval_logps/real": -490.6431579589844,
365
+ "eval_loss": 0.03204120323061943,
366
+ "eval_rewards/accuracies": 0.9879629611968994,
367
+ "eval_rewards/generated": -15.048893928527832,
368
+ "eval_rewards/margins": 10.328824043273926,
369
+ "eval_rewards/real": -4.720070838928223,
370
+ "eval_runtime": 1779.5528,
371
+ "eval_samples_per_second": 2.426,
372
+ "eval_steps_per_second": 0.076,
373
+ "step": 175
374
+ },
375
+ {
376
+ "epoch": 0.3,
377
+ "learning_rate": 9.468864468864468e-08,
378
+ "logits/generated": -1.8363538980484009,
379
+ "logits/real": -1.9760059118270874,
380
+ "logps/generated": -798.1632690429688,
381
+ "logps/real": -465.4309997558594,
382
+ "loss": 0.018,
383
+ "rewards/accuracies": 1.0,
384
+ "rewards/generated": -13.945306777954102,
385
+ "rewards/margins": 9.537097930908203,
386
+ "rewards/real": -4.40820837020874,
387
+ "step": 180
388
+ },
389
+ {
390
+ "epoch": 0.31,
391
+ "learning_rate": 9.377289377289377e-08,
392
+ "logits/generated": -1.7771434783935547,
393
+ "logits/real": -1.918859839439392,
394
+ "logps/generated": -778.4981689453125,
395
+ "logps/real": -445.9978942871094,
396
+ "loss": 0.0154,
397
+ "rewards/accuracies": 0.9937499761581421,
398
+ "rewards/generated": -15.705945014953613,
399
+ "rewards/margins": 11.227587699890137,
400
+ "rewards/real": -4.478354454040527,
401
+ "step": 190
402
+ },
403
+ {
404
+ "epoch": 0.33,
405
+ "learning_rate": 9.285714285714286e-08,
406
+ "logits/generated": -1.7843818664550781,
407
+ "logits/real": -1.9129893779754639,
408
+ "logps/generated": -816.643310546875,
409
+ "logps/real": -449.44171142578125,
410
+ "loss": 0.0249,
411
+ "rewards/accuracies": 0.987500011920929,
412
+ "rewards/generated": -16.523761749267578,
413
+ "rewards/margins": 11.881709098815918,
414
+ "rewards/real": -4.642051696777344,
415
+ "step": 200
416
+ },
417
+ {
418
+ "epoch": 0.33,
419
+ "eval_logits/generated": -1.7515002489089966,
420
+ "eval_logits/real": -1.8923099040985107,
421
+ "eval_logps/generated": -806.0497436523438,
422
+ "eval_logps/real": -494.1994934082031,
423
+ "eval_loss": 0.030071575194597244,
424
+ "eval_rewards/accuracies": 0.9879629611968994,
425
+ "eval_rewards/generated": -16.356964111328125,
426
+ "eval_rewards/margins": 11.28126049041748,
427
+ "eval_rewards/real": -5.0757036209106445,
428
+ "eval_runtime": 1798.4488,
429
+ "eval_samples_per_second": 2.401,
430
+ "eval_steps_per_second": 0.075,
431
+ "step": 200
432
+ },
433
+ {
434
+ "epoch": 0.35,
435
+ "learning_rate": 9.194139194139193e-08,
436
+ "logits/generated": -1.7697616815567017,
437
+ "logits/real": -1.9165000915527344,
438
+ "logps/generated": -853.5462646484375,
439
+ "logps/real": -462.77484130859375,
440
+ "loss": 0.0245,
441
+ "rewards/accuracies": 0.987500011920929,
442
+ "rewards/generated": -18.052305221557617,
443
+ "rewards/margins": 13.306139945983887,
444
+ "rewards/real": -4.746166229248047,
445
+ "step": 210
446
+ },
447
+ {
448
+ "epoch": 0.36,
449
+ "learning_rate": 9.102564102564102e-08,
450
+ "logits/generated": -1.7060960531234741,
451
+ "logits/real": -1.8868176937103271,
452
+ "logps/generated": -852.2977294921875,
453
+ "logps/real": -465.27099609375,
454
+ "loss": 0.0175,
455
+ "rewards/accuracies": 1.0,
456
+ "rewards/generated": -19.251428604125977,
457
+ "rewards/margins": 14.104260444641113,
458
+ "rewards/real": -5.1471662521362305,
459
+ "step": 220
460
+ },
461
+ {
462
+ "epoch": 0.37,
463
+ "eval_logits/generated": -1.7361782789230347,
464
+ "eval_logits/real": -1.8820877075195312,
465
+ "eval_logps/generated": -819.2310180664062,
466
+ "eval_logps/real": -497.7419128417969,
467
+ "eval_loss": 0.027269212529063225,
468
+ "eval_rewards/accuracies": 0.9879629611968994,
469
+ "eval_rewards/generated": -17.67508888244629,
470
+ "eval_rewards/margins": 12.245142936706543,
471
+ "eval_rewards/real": -5.4299445152282715,
472
+ "eval_runtime": 1798.3015,
473
+ "eval_samples_per_second": 2.401,
474
+ "eval_steps_per_second": 0.075,
475
+ "step": 225
476
+ },
477
+ {
478
+ "epoch": 0.38,
479
+ "learning_rate": 9.010989010989011e-08,
480
+ "logits/generated": -1.7464491128921509,
481
+ "logits/real": -1.911118507385254,
482
+ "logps/generated": -843.2506103515625,
483
+ "logps/real": -463.8089294433594,
484
+ "loss": 0.0137,
485
+ "rewards/accuracies": 0.9937499761581421,
486
+ "rewards/generated": -18.588809967041016,
487
+ "rewards/margins": 13.585103034973145,
488
+ "rewards/real": -5.0037055015563965,
489
+ "step": 230
490
+ },
491
+ {
492
+ "epoch": 0.4,
493
+ "learning_rate": 8.91941391941392e-08,
494
+ "logits/generated": -1.7304248809814453,
495
+ "logits/real": -1.868173360824585,
496
+ "logps/generated": -819.3607177734375,
497
+ "logps/real": -445.7488708496094,
498
+ "loss": 0.0219,
499
+ "rewards/accuracies": 0.981249988079071,
500
+ "rewards/generated": -17.680866241455078,
501
+ "rewards/margins": 12.903231620788574,
502
+ "rewards/real": -4.777635097503662,
503
+ "step": 240
504
+ },
505
+ {
506
+ "epoch": 0.41,
507
+ "learning_rate": 8.827838827838827e-08,
508
+ "logits/generated": -1.7114464044570923,
509
+ "logits/real": -1.8701032400131226,
510
+ "logps/generated": -823.8060302734375,
511
+ "logps/real": -483.9420471191406,
512
+ "loss": 0.0183,
513
+ "rewards/accuracies": 0.9937499761581421,
514
+ "rewards/generated": -17.834732055664062,
515
+ "rewards/margins": 12.7833251953125,
516
+ "rewards/real": -5.051407814025879,
517
+ "step": 250
518
+ },
519
+ {
520
+ "epoch": 0.41,
521
+ "eval_logits/generated": -1.729956865310669,
522
+ "eval_logits/real": -1.8793208599090576,
523
+ "eval_logps/generated": -826.3790893554688,
524
+ "eval_logps/real": -497.6258544921875,
525
+ "eval_loss": 0.025423016399145126,
526
+ "eval_rewards/accuracies": 0.9888888597488403,
527
+ "eval_rewards/generated": -18.389890670776367,
528
+ "eval_rewards/margins": 12.971549034118652,
529
+ "eval_rewards/real": -5.418341159820557,
530
+ "eval_runtime": 1801.3119,
531
+ "eval_samples_per_second": 2.397,
532
+ "eval_steps_per_second": 0.075,
533
+ "step": 250
534
+ },
535
+ {
536
+ "epoch": 0.43,
537
+ "learning_rate": 8.736263736263736e-08,
538
+ "logits/generated": -1.7352432012557983,
539
+ "logits/real": -1.9060261249542236,
540
+ "logps/generated": -870.6500854492188,
541
+ "logps/real": -461.4039611816406,
542
+ "loss": 0.0111,
543
+ "rewards/accuracies": 1.0,
544
+ "rewards/generated": -18.913497924804688,
545
+ "rewards/margins": 14.00297737121582,
546
+ "rewards/real": -4.910521030426025,
547
+ "step": 260
548
+ },
549
+ {
550
+ "epoch": 0.44,
551
+ "learning_rate": 8.644688644688645e-08,
552
+ "logits/generated": -1.784847617149353,
553
+ "logits/real": -1.9353469610214233,
554
+ "logps/generated": -850.8590087890625,
555
+ "logps/real": -482.2189025878906,
556
+ "loss": 0.0182,
557
+ "rewards/accuracies": 0.981249988079071,
558
+ "rewards/generated": -19.183507919311523,
559
+ "rewards/margins": 13.948068618774414,
560
+ "rewards/real": -5.235440731048584,
561
+ "step": 270
562
+ },
563
+ {
564
+ "epoch": 0.45,
565
+ "eval_logits/generated": -1.696116328239441,
566
+ "eval_logits/real": -1.8563601970672607,
567
+ "eval_logps/generated": -848.2400512695312,
568
+ "eval_logps/real": -504.34259033203125,
569
+ "eval_loss": 0.024484921246767044,
570
+ "eval_rewards/accuracies": 0.9888888597488403,
571
+ "eval_rewards/generated": -20.575990676879883,
572
+ "eval_rewards/margins": 14.485980033874512,
573
+ "eval_rewards/real": -6.09001350402832,
574
+ "eval_runtime": 1801.2175,
575
+ "eval_samples_per_second": 2.397,
576
+ "eval_steps_per_second": 0.075,
577
+ "step": 275
578
+ },
579
+ {
580
+ "epoch": 0.46,
581
+ "learning_rate": 8.553113553113552e-08,
582
+ "logits/generated": -1.6885887384414673,
583
+ "logits/real": -1.873110055923462,
584
+ "logps/generated": -874.4225463867188,
585
+ "logps/real": -485.9969177246094,
586
+ "loss": 0.0225,
587
+ "rewards/accuracies": 0.981249988079071,
588
+ "rewards/generated": -21.401386260986328,
589
+ "rewards/margins": 15.265310287475586,
590
+ "rewards/real": -6.136077404022217,
591
+ "step": 280
592
+ },
593
+ {
594
+ "epoch": 0.48,
595
+ "learning_rate": 8.461538461538461e-08,
596
+ "logits/generated": -1.7388379573822021,
597
+ "logits/real": -1.8577735424041748,
598
+ "logps/generated": -902.2374267578125,
599
+ "logps/real": -491.22247314453125,
600
+ "loss": 0.0217,
601
+ "rewards/accuracies": 0.9750000238418579,
602
+ "rewards/generated": -20.343700408935547,
603
+ "rewards/margins": 14.47362232208252,
604
+ "rewards/real": -5.870078086853027,
605
+ "step": 290
606
+ },
607
+ {
608
+ "epoch": 0.49,
609
+ "learning_rate": 8.36996336996337e-08,
610
+ "logits/generated": -1.760310173034668,
611
+ "logits/real": -1.9213718175888062,
612
+ "logps/generated": -855.2713623046875,
613
+ "logps/real": -482.796875,
614
+ "loss": 0.0253,
615
+ "rewards/accuracies": 0.9937499761581421,
616
+ "rewards/generated": -20.608051300048828,
617
+ "rewards/margins": 15.096084594726562,
618
+ "rewards/real": -5.511966228485107,
619
+ "step": 300
620
+ },
621
+ {
622
+ "epoch": 0.49,
623
+ "eval_logits/generated": -1.693785309791565,
624
+ "eval_logits/real": -1.8572747707366943,
625
+ "eval_logps/generated": -849.6640014648438,
626
+ "eval_logps/real": -502.681884765625,
627
+ "eval_loss": 0.02240588143467903,
628
+ "eval_rewards/accuracies": 0.989814817905426,
629
+ "eval_rewards/generated": -20.7183895111084,
630
+ "eval_rewards/margins": 14.79444694519043,
631
+ "eval_rewards/real": -5.923939228057861,
632
+ "eval_runtime": 1798.8833,
633
+ "eval_samples_per_second": 2.4,
634
+ "eval_steps_per_second": 0.075,
635
+ "step": 300
636
+ },
637
+ {
638
+ "epoch": 0.51,
639
+ "learning_rate": 8.278388278388278e-08,
640
+ "logits/generated": -1.6374238729476929,
641
+ "logits/real": -1.8183997869491577,
642
+ "logps/generated": -923.6209106445312,
643
+ "logps/real": -475.9380798339844,
644
+ "loss": 0.1301,
645
+ "rewards/accuracies": 0.987500011920929,
646
+ "rewards/generated": -25.12307357788086,
647
+ "rewards/margins": 18.438941955566406,
648
+ "rewards/real": -6.684133052825928,
649
+ "step": 310
650
+ },
651
+ {
652
+ "epoch": 0.53,
653
+ "learning_rate": 8.186813186813186e-08,
654
+ "logits/generated": -1.6634056568145752,
655
+ "logits/real": -1.8855922222137451,
656
+ "logps/generated": -906.6611328125,
657
+ "logps/real": -488.73858642578125,
658
+ "loss": 0.0075,
659
+ "rewards/accuracies": 1.0,
660
+ "rewards/generated": -24.581295013427734,
661
+ "rewards/margins": 18.07442283630371,
662
+ "rewards/real": -6.50687313079834,
663
+ "step": 320
664
+ },
665
+ {
666
+ "epoch": 0.53,
667
+ "eval_logits/generated": -1.6521793603897095,
668
+ "eval_logits/real": -1.8252357244491577,
669
+ "eval_logps/generated": -883.6064453125,
670
+ "eval_logps/real": -513.8781127929688,
671
+ "eval_loss": 0.023403111845254898,
672
+ "eval_rewards/accuracies": 0.989814817905426,
673
+ "eval_rewards/generated": -24.112627029418945,
674
+ "eval_rewards/margins": 17.069059371948242,
675
+ "eval_rewards/real": -7.043565273284912,
676
+ "eval_runtime": 1801.6344,
677
+ "eval_samples_per_second": 2.397,
678
+ "eval_steps_per_second": 0.075,
679
+ "step": 325
680
+ },
681
+ {
682
+ "epoch": 0.54,
683
+ "learning_rate": 8.095238095238095e-08,
684
+ "logits/generated": -1.585889458656311,
685
+ "logits/real": -1.804424524307251,
686
+ "logps/generated": -845.4251708984375,
687
+ "logps/real": -472.7271423339844,
688
+ "loss": 0.0545,
689
+ "rewards/accuracies": 0.96875,
690
+ "rewards/generated": -23.844438552856445,
691
+ "rewards/margins": 17.44953727722168,
692
+ "rewards/real": -6.39490270614624,
693
+ "step": 330
694
+ },
695
+ {
696
+ "epoch": 0.56,
697
+ "learning_rate": 8.003663003663003e-08,
698
+ "logits/generated": -1.6383155584335327,
699
+ "logits/real": -1.8644497394561768,
700
+ "logps/generated": -858.1883544921875,
701
+ "logps/real": -480.23931884765625,
702
+ "loss": 0.0129,
703
+ "rewards/accuracies": 1.0,
704
+ "rewards/generated": -20.641630172729492,
705
+ "rewards/margins": 15.211410522460938,
706
+ "rewards/real": -5.4302215576171875,
707
+ "step": 340
708
+ },
709
+ {
710
+ "epoch": 0.58,
711
+ "learning_rate": 7.912087912087911e-08,
712
+ "logits/generated": -1.7638896703720093,
713
+ "logits/real": -1.9181245565414429,
714
+ "logps/generated": -808.9601440429688,
715
+ "logps/real": -457.7825622558594,
716
+ "loss": 0.0141,
717
+ "rewards/accuracies": 0.987500011920929,
718
+ "rewards/generated": -19.34470558166504,
719
+ "rewards/margins": 14.01134204864502,
720
+ "rewards/real": -5.333361625671387,
721
+ "step": 350
722
+ },
723
+ {
724
+ "epoch": 0.58,
725
+ "eval_logits/generated": -1.7082347869873047,
726
+ "eval_logits/real": -1.8693056106567383,
727
+ "eval_logps/generated": -852.1936645507812,
728
+ "eval_logps/real": -499.138671875,
729
+ "eval_loss": 0.021183772012591362,
730
+ "eval_rewards/accuracies": 0.989814817905426,
731
+ "eval_rewards/generated": -20.971355438232422,
732
+ "eval_rewards/margins": 15.401734352111816,
733
+ "eval_rewards/real": -5.569622039794922,
734
+ "eval_runtime": 1777.6314,
735
+ "eval_samples_per_second": 2.429,
736
+ "eval_steps_per_second": 0.076,
737
+ "step": 350
738
+ },
739
+ {
740
+ "epoch": 0.59,
741
+ "learning_rate": 7.82051282051282e-08,
742
+ "logits/generated": -1.7445008754730225,
743
+ "logits/real": -1.909597396850586,
744
+ "logps/generated": -885.7131958007812,
745
+ "logps/real": -475.4231872558594,
746
+ "loss": 0.0247,
747
+ "rewards/accuracies": 1.0,
748
+ "rewards/generated": -21.5659236907959,
749
+ "rewards/margins": 16.31867218017578,
750
+ "rewards/real": -5.247251033782959,
751
+ "step": 360
752
+ },
753
+ {
754
+ "epoch": 0.61,
755
+ "learning_rate": 7.72893772893773e-08,
756
+ "logits/generated": -1.7469732761383057,
757
+ "logits/real": -1.8687480688095093,
758
+ "logps/generated": -842.2507934570312,
759
+ "logps/real": -467.54583740234375,
760
+ "loss": 0.0135,
761
+ "rewards/accuracies": 0.9937499761581421,
762
+ "rewards/generated": -19.440731048583984,
763
+ "rewards/margins": 14.80817985534668,
764
+ "rewards/real": -4.632552146911621,
765
+ "step": 370
766
+ },
767
+ {
768
+ "epoch": 0.62,
769
+ "eval_logits/generated": -1.7284820079803467,
770
+ "eval_logits/real": -1.8896727561950684,
771
+ "eval_logps/generated": -846.3809204101562,
772
+ "eval_logps/real": -496.0889587402344,
773
+ "eval_loss": 0.018172312527894974,
774
+ "eval_rewards/accuracies": 0.9907407164573669,
775
+ "eval_rewards/generated": -20.39007568359375,
776
+ "eval_rewards/margins": 15.125428199768066,
777
+ "eval_rewards/real": -5.264645099639893,
778
+ "eval_runtime": 1804.3242,
779
+ "eval_samples_per_second": 2.393,
780
+ "eval_steps_per_second": 0.075,
781
+ "step": 375
782
+ },
783
+ {
784
+ "epoch": 0.63,
785
+ "learning_rate": 7.637362637362636e-08,
786
+ "logits/generated": -1.7371108531951904,
787
+ "logits/real": -1.9044015407562256,
788
+ "logps/generated": -852.88427734375,
789
+ "logps/real": -489.7041015625,
790
+ "loss": 0.0123,
791
+ "rewards/accuracies": 0.9937499761581421,
792
+ "rewards/generated": -20.25876235961914,
793
+ "rewards/margins": 15.011631965637207,
794
+ "rewards/real": -5.247129917144775,
795
+ "step": 380
796
+ },
797
+ {
798
+ "epoch": 0.64,
799
+ "learning_rate": 7.545787545787545e-08,
800
+ "logits/generated": -1.7183958292007446,
801
+ "logits/real": -1.8460171222686768,
802
+ "logps/generated": -853.3533935546875,
803
+ "logps/real": -476.1839904785156,
804
+ "loss": 0.0069,
805
+ "rewards/accuracies": 0.9937499761581421,
806
+ "rewards/generated": -21.06509017944336,
807
+ "rewards/margins": 15.901025772094727,
808
+ "rewards/real": -5.164063453674316,
809
+ "step": 390
810
+ },
811
+ {
812
+ "epoch": 0.66,
813
+ "learning_rate": 7.454212454212454e-08,
814
+ "logits/generated": -1.7633212804794312,
815
+ "logits/real": -1.9220634698867798,
816
+ "logps/generated": -842.59765625,
817
+ "logps/real": -468.2982482910156,
818
+ "loss": 0.014,
819
+ "rewards/accuracies": 0.9750000238418579,
820
+ "rewards/generated": -20.100887298583984,
821
+ "rewards/margins": 15.023529052734375,
822
+ "rewards/real": -5.077359199523926,
823
+ "step": 400
824
+ },
825
+ {
826
+ "epoch": 0.66,
827
+ "eval_logits/generated": -1.7137374877929688,
828
+ "eval_logits/real": -1.8782566785812378,
829
+ "eval_logps/generated": -854.0593872070312,
830
+ "eval_logps/real": -498.4993591308594,
831
+ "eval_loss": 0.01818298175930977,
832
+ "eval_rewards/accuracies": 0.9907407164573669,
833
+ "eval_rewards/generated": -21.157926559448242,
834
+ "eval_rewards/margins": 15.652240753173828,
835
+ "eval_rewards/real": -5.505686283111572,
836
+ "eval_runtime": 1801.4399,
837
+ "eval_samples_per_second": 2.397,
838
+ "eval_steps_per_second": 0.075,
839
+ "step": 400
840
+ },
841
+ {
842
+ "epoch": 0.67,
843
+ "learning_rate": 7.362637362637363e-08,
844
+ "logits/generated": -1.7656316757202148,
845
+ "logits/real": -1.9041885137557983,
846
+ "logps/generated": -824.7591552734375,
847
+ "logps/real": -444.58935546875,
848
+ "loss": 0.0229,
849
+ "rewards/accuracies": 0.9937499761581421,
850
+ "rewards/generated": -20.36834144592285,
851
+ "rewards/margins": 15.581771850585938,
852
+ "rewards/real": -4.786566734313965,
853
+ "step": 410
854
+ },
855
+ {
856
+ "epoch": 0.69,
857
+ "learning_rate": 7.27106227106227e-08,
858
+ "logits/generated": -1.6781879663467407,
859
+ "logits/real": -1.8786585330963135,
860
+ "logps/generated": -863.8435668945312,
861
+ "logps/real": -460.38494873046875,
862
+ "loss": 0.0122,
863
+ "rewards/accuracies": 0.9937499761581421,
864
+ "rewards/generated": -22.69711685180664,
865
+ "rewards/margins": 17.59657859802246,
866
+ "rewards/real": -5.100537300109863,
867
+ "step": 420
868
+ },
869
+ {
870
+ "epoch": 0.7,
871
+ "eval_logits/generated": -1.7230830192565918,
872
+ "eval_logits/real": -1.8856515884399414,
873
+ "eval_logps/generated": -849.9996948242188,
874
+ "eval_logps/real": -496.84051513671875,
875
+ "eval_loss": 0.017169104889035225,
876
+ "eval_rewards/accuracies": 0.9907407164573669,
877
+ "eval_rewards/generated": -20.751964569091797,
878
+ "eval_rewards/margins": 15.412163734436035,
879
+ "eval_rewards/real": -5.33980131149292,
880
+ "eval_runtime": 1779.5809,
881
+ "eval_samples_per_second": 2.426,
882
+ "eval_steps_per_second": 0.076,
883
+ "step": 425
884
+ },
885
+ {
886
+ "epoch": 0.71,
887
+ "learning_rate": 7.17948717948718e-08,
888
+ "logits/generated": -1.7307789325714111,
889
+ "logits/real": -1.8954929113388062,
890
+ "logps/generated": -858.5565185546875,
891
+ "logps/real": -445.24554443359375,
892
+ "loss": 0.018,
893
+ "rewards/accuracies": 0.987500011920929,
894
+ "rewards/generated": -20.846343994140625,
895
+ "rewards/margins": 16.011089324951172,
896
+ "rewards/real": -4.835254669189453,
897
+ "step": 430
898
+ },
899
+ {
900
+ "epoch": 0.72,
901
+ "learning_rate": 7.087912087912088e-08,
902
+ "logits/generated": -1.7022396326065063,
903
+ "logits/real": -1.8817275762557983,
904
+ "logps/generated": -864.0067138671875,
905
+ "logps/real": -429.09539794921875,
906
+ "loss": 0.0169,
907
+ "rewards/accuracies": 0.987500011920929,
908
+ "rewards/generated": -20.980777740478516,
909
+ "rewards/margins": 16.663347244262695,
910
+ "rewards/real": -4.317431449890137,
911
+ "step": 440
912
+ },
913
+ {
914
+ "epoch": 0.74,
915
+ "learning_rate": 6.996336996336996e-08,
916
+ "logits/generated": -1.8108078241348267,
917
+ "logits/real": -1.9502532482147217,
918
+ "logps/generated": -838.1130981445312,
919
+ "logps/real": -445.90008544921875,
920
+ "loss": 0.0144,
921
+ "rewards/accuracies": 0.987500011920929,
922
+ "rewards/generated": -19.511409759521484,
923
+ "rewards/margins": 15.326568603515625,
924
+ "rewards/real": -4.184841632843018,
925
+ "step": 450
926
+ },
927
+ {
928
+ "epoch": 0.74,
929
+ "eval_logits/generated": -1.7465310096740723,
930
+ "eval_logits/real": -1.9042091369628906,
931
+ "eval_logps/generated": -836.2462768554688,
932
+ "eval_logps/real": -490.04827880859375,
933
+ "eval_loss": 0.016420260071754456,
934
+ "eval_rewards/accuracies": 0.9916666746139526,
935
+ "eval_rewards/generated": -19.37661361694336,
936
+ "eval_rewards/margins": 14.716034889221191,
937
+ "eval_rewards/real": -4.660578727722168,
938
+ "eval_runtime": 1791.4683,
939
+ "eval_samples_per_second": 2.41,
940
+ "eval_steps_per_second": 0.075,
941
+ "step": 450
942
+ },
943
+ {
944
+ "epoch": 0.76,
945
+ "learning_rate": 6.904761904761905e-08,
946
+ "logits/generated": -1.7178394794464111,
947
+ "logits/real": -1.9164073467254639,
948
+ "logps/generated": -814.5615844726562,
949
+ "logps/real": -447.05029296875,
950
+ "loss": 0.0206,
951
+ "rewards/accuracies": 1.0,
952
+ "rewards/generated": -18.99026870727539,
953
+ "rewards/margins": 14.712194442749023,
954
+ "rewards/real": -4.278077125549316,
955
+ "step": 460
956
+ },
957
+ {
958
+ "epoch": 0.77,
959
+ "learning_rate": 6.813186813186813e-08,
960
+ "logits/generated": -1.7678531408309937,
961
+ "logits/real": -1.9162557125091553,
962
+ "logps/generated": -866.0538330078125,
963
+ "logps/real": -465.385986328125,
964
+ "loss": 0.0103,
965
+ "rewards/accuracies": 1.0,
966
+ "rewards/generated": -20.878631591796875,
967
+ "rewards/margins": 16.432056427001953,
968
+ "rewards/real": -4.44657564163208,
969
+ "step": 470
970
+ },
971
+ {
972
+ "epoch": 0.78,
973
+ "eval_logits/generated": -1.744537353515625,
974
+ "eval_logits/real": -1.9063953161239624,
975
+ "eval_logps/generated": -843.5385131835938,
976
+ "eval_logps/real": -492.18194580078125,
977
+ "eval_loss": 0.015999892726540565,
978
+ "eval_rewards/accuracies": 0.9907407164573669,
979
+ "eval_rewards/generated": -20.1058349609375,
980
+ "eval_rewards/margins": 15.231893539428711,
981
+ "eval_rewards/real": -4.873941421508789,
982
+ "eval_runtime": 1802.1278,
983
+ "eval_samples_per_second": 2.396,
984
+ "eval_steps_per_second": 0.075,
985
+ "step": 475
986
+ },
987
+ {
988
+ "epoch": 0.79,
989
+ "learning_rate": 6.721611721611721e-08,
990
+ "logits/generated": -1.7018417119979858,
991
+ "logits/real": -1.8882300853729248,
992
+ "logps/generated": -894.1951293945312,
993
+ "logps/real": -452.98052978515625,
994
+ "loss": 0.0082,
995
+ "rewards/accuracies": 0.987500011920929,
996
+ "rewards/generated": -22.061939239501953,
997
+ "rewards/margins": 17.689193725585938,
998
+ "rewards/real": -4.372746467590332,
999
+ "step": 480
1000
+ },
1001
+ {
1002
+ "epoch": 0.81,
1003
+ "learning_rate": 6.63003663003663e-08,
1004
+ "logits/generated": -1.7523149251937866,
1005
+ "logits/real": -1.9084774255752563,
1006
+ "logps/generated": -886.0391845703125,
1007
+ "logps/real": -463.77880859375,
1008
+ "loss": 0.0116,
1009
+ "rewards/accuracies": 0.9937499761581421,
1010
+ "rewards/generated": -21.94992446899414,
1011
+ "rewards/margins": 17.210201263427734,
1012
+ "rewards/real": -4.739726543426514,
1013
+ "step": 490
1014
+ },
1015
+ {
1016
+ "epoch": 0.82,
1017
+ "learning_rate": 6.538461538461538e-08,
1018
+ "logits/generated": -1.7328628301620483,
1019
+ "logits/real": -1.9376299381256104,
1020
+ "logps/generated": -842.7811279296875,
1021
+ "logps/real": -459.4143981933594,
1022
+ "loss": 0.0147,
1023
+ "rewards/accuracies": 0.987500011920929,
1024
+ "rewards/generated": -21.233800888061523,
1025
+ "rewards/margins": 16.3835391998291,
1026
+ "rewards/real": -4.850262641906738,
1027
+ "step": 500
1028
+ },
1029
+ {
1030
+ "epoch": 0.82,
1031
+ "eval_logits/generated": -1.7434035539627075,
1032
+ "eval_logits/real": -1.9092177152633667,
1033
+ "eval_logps/generated": -852.0874633789062,
1034
+ "eval_logps/real": -494.6623229980469,
1035
+ "eval_loss": 0.015602019615471363,
1036
+ "eval_rewards/accuracies": 0.9916666746139526,
1037
+ "eval_rewards/generated": -20.96072769165039,
1038
+ "eval_rewards/margins": 15.838738441467285,
1039
+ "eval_rewards/real": -5.121987342834473,
1040
+ "eval_runtime": 1801.3586,
1041
+ "eval_samples_per_second": 2.397,
1042
+ "eval_steps_per_second": 0.075,
1043
+ "step": 500
1044
+ },
1045
+ {
1046
+ "epoch": 0.84,
1047
+ "learning_rate": 6.446886446886448e-08,
1048
+ "logits/generated": -1.7443310022354126,
1049
+ "logits/real": -1.906089186668396,
1050
+ "logps/generated": -855.3189697265625,
1051
+ "logps/real": -479.55206298828125,
1052
+ "loss": 0.017,
1053
+ "rewards/accuracies": 0.9750000238418579,
1054
+ "rewards/generated": -21.070411682128906,
1055
+ "rewards/margins": 15.907896041870117,
1056
+ "rewards/real": -5.162516117095947,
1057
+ "step": 510
1058
+ },
1059
+ {
1060
+ "epoch": 0.86,
1061
+ "learning_rate": 6.355311355311355e-08,
1062
+ "logits/generated": -1.7915077209472656,
1063
+ "logits/real": -1.96005117893219,
1064
+ "logps/generated": -909.4000854492188,
1065
+ "logps/real": -510.1407165527344,
1066
+ "loss": 0.0154,
1067
+ "rewards/accuracies": 0.9937499761581421,
1068
+ "rewards/generated": -22.668399810791016,
1069
+ "rewards/margins": 17.420974731445312,
1070
+ "rewards/real": -5.247425079345703,
1071
+ "step": 520
1072
+ },
1073
+ {
1074
+ "epoch": 0.86,
1075
+ "eval_logits/generated": -1.7357203960418701,
1076
+ "eval_logits/real": -1.903997778892517,
1077
+ "eval_logps/generated": -856.4739990234375,
1078
+ "eval_logps/real": -494.9234924316406,
1079
+ "eval_loss": 0.015464075841009617,
1080
+ "eval_rewards/accuracies": 0.9916666746139526,
1081
+ "eval_rewards/generated": -21.399391174316406,
1082
+ "eval_rewards/margins": 16.25129508972168,
1083
+ "eval_rewards/real": -5.148096561431885,
1084
+ "eval_runtime": 1799.2565,
1085
+ "eval_samples_per_second": 2.4,
1086
+ "eval_steps_per_second": 0.075,
1087
+ "step": 525
1088
+ },
1089
+ {
1090
+ "epoch": 0.87,
1091
+ "learning_rate": 6.263736263736263e-08,
1092
+ "logits/generated": -1.7220814228057861,
1093
+ "logits/real": -1.896211862564087,
1094
+ "logps/generated": -864.8518676757812,
1095
+ "logps/real": -481.7032775878906,
1096
+ "loss": 0.0081,
1097
+ "rewards/accuracies": 1.0,
1098
+ "rewards/generated": -21.411062240600586,
1099
+ "rewards/margins": 16.603487014770508,
1100
+ "rewards/real": -4.807575702667236,
1101
+ "step": 530
1102
+ },
1103
+ {
1104
+ "epoch": 0.89,
1105
+ "learning_rate": 6.172161172161173e-08,
1106
+ "logits/generated": -1.7355706691741943,
1107
+ "logits/real": -1.9386104345321655,
1108
+ "logps/generated": -856.8909301757812,
1109
+ "logps/real": -455.5501403808594,
1110
+ "loss": 0.0102,
1111
+ "rewards/accuracies": 0.987500011920929,
1112
+ "rewards/generated": -21.787107467651367,
1113
+ "rewards/margins": 16.66501808166504,
1114
+ "rewards/real": -5.122087001800537,
1115
+ "step": 540
1116
+ },
1117
+ {
1118
+ "epoch": 0.91,
1119
+ "learning_rate": 6.08058608058608e-08,
1120
+ "logits/generated": -1.6525169610977173,
1121
+ "logits/real": -1.872809648513794,
1122
+ "logps/generated": -870.7745361328125,
1123
+ "logps/real": -459.68572998046875,
1124
+ "loss": 0.0158,
1125
+ "rewards/accuracies": 0.9937499761581421,
1126
+ "rewards/generated": -21.89577865600586,
1127
+ "rewards/margins": 16.658113479614258,
1128
+ "rewards/real": -5.237664222717285,
1129
+ "step": 550
1130
+ },
1131
+ {
1132
+ "epoch": 0.91,
1133
+ "eval_logits/generated": -1.713934302330017,
1134
+ "eval_logits/real": -1.8881142139434814,
1135
+ "eval_logps/generated": -872.0122680664062,
1136
+ "eval_logps/real": -499.5303955078125,
1137
+ "eval_loss": 0.015055526979267597,
1138
+ "eval_rewards/accuracies": 0.9916666746139526,
1139
+ "eval_rewards/generated": -22.95322036743164,
1140
+ "eval_rewards/margins": 17.34442710876465,
1141
+ "eval_rewards/real": -5.608795166015625,
1142
+ "eval_runtime": 1796.1592,
1143
+ "eval_samples_per_second": 2.404,
1144
+ "eval_steps_per_second": 0.075,
1145
+ "step": 550
1146
+ },
1147
+ {
1148
+ "epoch": 0.92,
1149
+ "learning_rate": 5.989010989010988e-08,
1150
+ "logits/generated": -1.7358171939849854,
1151
+ "logits/real": -1.907268762588501,
1152
+ "logps/generated": -841.7224731445312,
1153
+ "logps/real": -471.75689697265625,
1154
+ "loss": 0.0081,
1155
+ "rewards/accuracies": 1.0,
1156
+ "rewards/generated": -21.611263275146484,
1157
+ "rewards/margins": 16.323144912719727,
1158
+ "rewards/real": -5.288116455078125,
1159
+ "step": 560
1160
+ },
1161
+ {
1162
+ "epoch": 0.94,
1163
+ "learning_rate": 5.897435897435897e-08,
1164
+ "logits/generated": -1.7383735179901123,
1165
+ "logits/real": -1.929496169090271,
1166
+ "logps/generated": -876.6693115234375,
1167
+ "logps/real": -483.697998046875,
1168
+ "loss": 0.0053,
1169
+ "rewards/accuracies": 1.0,
1170
+ "rewards/generated": -23.30849838256836,
1171
+ "rewards/margins": 17.91643714904785,
1172
+ "rewards/real": -5.392062664031982,
1173
+ "step": 570
1174
+ },
1175
+ {
1176
+ "epoch": 0.95,
1177
+ "eval_logits/generated": -1.7112655639648438,
1178
+ "eval_logits/real": -1.8888392448425293,
1179
+ "eval_logps/generated": -877.6972045898438,
1180
+ "eval_logps/real": -500.6514587402344,
1181
+ "eval_loss": 0.01491004228591919,
1182
+ "eval_rewards/accuracies": 0.9916666746139526,
1183
+ "eval_rewards/generated": -23.521709442138672,
1184
+ "eval_rewards/margins": 17.800806045532227,
1185
+ "eval_rewards/real": -5.7208991050720215,
1186
+ "eval_runtime": 1799.8979,
1187
+ "eval_samples_per_second": 2.399,
1188
+ "eval_steps_per_second": 0.075,
1189
+ "step": 575
1190
+ },
1191
+ {
1192
+ "epoch": 0.95,
1193
+ "learning_rate": 5.805860805860806e-08,
1194
+ "logits/generated": -1.7410743236541748,
1195
+ "logits/real": -1.901346206665039,
1196
+ "logps/generated": -881.9417724609375,
1197
+ "logps/real": -473.64892578125,
1198
+ "loss": 0.0153,
1199
+ "rewards/accuracies": 0.9937499761581421,
1200
+ "rewards/generated": -23.52071762084961,
1201
+ "rewards/margins": 17.951566696166992,
1202
+ "rewards/real": -5.569148540496826,
1203
+ "step": 580
1204
+ },
1205
+ {
1206
+ "epoch": 0.97,
1207
+ "learning_rate": 5.714285714285714e-08,
1208
+ "logits/generated": -1.7443621158599854,
1209
+ "logits/real": -1.8728523254394531,
1210
+ "logps/generated": -892.2806396484375,
1211
+ "logps/real": -453.60552978515625,
1212
+ "loss": 0.015,
1213
+ "rewards/accuracies": 0.987500011920929,
1214
+ "rewards/generated": -23.209545135498047,
1215
+ "rewards/margins": 18.014381408691406,
1216
+ "rewards/real": -5.195165157318115,
1217
+ "step": 590
1218
+ },
1219
+ {
1220
+ "epoch": 0.99,
1221
+ "learning_rate": 5.622710622710623e-08,
1222
+ "logits/generated": -1.7170673608779907,
1223
+ "logits/real": -1.889995813369751,
1224
+ "logps/generated": -875.4261474609375,
1225
+ "logps/real": -450.47576904296875,
1226
+ "loss": 0.008,
1227
+ "rewards/accuracies": 0.987500011920929,
1228
+ "rewards/generated": -23.910694122314453,
1229
+ "rewards/margins": 18.570858001708984,
1230
+ "rewards/real": -5.339831352233887,
1231
+ "step": 600
1232
+ },
1233
+ {
1234
+ "epoch": 0.99,
1235
+ "eval_logits/generated": -1.708635926246643,
1236
+ "eval_logits/real": -1.8878159523010254,
1237
+ "eval_logps/generated": -879.9544067382812,
1238
+ "eval_logps/real": -500.9651184082031,
1239
+ "eval_loss": 0.01472516916692257,
1240
+ "eval_rewards/accuracies": 0.9916666746139526,
1241
+ "eval_rewards/generated": -23.74742317199707,
1242
+ "eval_rewards/margins": 17.995161056518555,
1243
+ "eval_rewards/real": -5.752264022827148,
1244
+ "eval_runtime": 1800.102,
1245
+ "eval_samples_per_second": 2.399,
1246
+ "eval_steps_per_second": 0.075,
1247
+ "step": 600
1248
+ },
1249
+ {
1250
+ "epoch": 1.0,
1251
+ "learning_rate": 5.531135531135531e-08,
1252
+ "logits/generated": -1.6815847158432007,
1253
+ "logits/real": -1.917245626449585,
1254
+ "logps/generated": -922.5784912109375,
1255
+ "logps/real": -475.2225646972656,
1256
+ "loss": 0.0094,
1257
+ "rewards/accuracies": 1.0,
1258
+ "rewards/generated": -25.03234100341797,
1259
+ "rewards/margins": 19.452983856201172,
1260
+ "rewards/real": -5.579358100891113,
1261
+ "step": 610
1262
+ },
1263
+ {
1264
+ "epoch": 1.02,
1265
+ "learning_rate": 5.439560439560439e-08,
1266
+ "logits/generated": -1.7002710103988647,
1267
+ "logits/real": -1.886687994003296,
1268
+ "logps/generated": -922.806640625,
1269
+ "logps/real": -481.56787109375,
1270
+ "loss": 0.0049,
1271
+ "rewards/accuracies": 1.0,
1272
+ "rewards/generated": -25.185983657836914,
1273
+ "rewards/margins": 19.83902359008789,
1274
+ "rewards/real": -5.346956729888916,
1275
+ "step": 620
1276
+ },
1277
+ {
1278
+ "epoch": 1.03,
1279
+ "eval_logits/generated": -1.6730928421020508,
1280
+ "eval_logits/real": -1.8584686517715454,
1281
+ "eval_logps/generated": -891.3632202148438,
1282
+ "eval_logps/real": -505.2818298339844,
1283
+ "eval_loss": 0.015368033200502396,
1284
+ "eval_rewards/accuracies": 0.9907407164573669,
1285
+ "eval_rewards/generated": -24.888301849365234,
1286
+ "eval_rewards/margins": 18.704362869262695,
1287
+ "eval_rewards/real": -6.183938026428223,
1288
+ "eval_runtime": 1782.9432,
1289
+ "eval_samples_per_second": 2.422,
1290
+ "eval_steps_per_second": 0.076,
1291
+ "step": 625
1292
+ },
1293
+ {
1294
+ "epoch": 1.04,
1295
+ "learning_rate": 5.347985347985348e-08,
1296
+ "logits/generated": -1.6648222208023071,
1297
+ "logits/real": -1.8549429178237915,
1298
+ "logps/generated": -912.20654296875,
1299
+ "logps/real": -520.7745361328125,
1300
+ "loss": 0.004,
1301
+ "rewards/accuracies": 1.0,
1302
+ "rewards/generated": -24.70351791381836,
1303
+ "rewards/margins": 18.5965518951416,
1304
+ "rewards/real": -6.106965065002441,
1305
+ "step": 630
1306
+ },
1307
+ {
1308
+ "epoch": 1.05,
1309
+ "learning_rate": 5.256410256410256e-08,
1310
+ "logits/generated": -1.704904556274414,
1311
+ "logits/real": -1.8857839107513428,
1312
+ "logps/generated": -927.8040771484375,
1313
+ "logps/real": -481.84820556640625,
1314
+ "loss": 0.0069,
1315
+ "rewards/accuracies": 0.9937499761581421,
1316
+ "rewards/generated": -26.38739585876465,
1317
+ "rewards/margins": 20.759992599487305,
1318
+ "rewards/real": -5.627402305603027,
1319
+ "step": 640
1320
+ },
1321
+ {
1322
+ "epoch": 1.07,
1323
+ "learning_rate": 5.164835164835165e-08,
1324
+ "logits/generated": -1.7228724956512451,
1325
+ "logits/real": -1.8705856800079346,
1326
+ "logps/generated": -894.1580810546875,
1327
+ "logps/real": -462.95697021484375,
1328
+ "loss": 0.0057,
1329
+ "rewards/accuracies": 0.9937499761581421,
1330
+ "rewards/generated": -24.506175994873047,
1331
+ "rewards/margins": 18.540332794189453,
1332
+ "rewards/real": -5.96584415435791,
1333
+ "step": 650
1334
+ },
1335
+ {
1336
+ "epoch": 1.07,
1337
+ "eval_logits/generated": -1.6592012643814087,
1338
+ "eval_logits/real": -1.848427176475525,
1339
+ "eval_logps/generated": -901.4036865234375,
1340
+ "eval_logps/real": -508.3891906738281,
1341
+ "eval_loss": 0.015495581552386284,
1342
+ "eval_rewards/accuracies": 0.9916666746139526,
1343
+ "eval_rewards/generated": -25.892351150512695,
1344
+ "eval_rewards/margins": 19.397686004638672,
1345
+ "eval_rewards/real": -6.494665145874023,
1346
+ "eval_runtime": 1785.6862,
1347
+ "eval_samples_per_second": 2.418,
1348
+ "eval_steps_per_second": 0.076,
1349
+ "step": 650
1350
+ },
1351
+ {
1352
+ "epoch": 1.09,
1353
+ "learning_rate": 5.073260073260073e-08,
1354
+ "logits/generated": -1.6706949472427368,
1355
+ "logits/real": -1.8376855850219727,
1356
+ "logps/generated": -927.1456298828125,
1357
+ "logps/real": -447.51123046875,
1358
+ "loss": 0.0031,
1359
+ "rewards/accuracies": 0.9937499761581421,
1360
+ "rewards/generated": -27.89125633239746,
1361
+ "rewards/margins": 21.775976181030273,
1362
+ "rewards/real": -6.1152777671813965,
1363
+ "step": 660
1364
+ },
1365
+ {
1366
+ "epoch": 1.1,
1367
+ "learning_rate": 4.981684981684982e-08,
1368
+ "logits/generated": -1.6462090015411377,
1369
+ "logits/real": -1.8371574878692627,
1370
+ "logps/generated": -882.9945068359375,
1371
+ "logps/real": -455.0235290527344,
1372
+ "loss": 0.0076,
1373
+ "rewards/accuracies": 0.9937499761581421,
1374
+ "rewards/generated": -25.57509994506836,
1375
+ "rewards/margins": 19.432373046875,
1376
+ "rewards/real": -6.142725944519043,
1377
+ "step": 670
1378
+ },
1379
+ {
1380
+ "epoch": 1.11,
1381
+ "eval_logits/generated": -1.640711784362793,
1382
+ "eval_logits/real": -1.8339245319366455,
1383
+ "eval_logps/generated": -911.697021484375,
1384
+ "eval_logps/real": -511.9859313964844,
1385
+ "eval_loss": 0.01578509621322155,
1386
+ "eval_rewards/accuracies": 0.9916666746139526,
1387
+ "eval_rewards/generated": -26.921693801879883,
1388
+ "eval_rewards/margins": 20.067354202270508,
1389
+ "eval_rewards/real": -6.854339599609375,
1390
+ "eval_runtime": 1782.1365,
1391
+ "eval_samples_per_second": 2.423,
1392
+ "eval_steps_per_second": 0.076,
1393
+ "step": 675
1394
+ },
1395
+ {
1396
+ "epoch": 1.12,
1397
+ "learning_rate": 4.8901098901098895e-08,
1398
+ "logits/generated": -1.6573280096054077,
1399
+ "logits/real": -1.885148286819458,
1400
+ "logps/generated": -923.4420776367188,
1401
+ "logps/real": -483.80621337890625,
1402
+ "loss": 0.0057,
1403
+ "rewards/accuracies": 0.9937499761581421,
1404
+ "rewards/generated": -28.48358154296875,
1405
+ "rewards/margins": 21.583927154541016,
1406
+ "rewards/real": -6.899654388427734,
1407
+ "step": 680
1408
+ },
1409
+ {
1410
+ "epoch": 1.14,
1411
+ "learning_rate": 4.7985347985347985e-08,
1412
+ "logits/generated": -1.6559168100357056,
1413
+ "logits/real": -1.8097467422485352,
1414
+ "logps/generated": -922.6828002929688,
1415
+ "logps/real": -488.87664794921875,
1416
+ "loss": 0.005,
1417
+ "rewards/accuracies": 1.0,
1418
+ "rewards/generated": -26.857372283935547,
1419
+ "rewards/margins": 20.4537353515625,
1420
+ "rewards/real": -6.403636932373047,
1421
+ "step": 690
1422
+ },
1423
+ {
1424
+ "epoch": 1.15,
1425
+ "learning_rate": 4.706959706959707e-08,
1426
+ "logits/generated": -1.6271326541900635,
1427
+ "logits/real": -1.840662956237793,
1428
+ "logps/generated": -901.0564575195312,
1429
+ "logps/real": -491.31146240234375,
1430
+ "loss": 0.004,
1431
+ "rewards/accuracies": 1.0,
1432
+ "rewards/generated": -27.303613662719727,
1433
+ "rewards/margins": 20.737079620361328,
1434
+ "rewards/real": -6.566534996032715,
1435
+ "step": 700
1436
+ },
1437
+ {
1438
+ "epoch": 1.15,
1439
+ "eval_logits/generated": -1.6268597841262817,
1440
+ "eval_logits/real": -1.8235687017440796,
1441
+ "eval_logps/generated": -920.2236328125,
1442
+ "eval_logps/real": -514.767822265625,
1443
+ "eval_loss": 0.015848280861973763,
1444
+ "eval_rewards/accuracies": 0.9916666746139526,
1445
+ "eval_rewards/generated": -27.77434730529785,
1446
+ "eval_rewards/margins": 20.641807556152344,
1447
+ "eval_rewards/real": -7.132537841796875,
1448
+ "eval_runtime": 1781.0614,
1449
+ "eval_samples_per_second": 2.424,
1450
+ "eval_steps_per_second": 0.076,
1451
+ "step": 700
1452
+ },
1453
+ {
1454
+ "epoch": 1.17,
1455
+ "learning_rate": 4.615384615384615e-08,
1456
+ "logits/generated": -1.6111915111541748,
1457
+ "logits/real": -1.8123859167099,
1458
+ "logps/generated": -929.34326171875,
1459
+ "logps/real": -504.7548828125,
1460
+ "loss": 0.0041,
1461
+ "rewards/accuracies": 1.0,
1462
+ "rewards/generated": -27.69466781616211,
1463
+ "rewards/margins": 20.481304168701172,
1464
+ "rewards/real": -7.213364601135254,
1465
+ "step": 710
1466
+ },
1467
+ {
1468
+ "epoch": 1.19,
1469
+ "learning_rate": 4.5238095238095236e-08,
1470
+ "logits/generated": -1.6448142528533936,
1471
+ "logits/real": -1.8344615697860718,
1472
+ "logps/generated": -938.2579345703125,
1473
+ "logps/real": -477.796630859375,
1474
+ "loss": 0.0168,
1475
+ "rewards/accuracies": 0.987500011920929,
1476
+ "rewards/generated": -28.318180084228516,
1477
+ "rewards/margins": 21.399818420410156,
1478
+ "rewards/real": -6.918364524841309,
1479
+ "step": 720
1480
+ },
1481
+ {
1482
+ "epoch": 1.19,
1483
+ "eval_logits/generated": -1.656567931175232,
1484
+ "eval_logits/real": -1.8447872400283813,
1485
+ "eval_logps/generated": -905.2711181640625,
1486
+ "eval_logps/real": -512.4611206054688,
1487
+ "eval_loss": 0.015721740201115608,
1488
+ "eval_rewards/accuracies": 0.9916666746139526,
1489
+ "eval_rewards/generated": -26.27910041809082,
1490
+ "eval_rewards/margins": 19.37723731994629,
1491
+ "eval_rewards/real": -6.901863098144531,
1492
+ "eval_runtime": 1781.2515,
1493
+ "eval_samples_per_second": 2.424,
1494
+ "eval_steps_per_second": 0.076,
1495
+ "step": 725
1496
+ },
1497
+ {
1498
+ "epoch": 1.2,
1499
+ "learning_rate": 4.432234432234432e-08,
1500
+ "logits/generated": -1.6806806325912476,
1501
+ "logits/real": -1.909649133682251,
1502
+ "logps/generated": -898.3150634765625,
1503
+ "logps/real": -469.5130920410156,
1504
+ "loss": 0.0053,
1505
+ "rewards/accuracies": 1.0,
1506
+ "rewards/generated": -25.730077743530273,
1507
+ "rewards/margins": 19.823253631591797,
1508
+ "rewards/real": -5.906826019287109,
1509
+ "step": 730
1510
+ },
1511
+ {
1512
+ "epoch": 1.22,
1513
+ "learning_rate": 4.34065934065934e-08,
1514
+ "logits/generated": -1.734819769859314,
1515
+ "logits/real": -1.9214661121368408,
1516
+ "logps/generated": -965.8170776367188,
1517
+ "logps/real": -494.10760498046875,
1518
+ "loss": 0.0053,
1519
+ "rewards/accuracies": 1.0,
1520
+ "rewards/generated": -27.31577491760254,
1521
+ "rewards/margins": 20.62114906311035,
1522
+ "rewards/real": -6.6946234703063965,
1523
+ "step": 740
1524
+ },
1525
+ {
1526
+ "epoch": 1.23,
1527
+ "learning_rate": 4.2490842490842486e-08,
1528
+ "logits/generated": -1.7267796993255615,
1529
+ "logits/real": -1.929030179977417,
1530
+ "logps/generated": -886.3689575195312,
1531
+ "logps/real": -481.1717834472656,
1532
+ "loss": 0.0022,
1533
+ "rewards/accuracies": 1.0,
1534
+ "rewards/generated": -24.81966781616211,
1535
+ "rewards/margins": 18.82332992553711,
1536
+ "rewards/real": -5.996334552764893,
1537
+ "step": 750
1538
+ },
1539
+ {
1540
+ "epoch": 1.23,
1541
+ "eval_logits/generated": -1.6533170938491821,
1542
+ "eval_logits/real": -1.8422995805740356,
1543
+ "eval_logps/generated": -907.6251220703125,
1544
+ "eval_logps/real": -513.0281372070312,
1545
+ "eval_loss": 0.016253722831606865,
1546
+ "eval_rewards/accuracies": 0.9916666746139526,
1547
+ "eval_rewards/generated": -26.514497756958008,
1548
+ "eval_rewards/margins": 19.55593490600586,
1549
+ "eval_rewards/real": -6.958561897277832,
1550
+ "eval_runtime": 1784.393,
1551
+ "eval_samples_per_second": 2.42,
1552
+ "eval_steps_per_second": 0.076,
1553
+ "step": 750
1554
+ },
1555
+ {
1556
+ "epoch": 1.25,
1557
+ "learning_rate": 4.1575091575091576e-08,
1558
+ "logits/generated": -1.6823298931121826,
1559
+ "logits/real": -1.8801406621932983,
1560
+ "logps/generated": -974.8401489257812,
1561
+ "logps/real": -505.0538635253906,
1562
+ "loss": 0.0032,
1563
+ "rewards/accuracies": 1.0,
1564
+ "rewards/generated": -28.211589813232422,
1565
+ "rewards/margins": 21.735652923583984,
1566
+ "rewards/real": -6.4759368896484375,
1567
+ "step": 760
1568
+ },
1569
+ {
1570
+ "epoch": 1.27,
1571
+ "learning_rate": 4.065934065934066e-08,
1572
+ "logits/generated": -1.6613868474960327,
1573
+ "logits/real": -1.8679672479629517,
1574
+ "logps/generated": -923.1788330078125,
1575
+ "logps/real": -530.2198486328125,
1576
+ "loss": 0.0039,
1577
+ "rewards/accuracies": 0.987500011920929,
1578
+ "rewards/generated": -26.568191528320312,
1579
+ "rewards/margins": 19.437541961669922,
1580
+ "rewards/real": -7.130646705627441,
1581
+ "step": 770
1582
+ },
1583
+ {
1584
+ "epoch": 1.28,
1585
+ "eval_logits/generated": -1.6368576288223267,
1586
+ "eval_logits/real": -1.8327449560165405,
1587
+ "eval_logps/generated": -924.7037963867188,
1588
+ "eval_logps/real": -518.8289184570312,
1589
+ "eval_loss": 0.0164579376578331,
1590
+ "eval_rewards/accuracies": 0.9916666746139526,
1591
+ "eval_rewards/generated": -28.222370147705078,
1592
+ "eval_rewards/margins": 20.6837215423584,
1593
+ "eval_rewards/real": -7.538645267486572,
1594
+ "eval_runtime": 1783.8825,
1595
+ "eval_samples_per_second": 2.421,
1596
+ "eval_steps_per_second": 0.076,
1597
+ "step": 775
1598
+ },
1599
+ {
1600
+ "epoch": 1.28,
1601
+ "learning_rate": 3.9743589743589737e-08,
1602
+ "logits/generated": -1.635840654373169,
1603
+ "logits/real": -1.838230848312378,
1604
+ "logps/generated": -982.92529296875,
1605
+ "logps/real": -524.0345458984375,
1606
+ "loss": 0.0073,
1607
+ "rewards/accuracies": 0.9937499761581421,
1608
+ "rewards/generated": -28.60614585876465,
1609
+ "rewards/margins": 21.019775390625,
1610
+ "rewards/real": -7.586370944976807,
1611
+ "step": 780
1612
+ },
1613
+ {
1614
+ "epoch": 1.3,
1615
+ "learning_rate": 3.8827838827838827e-08,
1616
+ "logits/generated": -1.558452844619751,
1617
+ "logits/real": -1.8335201740264893,
1618
+ "logps/generated": -916.1337890625,
1619
+ "logps/real": -474.8262634277344,
1620
+ "loss": 0.0073,
1621
+ "rewards/accuracies": 0.9937499761581421,
1622
+ "rewards/generated": -27.614761352539062,
1623
+ "rewards/margins": 20.65240478515625,
1624
+ "rewards/real": -6.9623517990112305,
1625
+ "step": 790
1626
+ },
1627
+ {
1628
+ "epoch": 1.32,
1629
+ "learning_rate": 3.791208791208791e-08,
1630
+ "logits/generated": -1.59113347530365,
1631
+ "logits/real": -1.853981614112854,
1632
+ "logps/generated": -958.3170166015625,
1633
+ "logps/real": -483.369140625,
1634
+ "loss": 0.002,
1635
+ "rewards/accuracies": 1.0,
1636
+ "rewards/generated": -30.300689697265625,
1637
+ "rewards/margins": 23.347179412841797,
1638
+ "rewards/real": -6.9535112380981445,
1639
+ "step": 800
1640
+ },
1641
+ {
1642
+ "epoch": 1.32,
1643
+ "eval_logits/generated": -1.6365333795547485,
1644
+ "eval_logits/real": -1.8344322443008423,
1645
+ "eval_logps/generated": -928.9208374023438,
1646
+ "eval_logps/real": -520.0109252929688,
1647
+ "eval_loss": 0.016453638672828674,
1648
+ "eval_rewards/accuracies": 0.9907407164573669,
1649
+ "eval_rewards/generated": -28.644060134887695,
1650
+ "eval_rewards/margins": 20.987220764160156,
1651
+ "eval_rewards/real": -7.6568403244018555,
1652
+ "eval_runtime": 1804.1661,
1653
+ "eval_samples_per_second": 2.393,
1654
+ "eval_steps_per_second": 0.075,
1655
+ "step": 800
1656
+ },
1657
+ {
1658
+ "epoch": 1.33,
1659
+ "learning_rate": 3.6996336996336994e-08,
1660
+ "logits/generated": -1.6491447687149048,
1661
+ "logits/real": -1.8126541376113892,
1662
+ "logps/generated": -940.8536376953125,
1663
+ "logps/real": -472.17559814453125,
1664
+ "loss": 0.0032,
1665
+ "rewards/accuracies": 1.0,
1666
+ "rewards/generated": -29.954341888427734,
1667
+ "rewards/margins": 23.000102996826172,
1668
+ "rewards/real": -6.954239845275879,
1669
+ "step": 810
1670
+ },
1671
+ {
1672
+ "epoch": 1.35,
1673
+ "learning_rate": 3.608058608058608e-08,
1674
+ "logits/generated": -1.6780191659927368,
1675
+ "logits/real": -1.888399362564087,
1676
+ "logps/generated": -910.19189453125,
1677
+ "logps/real": -508.4088439941406,
1678
+ "loss": 0.002,
1679
+ "rewards/accuracies": 1.0,
1680
+ "rewards/generated": -28.487747192382812,
1681
+ "rewards/margins": 20.76497459411621,
1682
+ "rewards/real": -7.722770690917969,
1683
+ "step": 820
1684
+ },
1685
+ {
1686
+ "epoch": 1.36,
1687
+ "eval_logits/generated": -1.6348390579223633,
1688
+ "eval_logits/real": -1.835233449935913,
1689
+ "eval_logps/generated": -934.5078125,
1690
+ "eval_logps/real": -521.4318237304688,
1691
+ "eval_loss": 0.016549235209822655,
1692
+ "eval_rewards/accuracies": 0.9916666746139526,
1693
+ "eval_rewards/generated": -29.202777862548828,
1694
+ "eval_rewards/margins": 21.403844833374023,
1695
+ "eval_rewards/real": -7.7989301681518555,
1696
+ "eval_runtime": 1798.7077,
1697
+ "eval_samples_per_second": 2.401,
1698
+ "eval_steps_per_second": 0.075,
1699
+ "step": 825
1700
+ },
1701
+ {
1702
+ "epoch": 1.37,
1703
+ "learning_rate": 3.516483516483517e-08,
1704
+ "logits/generated": -1.6131916046142578,
1705
+ "logits/real": -1.8359510898590088,
1706
+ "logps/generated": -923.1203002929688,
1707
+ "logps/real": -487.068359375,
1708
+ "loss": 0.0039,
1709
+ "rewards/accuracies": 0.9937499761581421,
1710
+ "rewards/generated": -29.090587615966797,
1711
+ "rewards/margins": 21.882305145263672,
1712
+ "rewards/real": -7.208279609680176,
1713
+ "step": 830
1714
+ },
1715
+ {
1716
+ "epoch": 1.38,
1717
+ "learning_rate": 3.424908424908425e-08,
1718
+ "logits/generated": -1.6657575368881226,
1719
+ "logits/real": -1.8384662866592407,
1720
+ "logps/generated": -914.0133056640625,
1721
+ "logps/real": -461.9369201660156,
1722
+ "loss": 0.0057,
1723
+ "rewards/accuracies": 0.9937499761581421,
1724
+ "rewards/generated": -27.760547637939453,
1725
+ "rewards/margins": 21.031766891479492,
1726
+ "rewards/real": -6.7287774085998535,
1727
+ "step": 840
1728
+ },
1729
+ {
1730
+ "epoch": 1.4,
1731
+ "learning_rate": 3.333333333333333e-08,
1732
+ "logits/generated": -1.6930701732635498,
1733
+ "logits/real": -1.907292366027832,
1734
+ "logps/generated": -965.4385986328125,
1735
+ "logps/real": -523.5211181640625,
1736
+ "loss": 0.0019,
1737
+ "rewards/accuracies": 1.0,
1738
+ "rewards/generated": -30.622669219970703,
1739
+ "rewards/margins": 22.505613327026367,
1740
+ "rewards/real": -8.117053031921387,
1741
+ "step": 850
1742
+ },
1743
+ {
1744
+ "epoch": 1.4,
1745
+ "eval_logits/generated": -1.6166415214538574,
1746
+ "eval_logits/real": -1.8168882131576538,
1747
+ "eval_logps/generated": -938.438232421875,
1748
+ "eval_logps/real": -522.4202880859375,
1749
+ "eval_loss": 0.016505062580108643,
1750
+ "eval_rewards/accuracies": 0.9916666746139526,
1751
+ "eval_rewards/generated": -29.59580421447754,
1752
+ "eval_rewards/margins": 21.69802474975586,
1753
+ "eval_rewards/real": -7.897781848907471,
1754
+ "eval_runtime": 1800.0221,
1755
+ "eval_samples_per_second": 2.399,
1756
+ "eval_steps_per_second": 0.075,
1757
+ "step": 850
1758
+ },
1759
+ {
1760
+ "epoch": 1.42,
1761
+ "learning_rate": 3.241758241758242e-08,
1762
+ "logits/generated": -1.605548620223999,
1763
+ "logits/real": -1.788865089416504,
1764
+ "logps/generated": -1042.3509521484375,
1765
+ "logps/real": -527.3464965820312,
1766
+ "loss": 0.0018,
1767
+ "rewards/accuracies": 1.0,
1768
+ "rewards/generated": -32.53047561645508,
1769
+ "rewards/margins": 24.470928192138672,
1770
+ "rewards/real": -8.059545516967773,
1771
+ "step": 860
1772
+ },
1773
+ {
1774
+ "epoch": 1.43,
1775
+ "learning_rate": 3.15018315018315e-08,
1776
+ "logits/generated": -1.6185451745986938,
1777
+ "logits/real": -1.8139030933380127,
1778
+ "logps/generated": -965.8375854492188,
1779
+ "logps/real": -508.0126953125,
1780
+ "loss": 0.0041,
1781
+ "rewards/accuracies": 1.0,
1782
+ "rewards/generated": -30.107463836669922,
1783
+ "rewards/margins": 22.463966369628906,
1784
+ "rewards/real": -7.643497467041016,
1785
+ "step": 870
1786
+ },
1787
+ {
1788
+ "epoch": 1.44,
1789
+ "eval_logits/generated": -1.61648428440094,
1790
+ "eval_logits/real": -1.81755793094635,
1791
+ "eval_logps/generated": -940.4099731445312,
1792
+ "eval_logps/real": -523.1380004882812,
1793
+ "eval_loss": 0.016207309439778328,
1794
+ "eval_rewards/accuracies": 0.9916666746139526,
1795
+ "eval_rewards/generated": -29.792985916137695,
1796
+ "eval_rewards/margins": 21.823434829711914,
1797
+ "eval_rewards/real": -7.969552993774414,
1798
+ "eval_runtime": 1801.8606,
1799
+ "eval_samples_per_second": 2.396,
1800
+ "eval_steps_per_second": 0.075,
1801
+ "step": 875
1802
+ },
1803
+ {
1804
+ "epoch": 1.45,
1805
+ "learning_rate": 3.0586080586080584e-08,
1806
+ "logits/generated": -1.56507408618927,
1807
+ "logits/real": -1.8616406917572021,
1808
+ "logps/generated": -976.12548828125,
1809
+ "logps/real": -496.3408203125,
1810
+ "loss": 0.0063,
1811
+ "rewards/accuracies": 1.0,
1812
+ "rewards/generated": -31.310409545898438,
1813
+ "rewards/margins": 23.59577178955078,
1814
+ "rewards/real": -7.714636325836182,
1815
+ "step": 880
1816
+ },
1817
+ {
1818
+ "epoch": 1.47,
1819
+ "learning_rate": 2.9670329670329668e-08,
1820
+ "logits/generated": -1.5671743154525757,
1821
+ "logits/real": -1.733432412147522,
1822
+ "logps/generated": -920.916015625,
1823
+ "logps/real": -522.0253295898438,
1824
+ "loss": 0.0071,
1825
+ "rewards/accuracies": 1.0,
1826
+ "rewards/generated": -28.688098907470703,
1827
+ "rewards/margins": 20.804473876953125,
1828
+ "rewards/real": -7.883625030517578,
1829
+ "step": 890
1830
+ },
1831
+ {
1832
+ "epoch": 1.48,
1833
+ "learning_rate": 2.875457875457875e-08,
1834
+ "logits/generated": -1.6324392557144165,
1835
+ "logits/real": -1.8402057886123657,
1836
+ "logps/generated": -949.7025146484375,
1837
+ "logps/real": -491.1188049316406,
1838
+ "loss": 0.0023,
1839
+ "rewards/accuracies": 1.0,
1840
+ "rewards/generated": -31.04391098022461,
1841
+ "rewards/margins": 23.250308990478516,
1842
+ "rewards/real": -7.793595790863037,
1843
+ "step": 900
1844
+ },
1845
+ {
1846
+ "epoch": 1.48,
1847
+ "eval_logits/generated": -1.6044836044311523,
1848
+ "eval_logits/real": -1.809339165687561,
1849
+ "eval_logps/generated": -949.38916015625,
1850
+ "eval_logps/real": -525.528564453125,
1851
+ "eval_loss": 0.01638590730726719,
1852
+ "eval_rewards/accuracies": 0.9916666746139526,
1853
+ "eval_rewards/generated": -30.69091033935547,
1854
+ "eval_rewards/margins": 22.482301712036133,
1855
+ "eval_rewards/real": -8.208609580993652,
1856
+ "eval_runtime": 1798.0882,
1857
+ "eval_samples_per_second": 2.401,
1858
+ "eval_steps_per_second": 0.075,
1859
+ "step": 900
1860
+ },
1861
+ {
1862
+ "epoch": 1.5,
1863
+ "learning_rate": 2.7838827838827838e-08,
1864
+ "logits/generated": -1.596328854560852,
1865
+ "logits/real": -1.8236808776855469,
1866
+ "logps/generated": -962.3810424804688,
1867
+ "logps/real": -511.0006408691406,
1868
+ "loss": 0.0061,
1869
+ "rewards/accuracies": 0.9937499761581421,
1870
+ "rewards/generated": -31.28286361694336,
1871
+ "rewards/margins": 23.244314193725586,
1872
+ "rewards/real": -8.038549423217773,
1873
+ "step": 910
1874
+ },
1875
+ {
1876
+ "epoch": 1.51,
1877
+ "learning_rate": 2.692307692307692e-08,
1878
+ "logits/generated": -1.608758568763733,
1879
+ "logits/real": -1.8891884088516235,
1880
+ "logps/generated": -1000.3331298828125,
1881
+ "logps/real": -517.9403076171875,
1882
+ "loss": 0.0038,
1883
+ "rewards/accuracies": 1.0,
1884
+ "rewards/generated": -32.935813903808594,
1885
+ "rewards/margins": 25.521175384521484,
1886
+ "rewards/real": -7.414637565612793,
1887
+ "step": 920
1888
+ },
1889
+ {
1890
+ "epoch": 1.52,
1891
+ "eval_logits/generated": -1.591917634010315,
1892
+ "eval_logits/real": -1.7978274822235107,
1893
+ "eval_logps/generated": -949.2075805664062,
1894
+ "eval_logps/real": -524.6597290039062,
1895
+ "eval_loss": 0.016565019264817238,
1896
+ "eval_rewards/accuracies": 0.9916666746139526,
1897
+ "eval_rewards/generated": -30.672739028930664,
1898
+ "eval_rewards/margins": 22.55101776123047,
1899
+ "eval_rewards/real": -8.121725082397461,
1900
+ "eval_runtime": 1802.0893,
1901
+ "eval_samples_per_second": 2.396,
1902
+ "eval_steps_per_second": 0.075,
1903
+ "step": 925
1904
+ },
1905
+ {
1906
+ "epoch": 1.53,
1907
+ "learning_rate": 2.600732600732601e-08,
1908
+ "logits/generated": -1.537630319595337,
1909
+ "logits/real": -1.7378448247909546,
1910
+ "logps/generated": -968.7054443359375,
1911
+ "logps/real": -489.5370178222656,
1912
+ "loss": 0.0073,
1913
+ "rewards/accuracies": 1.0,
1914
+ "rewards/generated": -32.376976013183594,
1915
+ "rewards/margins": 24.455623626708984,
1916
+ "rewards/real": -7.921347141265869,
1917
+ "step": 930
1918
+ },
1919
+ {
1920
+ "epoch": 1.55,
1921
+ "learning_rate": 2.509157509157509e-08,
1922
+ "logits/generated": -1.604174017906189,
1923
+ "logits/real": -1.8114948272705078,
1924
+ "logps/generated": -937.0480346679688,
1925
+ "logps/real": -495.5521545410156,
1926
+ "loss": 0.0009,
1927
+ "rewards/accuracies": 1.0,
1928
+ "rewards/generated": -30.041767120361328,
1929
+ "rewards/margins": 22.23421859741211,
1930
+ "rewards/real": -7.807549953460693,
1931
+ "step": 940
1932
+ },
1933
+ {
1934
+ "epoch": 1.56,
1935
+ "learning_rate": 2.4175824175824175e-08,
1936
+ "logits/generated": -1.5735671520233154,
1937
+ "logits/real": -1.7788879871368408,
1938
+ "logps/generated": -938.4112548828125,
1939
+ "logps/real": -486.3994140625,
1940
+ "loss": 0.0096,
1941
+ "rewards/accuracies": 0.987500011920929,
1942
+ "rewards/generated": -29.847030639648438,
1943
+ "rewards/margins": 22.341915130615234,
1944
+ "rewards/real": -7.505116939544678,
1945
+ "step": 950
1946
+ },
1947
+ {
1948
+ "epoch": 1.56,
1949
+ "eval_logits/generated": -1.5908763408660889,
1950
+ "eval_logits/real": -1.7955536842346191,
1951
+ "eval_logps/generated": -943.6237182617188,
1952
+ "eval_logps/real": -521.6991577148438,
1953
+ "eval_loss": 0.016153085976839066,
1954
+ "eval_rewards/accuracies": 0.9916666746139526,
1955
+ "eval_rewards/generated": -30.114360809326172,
1956
+ "eval_rewards/margins": 22.2886962890625,
1957
+ "eval_rewards/real": -7.825665473937988,
1958
+ "eval_runtime": 1801.6388,
1959
+ "eval_samples_per_second": 2.397,
1960
+ "eval_steps_per_second": 0.075,
1961
+ "step": 950
1962
+ },
1963
+ {
1964
+ "epoch": 1.58,
1965
+ "learning_rate": 2.326007326007326e-08,
1966
+ "logits/generated": -1.5542490482330322,
1967
+ "logits/real": -1.7995363473892212,
1968
+ "logps/generated": -1011.2404174804688,
1969
+ "logps/real": -517.3983764648438,
1970
+ "loss": 0.0043,
1971
+ "rewards/accuracies": 0.987500011920929,
1972
+ "rewards/generated": -32.60115432739258,
1973
+ "rewards/margins": 24.621551513671875,
1974
+ "rewards/real": -7.979601860046387,
1975
+ "step": 960
1976
+ },
1977
+ {
1978
+ "epoch": 1.6,
1979
+ "learning_rate": 2.2344322344322346e-08,
1980
+ "logits/generated": -1.5683870315551758,
1981
+ "logits/real": -1.7601861953735352,
1982
+ "logps/generated": -916.2017822265625,
1983
+ "logps/real": -493.31494140625,
1984
+ "loss": 0.0057,
1985
+ "rewards/accuracies": 1.0,
1986
+ "rewards/generated": -29.836261749267578,
1987
+ "rewards/margins": 22.397926330566406,
1988
+ "rewards/real": -7.438332557678223,
1989
+ "step": 970
1990
+ },
1991
+ {
1992
+ "epoch": 1.6,
1993
+ "eval_logits/generated": -1.5854144096374512,
1994
+ "eval_logits/real": -1.7919222116470337,
1995
+ "eval_logps/generated": -949.1341552734375,
1996
+ "eval_logps/real": -523.7774658203125,
1997
+ "eval_loss": 0.0166173093020916,
1998
+ "eval_rewards/accuracies": 0.9916666746139526,
1999
+ "eval_rewards/generated": -30.665393829345703,
2000
+ "eval_rewards/margins": 22.631893157958984,
2001
+ "eval_rewards/real": -8.033498764038086,
2002
+ "eval_runtime": 1798.076,
2003
+ "eval_samples_per_second": 2.401,
2004
+ "eval_steps_per_second": 0.075,
2005
+ "step": 975
2006
+ },
2007
+ {
2008
+ "epoch": 1.61,
2009
+ "learning_rate": 2.1428571428571426e-08,
2010
+ "logits/generated": -1.6264305114746094,
2011
+ "logits/real": -1.851205825805664,
2012
+ "logps/generated": -929.2615356445312,
2013
+ "logps/real": -481.2755432128906,
2014
+ "loss": 0.0019,
2015
+ "rewards/accuracies": 1.0,
2016
+ "rewards/generated": -30.502222061157227,
2017
+ "rewards/margins": 23.29401206970215,
2018
+ "rewards/real": -7.2082085609436035,
2019
+ "step": 980
2020
+ },
2021
+ {
2022
+ "epoch": 1.63,
2023
+ "learning_rate": 2.0512820512820512e-08,
2024
+ "logits/generated": -1.6349788904190063,
2025
+ "logits/real": -1.8103811740875244,
2026
+ "logps/generated": -930.9240112304688,
2027
+ "logps/real": -486.3633728027344,
2028
+ "loss": 0.002,
2029
+ "rewards/accuracies": 0.9937499761581421,
2030
+ "rewards/generated": -28.78774642944336,
2031
+ "rewards/margins": 21.5825138092041,
2032
+ "rewards/real": -7.205234527587891,
2033
+ "step": 990
2034
+ },
2035
+ {
2036
+ "epoch": 1.65,
2037
+ "learning_rate": 1.9597069597069596e-08,
2038
+ "logits/generated": -1.5818378925323486,
2039
+ "logits/real": -1.8051410913467407,
2040
+ "logps/generated": -994.4307861328125,
2041
+ "logps/real": -506.43048095703125,
2042
+ "loss": 0.0046,
2043
+ "rewards/accuracies": 0.9937499761581421,
2044
+ "rewards/generated": -32.12641143798828,
2045
+ "rewards/margins": 24.237791061401367,
2046
+ "rewards/real": -7.888618469238281,
2047
+ "step": 1000
2048
+ },
2049
+ {
2050
+ "epoch": 1.65,
2051
+ "eval_logits/generated": -1.5768269300460815,
2052
+ "eval_logits/real": -1.7851576805114746,
2053
+ "eval_logps/generated": -952.6190795898438,
2054
+ "eval_logps/real": -525.199951171875,
2055
+ "eval_loss": 0.016495853662490845,
2056
+ "eval_rewards/accuracies": 0.9916666746139526,
2057
+ "eval_rewards/generated": -31.013896942138672,
2058
+ "eval_rewards/margins": 22.838150024414062,
2059
+ "eval_rewards/real": -8.175748825073242,
2060
+ "eval_runtime": 1801.7659,
2061
+ "eval_samples_per_second": 2.397,
2062
+ "eval_steps_per_second": 0.075,
2063
+ "step": 1000
2064
+ },
2065
+ {
2066
+ "epoch": 1.66,
2067
+ "learning_rate": 1.868131868131868e-08,
2068
+ "logits/generated": -1.5746369361877441,
2069
+ "logits/real": -1.8138281106948853,
2070
+ "logps/generated": -935.8997192382812,
2071
+ "logps/real": -524.5531616210938,
2072
+ "loss": 0.0043,
2073
+ "rewards/accuracies": 1.0,
2074
+ "rewards/generated": -29.7835750579834,
2075
+ "rewards/margins": 22.015628814697266,
2076
+ "rewards/real": -7.767943382263184,
2077
+ "step": 1010
2078
+ },
2079
+ {
2080
+ "epoch": 1.68,
2081
+ "learning_rate": 1.7765567765567766e-08,
2082
+ "logits/generated": -1.617248296737671,
2083
+ "logits/real": -1.8158845901489258,
2084
+ "logps/generated": -975.7345581054688,
2085
+ "logps/real": -520.7113037109375,
2086
+ "loss": 0.0009,
2087
+ "rewards/accuracies": 1.0,
2088
+ "rewards/generated": -31.075618743896484,
2089
+ "rewards/margins": 23.180437088012695,
2090
+ "rewards/real": -7.895182132720947,
2091
+ "step": 1020
2092
+ },
2093
+ {
2094
+ "epoch": 1.69,
2095
+ "eval_logits/generated": -1.5756635665893555,
2096
+ "eval_logits/real": -1.7830266952514648,
2097
+ "eval_logps/generated": -950.0453491210938,
2098
+ "eval_logps/real": -523.9951171875,
2099
+ "eval_loss": 0.01654692552983761,
2100
+ "eval_rewards/accuracies": 0.9916666746139526,
2101
+ "eval_rewards/generated": -30.75650978088379,
2102
+ "eval_rewards/margins": 22.701244354248047,
2103
+ "eval_rewards/real": -8.055268287658691,
2104
+ "eval_runtime": 1788.1081,
2105
+ "eval_samples_per_second": 2.415,
2106
+ "eval_steps_per_second": 0.075,
2107
+ "step": 1025
2108
+ },
2109
+ {
2110
+ "epoch": 1.7,
2111
+ "learning_rate": 1.684981684981685e-08,
2112
+ "logits/generated": -1.591524362564087,
2113
+ "logits/real": -1.7995145320892334,
2114
+ "logps/generated": -919.4166259765625,
2115
+ "logps/real": -488.1844787597656,
2116
+ "loss": 0.0037,
2117
+ "rewards/accuracies": 0.9937499761581421,
2118
+ "rewards/generated": -29.463176727294922,
2119
+ "rewards/margins": 22.12551498413086,
2120
+ "rewards/real": -7.337666988372803,
2121
+ "step": 1030
2122
+ },
2123
+ {
2124
+ "epoch": 1.71,
2125
+ "learning_rate": 1.5934065934065933e-08,
2126
+ "logits/generated": -1.5764684677124023,
2127
+ "logits/real": -1.7932662963867188,
2128
+ "logps/generated": -967.7091064453125,
2129
+ "logps/real": -521.2521362304688,
2130
+ "loss": 0.0034,
2131
+ "rewards/accuracies": 1.0,
2132
+ "rewards/generated": -31.743621826171875,
2133
+ "rewards/margins": 24.00382423400879,
2134
+ "rewards/real": -7.739800453186035,
2135
+ "step": 1040
2136
+ },
2137
+ {
2138
+ "epoch": 1.73,
2139
+ "learning_rate": 1.5018315018315017e-08,
2140
+ "logits/generated": -1.5970559120178223,
2141
+ "logits/real": -1.8183799982070923,
2142
+ "logps/generated": -954.7509765625,
2143
+ "logps/real": -505.82757568359375,
2144
+ "loss": 0.002,
2145
+ "rewards/accuracies": 1.0,
2146
+ "rewards/generated": -31.225833892822266,
2147
+ "rewards/margins": 23.92045021057129,
2148
+ "rewards/real": -7.305386543273926,
2149
+ "step": 1050
2150
+ },
2151
+ {
2152
+ "epoch": 1.73,
2153
+ "eval_logits/generated": -1.5691884756088257,
2154
+ "eval_logits/real": -1.7789667844772339,
2155
+ "eval_logps/generated": -955.8453369140625,
2156
+ "eval_logps/real": -525.2799682617188,
2157
+ "eval_loss": 0.01644195057451725,
2158
+ "eval_rewards/accuracies": 0.9916666746139526,
2159
+ "eval_rewards/generated": -31.336515426635742,
2160
+ "eval_rewards/margins": 23.152767181396484,
2161
+ "eval_rewards/real": -8.18375015258789,
2162
+ "eval_runtime": 1807.2715,
2163
+ "eval_samples_per_second": 2.389,
2164
+ "eval_steps_per_second": 0.075,
2165
+ "step": 1050
2166
+ },
2167
+ {
2168
+ "epoch": 1.74,
2169
+ "learning_rate": 1.4102564102564102e-08,
2170
+ "logits/generated": -1.5513131618499756,
2171
+ "logits/real": -1.7797822952270508,
2172
+ "logps/generated": -938.0900268554688,
2173
+ "logps/real": -503.44921875,
2174
+ "loss": 0.0041,
2175
+ "rewards/accuracies": 0.987500011920929,
2176
+ "rewards/generated": -31.36539649963379,
2177
+ "rewards/margins": 23.386436462402344,
2178
+ "rewards/real": -7.978959083557129,
2179
+ "step": 1060
2180
+ },
2181
+ {
2182
+ "epoch": 1.76,
2183
+ "learning_rate": 1.3186813186813187e-08,
2184
+ "logits/generated": -1.6085302829742432,
2185
+ "logits/real": -1.7643792629241943,
2186
+ "logps/generated": -978.6404418945312,
2187
+ "logps/real": -512.4838256835938,
2188
+ "loss": 0.0069,
2189
+ "rewards/accuracies": 0.9937499761581421,
2190
+ "rewards/generated": -31.299551010131836,
2191
+ "rewards/margins": 23.147233963012695,
2192
+ "rewards/real": -8.152318000793457,
2193
+ "step": 1070
2194
+ },
2195
+ {
2196
+ "epoch": 1.77,
2197
+ "eval_logits/generated": -1.57485032081604,
2198
+ "eval_logits/real": -1.7849942445755005,
2199
+ "eval_logps/generated": -956.59814453125,
2200
+ "eval_logps/real": -525.350830078125,
2201
+ "eval_loss": 0.01633109152317047,
2202
+ "eval_rewards/accuracies": 0.9916666746139526,
2203
+ "eval_rewards/generated": -31.411802291870117,
2204
+ "eval_rewards/margins": 23.220966339111328,
2205
+ "eval_rewards/real": -8.190834999084473,
2206
+ "eval_runtime": 1798.0611,
2207
+ "eval_samples_per_second": 2.401,
2208
+ "eval_steps_per_second": 0.075,
2209
+ "step": 1075
2210
+ },
2211
+ {
2212
+ "epoch": 1.78,
2213
+ "learning_rate": 1.227106227106227e-08,
2214
+ "logits/generated": -1.548825979232788,
2215
+ "logits/real": -1.7612594366073608,
2216
+ "logps/generated": -1017.5808715820312,
2217
+ "logps/real": -520.19384765625,
2218
+ "loss": 0.0018,
2219
+ "rewards/accuracies": 1.0,
2220
+ "rewards/generated": -33.57468795776367,
2221
+ "rewards/margins": 25.4267520904541,
2222
+ "rewards/real": -8.147936820983887,
2223
+ "step": 1080
2224
+ },
2225
+ {
2226
+ "epoch": 1.79,
2227
+ "learning_rate": 1.1355311355311355e-08,
2228
+ "logits/generated": -1.556921362876892,
2229
+ "logits/real": -1.7643944025039673,
2230
+ "logps/generated": -938.5661010742188,
2231
+ "logps/real": -503.1206970214844,
2232
+ "loss": 0.0034,
2233
+ "rewards/accuracies": 0.987500011920929,
2234
+ "rewards/generated": -29.682641983032227,
2235
+ "rewards/margins": 21.84661102294922,
2236
+ "rewards/real": -7.83603048324585,
2237
+ "step": 1090
2238
+ },
2239
+ {
2240
+ "epoch": 1.81,
2241
+ "learning_rate": 1.0439560439560439e-08,
2242
+ "logits/generated": -1.5458358526229858,
2243
+ "logits/real": -1.7758142948150635,
2244
+ "logps/generated": -942.8790893554688,
2245
+ "logps/real": -487.7559509277344,
2246
+ "loss": 0.0029,
2247
+ "rewards/accuracies": 0.9937499761581421,
2248
+ "rewards/generated": -31.93533706665039,
2249
+ "rewards/margins": 24.24590492248535,
2250
+ "rewards/real": -7.689431667327881,
2251
+ "step": 1100
2252
+ },
2253
+ {
2254
+ "epoch": 1.81,
2255
+ "eval_logits/generated": -1.5624111890792847,
2256
+ "eval_logits/real": -1.7751930952072144,
2257
+ "eval_logps/generated": -963.309814453125,
2258
+ "eval_logps/real": -527.5802001953125,
2259
+ "eval_loss": 0.016566824167966843,
2260
+ "eval_rewards/accuracies": 0.9916666746139526,
2261
+ "eval_rewards/generated": -32.08296585083008,
2262
+ "eval_rewards/margins": 23.669187545776367,
2263
+ "eval_rewards/real": -8.413775444030762,
2264
+ "eval_runtime": 1800.7952,
2265
+ "eval_samples_per_second": 2.398,
2266
+ "eval_steps_per_second": 0.075,
2267
+ "step": 1100
2268
+ },
2269
+ {
2270
+ "epoch": 1.83,
2271
+ "learning_rate": 9.523809523809522e-09,
2272
+ "logits/generated": -1.5966811180114746,
2273
+ "logits/real": -1.791329026222229,
2274
+ "logps/generated": -976.7927856445312,
2275
+ "logps/real": -493.8138122558594,
2276
+ "loss": 0.0047,
2277
+ "rewards/accuracies": 0.9937499761581421,
2278
+ "rewards/generated": -32.71385955810547,
2279
+ "rewards/margins": 24.73776626586914,
2280
+ "rewards/real": -7.9760942459106445,
2281
+ "step": 1110
2282
+ },
2283
+ {
2284
+ "epoch": 1.84,
2285
+ "learning_rate": 8.608058608058607e-09,
2286
+ "logits/generated": -1.5438224077224731,
2287
+ "logits/real": -1.7942355871200562,
2288
+ "logps/generated": -1013.4166870117188,
2289
+ "logps/real": -529.5133056640625,
2290
+ "loss": 0.0047,
2291
+ "rewards/accuracies": 1.0,
2292
+ "rewards/generated": -34.10750961303711,
2293
+ "rewards/margins": 25.782058715820312,
2294
+ "rewards/real": -8.325451850891113,
2295
+ "step": 1120
2296
+ },
2297
+ {
2298
+ "epoch": 1.85,
2299
+ "eval_logits/generated": -1.5631078481674194,
2300
+ "eval_logits/real": -1.775943398475647,
2301
+ "eval_logps/generated": -964.0065307617188,
2302
+ "eval_logps/real": -527.6651611328125,
2303
+ "eval_loss": 0.016596974804997444,
2304
+ "eval_rewards/accuracies": 0.9916666746139526,
2305
+ "eval_rewards/generated": -32.15264129638672,
2306
+ "eval_rewards/margins": 23.730371475219727,
2307
+ "eval_rewards/real": -8.422268867492676,
2308
+ "eval_runtime": 1805.5605,
2309
+ "eval_samples_per_second": 2.392,
2310
+ "eval_steps_per_second": 0.075,
2311
+ "step": 1125
2312
+ },
2313
+ {
2314
+ "epoch": 1.86,
2315
+ "learning_rate": 7.692307692307693e-09,
2316
+ "logits/generated": -1.6013424396514893,
2317
+ "logits/real": -1.8322757482528687,
2318
+ "logps/generated": -1004.1285400390625,
2319
+ "logps/real": -520.826416015625,
2320
+ "loss": 0.0019,
2321
+ "rewards/accuracies": 1.0,
2322
+ "rewards/generated": -34.476234436035156,
2323
+ "rewards/margins": 25.89908218383789,
2324
+ "rewards/real": -8.577150344848633,
2325
+ "step": 1130
2326
+ },
2327
+ {
2328
+ "epoch": 1.88,
2329
+ "learning_rate": 6.776556776556776e-09,
2330
+ "logits/generated": -1.6247609853744507,
2331
+ "logits/real": -1.8308923244476318,
2332
+ "logps/generated": -974.4166870117188,
2333
+ "logps/real": -467.469970703125,
2334
+ "loss": 0.0065,
2335
+ "rewards/accuracies": 0.987500011920929,
2336
+ "rewards/generated": -33.500511169433594,
2337
+ "rewards/margins": 25.762847900390625,
2338
+ "rewards/real": -7.737664699554443,
2339
+ "step": 1140
2340
+ },
2341
+ {
2342
+ "epoch": 1.89,
2343
+ "learning_rate": 5.86080586080586e-09,
2344
+ "logits/generated": -1.5782761573791504,
2345
+ "logits/real": -1.8114595413208008,
2346
+ "logps/generated": -984.72265625,
2347
+ "logps/real": -521.5853881835938,
2348
+ "loss": 0.0037,
2349
+ "rewards/accuracies": 0.9937499761581421,
2350
+ "rewards/generated": -32.02996826171875,
2351
+ "rewards/margins": 24.080408096313477,
2352
+ "rewards/real": -7.949559211730957,
2353
+ "step": 1150
2354
+ },
2355
+ {
2356
+ "epoch": 1.89,
2357
+ "eval_logits/generated": -1.573925256729126,
2358
+ "eval_logits/real": -1.7831730842590332,
2359
+ "eval_logps/generated": -955.689453125,
2360
+ "eval_logps/real": -525.0056762695312,
2361
+ "eval_loss": 0.016293587163090706,
2362
+ "eval_rewards/accuracies": 0.9916666746139526,
2363
+ "eval_rewards/generated": -31.32093048095703,
2364
+ "eval_rewards/margins": 23.164613723754883,
2365
+ "eval_rewards/real": -8.156318664550781,
2366
+ "eval_runtime": 1805.9186,
2367
+ "eval_samples_per_second": 2.391,
2368
+ "eval_steps_per_second": 0.075,
2369
+ "step": 1150
2370
+ },
2371
+ {
2372
+ "epoch": 1.91,
2373
+ "learning_rate": 4.945054945054945e-09,
2374
+ "logits/generated": -1.5925065279006958,
2375
+ "logits/real": -1.8153518438339233,
2376
+ "logps/generated": -977.4358520507812,
2377
+ "logps/real": -493.5396423339844,
2378
+ "loss": 0.0027,
2379
+ "rewards/accuracies": 1.0,
2380
+ "rewards/generated": -32.501895904541016,
2381
+ "rewards/margins": 24.7786865234375,
2382
+ "rewards/real": -7.723211765289307,
2383
+ "step": 1160
2384
+ },
2385
+ {
2386
+ "epoch": 1.93,
2387
+ "learning_rate": 4.02930402930403e-09,
2388
+ "logits/generated": -1.5815564393997192,
2389
+ "logits/real": -1.7986618280410767,
2390
+ "logps/generated": -984.4176025390625,
2391
+ "logps/real": -496.143310546875,
2392
+ "loss": 0.0026,
2393
+ "rewards/accuracies": 1.0,
2394
+ "rewards/generated": -30.3635311126709,
2395
+ "rewards/margins": 22.687597274780273,
2396
+ "rewards/real": -7.675933837890625,
2397
+ "step": 1170
2398
+ },
2399
+ {
2400
+ "epoch": 1.93,
2401
+ "eval_logits/generated": -1.5708197355270386,
2402
+ "eval_logits/real": -1.7807316780090332,
2403
+ "eval_logps/generated": -957.48876953125,
2404
+ "eval_logps/real": -525.5498046875,
2405
+ "eval_loss": 0.016291461884975433,
2406
+ "eval_rewards/accuracies": 0.9916666746139526,
2407
+ "eval_rewards/generated": -31.50086784362793,
2408
+ "eval_rewards/margins": 23.290132522583008,
2409
+ "eval_rewards/real": -8.210736274719238,
2410
+ "eval_runtime": 1806.1638,
2411
+ "eval_samples_per_second": 2.391,
2412
+ "eval_steps_per_second": 0.075,
2413
+ "step": 1175
2414
+ },
2415
+ {
2416
+ "epoch": 1.94,
2417
+ "learning_rate": 3.1135531135531137e-09,
2418
+ "logits/generated": -1.6078064441680908,
2419
+ "logits/real": -1.8417888879776,
2420
+ "logps/generated": -980.16552734375,
2421
+ "logps/real": -481.13250732421875,
2422
+ "loss": 0.0044,
2423
+ "rewards/accuracies": 1.0,
2424
+ "rewards/generated": -33.39889144897461,
2425
+ "rewards/margins": 25.546215057373047,
2426
+ "rewards/real": -7.852681636810303,
2427
+ "step": 1180
2428
+ },
2429
+ {
2430
+ "epoch": 1.96,
2431
+ "learning_rate": 2.197802197802198e-09,
2432
+ "logits/generated": -1.5867105722427368,
2433
+ "logits/real": -1.8165124654769897,
2434
+ "logps/generated": -991.1232299804688,
2435
+ "logps/real": -548.1749267578125,
2436
+ "loss": 0.0044,
2437
+ "rewards/accuracies": 0.9937499761581421,
2438
+ "rewards/generated": -30.872753143310547,
2439
+ "rewards/margins": 22.4478759765625,
2440
+ "rewards/real": -8.424878120422363,
2441
+ "step": 1190
2442
+ },
2443
+ {
2444
+ "epoch": 1.98,
2445
+ "learning_rate": 1.282051282051282e-09,
2446
+ "logits/generated": -1.5666377544403076,
2447
+ "logits/real": -1.8013808727264404,
2448
+ "logps/generated": -938.0455932617188,
2449
+ "logps/real": -504.17022705078125,
2450
+ "loss": 0.0058,
2451
+ "rewards/accuracies": 0.981249988079071,
2452
+ "rewards/generated": -30.902517318725586,
2453
+ "rewards/margins": 23.061681747436523,
2454
+ "rewards/real": -7.840832710266113,
2455
+ "step": 1200
2456
+ },
2457
+ {
2458
+ "epoch": 1.98,
2459
+ "eval_logits/generated": -1.5719400644302368,
2460
+ "eval_logits/real": -1.7812800407409668,
2461
+ "eval_logps/generated": -956.3063354492188,
2462
+ "eval_logps/real": -525.1734619140625,
2463
+ "eval_loss": 0.01621842570602894,
2464
+ "eval_rewards/accuracies": 0.9916666746139526,
2465
+ "eval_rewards/generated": -31.38262367248535,
2466
+ "eval_rewards/margins": 23.20952606201172,
2467
+ "eval_rewards/real": -8.173093795776367,
2468
+ "eval_runtime": 1803.2665,
2469
+ "eval_samples_per_second": 2.395,
2470
+ "eval_steps_per_second": 0.075,
2471
+ "step": 1200
2472
+ },
2473
+ {
2474
+ "epoch": 1.99,
2475
+ "learning_rate": 3.6630036630036627e-10,
2476
+ "logits/generated": -1.5853986740112305,
2477
+ "logits/real": -1.8437074422836304,
2478
+ "logps/generated": -930.419921875,
2479
+ "logps/real": -524.40087890625,
2480
+ "loss": 0.0047,
2481
+ "rewards/accuracies": 0.9937499761581421,
2482
+ "rewards/generated": -30.547359466552734,
2483
+ "rewards/margins": 22.564682006835938,
2484
+ "rewards/real": -7.982677459716797,
2485
+ "step": 1210
2486
+ },
2487
+ {
2488
+ "epoch": 2.0,
2489
+ "step": 1214,
2490
+ "total_flos": 0.0,
2491
+ "train_loss": 0.03410133493748145,
2492
+ "train_runtime": 146707.6169,
2493
+ "train_samples_per_second": 0.53,
2494
+ "train_steps_per_second": 0.008
2495
+ }
2496
+ ],
2497
+ "logging_steps": 10,
2498
+ "max_steps": 1214,
2499
+ "num_input_tokens_seen": 0,
2500
+ "num_train_epochs": 2,
2501
+ "save_steps": 500,
2502
+ "total_flos": 0.0,
2503
+ "train_batch_size": 8,
2504
+ "trial_name": null,
2505
+ "trial_params": null
2506
+ }