WeightsnWizardry
commited on
Commit
•
e60fdf7
1
Parent(s):
fb8bfcb
Update README.md
Browse files
README.md
CHANGED
@@ -118,19 +118,15 @@ Samples from each of the datasets have been programmatically formatted to chat,
|
|
118 |
| Num Rollouts | 1024 |
|
119 |
| PPO Epochs | 1 |
|
120 |
| Value Epochs | 1 |
|
121 |
-
|
|
122 |
-
| Init KL Coef | 0.01 |
|
123 |
-
| Target KL | 6.0 |
|
124 |
-
| K Beta | 0.1 |
|
125 |
| Gamma | 1.0 |
|
126 |
| GAE Lambda | 0.95 |
|
127 |
| Clip Range | 0.2 |
|
128 |
| Clip Range Value | 0.2 |
|
129 |
| Whiten Advantages | `true` |
|
130 |
| Whiten Rewards | `false` |
|
131 |
-
| Loss on
|
132 |
| Max Steps | 200 |
|
133 |
-
| microbatch_size | 1 |
|
134 |
| PPO steps/epoch | 1 |
|
135 |
| Value steps/epoch | 8 |
|
136 |
|
@@ -141,8 +137,6 @@ Samples from each of the datasets have been programmatically formatted to chat,
|
|
141 |
| Continuation Min Len | 0 |
|
142 |
| Top P | 1.0 |
|
143 |
| Temperature | 1.0 |
|
144 |
-
| # Cached Batches | 128 |
|
145 |
-
| Microbatch size | 1 |
|
146 |
|
147 |
|
148 |
## Evaluation
|
|
|
118 |
| Num Rollouts | 1024 |
|
119 |
| PPO Epochs | 1 |
|
120 |
| Value Epochs | 1 |
|
121 |
+
| KL Coef | 0.01 |
|
|
|
|
|
|
|
122 |
| Gamma | 1.0 |
|
123 |
| GAE Lambda | 0.95 |
|
124 |
| Clip Range | 0.2 |
|
125 |
| Clip Range Value | 0.2 |
|
126 |
| Whiten Advantages | `true` |
|
127 |
| Whiten Rewards | `false` |
|
128 |
+
| Loss on EOD | `true` |
|
129 |
| Max Steps | 200 |
|
|
|
130 |
| PPO steps/epoch | 1 |
|
131 |
| Value steps/epoch | 8 |
|
132 |
|
|
|
137 |
| Continuation Min Len | 0 |
|
138 |
| Top P | 1.0 |
|
139 |
| Temperature | 1.0 |
|
|
|
|
|
140 |
|
141 |
|
142 |
## Evaluation
|