Update README.md
Browse files
README.md
CHANGED
@@ -165,6 +165,7 @@ v2:
|
|
165 |
* Framework: https://github.com/tatsu-lab/alpaca_eval
|
166 |
* Evaluator: alpaca_eval_cot_gpt4_turbo_fn
|
167 |
|
|
|
168 |
| model | length_controlled_winrate | win_rate | standard_error | avg_length |
|
169 |
|-----|-----|-----|-----|-----|
|
170 |
|chatgpt_4_turbo | 76.04 | 90.00 |1.46 | 1270 |
|
@@ -177,4 +178,12 @@ v2:
|
|
177 |
|saiga_llama3_8b, v4 | 43.64 | 65.90 | 2.31 | 1200 |
|
178 |
|saiga_llama3_8b, v3 | 36.97 | 61.08 | 2.38 | 1162 |
|
179 |
|saiga_llama3_8b, v2 | 33.07 | 48.19 | 2.45 | 1166 |
|
180 |
-
|saiga_mistral_7b | 23.38 | 35.99 | 2.34 | 949 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
* Framework: https://github.com/tatsu-lab/alpaca_eval
|
166 |
* Evaluator: alpaca_eval_cot_gpt4_turbo_fn
|
167 |
|
168 |
+
Pivot: chatgpt_3_5_turbo
|
169 |
| model | length_controlled_winrate | win_rate | standard_error | avg_length |
|
170 |
|-----|-----|-----|-----|-----|
|
171 |
|chatgpt_4_turbo | 76.04 | 90.00 |1.46 | 1270 |
|
|
|
178 |
|saiga_llama3_8b, v4 | 43.64 | 65.90 | 2.31 | 1200 |
|
179 |
|saiga_llama3_8b, v3 | 36.97 | 61.08 | 2.38 | 1162 |
|
180 |
|saiga_llama3_8b, v2 | 33.07 | 48.19 | 2.45 | 1166 |
|
181 |
+
|saiga_mistral_7b | 23.38 | 35.99 | 2.34 | 949 |
|
182 |
+
|
183 |
+
Pivot: sfr
|
184 |
+
| model | length_controlled_winrate | win_rate | standard_error | avg_length |
|
185 |
+
|-----|-----|-----|-----|-----|
|
186 |
+
| sfr | 50.00 | 50.00 | 0.00 | 1215 |
|
187 |
+
| saiga_llama3_8b, v7 | 48.95 | 49.16 | 2.46 | 1266 |
|
188 |
+
| saiga_llama3_8b, v6 | 46.91 | 47.23 | 2.45 | 1262 |
|
189 |
+
| suzume_8b | 43.69 | 48.19 | 2.46 | 1325 |
|