leaderboard-pr-bot
commited on
Commit
•
e78137c
1
Parent(s):
bfb99ff
Adding Evaluation Results
Browse filesThis is an automated PR created with https://huggingface.co/spaces/Weyaxi/open-llm-leaderboard-results-pr
The purpose of this PR is to add evaluation results from the Open LLM Leaderboard to your model card.
If you encounter any issues, please report them to https://huggingface.co/spaces/Weyaxi/open-llm-leaderboard-results-pr/discussions
README.md
CHANGED
@@ -1,18 +1,18 @@
|
|
1 |
---
|
2 |
-
license: other
|
3 |
-
datasets:
|
4 |
-
- OpenAssistant/oasst2
|
5 |
-
- nvidia/HelpSteer
|
6 |
language:
|
7 |
- ja
|
8 |
- en
|
|
|
9 |
library_name: transformers
|
10 |
-
base_model: karakuri-ai/karakuri-lm-70b-v0.1
|
11 |
-
pipeline_tag: conversational
|
12 |
tags:
|
13 |
- llama
|
14 |
- llama-2
|
15 |
- steerlm
|
|
|
|
|
|
|
|
|
|
|
16 |
model-index:
|
17 |
- name: karakuri-ai/karakuri-lm-70b-chat-v0.1
|
18 |
results:
|
@@ -24,22 +24,113 @@ model-index:
|
|
24 |
type: unknown
|
25 |
metrics:
|
26 |
- type: unknown
|
27 |
-
name: score
|
28 |
value: 6.609375
|
|
|
|
|
|
|
|
|
29 |
source:
|
30 |
url: https://huggingface.co/spaces/lmsys/mt-bench
|
31 |
- task:
|
32 |
type: text-generation
|
33 |
name: Text Generation
|
34 |
dataset:
|
35 |
-
name:
|
36 |
-
type:
|
|
|
|
|
|
|
|
|
37 |
metrics:
|
38 |
-
- type:
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
source:
|
42 |
-
url: https://
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
---
|
44 |
|
45 |
# KARAKURI LM
|
@@ -169,3 +260,17 @@ Subject to the license above, and except for commercial purposes, you are free t
|
|
169 |
If you plan to use KARAKURI LM for commercial purposes, please contact us beforehand. You are not authorized to use KARAKURI LM for commercial purposes unless we expressly grant you such rights.
|
170 |
|
171 |
If you have any questions regarding the interpretation of above terms, please also feel free to contact us.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
|
|
|
|
|
|
|
|
2 |
language:
|
3 |
- ja
|
4 |
- en
|
5 |
+
license: other
|
6 |
library_name: transformers
|
|
|
|
|
7 |
tags:
|
8 |
- llama
|
9 |
- llama-2
|
10 |
- steerlm
|
11 |
+
datasets:
|
12 |
+
- OpenAssistant/oasst2
|
13 |
+
- nvidia/HelpSteer
|
14 |
+
base_model: karakuri-ai/karakuri-lm-70b-v0.1
|
15 |
+
pipeline_tag: conversational
|
16 |
model-index:
|
17 |
- name: karakuri-ai/karakuri-lm-70b-chat-v0.1
|
18 |
results:
|
|
|
24 |
type: unknown
|
25 |
metrics:
|
26 |
- type: unknown
|
|
|
27 |
value: 6.609375
|
28 |
+
name: score
|
29 |
+
- type: unknown
|
30 |
+
value: 6.43125
|
31 |
+
name: score
|
32 |
source:
|
33 |
url: https://huggingface.co/spaces/lmsys/mt-bench
|
34 |
- task:
|
35 |
type: text-generation
|
36 |
name: Text Generation
|
37 |
dataset:
|
38 |
+
name: AI2 Reasoning Challenge (25-Shot)
|
39 |
+
type: ai2_arc
|
40 |
+
config: ARC-Challenge
|
41 |
+
split: test
|
42 |
+
args:
|
43 |
+
num_few_shot: 25
|
44 |
metrics:
|
45 |
+
- type: acc_norm
|
46 |
+
value: 61.52
|
47 |
+
name: normalized accuracy
|
48 |
+
source:
|
49 |
+
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=karakuri-ai/karakuri-lm-70b-chat-v0.1
|
50 |
+
name: Open LLM Leaderboard
|
51 |
+
- task:
|
52 |
+
type: text-generation
|
53 |
+
name: Text Generation
|
54 |
+
dataset:
|
55 |
+
name: HellaSwag (10-Shot)
|
56 |
+
type: hellaswag
|
57 |
+
split: validation
|
58 |
+
args:
|
59 |
+
num_few_shot: 10
|
60 |
+
metrics:
|
61 |
+
- type: acc_norm
|
62 |
+
value: 83.13
|
63 |
+
name: normalized accuracy
|
64 |
source:
|
65 |
+
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=karakuri-ai/karakuri-lm-70b-chat-v0.1
|
66 |
+
name: Open LLM Leaderboard
|
67 |
+
- task:
|
68 |
+
type: text-generation
|
69 |
+
name: Text Generation
|
70 |
+
dataset:
|
71 |
+
name: MMLU (5-Shot)
|
72 |
+
type: cais/mmlu
|
73 |
+
config: all
|
74 |
+
split: test
|
75 |
+
args:
|
76 |
+
num_few_shot: 5
|
77 |
+
metrics:
|
78 |
+
- type: acc
|
79 |
+
value: 59.35
|
80 |
+
name: accuracy
|
81 |
+
source:
|
82 |
+
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=karakuri-ai/karakuri-lm-70b-chat-v0.1
|
83 |
+
name: Open LLM Leaderboard
|
84 |
+
- task:
|
85 |
+
type: text-generation
|
86 |
+
name: Text Generation
|
87 |
+
dataset:
|
88 |
+
name: TruthfulQA (0-shot)
|
89 |
+
type: truthful_qa
|
90 |
+
config: multiple_choice
|
91 |
+
split: validation
|
92 |
+
args:
|
93 |
+
num_few_shot: 0
|
94 |
+
metrics:
|
95 |
+
- type: mc2
|
96 |
+
value: 51.39
|
97 |
+
source:
|
98 |
+
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=karakuri-ai/karakuri-lm-70b-chat-v0.1
|
99 |
+
name: Open LLM Leaderboard
|
100 |
+
- task:
|
101 |
+
type: text-generation
|
102 |
+
name: Text Generation
|
103 |
+
dataset:
|
104 |
+
name: Winogrande (5-shot)
|
105 |
+
type: winogrande
|
106 |
+
config: winogrande_xl
|
107 |
+
split: validation
|
108 |
+
args:
|
109 |
+
num_few_shot: 5
|
110 |
+
metrics:
|
111 |
+
- type: acc
|
112 |
+
value: 78.37
|
113 |
+
name: accuracy
|
114 |
+
source:
|
115 |
+
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=karakuri-ai/karakuri-lm-70b-chat-v0.1
|
116 |
+
name: Open LLM Leaderboard
|
117 |
+
- task:
|
118 |
+
type: text-generation
|
119 |
+
name: Text Generation
|
120 |
+
dataset:
|
121 |
+
name: GSM8k (5-shot)
|
122 |
+
type: gsm8k
|
123 |
+
config: main
|
124 |
+
split: test
|
125 |
+
args:
|
126 |
+
num_few_shot: 5
|
127 |
+
metrics:
|
128 |
+
- type: acc
|
129 |
+
value: 40.41
|
130 |
+
name: accuracy
|
131 |
+
source:
|
132 |
+
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=karakuri-ai/karakuri-lm-70b-chat-v0.1
|
133 |
+
name: Open LLM Leaderboard
|
134 |
---
|
135 |
|
136 |
# KARAKURI LM
|
|
|
260 |
If you plan to use KARAKURI LM for commercial purposes, please contact us beforehand. You are not authorized to use KARAKURI LM for commercial purposes unless we expressly grant you such rights.
|
261 |
|
262 |
If you have any questions regarding the interpretation of above terms, please also feel free to contact us.
|
263 |
+
|
264 |
+
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
|
265 |
+
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_karakuri-ai__karakuri-lm-70b-chat-v0.1)
|
266 |
+
|
267 |
+
| Metric |Value|
|
268 |
+
|---------------------------------|----:|
|
269 |
+
|Avg. |62.36|
|
270 |
+
|AI2 Reasoning Challenge (25-Shot)|61.52|
|
271 |
+
|HellaSwag (10-Shot) |83.13|
|
272 |
+
|MMLU (5-Shot) |59.35|
|
273 |
+
|TruthfulQA (0-shot) |51.39|
|
274 |
+
|Winogrande (5-shot) |78.37|
|
275 |
+
|GSM8k (5-shot) |40.41|
|
276 |
+
|