Update README.md
Browse files
README.md
CHANGED
@@ -55,121 +55,34 @@ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
|
55 |
```
|
56 |
I encourage you to provide feedback on the model's performance. If you'd like to create your own quantizations, feel free to do so and let me know how it works for you!
|
57 |
|
58 |
-
|
59 |
-
- name:
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
type: prompt_level_strict_acc
|
89 |
-
value: 28.06
|
90 |
-
source:
|
91 |
-
name: Open LLM Leaderboard
|
92 |
-
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
|
93 |
-
|
94 |
-
- task:
|
95 |
-
type: text-generation
|
96 |
-
name: Text Generation
|
97 |
-
dataset:
|
98 |
-
name: BBH
|
99 |
-
type: BBH
|
100 |
-
args:
|
101 |
-
num_few_shot: 3
|
102 |
-
metrics:
|
103 |
-
- name: Normalized Accuracy
|
104 |
-
type: acc_norm
|
105 |
-
value: 31.81
|
106 |
-
source:
|
107 |
-
name: Open LLM Leaderboard
|
108 |
-
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
|
109 |
-
|
110 |
-
- task:
|
111 |
-
type: text-generation
|
112 |
-
name: Text Generation
|
113 |
-
dataset:
|
114 |
-
name: MATH Lvl 5
|
115 |
-
type: hendrycks/competition_math
|
116 |
-
args:
|
117 |
-
num_few_shot: 4
|
118 |
-
metrics:
|
119 |
-
- name: Exact Match
|
120 |
-
type: exact_match
|
121 |
-
value: 9.67
|
122 |
-
source:
|
123 |
-
name: Open LLM Leaderboard
|
124 |
-
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
|
125 |
-
|
126 |
-
- task:
|
127 |
-
type: text-generation
|
128 |
-
name: Text Generation
|
129 |
-
dataset:
|
130 |
-
name: GPQA
|
131 |
-
type: Idavidrein/gpqa
|
132 |
-
args:
|
133 |
-
num_few_shot: 0
|
134 |
-
metrics:
|
135 |
-
- name: Normalized Accuracy
|
136 |
-
type: acc_norm
|
137 |
-
value: 8.5
|
138 |
-
source:
|
139 |
-
name: Open LLM Leaderboard
|
140 |
-
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
|
141 |
-
|
142 |
-
- task:
|
143 |
-
type: text-generation
|
144 |
-
name: Text Generation
|
145 |
-
dataset:
|
146 |
-
name: MuSR
|
147 |
-
type: TAUR-Lab/MuSR
|
148 |
-
args:
|
149 |
-
num_few_shot: 0
|
150 |
-
metrics:
|
151 |
-
- name: Normalized Accuracy
|
152 |
-
type: acc_norm
|
153 |
-
value: 11.38
|
154 |
-
source:
|
155 |
-
name: Open LLM Leaderboard
|
156 |
-
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
|
157 |
-
|
158 |
-
- task:
|
159 |
-
type: text-generation
|
160 |
-
name: Text Generation
|
161 |
-
dataset:
|
162 |
-
name: MMLU-PRO
|
163 |
-
type: TIGER-Lab/MMLU-Pro
|
164 |
-
config: main
|
165 |
-
split: test
|
166 |
-
args:
|
167 |
-
num_few_shot: 5
|
168 |
-
metrics:
|
169 |
-
- name: Accuracy
|
170 |
-
type: acc
|
171 |
-
value: 27.34
|
172 |
-
source:
|
173 |
-
name: Open LLM Leaderboard
|
174 |
-
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
|
175 |
|
|
|
55 |
```
|
56 |
I encourage you to provide feedback on the model's performance. If you'd like to create your own quantizations, feel free to do so and let me know how it works for you!
|
57 |
|
58 |
+
metrics:
|
59 |
+
- name: "Average ⬆️"
|
60 |
+
dataset_name: "Average"
|
61 |
+
metric_type: "average_score"
|
62 |
+
value: 19.46
|
63 |
+
- name: "IFEval"
|
64 |
+
dataset_name: "IFEval"
|
65 |
+
metric_type: "inst_level_strict_acc, prompt_level_strict_acc"
|
66 |
+
value: 28.06
|
67 |
+
- name: "BBH"
|
68 |
+
dataset_name: "BBH"
|
69 |
+
metric_type: "acc_norm"
|
70 |
+
value: 31.81
|
71 |
+
- name: "MATH Lvl 5"
|
72 |
+
dataset_name: "MATH Lvl 5"
|
73 |
+
metric_type: "exact_match"
|
74 |
+
value: 9.67
|
75 |
+
- name: "GPQA"
|
76 |
+
dataset_name: "GPQA"
|
77 |
+
metric_type: "acc_norm"
|
78 |
+
value: 8.5
|
79 |
+
- name: "MUSR"
|
80 |
+
dataset_name: "MuSR"
|
81 |
+
metric_type: "acc_norm"
|
82 |
+
value: 11.38
|
83 |
+
- name: "MMLU-PRO"
|
84 |
+
dataset_name: "MMLU-PRO"
|
85 |
+
metric_type: "acc"
|
86 |
+
value: 27.34
|
87 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|