P0x0 commited on
Commit
d528238
1 Parent(s): 11f1eab

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +109 -8
README.md CHANGED
@@ -60,18 +60,119 @@ model-index:
60
  results:
61
  - task:
62
  type: text-generation
 
 
 
 
63
  metrics:
64
- - name: Average
65
  value: 19.46
66
- - name: IFEval
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  value: 28.06
68
- - name: BBH
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  value: 31.81
70
- - name: MATH Lvl 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  value: 9.67
72
- - name: GPQA
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  value: 8.5
74
- - name: MUSR
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  value: 11.38
76
- - name: MMLU-PRO
77
- value: 27.34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  results:
61
  - task:
62
  type: text-generation
63
+ name: Text Generation
64
+ dataset:
65
+ name: Average
66
+ type: Aggregated Metrics
67
  metrics:
68
+ - type: average_score
69
  value: 19.46
70
+ name: Average Score
71
+ source:
72
+ name: Open LLM Leaderboard
73
+ url: >-
74
+ https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
75
+
76
+ - task:
77
+ type: text-generation
78
+ name: Text Generation
79
+ dataset:
80
+ name: IFEval
81
+ type: HuggingFaceH4/ifeval
82
+ args:
83
+ num_few_shot: 0
84
+ metrics:
85
+ - type: strict_accuracy
86
  value: 28.06
87
+ name: Strict Accuracy
88
+ source:
89
+ name: Open LLM Leaderboard
90
+ url: >-
91
+ https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
92
+
93
+ - task:
94
+ type: text-generation
95
+ name: Text Generation
96
+ dataset:
97
+ name: BBH
98
+ type: BBH
99
+ args:
100
+ num_few_shot: 3
101
+ metrics:
102
+ - type: normalized_accuracy
103
  value: 31.81
104
+ name: Normalized Accuracy
105
+ source:
106
+ name: Open LLM Leaderboard
107
+ url: >-
108
+ https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
109
+
110
+ - task:
111
+ type: text-generation
112
+ name: Text Generation
113
+ dataset:
114
+ name: MATH Lvl 5
115
+ type: hendrycks/competition_math
116
+ args:
117
+ num_few_shot: 4
118
+ metrics:
119
+ - type: exact_match
120
  value: 9.67
121
+ name: Exact Match
122
+ source:
123
+ name: Open LLM Leaderboard
124
+ url: >-
125
+ https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
126
+
127
+ - task:
128
+ type: text-generation
129
+ name: Text Generation
130
+ dataset:
131
+ name: GPQA
132
+ type: Idavidrein/gpqa
133
+ args:
134
+ num_few_shot: 0
135
+ metrics:
136
+ - type: normalized_accuracy
137
  value: 8.5
138
+ name: Normalized Accuracy
139
+ source:
140
+ name: Open LLM Leaderboard
141
+ url: >-
142
+ https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
143
+
144
+ - task:
145
+ type: text-generation
146
+ name: Text Generation
147
+ dataset:
148
+ name: MuSR
149
+ type: TAUR-Lab/MuSR
150
+ args:
151
+ num_few_shot: 0
152
+ metrics:
153
+ - type: normalized_accuracy
154
  value: 11.38
155
+ name: Normalized Accuracy
156
+ source:
157
+ name: Open LLM Leaderboard
158
+ url: >-
159
+ https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
160
+
161
+ - task:
162
+ type: text-generation
163
+ name: Text Generation
164
+ dataset:
165
+ name: MMLU-PRO
166
+ type: TIGER-Lab/MMLU-Pro
167
+ config: main
168
+ split: test
169
+ args:
170
+ num_few_shot: 5
171
+ metrics:
172
+ - type: accuracy
173
+ value: 27.34
174
+ name: Accuracy
175
+ source:
176
+ name: Open LLM Leaderboard
177
+ url: >-
178
+ https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B