amezasor commited on
Commit
b70a18e
1 Parent(s): 102261f

base model

Browse files
Files changed (1) hide show
  1. README.md +56 -54
README.md CHANGED
@@ -12,8 +12,8 @@ model-index:
12
  - task:
13
  type: text-generation
14
  dataset:
15
- type: instruction-following
16
- name: IFEval
17
  metrics:
18
  - name: pass@1
19
  type: pass@1
@@ -22,8 +22,8 @@ model-index:
22
  - task:
23
  type: text-generation
24
  dataset:
25
- type: instruction-following
26
- name: MT-Bench
27
  metrics:
28
  - name: pass@1
29
  type: pass@1
@@ -32,8 +32,8 @@ model-index:
32
  - task:
33
  type: text-generation
34
  dataset:
35
- type: human-exams
36
- name: AGI-Eval
37
  metrics:
38
  - name: pass@1
39
  type: pass@1
@@ -42,8 +42,8 @@ model-index:
42
  - task:
43
  type: text-generation
44
  dataset:
45
- type: human-exams
46
- name: MMLU
47
  metrics:
48
  - name: pass@1
49
  type: pass@1
@@ -52,8 +52,8 @@ model-index:
52
  - task:
53
  type: text-generation
54
  dataset:
55
- type: human-exams
56
- name: MMLU-Pro
57
  metrics:
58
  - name: pass@1
59
  type: pass@1
@@ -62,18 +62,18 @@ model-index:
62
  - task:
63
  type: text-generation
64
  dataset:
65
- type: commonsense
66
- name: OBQA
67
  metrics:
68
  - name: pass@1
69
  type: pass@1
70
- value: 46.60
71
  veriefied: false
72
  - task:
73
  type: text-generation
74
  dataset:
75
- type: commonsense
76
- name: SIQA
77
  metrics:
78
  - name: pass@1
79
  type: pass@1
@@ -82,8 +82,8 @@ model-index:
82
  - task:
83
  type: text-generation
84
  dataset:
85
- type: commonsense
86
- name: Hellaswag
87
  metrics:
88
  - name: pass@1
89
  type: pass@1
@@ -92,8 +92,8 @@ model-index:
92
  - task:
93
  type: text-generation
94
  dataset:
95
- type: commonsense
96
- name: WinoGrande
97
  metrics:
98
  - name: pass@1
99
  type: pass@1
@@ -102,8 +102,8 @@ model-index:
102
  - task:
103
  type: text-generation
104
  dataset:
105
- type: commonsense
106
- name: TruthfulQA
107
  metrics:
108
  - name: pass@1
109
  type: pass@1
@@ -112,8 +112,8 @@ model-index:
112
  - task:
113
  type: text-generation
114
  dataset:
115
- type: reading-comprehension
116
- name: BoolQ
117
  metrics:
118
  - name: pass@1
119
  type: pass@1
@@ -122,8 +122,8 @@ model-index:
122
  - task:
123
  type: text-generation
124
  dataset:
125
- type: reading-comprehension
126
- name: SQuAD 2.0
127
  metrics:
128
  - name: pass@1
129
  type: pass@1
@@ -132,8 +132,8 @@ model-index:
132
  - task:
133
  type: text-generation
134
  dataset:
135
- type: reasoning
136
- name: ARC-C
137
  metrics:
138
  - name: pass@1
139
  type: pass@1
@@ -142,8 +142,8 @@ model-index:
142
  - task:
143
  type: text-generation
144
  dataset:
145
- type: reasoning
146
- name: GPQA
147
  metrics:
148
  - name: pass@1
149
  type: pass@1
@@ -152,8 +152,8 @@ model-index:
152
  - task:
153
  type: text-generation
154
  dataset:
155
- type: reasoning
156
- name: BBH
157
  metrics:
158
  - name: pass@1
159
  type: pass@1
@@ -162,8 +162,8 @@ model-index:
162
  - task:
163
  type: text-generation
164
  dataset:
165
- type: code
166
- name: HumanEvalSynthesis
167
  metrics:
168
  - name: pass@1
169
  type: pass@1
@@ -172,8 +172,8 @@ model-index:
172
  - task:
173
  type: text-generation
174
  dataset:
175
- type: code
176
- name: HumanEvalExplain
177
  metrics:
178
  - name: pass@1
179
  type: pass@1
@@ -182,8 +182,8 @@ model-index:
182
  - task:
183
  type: text-generation
184
  dataset:
185
- type: code
186
- name: HumanEvalFix
187
  metrics:
188
  - name: pass@1
189
  type: pass@1
@@ -192,53 +192,55 @@ model-index:
192
  - task:
193
  type: text-generation
194
  dataset:
195
- type: code
196
- name: MBPP
197
  metrics:
198
  - name: pass@1
199
  type: pass@1
200
- value: 49.60
201
- veriefied: false
202
  - task:
203
  type: text-generation
204
  dataset:
205
- type: math
206
- name: GSM8K
207
  metrics:
208
  - name: pass@1
209
  type: pass@1
210
  value: 68.99
211
- veriefied: false
212
  - task:
213
  type: text-generation
214
  dataset:
215
- type: math
216
- name: MATH
217
  metrics:
218
  - name: pass@1
219
  type: pass@1
220
  value: 30.94
221
- veriefied: false
222
  - task:
223
  type: text-generation
224
  dataset:
225
- type: multilingual
226
- name: PAWS-X (7 langs)
227
  metrics:
228
  - name: pass@1
229
  type: pass@1
230
  value: 64.94
231
- veriefied: false
232
  - task:
233
  type: text-generation
234
  dataset:
235
- type: multilingual
236
- name: MGSM (6 langs)
237
  metrics:
238
  - name: pass@1
239
  type: pass@1
240
- value: 48.20
241
- veriefied: false
 
 
242
  ---
243
 
244
  <!-- ![image/png](https://cdn-uploads.huggingface.co/production/uploads/62cd5057674cdb524450093d/1hzxoPwqkBJXshKVVe6_9.png) -->
 
12
  - task:
13
  type: text-generation
14
  dataset:
15
+ type: instruction-following
16
+ name: IFEval
17
  metrics:
18
  - name: pass@1
19
  type: pass@1
 
22
  - task:
23
  type: text-generation
24
  dataset:
25
+ type: instruction-following
26
+ name: MT-Bench
27
  metrics:
28
  - name: pass@1
29
  type: pass@1
 
32
  - task:
33
  type: text-generation
34
  dataset:
35
+ type: human-exams
36
+ name: AGI-Eval
37
  metrics:
38
  - name: pass@1
39
  type: pass@1
 
42
  - task:
43
  type: text-generation
44
  dataset:
45
+ type: human-exams
46
+ name: MMLU
47
  metrics:
48
  - name: pass@1
49
  type: pass@1
 
52
  - task:
53
  type: text-generation
54
  dataset:
55
+ type: human-exams
56
+ name: MMLU-Pro
57
  metrics:
58
  - name: pass@1
59
  type: pass@1
 
62
  - task:
63
  type: text-generation
64
  dataset:
65
+ type: commonsense
66
+ name: OBQA
67
  metrics:
68
  - name: pass@1
69
  type: pass@1
70
+ value: 46.6
71
  veriefied: false
72
  - task:
73
  type: text-generation
74
  dataset:
75
+ type: commonsense
76
+ name: SIQA
77
  metrics:
78
  - name: pass@1
79
  type: pass@1
 
82
  - task:
83
  type: text-generation
84
  dataset:
85
+ type: commonsense
86
+ name: Hellaswag
87
  metrics:
88
  - name: pass@1
89
  type: pass@1
 
92
  - task:
93
  type: text-generation
94
  dataset:
95
+ type: commonsense
96
+ name: WinoGrande
97
  metrics:
98
  - name: pass@1
99
  type: pass@1
 
102
  - task:
103
  type: text-generation
104
  dataset:
105
+ type: commonsense
106
+ name: TruthfulQA
107
  metrics:
108
  - name: pass@1
109
  type: pass@1
 
112
  - task:
113
  type: text-generation
114
  dataset:
115
+ type: reading-comprehension
116
+ name: BoolQ
117
  metrics:
118
  - name: pass@1
119
  type: pass@1
 
122
  - task:
123
  type: text-generation
124
  dataset:
125
+ type: reading-comprehension
126
+ name: SQuAD 2.0
127
  metrics:
128
  - name: pass@1
129
  type: pass@1
 
132
  - task:
133
  type: text-generation
134
  dataset:
135
+ type: reasoning
136
+ name: ARC-C
137
  metrics:
138
  - name: pass@1
139
  type: pass@1
 
142
  - task:
143
  type: text-generation
144
  dataset:
145
+ type: reasoning
146
+ name: GPQA
147
  metrics:
148
  - name: pass@1
149
  type: pass@1
 
152
  - task:
153
  type: text-generation
154
  dataset:
155
+ type: reasoning
156
+ name: BBH
157
  metrics:
158
  - name: pass@1
159
  type: pass@1
 
162
  - task:
163
  type: text-generation
164
  dataset:
165
+ type: code
166
+ name: HumanEvalSynthesis
167
  metrics:
168
  - name: pass@1
169
  type: pass@1
 
172
  - task:
173
  type: text-generation
174
  dataset:
175
+ type: code
176
+ name: HumanEvalExplain
177
  metrics:
178
  - name: pass@1
179
  type: pass@1
 
182
  - task:
183
  type: text-generation
184
  dataset:
185
+ type: code
186
+ name: HumanEvalFix
187
  metrics:
188
  - name: pass@1
189
  type: pass@1
 
192
  - task:
193
  type: text-generation
194
  dataset:
195
+ type: code
196
+ name: MBPP
197
  metrics:
198
  - name: pass@1
199
  type: pass@1
200
+ value: 49.6
201
+ veriefied: false
202
  - task:
203
  type: text-generation
204
  dataset:
205
+ type: math
206
+ name: GSM8K
207
  metrics:
208
  - name: pass@1
209
  type: pass@1
210
  value: 68.99
211
+ veriefied: false
212
  - task:
213
  type: text-generation
214
  dataset:
215
+ type: math
216
+ name: MATH
217
  metrics:
218
  - name: pass@1
219
  type: pass@1
220
  value: 30.94
221
+ veriefied: false
222
  - task:
223
  type: text-generation
224
  dataset:
225
+ type: multilingual
226
+ name: PAWS-X (7 langs)
227
  metrics:
228
  - name: pass@1
229
  type: pass@1
230
  value: 64.94
231
+ veriefied: false
232
  - task:
233
  type: text-generation
234
  dataset:
235
+ type: multilingual
236
+ name: MGSM (6 langs)
237
  metrics:
238
  - name: pass@1
239
  type: pass@1
240
+ value: 48.2
241
+ veriefied: false
242
+ base_model:
243
+ - ibm-granite/granite-3.0-8b-base
244
  ---
245
 
246
  <!-- ![image/png](https://cdn-uploads.huggingface.co/production/uploads/62cd5057674cdb524450093d/1hzxoPwqkBJXshKVVe6_9.png) -->