illian01 commited on
Commit
6e61d1a
1 Parent(s): c3d1633

Add weights and config

Browse files
mlc-chat-config.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "llama",
4
+ "quantization": "q4f16_1",
5
+ "model_config": {
6
+ "hidden_size": 4096,
7
+ "intermediate_size": 11008,
8
+ "num_attention_heads": 32,
9
+ "num_hidden_layers": 12,
10
+ "rms_norm_eps": 1e-06,
11
+ "vocab_size": 32000,
12
+ "position_embedding_base": 10000,
13
+ "context_window_size": 2048,
14
+ "prefill_chunk_size": 2048,
15
+ "num_key_value_heads": 32,
16
+ "head_dim": 128,
17
+ "tensor_parallel_shards": 1,
18
+ "max_batch_size": 80
19
+ },
20
+ "vocab_size": 32000,
21
+ "context_window_size": 2048,
22
+ "sliding_window_size": -1,
23
+ "prefill_chunk_size": 2048,
24
+ "attention_sink_size": -1,
25
+ "tensor_parallel_shards": 1,
26
+ "mean_gen_len": 128,
27
+ "max_gen_len": 512,
28
+ "shift_fill_factor": 0.3,
29
+ "temperature": 0.6,
30
+ "presence_penalty": 0.0,
31
+ "frequency_penalty": 0.0,
32
+ "repetition_penalty": 1.0,
33
+ "top_p": 0.9,
34
+ "tokenizer_files": [
35
+ "tokenizer.model",
36
+ "tokenizer_config.json",
37
+ "tokenizer.json"
38
+ ],
39
+ "tokenizer_info": {
40
+ "token_postproc_method": "byte_fallback",
41
+ "prepend_space_in_encode": true,
42
+ "strip_space_in_decode": true
43
+ },
44
+ "conv_template": {
45
+ "name": "st-llm",
46
+ "system_template": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{MessagePlaceholders.SYSTEM.value}</s>",
47
+ "system_message": "You are a helpful, respectful and honest assistant.",
48
+ "system_prefix_token_ids": null,
49
+ "add_role_after_system_message": true,
50
+ "roles": {
51
+ "user": "### Input:",
52
+ "assistant": "### Response:"
53
+ },
54
+ "role_templates": {
55
+ "user": "{user_message}",
56
+ "assistant": "{assistant_message}",
57
+ "tool": "{tool_message}"
58
+ },
59
+ "messages": [],
60
+ "seps": [
61
+ "</s>"
62
+ ],
63
+ "role_content_sep": "\n",
64
+ "role_empty_sep": "\n",
65
+ "stop_str": [
66
+ "</s>"
67
+ ],
68
+ "stop_token_ids": [
69
+ 2
70
+ ],
71
+ "function_string": "",
72
+ "use_function_calling": false
73
+ },
74
+ "pad_token_id": 0,
75
+ "bos_token_id": 1,
76
+ "eos_token_id": 2
77
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,1719 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 125,
4
+ "ParamBytes": 1513693184.0,
5
+ "BitsPerParam": 4.500437647753687
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 65536000,
12
+ "records": [
13
+ {
14
+ "name": "model.embed_tokens.q_weight",
15
+ "shape": [
16
+ 32000,
17
+ 512
18
+ ],
19
+ "dtype": "uint32",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 65536000,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "a5721d9560034ebdaf9d54846920d333"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 33357824,
31
+ "records": [
32
+ {
33
+ "name": "model.embed_tokens.q_scale",
34
+ "shape": [
35
+ 32000,
36
+ 128
37
+ ],
38
+ "dtype": "float16",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 8192000,
41
+ "byteOffset": 0
42
+ },
43
+ {
44
+ "name": "model.layers.0.self_attn.qkv_proj.q_weight",
45
+ "shape": [
46
+ 12288,
47
+ 512
48
+ ],
49
+ "dtype": "uint32",
50
+ "format": "f32-to-bf16",
51
+ "nbytes": 25165824,
52
+ "byteOffset": 8192000
53
+ }
54
+ ],
55
+ "md5sum": "4788df7f40558fb462c59533402e1f9b"
56
+ },
57
+ {
58
+ "dataPath": "params_shard_2.bin",
59
+ "format": "raw-shard",
60
+ "nbytes": 45088768,
61
+ "records": [
62
+ {
63
+ "name": "model.layers.0.mlp.gate_up_proj.q_weight",
64
+ "shape": [
65
+ 22016,
66
+ 512
67
+ ],
68
+ "dtype": "uint32",
69
+ "format": "f32-to-bf16",
70
+ "nbytes": 45088768,
71
+ "byteOffset": 0
72
+ }
73
+ ],
74
+ "md5sum": "2ee5d6d6432333ead4e3a36fa98d6a85"
75
+ },
76
+ {
77
+ "dataPath": "params_shard_3.bin",
78
+ "format": "raw-shard",
79
+ "nbytes": 22544384,
80
+ "records": [
81
+ {
82
+ "name": "model.layers.0.mlp.down_proj.q_weight",
83
+ "shape": [
84
+ 4096,
85
+ 1376
86
+ ],
87
+ "dtype": "uint32",
88
+ "format": "f32-to-bf16",
89
+ "nbytes": 22544384,
90
+ "byteOffset": 0
91
+ }
92
+ ],
93
+ "md5sum": "024f445f0a8b013e4cc028d6d6ea48e1"
94
+ },
95
+ {
96
+ "dataPath": "params_shard_4.bin",
97
+ "format": "raw-shard",
98
+ "nbytes": 25165824,
99
+ "records": [
100
+ {
101
+ "name": "model.layers.1.self_attn.qkv_proj.q_weight",
102
+ "shape": [
103
+ 12288,
104
+ 512
105
+ ],
106
+ "dtype": "uint32",
107
+ "format": "f32-to-bf16",
108
+ "nbytes": 25165824,
109
+ "byteOffset": 0
110
+ }
111
+ ],
112
+ "md5sum": "abc2656fa505113f94dec9670057a565"
113
+ },
114
+ {
115
+ "dataPath": "params_shard_5.bin",
116
+ "format": "raw-shard",
117
+ "nbytes": 32587776,
118
+ "records": [
119
+ {
120
+ "name": "model.layers.0.self_attn.qkv_proj.q_scale",
121
+ "shape": [
122
+ 12288,
123
+ 128
124
+ ],
125
+ "dtype": "float16",
126
+ "format": "f32-to-bf16",
127
+ "nbytes": 3145728,
128
+ "byteOffset": 0
129
+ },
130
+ {
131
+ "name": "model.layers.0.self_attn.o_proj.q_weight",
132
+ "shape": [
133
+ 4096,
134
+ 512
135
+ ],
136
+ "dtype": "uint32",
137
+ "format": "f32-to-bf16",
138
+ "nbytes": 8388608,
139
+ "byteOffset": 3145728
140
+ },
141
+ {
142
+ "name": "model.layers.0.self_attn.o_proj.q_scale",
143
+ "shape": [
144
+ 4096,
145
+ 128
146
+ ],
147
+ "dtype": "float16",
148
+ "format": "f32-to-bf16",
149
+ "nbytes": 1048576,
150
+ "byteOffset": 11534336
151
+ },
152
+ {
153
+ "name": "model.layers.0.mlp.gate_up_proj.q_scale",
154
+ "shape": [
155
+ 22016,
156
+ 128
157
+ ],
158
+ "dtype": "float16",
159
+ "format": "f32-to-bf16",
160
+ "nbytes": 5636096,
161
+ "byteOffset": 12582912
162
+ },
163
+ {
164
+ "name": "model.layers.0.mlp.down_proj.q_scale",
165
+ "shape": [
166
+ 4096,
167
+ 344
168
+ ],
169
+ "dtype": "float16",
170
+ "format": "f32-to-bf16",
171
+ "nbytes": 2818048,
172
+ "byteOffset": 18219008
173
+ },
174
+ {
175
+ "name": "model.layers.0.input_layernorm.weight",
176
+ "shape": [
177
+ 4096
178
+ ],
179
+ "dtype": "float16",
180
+ "format": "f32-to-bf16",
181
+ "nbytes": 8192,
182
+ "byteOffset": 21037056
183
+ },
184
+ {
185
+ "name": "model.layers.0.post_attention_layernorm.weight",
186
+ "shape": [
187
+ 4096
188
+ ],
189
+ "dtype": "float16",
190
+ "format": "f32-to-bf16",
191
+ "nbytes": 8192,
192
+ "byteOffset": 21045248
193
+ },
194
+ {
195
+ "name": "model.layers.1.self_attn.qkv_proj.q_scale",
196
+ "shape": [
197
+ 12288,
198
+ 128
199
+ ],
200
+ "dtype": "float16",
201
+ "format": "f32-to-bf16",
202
+ "nbytes": 3145728,
203
+ "byteOffset": 21053440
204
+ },
205
+ {
206
+ "name": "model.layers.1.self_attn.o_proj.q_weight",
207
+ "shape": [
208
+ 4096,
209
+ 512
210
+ ],
211
+ "dtype": "uint32",
212
+ "format": "f32-to-bf16",
213
+ "nbytes": 8388608,
214
+ "byteOffset": 24199168
215
+ }
216
+ ],
217
+ "md5sum": "db7fda278e7407e2beb655a5b6ca1aaa"
218
+ },
219
+ {
220
+ "dataPath": "params_shard_6.bin",
221
+ "format": "raw-shard",
222
+ "nbytes": 45088768,
223
+ "records": [
224
+ {
225
+ "name": "model.layers.1.mlp.gate_up_proj.q_weight",
226
+ "shape": [
227
+ 22016,
228
+ 512
229
+ ],
230
+ "dtype": "uint32",
231
+ "format": "f32-to-bf16",
232
+ "nbytes": 45088768,
233
+ "byteOffset": 0
234
+ }
235
+ ],
236
+ "md5sum": "5defe621e6391b2bf8a2cb6b9ad8c229"
237
+ },
238
+ {
239
+ "dataPath": "params_shard_7.bin",
240
+ "format": "raw-shard",
241
+ "nbytes": 25165824,
242
+ "records": [
243
+ {
244
+ "name": "model.layers.2.self_attn.qkv_proj.q_weight",
245
+ "shape": [
246
+ 12288,
247
+ 512
248
+ ],
249
+ "dtype": "uint32",
250
+ "format": "f32-to-bf16",
251
+ "nbytes": 25165824,
252
+ "byteOffset": 0
253
+ }
254
+ ],
255
+ "md5sum": "cd65b83d88867c3cbd36d31ef21e5168"
256
+ },
257
+ {
258
+ "dataPath": "params_shard_8.bin",
259
+ "format": "raw-shard",
260
+ "nbytes": 32063488,
261
+ "records": [
262
+ {
263
+ "name": "model.layers.1.self_attn.o_proj.q_scale",
264
+ "shape": [
265
+ 4096,
266
+ 128
267
+ ],
268
+ "dtype": "float16",
269
+ "format": "f32-to-bf16",
270
+ "nbytes": 1048576,
271
+ "byteOffset": 0
272
+ },
273
+ {
274
+ "name": "model.layers.1.mlp.gate_up_proj.q_scale",
275
+ "shape": [
276
+ 22016,
277
+ 128
278
+ ],
279
+ "dtype": "float16",
280
+ "format": "f32-to-bf16",
281
+ "nbytes": 5636096,
282
+ "byteOffset": 1048576
283
+ },
284
+ {
285
+ "name": "model.layers.1.mlp.down_proj.q_weight",
286
+ "shape": [
287
+ 4096,
288
+ 1376
289
+ ],
290
+ "dtype": "uint32",
291
+ "format": "f32-to-bf16",
292
+ "nbytes": 22544384,
293
+ "byteOffset": 6684672
294
+ },
295
+ {
296
+ "name": "model.layers.1.mlp.down_proj.q_scale",
297
+ "shape": [
298
+ 4096,
299
+ 344
300
+ ],
301
+ "dtype": "float16",
302
+ "format": "f32-to-bf16",
303
+ "nbytes": 2818048,
304
+ "byteOffset": 29229056
305
+ },
306
+ {
307
+ "name": "model.layers.1.input_layernorm.weight",
308
+ "shape": [
309
+ 4096
310
+ ],
311
+ "dtype": "float16",
312
+ "format": "f32-to-bf16",
313
+ "nbytes": 8192,
314
+ "byteOffset": 32047104
315
+ },
316
+ {
317
+ "name": "model.layers.1.post_attention_layernorm.weight",
318
+ "shape": [
319
+ 4096
320
+ ],
321
+ "dtype": "float16",
322
+ "format": "f32-to-bf16",
323
+ "nbytes": 8192,
324
+ "byteOffset": 32055296
325
+ }
326
+ ],
327
+ "md5sum": "a0cbc5ffbdaae0258e697e4383c61b3e"
328
+ },
329
+ {
330
+ "dataPath": "params_shard_9.bin",
331
+ "format": "raw-shard",
332
+ "nbytes": 45088768,
333
+ "records": [
334
+ {
335
+ "name": "model.layers.2.mlp.gate_up_proj.q_weight",
336
+ "shape": [
337
+ 22016,
338
+ 512
339
+ ],
340
+ "dtype": "uint32",
341
+ "format": "f32-to-bf16",
342
+ "nbytes": 45088768,
343
+ "byteOffset": 0
344
+ }
345
+ ],
346
+ "md5sum": "efd9b9bc047efd11a144113030043ad2"
347
+ },
348
+ {
349
+ "dataPath": "params_shard_10.bin",
350
+ "format": "raw-shard",
351
+ "nbytes": 22544384,
352
+ "records": [
353
+ {
354
+ "name": "model.layers.2.mlp.down_proj.q_weight",
355
+ "shape": [
356
+ 4096,
357
+ 1376
358
+ ],
359
+ "dtype": "uint32",
360
+ "format": "f32-to-bf16",
361
+ "nbytes": 22544384,
362
+ "byteOffset": 0
363
+ }
364
+ ],
365
+ "md5sum": "e26e435afc03547462926a2469f532e7"
366
+ },
367
+ {
368
+ "dataPath": "params_shard_11.bin",
369
+ "format": "raw-shard",
370
+ "nbytes": 25165824,
371
+ "records": [
372
+ {
373
+ "name": "model.layers.3.self_attn.qkv_proj.q_weight",
374
+ "shape": [
375
+ 12288,
376
+ 512
377
+ ],
378
+ "dtype": "uint32",
379
+ "format": "f32-to-bf16",
380
+ "nbytes": 25165824,
381
+ "byteOffset": 0
382
+ }
383
+ ],
384
+ "md5sum": "d05d0eb0cd034c59d5e4b867277a45fc"
385
+ },
386
+ {
387
+ "dataPath": "params_shard_12.bin",
388
+ "format": "raw-shard",
389
+ "nbytes": 32587776,
390
+ "records": [
391
+ {
392
+ "name": "model.layers.2.self_attn.qkv_proj.q_scale",
393
+ "shape": [
394
+ 12288,
395
+ 128
396
+ ],
397
+ "dtype": "float16",
398
+ "format": "f32-to-bf16",
399
+ "nbytes": 3145728,
400
+ "byteOffset": 0
401
+ },
402
+ {
403
+ "name": "model.layers.2.self_attn.o_proj.q_weight",
404
+ "shape": [
405
+ 4096,
406
+ 512
407
+ ],
408
+ "dtype": "uint32",
409
+ "format": "f32-to-bf16",
410
+ "nbytes": 8388608,
411
+ "byteOffset": 3145728
412
+ },
413
+ {
414
+ "name": "model.layers.2.self_attn.o_proj.q_scale",
415
+ "shape": [
416
+ 4096,
417
+ 128
418
+ ],
419
+ "dtype": "float16",
420
+ "format": "f32-to-bf16",
421
+ "nbytes": 1048576,
422
+ "byteOffset": 11534336
423
+ },
424
+ {
425
+ "name": "model.layers.2.mlp.gate_up_proj.q_scale",
426
+ "shape": [
427
+ 22016,
428
+ 128
429
+ ],
430
+ "dtype": "float16",
431
+ "format": "f32-to-bf16",
432
+ "nbytes": 5636096,
433
+ "byteOffset": 12582912
434
+ },
435
+ {
436
+ "name": "model.layers.2.mlp.down_proj.q_scale",
437
+ "shape": [
438
+ 4096,
439
+ 344
440
+ ],
441
+ "dtype": "float16",
442
+ "format": "f32-to-bf16",
443
+ "nbytes": 2818048,
444
+ "byteOffset": 18219008
445
+ },
446
+ {
447
+ "name": "model.layers.2.input_layernorm.weight",
448
+ "shape": [
449
+ 4096
450
+ ],
451
+ "dtype": "float16",
452
+ "format": "f32-to-bf16",
453
+ "nbytes": 8192,
454
+ "byteOffset": 21037056
455
+ },
456
+ {
457
+ "name": "model.layers.2.post_attention_layernorm.weight",
458
+ "shape": [
459
+ 4096
460
+ ],
461
+ "dtype": "float16",
462
+ "format": "f32-to-bf16",
463
+ "nbytes": 8192,
464
+ "byteOffset": 21045248
465
+ },
466
+ {
467
+ "name": "model.layers.3.self_attn.qkv_proj.q_scale",
468
+ "shape": [
469
+ 12288,
470
+ 128
471
+ ],
472
+ "dtype": "float16",
473
+ "format": "f32-to-bf16",
474
+ "nbytes": 3145728,
475
+ "byteOffset": 21053440
476
+ },
477
+ {
478
+ "name": "model.layers.3.self_attn.o_proj.q_weight",
479
+ "shape": [
480
+ 4096,
481
+ 512
482
+ ],
483
+ "dtype": "uint32",
484
+ "format": "f32-to-bf16",
485
+ "nbytes": 8388608,
486
+ "byteOffset": 24199168
487
+ }
488
+ ],
489
+ "md5sum": "9d30528090df6120bffa3656aa1ff76d"
490
+ },
491
+ {
492
+ "dataPath": "params_shard_13.bin",
493
+ "format": "raw-shard",
494
+ "nbytes": 45088768,
495
+ "records": [
496
+ {
497
+ "name": "model.layers.3.mlp.gate_up_proj.q_weight",
498
+ "shape": [
499
+ 22016,
500
+ 512
501
+ ],
502
+ "dtype": "uint32",
503
+ "format": "f32-to-bf16",
504
+ "nbytes": 45088768,
505
+ "byteOffset": 0
506
+ }
507
+ ],
508
+ "md5sum": "b4353a56eac3878c19fea8496f287c51"
509
+ },
510
+ {
511
+ "dataPath": "params_shard_14.bin",
512
+ "format": "raw-shard",
513
+ "nbytes": 25165824,
514
+ "records": [
515
+ {
516
+ "name": "model.layers.4.self_attn.qkv_proj.q_weight",
517
+ "shape": [
518
+ 12288,
519
+ 512
520
+ ],
521
+ "dtype": "uint32",
522
+ "format": "f32-to-bf16",
523
+ "nbytes": 25165824,
524
+ "byteOffset": 0
525
+ }
526
+ ],
527
+ "md5sum": "aefdf4e14a0739ea1c5cb97efc61c574"
528
+ },
529
+ {
530
+ "dataPath": "params_shard_15.bin",
531
+ "format": "raw-shard",
532
+ "nbytes": 32063488,
533
+ "records": [
534
+ {
535
+ "name": "model.layers.3.self_attn.o_proj.q_scale",
536
+ "shape": [
537
+ 4096,
538
+ 128
539
+ ],
540
+ "dtype": "float16",
541
+ "format": "f32-to-bf16",
542
+ "nbytes": 1048576,
543
+ "byteOffset": 0
544
+ },
545
+ {
546
+ "name": "model.layers.3.mlp.gate_up_proj.q_scale",
547
+ "shape": [
548
+ 22016,
549
+ 128
550
+ ],
551
+ "dtype": "float16",
552
+ "format": "f32-to-bf16",
553
+ "nbytes": 5636096,
554
+ "byteOffset": 1048576
555
+ },
556
+ {
557
+ "name": "model.layers.3.mlp.down_proj.q_weight",
558
+ "shape": [
559
+ 4096,
560
+ 1376
561
+ ],
562
+ "dtype": "uint32",
563
+ "format": "f32-to-bf16",
564
+ "nbytes": 22544384,
565
+ "byteOffset": 6684672
566
+ },
567
+ {
568
+ "name": "model.layers.3.mlp.down_proj.q_scale",
569
+ "shape": [
570
+ 4096,
571
+ 344
572
+ ],
573
+ "dtype": "float16",
574
+ "format": "f32-to-bf16",
575
+ "nbytes": 2818048,
576
+ "byteOffset": 29229056
577
+ },
578
+ {
579
+ "name": "model.layers.3.input_layernorm.weight",
580
+ "shape": [
581
+ 4096
582
+ ],
583
+ "dtype": "float16",
584
+ "format": "f32-to-bf16",
585
+ "nbytes": 8192,
586
+ "byteOffset": 32047104
587
+ },
588
+ {
589
+ "name": "model.layers.3.post_attention_layernorm.weight",
590
+ "shape": [
591
+ 4096
592
+ ],
593
+ "dtype": "float16",
594
+ "format": "f32-to-bf16",
595
+ "nbytes": 8192,
596
+ "byteOffset": 32055296
597
+ }
598
+ ],
599
+ "md5sum": "8be31773c96501e9bcb27847528abfcd"
600
+ },
601
+ {
602
+ "dataPath": "params_shard_16.bin",
603
+ "format": "raw-shard",
604
+ "nbytes": 45088768,
605
+ "records": [
606
+ {
607
+ "name": "model.layers.4.mlp.gate_up_proj.q_weight",
608
+ "shape": [
609
+ 22016,
610
+ 512
611
+ ],
612
+ "dtype": "uint32",
613
+ "format": "f32-to-bf16",
614
+ "nbytes": 45088768,
615
+ "byteOffset": 0
616
+ }
617
+ ],
618
+ "md5sum": "eb85b485181e45c636f8288e1c9884ab"
619
+ },
620
+ {
621
+ "dataPath": "params_shard_17.bin",
622
+ "format": "raw-shard",
623
+ "nbytes": 22544384,
624
+ "records": [
625
+ {
626
+ "name": "model.layers.4.mlp.down_proj.q_weight",
627
+ "shape": [
628
+ 4096,
629
+ 1376
630
+ ],
631
+ "dtype": "uint32",
632
+ "format": "f32-to-bf16",
633
+ "nbytes": 22544384,
634
+ "byteOffset": 0
635
+ }
636
+ ],
637
+ "md5sum": "1d79bed3a83d48161cb6b7e128052fdf"
638
+ },
639
+ {
640
+ "dataPath": "params_shard_18.bin",
641
+ "format": "raw-shard",
642
+ "nbytes": 25165824,
643
+ "records": [
644
+ {
645
+ "name": "model.layers.5.self_attn.qkv_proj.q_weight",
646
+ "shape": [
647
+ 12288,
648
+ 512
649
+ ],
650
+ "dtype": "uint32",
651
+ "format": "f32-to-bf16",
652
+ "nbytes": 25165824,
653
+ "byteOffset": 0
654
+ }
655
+ ],
656
+ "md5sum": "35d342713b13693fc476b04f7fdc9bd4"
657
+ },
658
+ {
659
+ "dataPath": "params_shard_19.bin",
660
+ "format": "raw-shard",
661
+ "nbytes": 32587776,
662
+ "records": [
663
+ {
664
+ "name": "model.layers.4.self_attn.qkv_proj.q_scale",
665
+ "shape": [
666
+ 12288,
667
+ 128
668
+ ],
669
+ "dtype": "float16",
670
+ "format": "f32-to-bf16",
671
+ "nbytes": 3145728,
672
+ "byteOffset": 0
673
+ },
674
+ {
675
+ "name": "model.layers.4.self_attn.o_proj.q_weight",
676
+ "shape": [
677
+ 4096,
678
+ 512
679
+ ],
680
+ "dtype": "uint32",
681
+ "format": "f32-to-bf16",
682
+ "nbytes": 8388608,
683
+ "byteOffset": 3145728
684
+ },
685
+ {
686
+ "name": "model.layers.4.self_attn.o_proj.q_scale",
687
+ "shape": [
688
+ 4096,
689
+ 128
690
+ ],
691
+ "dtype": "float16",
692
+ "format": "f32-to-bf16",
693
+ "nbytes": 1048576,
694
+ "byteOffset": 11534336
695
+ },
696
+ {
697
+ "name": "model.layers.4.mlp.gate_up_proj.q_scale",
698
+ "shape": [
699
+ 22016,
700
+ 128
701
+ ],
702
+ "dtype": "float16",
703
+ "format": "f32-to-bf16",
704
+ "nbytes": 5636096,
705
+ "byteOffset": 12582912
706
+ },
707
+ {
708
+ "name": "model.layers.4.mlp.down_proj.q_scale",
709
+ "shape": [
710
+ 4096,
711
+ 344
712
+ ],
713
+ "dtype": "float16",
714
+ "format": "f32-to-bf16",
715
+ "nbytes": 2818048,
716
+ "byteOffset": 18219008
717
+ },
718
+ {
719
+ "name": "model.layers.4.input_layernorm.weight",
720
+ "shape": [
721
+ 4096
722
+ ],
723
+ "dtype": "float16",
724
+ "format": "f32-to-bf16",
725
+ "nbytes": 8192,
726
+ "byteOffset": 21037056
727
+ },
728
+ {
729
+ "name": "model.layers.4.post_attention_layernorm.weight",
730
+ "shape": [
731
+ 4096
732
+ ],
733
+ "dtype": "float16",
734
+ "format": "f32-to-bf16",
735
+ "nbytes": 8192,
736
+ "byteOffset": 21045248
737
+ },
738
+ {
739
+ "name": "model.layers.5.self_attn.qkv_proj.q_scale",
740
+ "shape": [
741
+ 12288,
742
+ 128
743
+ ],
744
+ "dtype": "float16",
745
+ "format": "f32-to-bf16",
746
+ "nbytes": 3145728,
747
+ "byteOffset": 21053440
748
+ },
749
+ {
750
+ "name": "model.layers.5.self_attn.o_proj.q_weight",
751
+ "shape": [
752
+ 4096,
753
+ 512
754
+ ],
755
+ "dtype": "uint32",
756
+ "format": "f32-to-bf16",
757
+ "nbytes": 8388608,
758
+ "byteOffset": 24199168
759
+ }
760
+ ],
761
+ "md5sum": "302b058c01fcc629b7a82786c92ba9b4"
762
+ },
763
+ {
764
+ "dataPath": "params_shard_20.bin",
765
+ "format": "raw-shard",
766
+ "nbytes": 45088768,
767
+ "records": [
768
+ {
769
+ "name": "model.layers.5.mlp.gate_up_proj.q_weight",
770
+ "shape": [
771
+ 22016,
772
+ 512
773
+ ],
774
+ "dtype": "uint32",
775
+ "format": "f32-to-bf16",
776
+ "nbytes": 45088768,
777
+ "byteOffset": 0
778
+ }
779
+ ],
780
+ "md5sum": "5b45bf192a4d39eba6fc70626fb9eded"
781
+ },
782
+ {
783
+ "dataPath": "params_shard_21.bin",
784
+ "format": "raw-shard",
785
+ "nbytes": 25165824,
786
+ "records": [
787
+ {
788
+ "name": "model.layers.6.self_attn.qkv_proj.q_weight",
789
+ "shape": [
790
+ 12288,
791
+ 512
792
+ ],
793
+ "dtype": "uint32",
794
+ "format": "f32-to-bf16",
795
+ "nbytes": 25165824,
796
+ "byteOffset": 0
797
+ }
798
+ ],
799
+ "md5sum": "5d1705622213b2998c1707f2e5ff1eeb"
800
+ },
801
+ {
802
+ "dataPath": "params_shard_22.bin",
803
+ "format": "raw-shard",
804
+ "nbytes": 32063488,
805
+ "records": [
806
+ {
807
+ "name": "model.layers.5.self_attn.o_proj.q_scale",
808
+ "shape": [
809
+ 4096,
810
+ 128
811
+ ],
812
+ "dtype": "float16",
813
+ "format": "f32-to-bf16",
814
+ "nbytes": 1048576,
815
+ "byteOffset": 0
816
+ },
817
+ {
818
+ "name": "model.layers.5.mlp.gate_up_proj.q_scale",
819
+ "shape": [
820
+ 22016,
821
+ 128
822
+ ],
823
+ "dtype": "float16",
824
+ "format": "f32-to-bf16",
825
+ "nbytes": 5636096,
826
+ "byteOffset": 1048576
827
+ },
828
+ {
829
+ "name": "model.layers.5.mlp.down_proj.q_weight",
830
+ "shape": [
831
+ 4096,
832
+ 1376
833
+ ],
834
+ "dtype": "uint32",
835
+ "format": "f32-to-bf16",
836
+ "nbytes": 22544384,
837
+ "byteOffset": 6684672
838
+ },
839
+ {
840
+ "name": "model.layers.5.mlp.down_proj.q_scale",
841
+ "shape": [
842
+ 4096,
843
+ 344
844
+ ],
845
+ "dtype": "float16",
846
+ "format": "f32-to-bf16",
847
+ "nbytes": 2818048,
848
+ "byteOffset": 29229056
849
+ },
850
+ {
851
+ "name": "model.layers.5.input_layernorm.weight",
852
+ "shape": [
853
+ 4096
854
+ ],
855
+ "dtype": "float16",
856
+ "format": "f32-to-bf16",
857
+ "nbytes": 8192,
858
+ "byteOffset": 32047104
859
+ },
860
+ {
861
+ "name": "model.layers.5.post_attention_layernorm.weight",
862
+ "shape": [
863
+ 4096
864
+ ],
865
+ "dtype": "float16",
866
+ "format": "f32-to-bf16",
867
+ "nbytes": 8192,
868
+ "byteOffset": 32055296
869
+ }
870
+ ],
871
+ "md5sum": "02763566ec8de53492fb6836a773b006"
872
+ },
873
+ {
874
+ "dataPath": "params_shard_23.bin",
875
+ "format": "raw-shard",
876
+ "nbytes": 45088768,
877
+ "records": [
878
+ {
879
+ "name": "model.layers.6.mlp.gate_up_proj.q_weight",
880
+ "shape": [
881
+ 22016,
882
+ 512
883
+ ],
884
+ "dtype": "uint32",
885
+ "format": "f32-to-bf16",
886
+ "nbytes": 45088768,
887
+ "byteOffset": 0
888
+ }
889
+ ],
890
+ "md5sum": "81d53bbce20ca72efb5014f17df470a7"
891
+ },
892
+ {
893
+ "dataPath": "params_shard_24.bin",
894
+ "format": "raw-shard",
895
+ "nbytes": 22544384,
896
+ "records": [
897
+ {
898
+ "name": "model.layers.6.mlp.down_proj.q_weight",
899
+ "shape": [
900
+ 4096,
901
+ 1376
902
+ ],
903
+ "dtype": "uint32",
904
+ "format": "f32-to-bf16",
905
+ "nbytes": 22544384,
906
+ "byteOffset": 0
907
+ }
908
+ ],
909
+ "md5sum": "fecefeccf11179fc9285ee87b1865040"
910
+ },
911
+ {
912
+ "dataPath": "params_shard_25.bin",
913
+ "format": "raw-shard",
914
+ "nbytes": 25165824,
915
+ "records": [
916
+ {
917
+ "name": "model.layers.7.self_attn.qkv_proj.q_weight",
918
+ "shape": [
919
+ 12288,
920
+ 512
921
+ ],
922
+ "dtype": "uint32",
923
+ "format": "f32-to-bf16",
924
+ "nbytes": 25165824,
925
+ "byteOffset": 0
926
+ }
927
+ ],
928
+ "md5sum": "0edd1443fe377c6fc3e37ab0797114db"
929
+ },
930
+ {
931
+ "dataPath": "params_shard_26.bin",
932
+ "format": "raw-shard",
933
+ "nbytes": 32587776,
934
+ "records": [
935
+ {
936
+ "name": "model.layers.6.self_attn.qkv_proj.q_scale",
937
+ "shape": [
938
+ 12288,
939
+ 128
940
+ ],
941
+ "dtype": "float16",
942
+ "format": "f32-to-bf16",
943
+ "nbytes": 3145728,
944
+ "byteOffset": 0
945
+ },
946
+ {
947
+ "name": "model.layers.6.self_attn.o_proj.q_weight",
948
+ "shape": [
949
+ 4096,
950
+ 512
951
+ ],
952
+ "dtype": "uint32",
953
+ "format": "f32-to-bf16",
954
+ "nbytes": 8388608,
955
+ "byteOffset": 3145728
956
+ },
957
+ {
958
+ "name": "model.layers.6.self_attn.o_proj.q_scale",
959
+ "shape": [
960
+ 4096,
961
+ 128
962
+ ],
963
+ "dtype": "float16",
964
+ "format": "f32-to-bf16",
965
+ "nbytes": 1048576,
966
+ "byteOffset": 11534336
967
+ },
968
+ {
969
+ "name": "model.layers.6.mlp.gate_up_proj.q_scale",
970
+ "shape": [
971
+ 22016,
972
+ 128
973
+ ],
974
+ "dtype": "float16",
975
+ "format": "f32-to-bf16",
976
+ "nbytes": 5636096,
977
+ "byteOffset": 12582912
978
+ },
979
+ {
980
+ "name": "model.layers.6.mlp.down_proj.q_scale",
981
+ "shape": [
982
+ 4096,
983
+ 344
984
+ ],
985
+ "dtype": "float16",
986
+ "format": "f32-to-bf16",
987
+ "nbytes": 2818048,
988
+ "byteOffset": 18219008
989
+ },
990
+ {
991
+ "name": "model.layers.6.input_layernorm.weight",
992
+ "shape": [
993
+ 4096
994
+ ],
995
+ "dtype": "float16",
996
+ "format": "f32-to-bf16",
997
+ "nbytes": 8192,
998
+ "byteOffset": 21037056
999
+ },
1000
+ {
1001
+ "name": "model.layers.6.post_attention_layernorm.weight",
1002
+ "shape": [
1003
+ 4096
1004
+ ],
1005
+ "dtype": "float16",
1006
+ "format": "f32-to-bf16",
1007
+ "nbytes": 8192,
1008
+ "byteOffset": 21045248
1009
+ },
1010
+ {
1011
+ "name": "model.layers.7.self_attn.qkv_proj.q_scale",
1012
+ "shape": [
1013
+ 12288,
1014
+ 128
1015
+ ],
1016
+ "dtype": "float16",
1017
+ "format": "f32-to-bf16",
1018
+ "nbytes": 3145728,
1019
+ "byteOffset": 21053440
1020
+ },
1021
+ {
1022
+ "name": "model.layers.7.self_attn.o_proj.q_weight",
1023
+ "shape": [
1024
+ 4096,
1025
+ 512
1026
+ ],
1027
+ "dtype": "uint32",
1028
+ "format": "f32-to-bf16",
1029
+ "nbytes": 8388608,
1030
+ "byteOffset": 24199168
1031
+ }
1032
+ ],
1033
+ "md5sum": "40d9cec29a9789826f66ec48442d19c8"
1034
+ },
1035
+ {
1036
+ "dataPath": "params_shard_27.bin",
1037
+ "format": "raw-shard",
1038
+ "nbytes": 45088768,
1039
+ "records": [
1040
+ {
1041
+ "name": "model.layers.7.mlp.gate_up_proj.q_weight",
1042
+ "shape": [
1043
+ 22016,
1044
+ 512
1045
+ ],
1046
+ "dtype": "uint32",
1047
+ "format": "f32-to-bf16",
1048
+ "nbytes": 45088768,
1049
+ "byteOffset": 0
1050
+ }
1051
+ ],
1052
+ "md5sum": "a59eb6abecb6c89e1b747471759a02bc"
1053
+ },
1054
+ {
1055
+ "dataPath": "params_shard_28.bin",
1056
+ "format": "raw-shard",
1057
+ "nbytes": 25165824,
1058
+ "records": [
1059
+ {
1060
+ "name": "model.layers.8.self_attn.qkv_proj.q_weight",
1061
+ "shape": [
1062
+ 12288,
1063
+ 512
1064
+ ],
1065
+ "dtype": "uint32",
1066
+ "format": "f32-to-bf16",
1067
+ "nbytes": 25165824,
1068
+ "byteOffset": 0
1069
+ }
1070
+ ],
1071
+ "md5sum": "8224cd41035258f18a50ac3965fd92a4"
1072
+ },
1073
+ {
1074
+ "dataPath": "params_shard_29.bin",
1075
+ "format": "raw-shard",
1076
+ "nbytes": 32063488,
1077
+ "records": [
1078
+ {
1079
+ "name": "model.layers.7.self_attn.o_proj.q_scale",
1080
+ "shape": [
1081
+ 4096,
1082
+ 128
1083
+ ],
1084
+ "dtype": "float16",
1085
+ "format": "f32-to-bf16",
1086
+ "nbytes": 1048576,
1087
+ "byteOffset": 0
1088
+ },
1089
+ {
1090
+ "name": "model.layers.7.mlp.gate_up_proj.q_scale",
1091
+ "shape": [
1092
+ 22016,
1093
+ 128
1094
+ ],
1095
+ "dtype": "float16",
1096
+ "format": "f32-to-bf16",
1097
+ "nbytes": 5636096,
1098
+ "byteOffset": 1048576
1099
+ },
1100
+ {
1101
+ "name": "model.layers.7.mlp.down_proj.q_weight",
1102
+ "shape": [
1103
+ 4096,
1104
+ 1376
1105
+ ],
1106
+ "dtype": "uint32",
1107
+ "format": "f32-to-bf16",
1108
+ "nbytes": 22544384,
1109
+ "byteOffset": 6684672
1110
+ },
1111
+ {
1112
+ "name": "model.layers.7.mlp.down_proj.q_scale",
1113
+ "shape": [
1114
+ 4096,
1115
+ 344
1116
+ ],
1117
+ "dtype": "float16",
1118
+ "format": "f32-to-bf16",
1119
+ "nbytes": 2818048,
1120
+ "byteOffset": 29229056
1121
+ },
1122
+ {
1123
+ "name": "model.layers.7.input_layernorm.weight",
1124
+ "shape": [
1125
+ 4096
1126
+ ],
1127
+ "dtype": "float16",
1128
+ "format": "f32-to-bf16",
1129
+ "nbytes": 8192,
1130
+ "byteOffset": 32047104
1131
+ },
1132
+ {
1133
+ "name": "model.layers.7.post_attention_layernorm.weight",
1134
+ "shape": [
1135
+ 4096
1136
+ ],
1137
+ "dtype": "float16",
1138
+ "format": "f32-to-bf16",
1139
+ "nbytes": 8192,
1140
+ "byteOffset": 32055296
1141
+ }
1142
+ ],
1143
+ "md5sum": "568e68fb348c065bfd75123c018019ba"
1144
+ },
1145
+ {
1146
+ "dataPath": "params_shard_30.bin",
1147
+ "format": "raw-shard",
1148
+ "nbytes": 45088768,
1149
+ "records": [
1150
+ {
1151
+ "name": "model.layers.8.mlp.gate_up_proj.q_weight",
1152
+ "shape": [
1153
+ 22016,
1154
+ 512
1155
+ ],
1156
+ "dtype": "uint32",
1157
+ "format": "f32-to-bf16",
1158
+ "nbytes": 45088768,
1159
+ "byteOffset": 0
1160
+ }
1161
+ ],
1162
+ "md5sum": "8ee1da3b049eebbb296bcc007620728d"
1163
+ },
1164
+ {
1165
+ "dataPath": "params_shard_31.bin",
1166
+ "format": "raw-shard",
1167
+ "nbytes": 22544384,
1168
+ "records": [
1169
+ {
1170
+ "name": "model.layers.8.mlp.down_proj.q_weight",
1171
+ "shape": [
1172
+ 4096,
1173
+ 1376
1174
+ ],
1175
+ "dtype": "uint32",
1176
+ "format": "f32-to-bf16",
1177
+ "nbytes": 22544384,
1178
+ "byteOffset": 0
1179
+ }
1180
+ ],
1181
+ "md5sum": "59bcc03d2f36cedcead845880380a00e"
1182
+ },
1183
+ {
1184
+ "dataPath": "params_shard_32.bin",
1185
+ "format": "raw-shard",
1186
+ "nbytes": 25165824,
1187
+ "records": [
1188
+ {
1189
+ "name": "model.layers.9.self_attn.qkv_proj.q_weight",
1190
+ "shape": [
1191
+ 12288,
1192
+ 512
1193
+ ],
1194
+ "dtype": "uint32",
1195
+ "format": "f32-to-bf16",
1196
+ "nbytes": 25165824,
1197
+ "byteOffset": 0
1198
+ }
1199
+ ],
1200
+ "md5sum": "ca78b3a407cc95dff476884ec67bf7ca"
1201
+ },
1202
+ {
1203
+ "dataPath": "params_shard_33.bin",
1204
+ "format": "raw-shard",
1205
+ "nbytes": 32587776,
1206
+ "records": [
1207
+ {
1208
+ "name": "model.layers.8.self_attn.qkv_proj.q_scale",
1209
+ "shape": [
1210
+ 12288,
1211
+ 128
1212
+ ],
1213
+ "dtype": "float16",
1214
+ "format": "f32-to-bf16",
1215
+ "nbytes": 3145728,
1216
+ "byteOffset": 0
1217
+ },
1218
+ {
1219
+ "name": "model.layers.8.self_attn.o_proj.q_weight",
1220
+ "shape": [
1221
+ 4096,
1222
+ 512
1223
+ ],
1224
+ "dtype": "uint32",
1225
+ "format": "f32-to-bf16",
1226
+ "nbytes": 8388608,
1227
+ "byteOffset": 3145728
1228
+ },
1229
+ {
1230
+ "name": "model.layers.8.self_attn.o_proj.q_scale",
1231
+ "shape": [
1232
+ 4096,
1233
+ 128
1234
+ ],
1235
+ "dtype": "float16",
1236
+ "format": "f32-to-bf16",
1237
+ "nbytes": 1048576,
1238
+ "byteOffset": 11534336
1239
+ },
1240
+ {
1241
+ "name": "model.layers.8.mlp.gate_up_proj.q_scale",
1242
+ "shape": [
1243
+ 22016,
1244
+ 128
1245
+ ],
1246
+ "dtype": "float16",
1247
+ "format": "f32-to-bf16",
1248
+ "nbytes": 5636096,
1249
+ "byteOffset": 12582912
1250
+ },
1251
+ {
1252
+ "name": "model.layers.8.mlp.down_proj.q_scale",
1253
+ "shape": [
1254
+ 4096,
1255
+ 344
1256
+ ],
1257
+ "dtype": "float16",
1258
+ "format": "f32-to-bf16",
1259
+ "nbytes": 2818048,
1260
+ "byteOffset": 18219008
1261
+ },
1262
+ {
1263
+ "name": "model.layers.8.input_layernorm.weight",
1264
+ "shape": [
1265
+ 4096
1266
+ ],
1267
+ "dtype": "float16",
1268
+ "format": "f32-to-bf16",
1269
+ "nbytes": 8192,
1270
+ "byteOffset": 21037056
1271
+ },
1272
+ {
1273
+ "name": "model.layers.8.post_attention_layernorm.weight",
1274
+ "shape": [
1275
+ 4096
1276
+ ],
1277
+ "dtype": "float16",
1278
+ "format": "f32-to-bf16",
1279
+ "nbytes": 8192,
1280
+ "byteOffset": 21045248
1281
+ },
1282
+ {
1283
+ "name": "model.layers.9.self_attn.qkv_proj.q_scale",
1284
+ "shape": [
1285
+ 12288,
1286
+ 128
1287
+ ],
1288
+ "dtype": "float16",
1289
+ "format": "f32-to-bf16",
1290
+ "nbytes": 3145728,
1291
+ "byteOffset": 21053440
1292
+ },
1293
+ {
1294
+ "name": "model.layers.9.self_attn.o_proj.q_weight",
1295
+ "shape": [
1296
+ 4096,
1297
+ 512
1298
+ ],
1299
+ "dtype": "uint32",
1300
+ "format": "f32-to-bf16",
1301
+ "nbytes": 8388608,
1302
+ "byteOffset": 24199168
1303
+ }
1304
+ ],
1305
+ "md5sum": "07e9c23cb4eeea29574b48448a1d8e7b"
1306
+ },
1307
+ {
1308
+ "dataPath": "params_shard_34.bin",
1309
+ "format": "raw-shard",
1310
+ "nbytes": 45088768,
1311
+ "records": [
1312
+ {
1313
+ "name": "model.layers.9.mlp.gate_up_proj.q_weight",
1314
+ "shape": [
1315
+ 22016,
1316
+ 512
1317
+ ],
1318
+ "dtype": "uint32",
1319
+ "format": "f32-to-bf16",
1320
+ "nbytes": 45088768,
1321
+ "byteOffset": 0
1322
+ }
1323
+ ],
1324
+ "md5sum": "a45160911ca3adf23043ec9e7e82d2bb"
1325
+ },
1326
+ {
1327
+ "dataPath": "params_shard_35.bin",
1328
+ "format": "raw-shard",
1329
+ "nbytes": 25165824,
1330
+ "records": [
1331
+ {
1332
+ "name": "model.layers.10.self_attn.qkv_proj.q_weight",
1333
+ "shape": [
1334
+ 12288,
1335
+ 512
1336
+ ],
1337
+ "dtype": "uint32",
1338
+ "format": "f32-to-bf16",
1339
+ "nbytes": 25165824,
1340
+ "byteOffset": 0
1341
+ }
1342
+ ],
1343
+ "md5sum": "687a041639a0dbc9bca1c33e01d74138"
1344
+ },
1345
+ {
1346
+ "dataPath": "params_shard_36.bin",
1347
+ "format": "raw-shard",
1348
+ "nbytes": 32063488,
1349
+ "records": [
1350
+ {
1351
+ "name": "model.layers.9.self_attn.o_proj.q_scale",
1352
+ "shape": [
1353
+ 4096,
1354
+ 128
1355
+ ],
1356
+ "dtype": "float16",
1357
+ "format": "f32-to-bf16",
1358
+ "nbytes": 1048576,
1359
+ "byteOffset": 0
1360
+ },
1361
+ {
1362
+ "name": "model.layers.9.mlp.gate_up_proj.q_scale",
1363
+ "shape": [
1364
+ 22016,
1365
+ 128
1366
+ ],
1367
+ "dtype": "float16",
1368
+ "format": "f32-to-bf16",
1369
+ "nbytes": 5636096,
1370
+ "byteOffset": 1048576
1371
+ },
1372
+ {
1373
+ "name": "model.layers.9.mlp.down_proj.q_weight",
1374
+ "shape": [
1375
+ 4096,
1376
+ 1376
1377
+ ],
1378
+ "dtype": "uint32",
1379
+ "format": "f32-to-bf16",
1380
+ "nbytes": 22544384,
1381
+ "byteOffset": 6684672
1382
+ },
1383
+ {
1384
+ "name": "model.layers.9.mlp.down_proj.q_scale",
1385
+ "shape": [
1386
+ 4096,
1387
+ 344
1388
+ ],
1389
+ "dtype": "float16",
1390
+ "format": "f32-to-bf16",
1391
+ "nbytes": 2818048,
1392
+ "byteOffset": 29229056
1393
+ },
1394
+ {
1395
+ "name": "model.layers.9.input_layernorm.weight",
1396
+ "shape": [
1397
+ 4096
1398
+ ],
1399
+ "dtype": "float16",
1400
+ "format": "f32-to-bf16",
1401
+ "nbytes": 8192,
1402
+ "byteOffset": 32047104
1403
+ },
1404
+ {
1405
+ "name": "model.layers.9.post_attention_layernorm.weight",
1406
+ "shape": [
1407
+ 4096
1408
+ ],
1409
+ "dtype": "float16",
1410
+ "format": "f32-to-bf16",
1411
+ "nbytes": 8192,
1412
+ "byteOffset": 32055296
1413
+ }
1414
+ ],
1415
+ "md5sum": "ba6589d2dc5c0f724d98905e2847b5ef"
1416
+ },
1417
+ {
1418
+ "dataPath": "params_shard_37.bin",
1419
+ "format": "raw-shard",
1420
+ "nbytes": 45088768,
1421
+ "records": [
1422
+ {
1423
+ "name": "model.layers.10.mlp.gate_up_proj.q_weight",
1424
+ "shape": [
1425
+ 22016,
1426
+ 512
1427
+ ],
1428
+ "dtype": "uint32",
1429
+ "format": "f32-to-bf16",
1430
+ "nbytes": 45088768,
1431
+ "byteOffset": 0
1432
+ }
1433
+ ],
1434
+ "md5sum": "e519600032dfce8612bbb0a02b74d1f1"
1435
+ },
1436
+ {
1437
+ "dataPath": "params_shard_38.bin",
1438
+ "format": "raw-shard",
1439
+ "nbytes": 22544384,
1440
+ "records": [
1441
+ {
1442
+ "name": "model.layers.10.mlp.down_proj.q_weight",
1443
+ "shape": [
1444
+ 4096,
1445
+ 1376
1446
+ ],
1447
+ "dtype": "uint32",
1448
+ "format": "f32-to-bf16",
1449
+ "nbytes": 22544384,
1450
+ "byteOffset": 0
1451
+ }
1452
+ ],
1453
+ "md5sum": "0ccc0ff5ad387b3cae2733c0946b1f94"
1454
+ },
1455
+ {
1456
+ "dataPath": "params_shard_39.bin",
1457
+ "format": "raw-shard",
1458
+ "nbytes": 25165824,
1459
+ "records": [
1460
+ {
1461
+ "name": "model.layers.11.self_attn.qkv_proj.q_weight",
1462
+ "shape": [
1463
+ 12288,
1464
+ 512
1465
+ ],
1466
+ "dtype": "uint32",
1467
+ "format": "f32-to-bf16",
1468
+ "nbytes": 25165824,
1469
+ "byteOffset": 0
1470
+ }
1471
+ ],
1472
+ "md5sum": "d1555a71ee9331b0d235eba4d8d0d413"
1473
+ },
1474
+ {
1475
+ "dataPath": "params_shard_40.bin",
1476
+ "format": "raw-shard",
1477
+ "nbytes": 32587776,
1478
+ "records": [
1479
+ {
1480
+ "name": "model.layers.10.self_attn.qkv_proj.q_scale",
1481
+ "shape": [
1482
+ 12288,
1483
+ 128
1484
+ ],
1485
+ "dtype": "float16",
1486
+ "format": "f32-to-bf16",
1487
+ "nbytes": 3145728,
1488
+ "byteOffset": 0
1489
+ },
1490
+ {
1491
+ "name": "model.layers.10.self_attn.o_proj.q_weight",
1492
+ "shape": [
1493
+ 4096,
1494
+ 512
1495
+ ],
1496
+ "dtype": "uint32",
1497
+ "format": "f32-to-bf16",
1498
+ "nbytes": 8388608,
1499
+ "byteOffset": 3145728
1500
+ },
1501
+ {
1502
+ "name": "model.layers.10.self_attn.o_proj.q_scale",
1503
+ "shape": [
1504
+ 4096,
1505
+ 128
1506
+ ],
1507
+ "dtype": "float16",
1508
+ "format": "f32-to-bf16",
1509
+ "nbytes": 1048576,
1510
+ "byteOffset": 11534336
1511
+ },
1512
+ {
1513
+ "name": "model.layers.10.mlp.gate_up_proj.q_scale",
1514
+ "shape": [
1515
+ 22016,
1516
+ 128
1517
+ ],
1518
+ "dtype": "float16",
1519
+ "format": "f32-to-bf16",
1520
+ "nbytes": 5636096,
1521
+ "byteOffset": 12582912
1522
+ },
1523
+ {
1524
+ "name": "model.layers.10.mlp.down_proj.q_scale",
1525
+ "shape": [
1526
+ 4096,
1527
+ 344
1528
+ ],
1529
+ "dtype": "float16",
1530
+ "format": "f32-to-bf16",
1531
+ "nbytes": 2818048,
1532
+ "byteOffset": 18219008
1533
+ },
1534
+ {
1535
+ "name": "model.layers.10.input_layernorm.weight",
1536
+ "shape": [
1537
+ 4096
1538
+ ],
1539
+ "dtype": "float16",
1540
+ "format": "f32-to-bf16",
1541
+ "nbytes": 8192,
1542
+ "byteOffset": 21037056
1543
+ },
1544
+ {
1545
+ "name": "model.layers.10.post_attention_layernorm.weight",
1546
+ "shape": [
1547
+ 4096
1548
+ ],
1549
+ "dtype": "float16",
1550
+ "format": "f32-to-bf16",
1551
+ "nbytes": 8192,
1552
+ "byteOffset": 21045248
1553
+ },
1554
+ {
1555
+ "name": "model.layers.11.self_attn.qkv_proj.q_scale",
1556
+ "shape": [
1557
+ 12288,
1558
+ 128
1559
+ ],
1560
+ "dtype": "float16",
1561
+ "format": "f32-to-bf16",
1562
+ "nbytes": 3145728,
1563
+ "byteOffset": 21053440
1564
+ },
1565
+ {
1566
+ "name": "model.layers.11.self_attn.o_proj.q_weight",
1567
+ "shape": [
1568
+ 4096,
1569
+ 512
1570
+ ],
1571
+ "dtype": "uint32",
1572
+ "format": "f32-to-bf16",
1573
+ "nbytes": 8388608,
1574
+ "byteOffset": 24199168
1575
+ }
1576
+ ],
1577
+ "md5sum": "c2a383c1ddd738eef41e28f67a7333cd"
1578
+ },
1579
+ {
1580
+ "dataPath": "params_shard_41.bin",
1581
+ "format": "raw-shard",
1582
+ "nbytes": 45088768,
1583
+ "records": [
1584
+ {
1585
+ "name": "model.layers.11.mlp.gate_up_proj.q_weight",
1586
+ "shape": [
1587
+ 22016,
1588
+ 512
1589
+ ],
1590
+ "dtype": "uint32",
1591
+ "format": "f32-to-bf16",
1592
+ "nbytes": 45088768,
1593
+ "byteOffset": 0
1594
+ }
1595
+ ],
1596
+ "md5sum": "e80ac8d70f6a7f8356f787c6dfbf2264"
1597
+ },
1598
+ {
1599
+ "dataPath": "params_shard_42.bin",
1600
+ "format": "raw-shard",
1601
+ "nbytes": 65536000,
1602
+ "records": [
1603
+ {
1604
+ "name": "lm_head.q_weight",
1605
+ "shape": [
1606
+ 32000,
1607
+ 512
1608
+ ],
1609
+ "dtype": "uint32",
1610
+ "format": "f32-to-bf16",
1611
+ "nbytes": 65536000,
1612
+ "byteOffset": 0
1613
+ }
1614
+ ],
1615
+ "md5sum": "f61d0370bcdee50e894cad5a43ac525e"
1616
+ },
1617
+ {
1618
+ "dataPath": "params_shard_43.bin",
1619
+ "format": "raw-shard",
1620
+ "nbytes": 32071680,
1621
+ "records": [
1622
+ {
1623
+ "name": "model.layers.11.self_attn.o_proj.q_scale",
1624
+ "shape": [
1625
+ 4096,
1626
+ 128
1627
+ ],
1628
+ "dtype": "float16",
1629
+ "format": "f32-to-bf16",
1630
+ "nbytes": 1048576,
1631
+ "byteOffset": 0
1632
+ },
1633
+ {
1634
+ "name": "model.layers.11.mlp.gate_up_proj.q_scale",
1635
+ "shape": [
1636
+ 22016,
1637
+ 128
1638
+ ],
1639
+ "dtype": "float16",
1640
+ "format": "f32-to-bf16",
1641
+ "nbytes": 5636096,
1642
+ "byteOffset": 1048576
1643
+ },
1644
+ {
1645
+ "name": "model.layers.11.mlp.down_proj.q_weight",
1646
+ "shape": [
1647
+ 4096,
1648
+ 1376
1649
+ ],
1650
+ "dtype": "uint32",
1651
+ "format": "f32-to-bf16",
1652
+ "nbytes": 22544384,
1653
+ "byteOffset": 6684672
1654
+ },
1655
+ {
1656
+ "name": "model.layers.11.mlp.down_proj.q_scale",
1657
+ "shape": [
1658
+ 4096,
1659
+ 344
1660
+ ],
1661
+ "dtype": "float16",
1662
+ "format": "f32-to-bf16",
1663
+ "nbytes": 2818048,
1664
+ "byteOffset": 29229056
1665
+ },
1666
+ {
1667
+ "name": "model.layers.11.input_layernorm.weight",
1668
+ "shape": [
1669
+ 4096
1670
+ ],
1671
+ "dtype": "float16",
1672
+ "format": "f32-to-bf16",
1673
+ "nbytes": 8192,
1674
+ "byteOffset": 32047104
1675
+ },
1676
+ {
1677
+ "name": "model.layers.11.post_attention_layernorm.weight",
1678
+ "shape": [
1679
+ 4096
1680
+ ],
1681
+ "dtype": "float16",
1682
+ "format": "f32-to-bf16",
1683
+ "nbytes": 8192,
1684
+ "byteOffset": 32055296
1685
+ },
1686
+ {
1687
+ "name": "model.norm.weight",
1688
+ "shape": [
1689
+ 4096
1690
+ ],
1691
+ "dtype": "float16",
1692
+ "format": "f32-to-bf16",
1693
+ "nbytes": 8192,
1694
+ "byteOffset": 32063488
1695
+ }
1696
+ ],
1697
+ "md5sum": "d894019b076493b52db73e02e695b3ff"
1698
+ },
1699
+ {
1700
+ "dataPath": "params_shard_44.bin",
1701
+ "format": "raw-shard",
1702
+ "nbytes": 8192000,
1703
+ "records": [
1704
+ {
1705
+ "name": "lm_head.q_scale",
1706
+ "shape": [
1707
+ 32000,
1708
+ 128
1709
+ ],
1710
+ "dtype": "float16",
1711
+ "format": "f32-to-bf16",
1712
+ "nbytes": 8192000,
1713
+ "byteOffset": 0
1714
+ }
1715
+ ],
1716
+ "md5sum": "473aa64cebc480ac355f7920eb93472a"
1717
+ }
1718
+ ]
1719
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba2015d1236070535adc75517a2e64b5d0ce66ecc0c66579c6b3ab68ad83dd22
3
+ size 65536000
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa2dcc693d2c8afacb3ff3e877aeed1dd93f7dea54633ee54486f3d974090750
3
+ size 33357824
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c378cd13f074be66f162d1352c3427d2b64cafd54e88993d5e2c536a067b84f7
3
+ size 22544384
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b08f8f895a5cf9e2dfc28bc95479e36b3f33a7a5c6859ed6d329c7f9c3381f4
3
+ size 25165824
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ac0c42ee1cbab98ef6ea5e342460a9c452ba547b3940970a1d279616537e517
3
+ size 32587776
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a24ef93fa4ec1f3d652847c184448e8a639fefa0fcfd39910e983fb0da6d2f45
3
+ size 45088768
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9a87305a7cca5292a029052f11515b054a1d292b5a29c70bf98337122dfee78
3
+ size 25165824
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:760dee0ab46d6faeb8998526c89196e01660e89a9cac6a1eddb53910d6bbd808
3
+ size 32063488
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a943f0c65095cb5f45ff379ccd9d4357123697fb45f9b21d6b4e37003ae64f85
3
+ size 45088768
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1f788f1102bace1e54213072745ef910fea533809f45d6a2de0f9877a02dfde
3
+ size 22544384
params_shard_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0241603aa9527210704cef31c274b9cf626b711d2e9ca198a3ca9ab30ddb5af4
3
+ size 25165824
params_shard_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa1a22a01e705e526e2fac91ab8a025bc75ec2191f1c360df6c7bd06386efa1a
3
+ size 32587776
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a8f2e557015bbd9222c427e9d474f177c192b8d386e162eec119415dc9d3999
3
+ size 45088768
params_shard_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fb81c74ab9e6380548eaf87b6a588981275c5e943135a752b68cef3aa4c3d7a
3
+ size 45088768
params_shard_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ad80e426af77e371b00f31ae61fe2131275219d6a805e189efe3924ead1e89a
3
+ size 25165824
params_shard_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c88a2749881f73435ebd67c514e54b6f90e67aaf0a4aeaa390a8d7b39ac23a7
3
+ size 32063488
params_shard_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:019ddf2a78b70d2ec3640eb128dc7983bb6eb2fbaf35220a153e55d4bf3f6fda
3
+ size 45088768
params_shard_24.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c84232b1c63e181d06a69677add3b8e11888f8e52e2d2f7f1a6c60bb9aeff7e6
3
+ size 22544384
params_shard_25.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d754344c88fbf75a8644930d8407370d1be85b2081027eb05a2f9f3162c4dd2
3
+ size 25165824
params_shard_26.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3701703eea518f560fc4d19207912c0a8afc1df36adcf150fc28aff044199e27
3
+ size 32587776
params_shard_27.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:288becdf558046aec22d675b6e236ffbd4c16b7b95d34e1bc62a97839944d2b9
3
+ size 45088768
params_shard_28.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c28b761e3af8b1d619eeffc93ef1af4b9739081c117556e025d1b3271805db5b
3
+ size 25165824
params_shard_29.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46ba0be8cbd96e678f45152c5f4786a7064dbeb144b8d2cea3f23e40ead57c6c
3
+ size 32063488
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfa0d6984de454ae97e69b326dc47b69ed1da3cd58546689bd87940ef44d4909
3
+ size 22544384
params_shard_30.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb920919455722619f912a51964acf7c664da0b33d6af4210752c740550fd611
3
+ size 45088768
params_shard_31.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ee84d920d782f3a615524cc4826ae0b6a7a98ecdb45e0c05354b8bd2489960e
3
+ size 22544384
params_shard_32.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1dfa53ec682b116ec803111775d2a7be35c98d4945ceb8915abdae5a48f96e7
3
+ size 25165824
params_shard_33.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a33cea5fe2d5d47271d084423519542324bfb2d76dcb656b40e0b70678121ce
3
+ size 32587776
params_shard_34.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d055ca4443fae984dbb808147e3d8deed77c98dd719bc66241c321631be81ded
3
+ size 45088768
params_shard_35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0d1e5615fced441749c7a112e437a2f29d043b6ed689c9f407ca918f6cded14
3
+ size 25165824
params_shard_36.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aec7b45ed7e8d5771aefccd64f304ba0b5397d75f6af3d52b914ed8e383a6c25
3
+ size 32063488
params_shard_37.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dd7f69f45c5f0cddbc8f2fbadd89ef28e318caac8e9d5ec193a6861d030ab72
3
+ size 45088768
params_shard_38.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7776a04cf1817adf9e15aa92a57d61b67ecba68a10f98a878edff9c5eafd77d8
3
+ size 22544384
params_shard_39.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa02671ea3bbc85b76ca93976ac711f1f7f0f7fe0742ddaacd0e4d95245704b6
3
+ size 25165824
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c11f14b0826d95e6bab8870c9f77c2e2b857c101a23f1b0a998c8dcc44a7ef42
3
+ size 25165824
params_shard_40.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:201188df6f9a6f0f8bc3b56133cdfbeb2d7297e75c2958c9ab472609776d14ca
3
+ size 32587776
params_shard_41.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1b4cc8deece1618522bae395804e08a0e56905445b850fd3dc11263a278d472
3
+ size 45088768
params_shard_42.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8676c2f5766b9dd2253001ac51e0d50336a5a8daafd98cf2df526339f7329c8a
3
+ size 65536000
params_shard_43.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5ce0265ed7b1356efcfbb90ab7633f58b003a84db00d03d2282680081fdea79
3
+ size 32071680
params_shard_44.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54d30a6b341cea15b268e50d14e0adb1f86c094610bf70a76d4f8b8a029e8637
3
+ size 8192000
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:433f5a16c80b10d6e6d6fbe03b20178363078c7b1e66774b1a600ba3eab18547
3
+ size 32587776
params_shard_6.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f37abb8ebb33cff557356cfee4913367464055dc1669c707c492921f4a9368bb
3
+ size 45088768
params_shard_7.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f0f4a59275bc23b0969c140b7f1c6f306bdd6b6830ccface4ea36b570d2979b
3
+ size 25165824
params_shard_8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ef89d64b6e8bf7e7930efd28876ee6e22dc87a5972e8e6b550ecab25694958
3
+ size 32063488
params_shard_9.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f32d611a3276847660038967ef1e7a82ae996d4286f857ab44f19689351c806f
3
+ size 45088768
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<s>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": false,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "</s>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "legacy": true,
22
+ "model_max_length": 2048,
23
+ "pad_token": null,
24
+ "padding_side": "right",
25
+ "sp_model_kwargs": {},
26
+ "tokenizer_class": "LlamaTokenizer",
27
+ "unk_token": {
28
+ "__type": "AddedToken",
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false
34
+ }
35
+ }