Upload folder using huggingface_hub
Browse files- config.json +6 -24
- mergekit_moe_config.yml +5 -1
- model-00001-of-00002.safetensors +2 -2
- model-00002-of-00002.safetensors +2 -2
- model.safetensors.index.json +1 -1
config.json
CHANGED
@@ -1,50 +1,32 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"activation_function": "gelu_new",
|
4 |
"architectures": [
|
5 |
-
"
|
6 |
],
|
7 |
-
"attention_dropout": 0.0,
|
8 |
"attn_pdrop": 0.0,
|
9 |
"auto_map": {
|
10 |
-
"AutoConfig": "
|
11 |
-
"AutoModelForCausalLM": "
|
12 |
},
|
13 |
-
"bos_token_id": null,
|
14 |
"embd_pdrop": 0.0,
|
15 |
-
"eos_token_id": null,
|
16 |
"flash_attn": false,
|
17 |
"flash_rotary": false,
|
18 |
"fused_dense": false,
|
19 |
-
"hidden_act": "silu",
|
20 |
-
"hidden_size": 4096,
|
21 |
"img_processor": null,
|
22 |
"initializer_range": 0.02,
|
23 |
-
"intermediate_size": 14336,
|
24 |
"layer_norm_epsilon": 1e-05,
|
25 |
-
"
|
26 |
-
"model_type": "mixtral",
|
27 |
"n_embd": 2560,
|
28 |
"n_head": 32,
|
29 |
"n_head_kv": null,
|
30 |
"n_inner": null,
|
31 |
"n_layer": 32,
|
32 |
"n_positions": 2048,
|
33 |
-
"num_attention_heads": 32,
|
34 |
-
"num_experts_per_tok": 2,
|
35 |
-
"num_hidden_layers": 32,
|
36 |
-
"num_key_value_heads": 8,
|
37 |
-
"num_local_experts": 2,
|
38 |
-
"output_router_logits": false,
|
39 |
"resid_pdrop": 0.1,
|
40 |
-
"rms_norm_eps": 1e-06,
|
41 |
-
"rope_theta": 10000.0,
|
42 |
"rotary_dim": 32,
|
43 |
-
"router_aux_loss_coef": 0.001,
|
44 |
-
"sliding_window": null,
|
45 |
"tie_word_embeddings": false,
|
46 |
"torch_dtype": "float16",
|
47 |
-
"transformers_version": "4.
|
48 |
-
"use_cache": false,
|
49 |
"vocab_size": 51200
|
50 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "microsoft/phi-2",
|
3 |
"activation_function": "gelu_new",
|
4 |
"architectures": [
|
5 |
+
"PhiForCausalLM"
|
6 |
],
|
|
|
7 |
"attn_pdrop": 0.0,
|
8 |
"auto_map": {
|
9 |
+
"AutoConfig": "configuration_phi.PhiConfig",
|
10 |
+
"AutoModelForCausalLM": "modeling_phi.PhiForCausalLM"
|
11 |
},
|
|
|
12 |
"embd_pdrop": 0.0,
|
|
|
13 |
"flash_attn": false,
|
14 |
"flash_rotary": false,
|
15 |
"fused_dense": false,
|
|
|
|
|
16 |
"img_processor": null,
|
17 |
"initializer_range": 0.02,
|
|
|
18 |
"layer_norm_epsilon": 1e-05,
|
19 |
+
"model_type": "phi-msft",
|
|
|
20 |
"n_embd": 2560,
|
21 |
"n_head": 32,
|
22 |
"n_head_kv": null,
|
23 |
"n_inner": null,
|
24 |
"n_layer": 32,
|
25 |
"n_positions": 2048,
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
"resid_pdrop": 0.1,
|
|
|
|
|
27 |
"rotary_dim": 32,
|
|
|
|
|
28 |
"tie_word_embeddings": false,
|
29 |
"torch_dtype": "float16",
|
30 |
+
"transformers_version": "4.35.2",
|
|
|
31 |
"vocab_size": 51200
|
32 |
}
|
mergekit_moe_config.yml
CHANGED
@@ -4,4 +4,8 @@ experts:
|
|
4 |
- source_model: cognitivecomputations/dolphin-2_6-phi-2
|
5 |
positive_prompts: [""]
|
6 |
- source_model: lxuechen/phi-2-dpo
|
7 |
-
positive_prompts: [""]
|
|
|
|
|
|
|
|
|
|
4 |
- source_model: cognitivecomputations/dolphin-2_6-phi-2
|
5 |
positive_prompts: [""]
|
6 |
- source_model: lxuechen/phi-2-dpo
|
7 |
+
positive_prompts: [""]
|
8 |
+
- source_model: Yhyu13/phi-2-sft-dpo-gpt4_en-ep1
|
9 |
+
positive_prompts: [""]
|
10 |
+
- source_model: mrm8488/phi-2-coder
|
11 |
+
positive_prompts: [""]
|
model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a6234dfb7cf0a8d9b6680e32fdadd35d64be11d54bf110d69d8e06a12c3012f
|
3 |
+
size 9965910088
|
model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb9ce42cacc4264bed468062b76ca893aca9e6c7a9310e6fb67dddcb443855e7
|
3 |
+
size 5662981640
|
model.safetensors.index.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"metadata": {"mergekit_version": "0.0.3.2"}, "weight_map": {"transformer.embd.wte.weight": "model-00001-of-00001.safetensors", "lm_head.linear.bias": "model-00001-of-00001.safetensors", "lm_head.linear.weight": "model-00001-of-00001.safetensors", "lm_head.ln.bias": "model-00001-of-00001.safetensors", "lm_head.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.0.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.1.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.2.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.3.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.4.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.5.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.6.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.7.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.8.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.9.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.10.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.11.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.12.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.13.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.14.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.15.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.16.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.17.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.18.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.19.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.20.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.21.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.22.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.23.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.24.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.25.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.26.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.27.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.28.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.29.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.30.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.31.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.0.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.1.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.2.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.3.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.4.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.5.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.6.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.7.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.8.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.9.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.10.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.11.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.12.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.13.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.14.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.15.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.16.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.17.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.18.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.19.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.20.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.21.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.22.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.23.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.24.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.25.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.26.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.27.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.28.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.29.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.30.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.31.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.0.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.1.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.2.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.3.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.4.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.5.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.6.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.7.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.8.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.9.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.10.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.11.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.12.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.13.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.14.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.15.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.16.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.17.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.18.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.19.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.20.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.21.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.22.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.23.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.24.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.25.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.26.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.27.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.28.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.29.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.30.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.31.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.0.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.1.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.3.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.4.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.6.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.7.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.9.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.10.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.12.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.13.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.15.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.16.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.17.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.18.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.19.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.20.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.21.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.22.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.23.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.24.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.25.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.26.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.27.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.28.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.29.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.30.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.31.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.0.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.1.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.2.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.3.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.4.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.5.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.6.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.7.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.8.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.9.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.10.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.11.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.12.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.13.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.14.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.15.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.16.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.17.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.18.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.19.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.20.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.21.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.22.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.23.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.24.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.25.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.26.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.27.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.28.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.29.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.30.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.31.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.0.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.1.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.3.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.4.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.6.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.7.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.9.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.10.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.12.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.13.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.15.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.16.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.17.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.18.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.19.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.20.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.21.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.22.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.23.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.24.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.25.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.26.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.27.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.28.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.29.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.30.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.31.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.0.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.0.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.1.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.1.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.2.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.2.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.3.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.3.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.4.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.4.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.5.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.5.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.6.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.6.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.7.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.7.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.8.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.8.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.9.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.9.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.10.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.10.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.11.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.11.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.12.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.12.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.13.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.13.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.14.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.14.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.15.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.15.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.16.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.16.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.17.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.17.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.18.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.18.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.19.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.19.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.20.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.20.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.21.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.21.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.22.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.22.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.23.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.23.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.24.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.24.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.25.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.25.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.26.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.26.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.27.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.27.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.28.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.28.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.29.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.29.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.30.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.30.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.31.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.31.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.1.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.1.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.2.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.2.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.3.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.3.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.4.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.4.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.5.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.5.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.6.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.6.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.7.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.7.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.8.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.8.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.9.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.9.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.10.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.10.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.11.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.11.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.12.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.12.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.13.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.13.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.14.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.14.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.15.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.15.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.16.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.16.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.17.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.17.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.18.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.18.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.19.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.19.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.20.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.20.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.21.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.21.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.22.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.22.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.23.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.23.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.24.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.24.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.25.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.25.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.26.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.26.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.27.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.27.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.28.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.28.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.29.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.29.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.30.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.30.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.31.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.31.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.0.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.0.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.1.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.1.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.2.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.2.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.3.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.3.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.4.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.4.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.5.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.5.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.6.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.6.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.7.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.7.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.8.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.8.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.9.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.9.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.10.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.10.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.11.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.11.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.12.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.12.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.13.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.13.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.14.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.14.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.15.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.15.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.16.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.16.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.17.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.17.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.18.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.18.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.19.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.19.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.20.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.20.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.21.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.21.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.22.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.22.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.23.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.23.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.24.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.24.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.25.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.25.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.26.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.26.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.27.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.27.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.28.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.28.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.29.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.29.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.30.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.30.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.31.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.31.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.1.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.1.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.2.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.2.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.3.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.3.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.4.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.4.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.5.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.5.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.6.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.6.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.7.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.7.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.8.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.8.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.9.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.9.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.10.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.10.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.11.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.11.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.12.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.12.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.13.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.13.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.14.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.14.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.15.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.15.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.16.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.16.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.17.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.17.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.18.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.18.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.19.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.19.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.20.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.20.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.21.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.21.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.22.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.22.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.23.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.23.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.24.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.24.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.25.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.25.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.26.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.26.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.27.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.27.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.28.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.28.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.29.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.29.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.30.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.30.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.31.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.31.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.0.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.1.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.2.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.3.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.4.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.5.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.6.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.7.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.8.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.9.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.10.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.11.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.12.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.13.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.14.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.15.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.16.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.17.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.18.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.19.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.20.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.21.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.22.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.23.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.24.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.25.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.26.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.27.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.28.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.29.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.30.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.31.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors"}}
|
|
|
1 |
+
{"metadata": {"mergekit_version": "0.0.3.2"}, "weight_map": {"transformer.embd.wte.weight": "model-00001-of-00002.safetensors", "lm_head.linear.bias": "model-00001-of-00002.safetensors", "lm_head.linear.weight": "model-00001-of-00002.safetensors", "lm_head.ln.bias": "model-00001-of-00002.safetensors", "lm_head.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.0.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.1.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.2.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.3.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.4.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.5.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.6.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.7.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.8.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.9.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.10.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.11.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.12.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.13.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.14.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.15.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.16.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.17.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.18.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.19.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.20.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.21.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.22.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.23.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.24.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.25.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.26.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.27.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.28.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.29.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.30.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.31.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.0.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.1.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.2.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.3.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.4.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.5.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.6.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.7.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.8.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.9.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.10.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.11.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.12.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.13.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.14.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.15.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.16.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.17.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.18.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.19.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.20.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.21.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.22.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.23.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.24.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.25.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.26.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.27.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.28.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.29.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.30.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.31.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.0.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.1.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.2.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.3.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.4.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.5.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.6.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.7.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.8.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.9.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.10.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.11.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.12.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.13.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.14.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.15.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.16.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.17.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.18.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.19.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.20.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.21.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.22.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.23.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.24.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.25.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.26.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.27.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.28.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.29.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.30.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.31.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.0.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.1.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.2.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.3.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.4.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.5.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.6.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.7.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.8.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.9.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.10.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.11.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.12.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.13.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.14.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.15.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.16.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.17.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.18.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.19.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.20.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.21.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.22.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.23.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.24.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.25.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.26.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.27.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.28.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.29.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.30.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.31.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.0.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.1.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.2.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.3.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.4.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.5.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.6.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.7.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.8.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.9.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.10.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.11.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.12.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.13.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.14.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.15.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.16.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.17.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.18.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.19.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.20.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.21.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.22.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.23.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.24.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.25.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.26.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.27.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.28.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.29.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.30.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.31.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.0.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.1.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.2.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.3.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.4.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.5.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.6.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.7.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.8.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.9.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.10.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.11.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.12.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.13.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.14.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.15.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.16.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.17.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.18.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.19.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.20.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.21.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.22.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.23.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.24.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.25.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.26.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.27.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.28.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.29.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.30.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.31.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.0.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.1.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.2.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.3.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.0.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.1.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.2.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.3.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.0.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.1.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.2.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.3.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.0.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.1.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.2.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.3.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.0.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.1.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.2.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.3.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.5.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.5.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.5.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.6.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.6.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.6.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.6.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.7.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.7.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.7.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.7.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.8.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.8.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.8.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.8.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.9.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.9.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.9.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.9.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.10.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.10.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.10.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.10.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.11.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.11.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.11.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.11.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.12.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.12.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.12.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.12.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.13.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.13.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.13.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.13.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.14.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.14.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.14.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.14.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.15.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.15.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.15.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.15.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.16.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.16.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.16.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.16.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.17.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.17.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.17.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.17.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.18.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.18.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.18.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.18.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.19.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.19.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.19.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.19.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.20.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.20.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.20.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.20.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.21.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.21.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.21.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.21.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.22.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.22.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.22.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.22.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.23.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.23.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.23.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.23.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.24.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.24.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.24.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.24.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.25.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.25.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.25.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.25.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.26.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.26.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.26.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.26.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.27.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.27.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.27.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.27.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.28.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.28.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.28.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.28.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.29.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.29.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.29.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.29.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.30.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.30.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.30.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.30.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.31.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.31.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.31.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.31.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.0.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.1.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.2.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.3.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.4.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.5.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.6.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.7.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.8.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.9.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.10.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.11.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.12.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.13.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.14.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.15.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.16.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.17.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.18.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.19.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.20.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.21.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.22.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.23.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.24.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.25.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.26.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.27.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.28.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.29.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.30.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.31.moe.gate.weight": "model-00002-of-00002.safetensors"}}
|