mlabonne commited on
Commit
bc8a3bc
1 Parent(s): 36c1f1f

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -1,50 +1,32 @@
1
  {
2
- "_name_or_path": "cognitivecomputations/dolphin-2_6-phi-2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
- "MixtralForCausalLM"
6
  ],
7
- "attention_dropout": 0.0,
8
  "attn_pdrop": 0.0,
9
  "auto_map": {
10
- "AutoConfig": "cognitivecomputations/dolphin-2_6-phi-2--configuration_phi.PhiConfig",
11
- "AutoModelForCausalLM": "cognitivecomputations/dolphin-2_6-phi-2--modeling_phi.PhiForCausalLM"
12
  },
13
- "bos_token_id": null,
14
  "embd_pdrop": 0.0,
15
- "eos_token_id": null,
16
  "flash_attn": false,
17
  "flash_rotary": false,
18
  "fused_dense": false,
19
- "hidden_act": "silu",
20
- "hidden_size": 4096,
21
  "img_processor": null,
22
  "initializer_range": 0.02,
23
- "intermediate_size": 14336,
24
  "layer_norm_epsilon": 1e-05,
25
- "max_position_embeddings": 2048,
26
- "model_type": "mixtral",
27
  "n_embd": 2560,
28
  "n_head": 32,
29
  "n_head_kv": null,
30
  "n_inner": null,
31
  "n_layer": 32,
32
  "n_positions": 2048,
33
- "num_attention_heads": 32,
34
- "num_experts_per_tok": 2,
35
- "num_hidden_layers": 32,
36
- "num_key_value_heads": 8,
37
- "num_local_experts": 2,
38
- "output_router_logits": false,
39
  "resid_pdrop": 0.1,
40
- "rms_norm_eps": 1e-06,
41
- "rope_theta": 10000.0,
42
  "rotary_dim": 32,
43
- "router_aux_loss_coef": 0.001,
44
- "sliding_window": null,
45
  "tie_word_embeddings": false,
46
  "torch_dtype": "float16",
47
- "transformers_version": "4.36.2",
48
- "use_cache": false,
49
  "vocab_size": 51200
50
  }
 
1
  {
2
+ "_name_or_path": "microsoft/phi-2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
+ "PhiForCausalLM"
6
  ],
 
7
  "attn_pdrop": 0.0,
8
  "auto_map": {
9
+ "AutoConfig": "configuration_phi.PhiConfig",
10
+ "AutoModelForCausalLM": "modeling_phi.PhiForCausalLM"
11
  },
 
12
  "embd_pdrop": 0.0,
 
13
  "flash_attn": false,
14
  "flash_rotary": false,
15
  "fused_dense": false,
 
 
16
  "img_processor": null,
17
  "initializer_range": 0.02,
 
18
  "layer_norm_epsilon": 1e-05,
19
+ "model_type": "phi-msft",
 
20
  "n_embd": 2560,
21
  "n_head": 32,
22
  "n_head_kv": null,
23
  "n_inner": null,
24
  "n_layer": 32,
25
  "n_positions": 2048,
 
 
 
 
 
 
26
  "resid_pdrop": 0.1,
 
 
27
  "rotary_dim": 32,
 
 
28
  "tie_word_embeddings": false,
29
  "torch_dtype": "float16",
30
+ "transformers_version": "4.35.2",
 
31
  "vocab_size": 51200
32
  }
mergekit_moe_config.yml CHANGED
@@ -4,4 +4,8 @@ experts:
4
  - source_model: cognitivecomputations/dolphin-2_6-phi-2
5
  positive_prompts: [""]
6
  - source_model: lxuechen/phi-2-dpo
7
- positive_prompts: [""]
 
 
 
 
 
4
  - source_model: cognitivecomputations/dolphin-2_6-phi-2
5
  positive_prompts: [""]
6
  - source_model: lxuechen/phi-2-dpo
7
+ positive_prompts: [""]
8
+ - source_model: Yhyu13/phi-2-sft-dpo-gpt4_en-ep1
9
+ positive_prompts: [""]
10
+ - source_model: mrm8488/phi-2-coder
11
+ positive_prompts: [""]
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89f2a8822555a57254b2dd9aa5f41b6d4eb4f5cd421e93423cccf0153ebcf109
3
- size 9965916552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a6234dfb7cf0a8d9b6680e32fdadd35d64be11d54bf110d69d8e06a12c3012f
3
+ size 9965910088
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f0022776c348793df7b4ec4fd72450496967ef5841f900a60f9a32e133b017e
3
- size 5662983792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb9ce42cacc4264bed468062b76ca893aca9e6c7a9310e6fb67dddcb443855e7
3
+ size 5662981640
model.safetensors.index.json CHANGED
@@ -1 +1 @@
1
- {"metadata": {"mergekit_version": "0.0.3.2"}, "weight_map": {"transformer.embd.wte.weight": "model-00001-of-00001.safetensors", "lm_head.linear.bias": "model-00001-of-00001.safetensors", "lm_head.linear.weight": "model-00001-of-00001.safetensors", "lm_head.ln.bias": "model-00001-of-00001.safetensors", "lm_head.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.0.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.1.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.2.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.3.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.4.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.5.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.6.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.7.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.8.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.9.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.10.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.11.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.12.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.13.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.14.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.15.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.16.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.17.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.18.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.19.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.20.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.21.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.22.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.23.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.24.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.25.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.26.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.27.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.28.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.29.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.30.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.31.ln.bias": "model-00001-of-00001.safetensors", "transformer.h.0.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.1.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.2.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.3.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.4.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.5.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.6.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.7.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.8.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.9.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.10.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.11.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.12.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.13.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.14.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.15.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.16.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.17.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.18.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.19.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.20.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.21.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.22.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.23.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.24.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.25.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.26.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.27.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.28.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.29.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.30.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.31.ln.weight": "model-00001-of-00001.safetensors", "transformer.h.0.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.1.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.2.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.3.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.4.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.5.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.6.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.7.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.8.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.9.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.10.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.11.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.12.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.13.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.14.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.15.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.16.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.17.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.18.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.19.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.20.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.21.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.22.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.23.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.24.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.25.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.26.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.27.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.28.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.29.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.30.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.31.mixer.out_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.0.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.1.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.3.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.4.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.6.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.7.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.9.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.10.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.12.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.13.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.15.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.16.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.17.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.18.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.19.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.20.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.21.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.22.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.23.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.24.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.25.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.26.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.27.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.28.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.29.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.30.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.31.mixer.out_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.0.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.1.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.2.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.3.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.4.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.5.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.6.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.7.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.8.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.9.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.10.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.11.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.12.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.13.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.14.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.15.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.16.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.17.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.18.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.19.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.20.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.21.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.22.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.23.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.24.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.25.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.26.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.27.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.28.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.29.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.30.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.31.mixer.Wqkv.bias": "model-00001-of-00001.safetensors", "transformer.h.0.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.1.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.3.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.4.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.6.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.7.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.9.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.10.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.12.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.13.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.15.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.16.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.17.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.18.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.19.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.20.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.21.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.22.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.23.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.24.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.25.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.26.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.27.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.28.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.29.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.30.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.31.mixer.Wqkv.weight": "model-00001-of-00001.safetensors", "transformer.h.0.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.0.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.1.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.1.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.2.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.2.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.3.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.3.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.4.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.4.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.5.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.5.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.6.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.6.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.7.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.7.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.8.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.8.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.9.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.9.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.10.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.10.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.11.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.11.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.12.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.12.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.13.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.13.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.14.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.14.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.15.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.15.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.16.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.16.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.17.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.17.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.18.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.18.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.19.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.19.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.20.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.20.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.21.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.21.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.22.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.22.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.23.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.23.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.24.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.24.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.25.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.25.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.26.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.26.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.27.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.27.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.28.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.28.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.29.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.29.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.30.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.30.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.31.block_sparse_moe.experts.0.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.31.block_sparse_moe.experts.1.w1.bias": "model-00001-of-00001.safetensors", "transformer.h.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.1.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.1.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.2.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.2.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.3.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.3.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.4.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.4.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.5.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.5.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.6.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.6.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.7.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.7.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.8.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.8.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.9.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.9.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.10.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.10.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.11.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.11.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.12.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.12.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.13.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.13.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.14.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.14.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.15.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.15.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.16.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.16.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.17.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.17.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.18.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.18.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.19.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.19.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.20.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.20.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.21.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.21.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.22.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.22.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.23.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.23.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.24.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.24.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.25.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.25.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.26.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.26.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.27.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.27.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.28.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.28.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.29.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.29.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.30.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.30.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.31.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.31.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.0.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.0.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.1.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.1.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.2.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.2.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.3.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.3.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.4.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.4.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.5.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.5.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.6.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.6.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.7.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.7.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.8.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.8.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.9.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.9.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.10.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.10.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.11.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.11.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.12.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.12.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.13.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.13.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.14.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.14.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.15.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.15.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.16.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.16.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.17.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.17.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.18.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.18.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.19.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.19.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.20.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.20.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.21.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.21.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.22.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.22.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.23.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.23.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.24.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.24.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.25.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.25.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.26.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.26.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.27.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.27.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.28.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.28.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.29.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.29.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.30.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.30.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.31.block_sparse_moe.experts.0.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.31.block_sparse_moe.experts.1.w2.bias": "model-00001-of-00001.safetensors", "transformer.h.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.1.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.1.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.2.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.2.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.3.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.3.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.4.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.4.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.5.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.5.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.6.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.6.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.7.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.7.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.8.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.8.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.9.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.9.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.10.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.10.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.11.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.11.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.12.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.12.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.13.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.13.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.14.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.14.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.15.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.15.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.16.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.16.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.17.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.17.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.18.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.18.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.19.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.19.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.20.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.20.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.21.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.21.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.22.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.22.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.23.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.23.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.24.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.24.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.25.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.25.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.26.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.26.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.27.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.27.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.28.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.28.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.29.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.29.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.30.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.30.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.31.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.31.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.0.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.1.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.2.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.3.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.4.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.5.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.6.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.7.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.8.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.9.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.10.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.11.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.12.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.13.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.14.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.15.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.16.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.17.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.18.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.19.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.20.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.21.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.22.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.23.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.24.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.25.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.26.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.27.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.28.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.29.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.30.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "transformer.h.31.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors"}}
 
1
+ {"metadata": {"mergekit_version": "0.0.3.2"}, "weight_map": {"transformer.embd.wte.weight": "model-00001-of-00002.safetensors", "lm_head.linear.bias": "model-00001-of-00002.safetensors", "lm_head.linear.weight": "model-00001-of-00002.safetensors", "lm_head.ln.bias": "model-00001-of-00002.safetensors", "lm_head.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.0.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.1.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.2.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.3.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.4.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.5.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.6.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.7.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.8.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.9.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.10.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.11.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.12.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.13.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.14.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.15.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.16.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.17.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.18.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.19.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.20.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.21.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.22.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.23.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.24.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.25.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.26.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.27.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.28.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.29.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.30.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.31.ln.bias": "model-00001-of-00002.safetensors", "transformer.h.0.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.1.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.2.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.3.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.4.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.5.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.6.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.7.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.8.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.9.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.10.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.11.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.12.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.13.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.14.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.15.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.16.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.17.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.18.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.19.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.20.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.21.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.22.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.23.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.24.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.25.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.26.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.27.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.28.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.29.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.30.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.31.ln.weight": "model-00001-of-00002.safetensors", "transformer.h.0.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.1.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.2.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.3.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.4.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.5.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.6.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.7.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.8.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.9.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.10.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.11.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.12.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.13.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.14.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.15.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.16.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.17.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.18.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.19.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.20.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.21.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.22.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.23.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.24.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.25.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.26.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.27.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.28.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.29.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.30.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.31.mixer.out_proj.bias": "model-00001-of-00002.safetensors", "transformer.h.0.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.1.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.2.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.3.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.4.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.5.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.6.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.7.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.8.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.9.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.10.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.11.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.12.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.13.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.14.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.15.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.16.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.17.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.18.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.19.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.20.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.21.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.22.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.23.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.24.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.25.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.26.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.27.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.28.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.29.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.30.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.31.mixer.out_proj.weight": "model-00001-of-00002.safetensors", "transformer.h.0.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.1.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.2.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.3.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.4.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.5.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.6.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.7.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.8.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.9.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.10.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.11.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.12.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.13.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.14.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.15.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.16.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.17.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.18.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.19.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.20.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.21.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.22.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.23.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.24.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.25.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.26.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.27.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.28.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.29.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.30.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.31.mixer.Wqkv.bias": "model-00001-of-00002.safetensors", "transformer.h.0.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.1.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.2.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.3.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.4.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.5.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.6.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.7.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.8.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.9.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.10.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.11.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.12.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.13.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.14.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.15.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.16.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.17.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.18.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.19.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.20.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.21.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.22.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.23.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.24.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.25.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.26.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.27.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.28.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.29.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.30.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.31.mixer.Wqkv.weight": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.0.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.1.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.2.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.3.fc1.bias": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.0.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.1.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.2.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.3.fc1.weight": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.6.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.7.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.8.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.9.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.10.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.11.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.12.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.13.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.14.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.15.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.16.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.17.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.18.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.19.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.20.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.21.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.22.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.23.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.24.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.25.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.26.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.27.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.28.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.29.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.30.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.0.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.1.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.2.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.31.moe.mlp.3.fc2.bias": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.0.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.1.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.2.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.0.moe.mlp.3.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.0.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.1.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.2.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.1.moe.mlp.3.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.0.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.1.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.2.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.2.moe.mlp.3.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.0.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.1.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.2.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.3.moe.mlp.3.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.0.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.1.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.2.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.4.moe.mlp.3.fc2.weight": "model-00001-of-00002.safetensors", "transformer.h.5.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.5.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.5.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.5.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.6.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.6.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.6.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.6.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.7.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.7.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.7.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.7.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.8.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.8.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.8.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.8.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.9.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.9.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.9.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.9.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.10.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.10.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.10.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.10.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.11.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.11.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.11.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.11.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.12.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.12.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.12.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.12.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.13.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.13.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.13.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.13.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.14.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.14.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.14.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.14.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.15.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.15.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.15.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.15.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.16.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.16.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.16.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.16.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.17.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.17.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.17.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.17.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.18.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.18.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.18.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.18.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.19.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.19.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.19.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.19.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.20.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.20.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.20.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.20.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.21.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.21.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.21.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.21.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.22.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.22.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.22.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.22.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.23.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.23.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.23.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.23.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.24.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.24.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.24.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.24.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.25.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.25.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.25.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.25.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.26.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.26.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.26.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.26.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.27.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.27.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.27.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.27.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.28.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.28.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.28.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.28.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.29.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.29.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.29.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.29.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.30.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.30.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.30.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.30.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.31.moe.mlp.0.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.31.moe.mlp.1.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.31.moe.mlp.2.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.31.moe.mlp.3.fc2.weight": "model-00002-of-00002.safetensors", "transformer.h.0.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.1.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.2.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.3.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.4.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.5.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.6.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.7.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.8.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.9.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.10.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.11.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.12.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.13.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.14.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.15.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.16.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.17.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.18.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.19.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.20.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.21.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.22.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.23.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.24.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.25.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.26.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.27.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.28.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.29.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.30.moe.gate.weight": "model-00002-of-00002.safetensors", "transformer.h.31.moe.gate.weight": "model-00002-of-00002.safetensors"}}