Pearush commited on
Commit
559767e
1 Parent(s): 7c472cb

Upload PhiMoEForCausalLM

Browse files
config.json CHANGED
@@ -16,9 +16,9 @@
16
  "hidden_size": 4096,
17
  "initializer_range": 0.02,
18
  "input_jitter_noise": 0.01,
19
- "intermediate_size": 1920,
20
  "lm_head_bias": true,
21
- "max_position_embeddings": 131072,
22
  "model_type": "phimoe",
23
  "num_attention_heads": 32,
24
  "num_experts_per_tok": 2,
@@ -28,148 +28,11 @@
28
  "original_max_position_embeddings": 4096,
29
  "output_router_logits": false,
30
  "rms_norm_eps": 1e-05,
31
- "rope_scaling": {
32
- "long_factor": [
33
- 1.0199999809265137,
34
- 1.0299999713897705,
35
- 1.0399999618530273,
36
- 1.0499999523162842,
37
- 1.0499999523162842,
38
- 1.0499999523162842,
39
- 1.059999942779541,
40
- 1.059999942779541,
41
- 1.059999942779541,
42
- 1.059999942779541,
43
- 1.059999942779541,
44
- 1.059999942779541,
45
- 1.0999999046325684,
46
- 1.1799999475479126,
47
- 1.1799999475479126,
48
- 1.3700000047683716,
49
- 1.4899998903274536,
50
- 2.109999895095825,
51
- 2.8899998664855957,
52
- 3.9499998092651367,
53
- 4.299999713897705,
54
- 6.429999828338623,
55
- 8.09000015258789,
56
- 10.690000534057617,
57
- 12.050000190734863,
58
- 18.229999542236328,
59
- 18.84000015258789,
60
- 19.899999618530273,
61
- 21.420000076293945,
62
- 26.200000762939453,
63
- 34.28000259399414,
64
- 34.590003967285156,
65
- 38.730003356933594,
66
- 40.22000503540039,
67
- 42.54000473022461,
68
- 44.000003814697266,
69
- 47.590003967285156,
70
- 54.750003814697266,
71
- 56.19000244140625,
72
- 57.44000244140625,
73
- 57.4900016784668,
74
- 61.20000076293945,
75
- 61.540000915527344,
76
- 61.75,
77
- 61.779998779296875,
78
- 62.06999969482422,
79
- 63.11000061035156,
80
- 63.43000030517578,
81
- 63.560001373291016,
82
- 63.71000289916992,
83
- 63.92000198364258,
84
- 63.94000244140625,
85
- 63.94000244140625,
86
- 63.96000289916992,
87
- 63.980003356933594,
88
- 64.0300064086914,
89
- 64.0300064086914,
90
- 64.0300064086914,
91
- 64.04000854492188,
92
- 64.10000610351562,
93
- 64.19000244140625,
94
- 64.20999908447266,
95
- 64.75,
96
- 64.95999908447266
97
- ],
98
- "long_mscale": 1.243163121016122,
99
- "original_max_position_embeddings": 4096,
100
- "short_factor": [
101
- 1.0,
102
- 1.0399999618530273,
103
- 1.0399999618530273,
104
- 1.0399999618530273,
105
- 1.0499999523162842,
106
- 1.0499999523162842,
107
- 1.0499999523162842,
108
- 1.0499999523162842,
109
- 1.0499999523162842,
110
- 1.0499999523162842,
111
- 1.0499999523162842,
112
- 1.0499999523162842,
113
- 1.0499999523162842,
114
- 1.0499999523162842,
115
- 1.059999942779541,
116
- 1.059999942779541,
117
- 1.0699999332427979,
118
- 1.0699999332427979,
119
- 1.0699999332427979,
120
- 1.0699999332427979,
121
- 1.1399999856948853,
122
- 1.159999966621399,
123
- 1.159999966621399,
124
- 1.159999966621399,
125
- 1.159999966621399,
126
- 1.1799999475479126,
127
- 1.1999999284744263,
128
- 1.3199999332427979,
129
- 1.3399999141693115,
130
- 1.3499999046325684,
131
- 1.3999998569488525,
132
- 1.4799998998641968,
133
- 1.4999998807907104,
134
- 1.589999794960022,
135
- 1.6499998569488525,
136
- 1.71999990940094,
137
- 1.8999998569488525,
138
- 1.9099998474121094,
139
- 1.9099998474121094,
140
- 1.9899998903274536,
141
- 1.9999998807907104,
142
- 1.9999998807907104,
143
- 2.009999990463257,
144
- 2.009999990463257,
145
- 2.009999990463257,
146
- 2.009999990463257,
147
- 2.009999990463257,
148
- 2.009999990463257,
149
- 2.009999990463257,
150
- 2.009999990463257,
151
- 2.009999990463257,
152
- 2.009999990463257,
153
- 2.009999990463257,
154
- 2.009999990463257,
155
- 2.009999990463257,
156
- 2.009999990463257,
157
- 2.009999990463257,
158
- 2.009999990463257,
159
- 2.009999990463257,
160
- 2.0999999046325684,
161
- 2.319999933242798,
162
- 2.419999837875366,
163
- 2.5899999141693115,
164
- 2.7899999618530273
165
- ],
166
- "short_mscale": 1.243163121016122,
167
- "type": "longrope"
168
- },
169
  "rope_theta": 10000.0,
170
  "router_aux_loss_coef": 0.0,
171
  "router_jitter_noise": 0.01,
172
- "sliding_window": 131072,
173
  "tie_word_embeddings": false,
174
  "torch_dtype": "bfloat16",
175
  "transformers_version": "4.41.2",
 
16
  "hidden_size": 4096,
17
  "initializer_range": 0.02,
18
  "input_jitter_noise": 0.01,
19
+ "intermediate_size": 2240,
20
  "lm_head_bias": true,
21
+ "max_position_embeddings": 4096,
22
  "model_type": "phimoe",
23
  "num_attention_heads": 32,
24
  "num_experts_per_tok": 2,
 
28
  "original_max_position_embeddings": 4096,
29
  "output_router_logits": false,
30
  "rms_norm_eps": 1e-05,
31
+ "rope_scaling": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  "rope_theta": 10000.0,
33
  "router_aux_loss_coef": 0.0,
34
  "router_jitter_noise": 0.01,
35
+ "sliding_window": 4096,
36
  "tie_word_embeddings": false,
37
  "torch_dtype": "bfloat16",
38
  "transformers_version": "4.41.2",
model-00001-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19a5e82359d09fb36682c2baa8fd46e49187f6a950be5f71571e68aa1313b23a
3
+ size 4995293984
model-00002-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd71dfcf44f57af8eb1b27603eac3222e7d27a9c07698620f7570ba94dc1096d
3
+ size 4966538712
model-00003-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02c665177d645bb00746a5927c7e2fcdb6fe0294fcf77d520e939cc6937679ee
3
+ size 4986555656
model-00004-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:015356e9dfda7305917b83c1c5af356939068c45654ab3b1ecc6cefcb1be13e1
3
+ size 4989562032
model-00005-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77c81b9a69e3fb3ffffa462f29c5b86cea854dd47e16f7af54c72ad076ab2f8e
3
+ size 4989562032
model-00006-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a444a00c6e27d043c5929c01fb171eecb430e341a0d226b3975bd74e52d28b1
3
+ size 4989562040
model-00007-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bb6372e707900abcf3df8d12413826b31b2be49f2f1ee7c580f3a2dd0c257f3
3
+ size 1484567336
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff