mlabonne commited on
Commit
a99a26a
1 Parent(s): 1418d13

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +47 -0
  2. config.json +39 -0
  3. mergekit_config.yml +18 -0
  4. model-00001-of-00322.safetensors +3 -0
  5. model-00002-of-00322.safetensors +3 -0
  6. model-00003-of-00322.safetensors +3 -0
  7. model-00004-of-00322.safetensors +3 -0
  8. model-00005-of-00322.safetensors +3 -0
  9. model-00006-of-00322.safetensors +3 -0
  10. model-00007-of-00322.safetensors +3 -0
  11. model-00008-of-00322.safetensors +3 -0
  12. model-00009-of-00322.safetensors +3 -0
  13. model-00010-of-00322.safetensors +3 -0
  14. model-00011-of-00322.safetensors +3 -0
  15. model-00012-of-00322.safetensors +3 -0
  16. model-00013-of-00322.safetensors +3 -0
  17. model-00014-of-00322.safetensors +3 -0
  18. model-00015-of-00322.safetensors +3 -0
  19. model-00016-of-00322.safetensors +3 -0
  20. model-00017-of-00322.safetensors +3 -0
  21. model-00018-of-00322.safetensors +3 -0
  22. model-00019-of-00322.safetensors +3 -0
  23. model-00020-of-00322.safetensors +3 -0
  24. model-00021-of-00322.safetensors +3 -0
  25. model-00022-of-00322.safetensors +3 -0
  26. model-00023-of-00322.safetensors +3 -0
  27. model-00024-of-00322.safetensors +3 -0
  28. model-00025-of-00322.safetensors +3 -0
  29. model-00026-of-00322.safetensors +3 -0
  30. model-00027-of-00322.safetensors +3 -0
  31. model-00028-of-00322.safetensors +3 -0
  32. model-00029-of-00322.safetensors +3 -0
  33. model-00030-of-00322.safetensors +3 -0
  34. model-00031-of-00322.safetensors +3 -0
  35. model-00032-of-00322.safetensors +3 -0
  36. model-00033-of-00322.safetensors +3 -0
  37. model-00034-of-00322.safetensors +3 -0
  38. model-00035-of-00322.safetensors +3 -0
  39. model-00036-of-00322.safetensors +3 -0
  40. model-00037-of-00322.safetensors +3 -0
  41. model-00038-of-00322.safetensors +3 -0
  42. model-00039-of-00322.safetensors +3 -0
  43. model-00040-of-00322.safetensors +3 -0
  44. model-00041-of-00322.safetensors +3 -0
  45. model-00042-of-00322.safetensors +3 -0
  46. model-00043-of-00322.safetensors +3 -0
  47. model-00044-of-00322.safetensors +3 -0
  48. model-00045-of-00322.safetensors +3 -0
  49. model-00046-of-00322.safetensors +3 -0
  50. model-00047-of-00322.safetensors +3 -0
README.md ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model:
3
+ - meta-llama/Meta-Llama-3.1-405B-Instruct
4
+ library_name: transformers
5
+ tags:
6
+ - mergekit
7
+ - merge
8
+
9
+ ---
10
+ # merge
11
+
12
+ This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).
13
+
14
+ ## Merge Details
15
+ ### Merge Method
16
+
17
+ This model was merged using the passthrough merge method.
18
+
19
+ ### Models Merged
20
+
21
+ The following models were included in the merge:
22
+ * [meta-llama/Meta-Llama-3.1-405B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-405B-Instruct)
23
+
24
+ ### Configuration
25
+
26
+ The following YAML configuration was used to produce this model:
27
+
28
+ ```yaml
29
+ slices:
30
+ - sources:
31
+ - layer_range: [0, 42]
32
+ model: meta-llama/Meta-Llama-3.1-405B-Instruct
33
+ - sources:
34
+ - layer_range: [21, 63]
35
+ model: meta-llama/Meta-Llama-3.1-405B-Instruct
36
+ - sources:
37
+ - layer_range: [42, 84]
38
+ model: meta-llama/Meta-Llama-3.1-405B-Instruct
39
+ - sources:
40
+ - layer_range: [63, 105]
41
+ model: meta-llama/Meta-Llama-3.1-405B-Instruct
42
+ - sources:
43
+ - layer_range: [84, 126]
44
+ model: meta-llama/Meta-Llama-3.1-405B-Instruct
45
+ merge_method: passthrough
46
+ dtype: bfloat16
47
+ ```
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "meta-llama/Meta-Llama-3.1-405B-Instruct",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 128000,
9
+ "eos_token_id": [
10
+ 128001,
11
+ 128008,
12
+ 128009
13
+ ],
14
+ "hidden_act": "silu",
15
+ "hidden_size": 16384,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 53248,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "llama",
21
+ "num_attention_heads": 128,
22
+ "num_hidden_layers": 210,
23
+ "num_key_value_heads": 16,
24
+ "pretraining_tp": 1,
25
+ "rms_norm_eps": 1e-05,
26
+ "rope_scaling": {
27
+ "factor": 8.0,
28
+ "high_freq_factor": 4.0,
29
+ "low_freq_factor": 1.0,
30
+ "original_max_position_embeddings": 8192,
31
+ "rope_type": "llama3"
32
+ },
33
+ "rope_theta": 500000.0,
34
+ "tie_word_embeddings": false,
35
+ "torch_dtype": "bfloat16",
36
+ "transformers_version": "4.43.3",
37
+ "use_cache": true,
38
+ "vocab_size": 128256
39
+ }
mergekit_config.yml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ slices:
2
+ - sources:
3
+ - layer_range: [0, 42]
4
+ model: meta-llama/Meta-Llama-3.1-405B-Instruct
5
+ - sources:
6
+ - layer_range: [21, 63]
7
+ model: meta-llama/Meta-Llama-3.1-405B-Instruct
8
+ - sources:
9
+ - layer_range: [42, 84]
10
+ model: meta-llama/Meta-Llama-3.1-405B-Instruct
11
+ - sources:
12
+ - layer_range: [63, 105]
13
+ model: meta-llama/Meta-Llama-3.1-405B-Instruct
14
+ - sources:
15
+ - layer_range: [84, 126]
16
+ model: meta-llama/Meta-Llama-3.1-405B-Instruct
17
+ merge_method: passthrough
18
+ dtype: bfloat16
model-00001-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71b079a02069ebe5a5f4f8135815f136afb1d6fc4423620dcb2a14fa399cdf12
3
+ size 4202692736
model-00002-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbb90d23098d70c7ce46a8a10ef3c0279d83831022e8f8da53f67874f1a84a66
3
+ size 4202725632
model-00003-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c07820a3c436a61f69fe4e2b8abdffc7bce4a11c44f8105297d5b03a5dae092d
3
+ size 3489661192
model-00004-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bcdac7531550e841a2add9bbb94626f85a215f00b177f9998d7e59f518a9ab7
3
+ size 4697686984
model-00005-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:441a03403e60fb88c2dac49102a8bf1a983e01a56175c5563344a8469b8b95a2
3
+ size 4697686992
model-00006-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:053a525027d39c68ba7714be3ee8115f5034d181f07834c5e4dfed07623f15d7
3
+ size 3489661200
model-00007-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79a7d76d43dc155e79c7c40db6c21b7212944e049d550ff7cc0e3ee9b163074c
3
+ size 4697719880
model-00008-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa8ffa22fd4412938538ab3858f57919cdb5fce223e8a6440d48a5d9c0713c24
3
+ size 3489661200
model-00009-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4225f30cd80f39aa01ec806518fabb5f080e2a387197f7f322138e80865fcbcb
3
+ size 3489661200
model-00010-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7245ae6f38983decef6fd7bdc9d321aeeede2c9dc6ac8b82648a9cb9396a570
3
+ size 4160882232
model-00011-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49b8d84571c4ebbe0892bfd4d902e42726abe489b6d744f3ce2d23db90aa1b2f
3
+ size 3489661200
model-00012-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aeb943e6fc3be931f6409c0d0a8e6aa184ea249edb99d0865056b45d6eecfa3
3
+ size 3489661200
model-00013-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:879c3db5da5203a85f91fd4261d3327f253db14145e490ff83ada897358ac0f2
3
+ size 4697687008
model-00014-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fadfa642941d9c53b3e7835253e3b82a6dd20e62c1bf9b956d56ba7f80901cf
3
+ size 4697686992
model-00015-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ed5ed04ee4ec55e3e11abbb87b935b57973dfafa8b78ab665a3b8be0eb11bf6
3
+ size 3489661200
model-00016-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c54c61efc3406374ff62828baa3970892f92d253ac61ae61139b2336f213f42
3
+ size 4697687008
model-00017-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a54a080b53515419c5196a38b645025a64770b2c3fe8018ec5b3daeb40071000
3
+ size 4697686992
model-00018-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c19d99f10b57568d6a73f787dbbf6b0135424e2de191ab6dbda86e64edfd4252
3
+ size 3489661200
model-00019-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0cfecbe21f0e6a31e38dd54fabfdcd077a334793be6392e768d9fa3464f3834
3
+ size 4697687008
model-00020-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37ed38d1890092ab8e16d34c5248152002c4fbcd9e280a8390a732613b20ee85
3
+ size 4697686992
model-00021-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df06009851058669e63e899952e0ad8c28e2ace209cf7448da51a22338abec41
3
+ size 3489661200
model-00022-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f3b6a4c72ce57ebc440039e49cb9cff495df101282357143907b580b7ee8c0a
3
+ size 4697687008
model-00023-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63068005f137cd394d03f7155d13192b089a28adac606ac0bfb94d33c67f19f9
3
+ size 4697654112
model-00024-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2c0e690772d8e8055a6ed9134a7262841f6bc123fbb421279470b13709ca99c
3
+ size 4697687000
model-00025-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d30cf8654f5f97ffe1cc73a50a7b4654aa4ac943232b5451d5becc8e22c7046
3
+ size 4697687000
model-00026-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd46e3493dab8e6d4fd5640a466d504ec668d5b0d16e1f6fd2cd44a7fe43b3ce
3
+ size 3489661200
model-00027-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a22cc7248010c9670d8f3dede79d98311b9c7cbf17fa4d82a61bda1d6a29ef2
3
+ size 4697687000
model-00028-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:521a71614de433f205a7d4a3053e8dee25d779a3ee08e447cba76065bae5b4e7
3
+ size 4697687000
model-00029-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a4665f8d7377c214bdd7c35fa1bd842db43c4fb57c604f13df3addd5c2a3c85
3
+ size 3489661200
model-00030-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:299365053a11a92bf77e89d393322301d9a8ca7b232d438402a2824e37f1c411
3
+ size 4697686992
model-00031-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d0994512abf9d69ad5e44d7742de7e53e1ff0db3fc465362cf70e9a8500cabf
3
+ size 4697687000
model-00032-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:425a6260e99641eade8c125ad279c39c0fded393db882488a6384651193256e7
3
+ size 3489661200
model-00033-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05d7f58eb101fd78ab33e39589ca691751bcb9f108f14c318a2f534fab235c7c
3
+ size 4697687000
model-00034-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5068c2f29993b8e7d7044b37c389585d7fba95017cc9315eed86f8e5bea64cc9
3
+ size 4697687000
model-00035-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dde861adca3f0be20f4a597f1f5910a27765fa749a718866eaebef3e7af1625e
3
+ size 3489661200
model-00036-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec7e95a74611439aae971fea5c3a8db546fde3c2fdb99721a3a3d59d9a868f1f
3
+ size 4697687000
model-00037-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:062e44ee5dce99be0613cd40d1115749d175c005eacce0136fa84f5d0aabdd4f
3
+ size 4697687000
model-00038-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:910a1ada9203baf4503ba9673b676a624040cfe408b0362849380e80ee046203
3
+ size 3489661200
model-00039-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea80121dadb2a57852817c9e9c1b79e43263ba4bcbca46c0e895878b5873a0be
3
+ size 4697687000
model-00040-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3594322e42fedb84e4c06e131b4c0fe2598d35f48910d4127f525679bc69b31
3
+ size 4697687000
model-00041-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e649c6621c3e013f571732aa4eede969be760550ea5e91fad80e0d2b107e78b2
3
+ size 3489661200
model-00042-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07e0dbadf96e556d1f5c0374c090048ab11c5db933a7b57063063d77e7f57688
3
+ size 4697687000
model-00043-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e81e61cd08e78e5add0ff42259cf48c422401f90188a003091d361660663cae
3
+ size 4697687000
model-00044-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ffdb2fa0f65fba7c6a6dc1cdfca3335862a4ae62ade8efef88d864bf3ad08b2
3
+ size 3489661200
model-00045-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db7e4f50d9348abab70afb8f31f0cbd1b03c42fd540db48f3b65471a49722ff1
3
+ size 4697687000
model-00046-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50c18964dce1f9b39e3880074d491a36ccc00733e2a12b304a124c226a3fd836
3
+ size 4697686992
model-00047-of-00322.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a4e5febe06fd9aa06cbc2ee8c0df238f668bcf41b6634b480e24622dc2e4330
3
+ size 3489661200