{ "metadata": { "ParamSize": 199, "ParamBytes": 15231233024.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 1089994752, "records": [ { "name": "lm_head.weight", "shape": [ 152064, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1089994752, "byteOffset": 0 } ], "md5sum": "cdca534f95b0f8cade80b96e842e55c8" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "6d2ffdda94b495a654f93b3a04df06f0" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "5c1acaba71654507dbc123c99c947404" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "9056169ec2bb0c842277d7c6ed17a4d2" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "b77610894f554d3e7ea41c0e7705e739" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "0ae032cc345cdda5ce13f74f4cfba526" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "15b47939b585856133a0bb40f1d5eb59" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.24.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "9b9f3153d6eaf8ea668f3830ab7f8790" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "aa7bea7e9997f5fe96725be8b98a2fbf" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "2f395418bdb0190381f2d7ffd4d9fb88" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "2f5f7550fd2375ba00bdefd5ed39de53" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.25.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "148b1e6e19a9cb04878a323114cce229" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "d89aae53b00c82f5452b7aea97048ed2" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "e7a58fc1091af799e8ec6eaa8a2a69d8" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "ab6bccc40154db925f1f8df29b454a96" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.26.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "c8fb1b5b262e2ad72b4bb31bcf8f9e15" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "ca53275030995352c1c60a7fcb775d1a" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "8b54babd972f153608c5211065c193ed" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "ea63930e7207f32a8a4c77e9a0cc2a45" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.27.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "0bd6fca94b56bdc6eb4db96c161218e6" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "f115f1db5cd60babf240706590c4175a" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 1089994752, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 152064, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1089994752, "byteOffset": 0 } ], "md5sum": "24daca6774c540da30a49e926dd5f0eb" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "634d9deb2dfa1b4aecb159bf4d66c427" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "a95f91ee8b213d02bbe3876bb4caaabd" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.0.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "9a76126f34c09ec6f3914c7f563e9453" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "1d60b267b13cc7b2ff258c7f5e89cd29" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "27415e635043d1f9e86ada121b3d5250" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "8f09e1e4777caa919d183cf8dda95252" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.1.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "ad1b3499a169a1df3740c12d4116f40d" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "7a3797c0423ea80154f771f226d11470" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "8ef169df019f6750290b642d75b4babf" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "99549e01261746d416ceee43ee16a535" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.2.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "c2bcd1081f32dc1836bdc84242eb7621" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "71cca3651f3ffdd9894976a10b762b63" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "458406c8ce8e5f642e40c27d2a517be3" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "32ca49690d14f4f4cb41a56131cb0ea9" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.3.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "ea8c51f9880d1edd706fd9fa7681c830" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "f0e25085ff295eb8120ec67d4ce2d5bc" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "130a141311bfb8a11a8414fba5271522" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "5c80ead475dc17f82753192a048f6a80" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.4.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "b56c6d7a1ef84967fd9d81ac54e97a28" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "00c8f7668baad7bb670bf3bcda7fd04b" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "3c076aad831d68f3ba736f082df892f2" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "37cfcd8722caafcf081a1bfc4a79ede5" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.5.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "6643007e6dda68b7f9e1684c68726ba2" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "4b334c87ddc56bd8d9e1b864b9a700cd" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.6.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "f9ca9318c5680031e5dc99a39b9e206f" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "dbc9563e8d536145609439599af9b180" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "ccaaa5924181d2868874dab5d017ad59" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "b7ed561904f64f0c6f54a86cbccbeaa3" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.10.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "71fd7bc08fae28bf75a63079ea6ca140" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "840e84a3b85c455e31c8587c00f0141d" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "2837c008ae7e11f3df62cb0df109cc1e" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "1f4722d3341171b3b9349fa66001fff6" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.11.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "343a1d7282bd2dbc6347a3ef70d880be" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "fadfe66b4bef29bdfac9c84f6c331905" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "9ea1daf3983963dbfd00fdb06ef9e84f" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "4f737911fa360182e95bcfefad4b1909" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.12.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "8ec296f7f1cfc7e98f6d0c374536b62a" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "0b2f7d561a26d7563401ccbf155b4930" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "b83862070a4b0bc7ab7a561bed14e6aa" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "8069795ed8f9cd3b066f2290f49738fb" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.13.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "79df1f193e277c72a8aa948b74b610b4" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "dcd2cd27cbf82d7117ce191b07d0e12e" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "f374213c51b324c0535bde8db5a59131" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.14.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "0af6642f9485a6f1b2022d23cd653656" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "c7fdc26d87ef8bed8422d97cea549062" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "0ba359172b430cd0290da37cdf1bb4bb" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "93cda2d2c78173573f727fa4347441b3" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "bf8dbcce7b1793d9adfca8393936cfbd" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "9d3ae672daef6ef7c6a89ef3d6903b69" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.7.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "55bc17a1ead1856a727805ee107d8e02" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "c08ac284bed45de0e6ec9865d84e6bb5" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "3d23fdae957c1d11ccd652090fa8a2fc" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "126d568dad1627b91fe2f26962a1b117" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.8.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "09cd2b7eb8cac4dd9685b20622171858" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "173163b95581c2f8834aaabe50f36029" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "54951bcad8a60fac7cae1bf5605b6b5d" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "c585fef52caeb9dec06b149547670ff8" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.9.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "891773728b4a74aaf57ddf33f0edee9f" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "ad5a1c85de04975069fec35ba61f8b29" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "3ac68d2d670bcb6b3c7017a903a3381e" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "4a293bffd7072f077c26e503deb5fb0b" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "f326da2bf1dc6a25b2bde1e8ce3fa1a0" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.15.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "a91825bf8452b481424c5658d22be82f" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "39e70c16b45bb29a56d790f50ef0ff8a" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "48093844abd11d3944f2616f1a5cdccf" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "83d12e76a741a24b0d568b845db83630" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 33546240, "records": [ { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 7168 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14336 }, { "name": "model.layers.23.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 23552 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33053696 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33060864 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33068032 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33077248 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33084416 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33091584 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33100800 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33107968 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33115136 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33124352 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33131520 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33138688 }, { "name": "model.norm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33147904 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33155072 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33162240 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33169408 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33178624 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33185792 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33192960 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33202176 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33209344 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33216512 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33225728 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33232896 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33240064 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33249280 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33256448 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33263616 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33272832 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33280000 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33287168 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33296384 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33303552 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33310720 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33319936 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33327104 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33334272 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33343488 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33350656 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33357824 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33367040 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33374208 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33381376 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33390592 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33397760 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33404928 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33414144 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33421312 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33428480 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33437696 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33444864 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33452032 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33461248 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33468416 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33475584 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33484800 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33491968 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33499136 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33508352 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33515520 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33522688 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33531904 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33539072 } ], "md5sum": "cf59ecb174188d26f8c41e7092ccc0ca" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "20bcdab82238e9f2411190f3ef306732" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "f18a191950aec889bd7dcd69e0f10179" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "cae677313687816876269f1ec6638219" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.17.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "a7b5979272c4ce649561833f25a6a5bd" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "4cf4ee73185f999e92bacb0202d067e0" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "f22a4d9867954ebe421238733c7fab3c" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "5fc7527d1e798c988048402eee7453b8" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.18.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "1df7d9af89d9d92a1602e82e20634a47" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "611fee9523365daa7bd74448f460f030" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "f37f9bae05d4d5f388baa70020f85606" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "8620c986cafec41c6ef8f856839da7cf" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.19.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "d94d6b2cecfd309a3489f2f28bb967a0" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "75a849d263d279b21150479aa8b61256" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "19ca5eeb35bc26bf0d39de76f439bd87" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "cb9659ddcb42461c1348da4776d9b57e" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.20.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "c943a084592ee2d19ac1f5a4e9d59038" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "e7e7dd75bea85fab11235674603f3a3a" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "aef4d0b4d7a364ac51fc755dd05ae04e" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "d35a8a381c8517ef72a42849602c6519" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.21.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "de71f9e8cb58dbc1f6970e0228232513" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "7e248b601d632decf76fd3f076e3888a" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "ef385194f21f1e40b54926c676b4573f" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.22.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "7baf47f3e75445d87299f742e98ce3ff" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "0ae90b849f43202289e6e039c4e8690a" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 33180672, "records": [ { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 9216 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33039360 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33046528 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33053696 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33062912 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33070080 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33077248 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33086464 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33093632 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33100800 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33110016 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33117184 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33124352 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33133568 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33140736 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33147904 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33157120 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33164288 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33171456 } ], "md5sum": "b4b876946fc566f4217db081e3a96448" } ] }