{ "metadata": { "ParamSize": 533, "ParamBytes": 9234108416.0, "BitsPerParam": 5.001536828453907 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "lm_head.q_weight", "shape": [ 152064, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "718b17ab89995acae054282c19765133" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 48660480, "records": [ { "name": "lm_head.q_scale", "shape": [ 152064, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 48660480, "byteOffset": 0 } ], "md5sum": "323bf7391cc68a49f27fdd5d9eef22d0" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "a94d103570dab2f9b057fd15eb82bf10" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 152064, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "9dba084c48a55f21ea9615c7a46761e0" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 48660480, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 152064, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 48660480, "byteOffset": 0 } ], "md5sum": "75bca7695c154208d8571ec9d60f7d3b" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "a45a8a94fbd4a1ba16fd1dd4f870460f" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0842458e253cdb7eec380ecb49cca379" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "57ff5d2fc7f9c2405659fa9bf4ff5cc3" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 33140736, "records": [ { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4423680 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4433920 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4444160 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 8867840 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17715200 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17725440 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 17739776 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 20033536 } ], "md5sum": "069cd5b16478f27168c9ed5dcd574454" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "9869dc1f08de28c8c1e55e0489cebc8b" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6b43522337478a31c77a76f17a7d79db" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 33294336, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 0 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1638400 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 1648640 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 6072320 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14919680 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 14929920 }, { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 14944256 } ], "md5sum": "18f439382b1ee118f2ac6e8fc86c2e80" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "aaeb24b0ef10379da938e1d3f8581fd2" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ed855b9b006ebf3e82b080346e2bb2af" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "915052f8cae808457e60e06ecf45534b" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 32638976, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 17049600 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 21473280 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30320640 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 30330880 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 30345216 } ], "md5sum": "7baaa43095a3c4d2413f8feb0f015b04" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "71d48dc956bb1ec7165150a6f32914fd" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5f7725a73dc024b4637287eed3772899" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "00ae263634b7eef6d6346f5ce362e9c3" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "c21b7f72c8db473c82a717b57fbf0aa2" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 33130496, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14755840 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 14766080 }, { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 14780416 } ], "md5sum": "460746b1c9ddda5d8b2c51992dc3c385" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "35f82d6221b93d44b6b24743b2c225ff" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "369164c1ce1e156ac82b2b5ea3e6361b" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "52c111b04aa07a0217070d64bd318290" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 32638976, "records": [ { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 17049600 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 21473280 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30320640 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 30330880 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 30345216 } ], "md5sum": "391159aa84bfe9f85c895fa97fece8e2" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "075ca11fda2ace9a9c299e5e30f7fabb" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "8a077479e77a01c5033390404e833c6a" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "abc55fa83de4ff5ec10ba32e22e4dd9f" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "acead9c7abd89198186f00af06227726" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f6754d29ee782fcd299a887ffc081180" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "2120635450b796fbeeda269572f8983c" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c7cfbe372304ec6e8920eefb971c12cf" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 32450560, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14745600 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19169280 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28026880 } ], "md5sum": "ace530886709db34627cdb5ae73ba99d" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 29515776, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8857600 }, { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8871936 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27222016 } ], "md5sum": "5894f32993f061365c858e46564363ff" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "d503f595d6a606de4e414d2dfa297196" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0cb12c672d9eacfa3baeaed177b71174" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "313b077f332ccd948ae84a8e30705eef" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "4172591c4c70ef65b290e20d503eb609" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "a1269ea050e1f3af760508017958e2f5" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "39fb6be79ec8efb22a916a5f4b8b5430" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "38c548fcaa9609c6a2c5dbea9aab8911" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "e15b1c2ddf20f3b9356db0dddef1b9e8" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "c2a0f127fb642bb3738c5db66d2e7b6d" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "103ee49177cb76817ca4f5c27479f985" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "1ae377a334d3b6338a351c41d107a5d7" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "cfcfb44b6a38d3988958230cbe919b80" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "55c410146ec8e595220ecde24dc88328" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "915b6b0cf93970ed063b2ffba068d5b1" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "88cd11a5ebcea37e4cbf97d0ce41ef87" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "20065c36afa76f524d4b2387ce65e6ae" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "2ebdc7b7e80cb32481672bcdc8628922" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "4fb6c79d14eaf574fa31bb09587614dc" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f62cb7101c6fae216e9753d4053b781c" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "335b4495f15e4e407d0ccb7a3908e7cc" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 32475136, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14745600 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19169280 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 19179520 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 23603200 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32450560 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32460800 } ], "md5sum": "51a57d4c2c4b21c66e170093297d8c17" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "efdd56cb83117ff2d88af5b716ac3a01" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "476e1bd4d476a84cbee1049283bc6afa" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "dab34a31811c0bf3e256b238404ede30" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 32638976, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 17049600 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 21473280 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30320640 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 30330880 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 30345216 } ], "md5sum": "1fa851d7e376ca4a37c6bc18bfd26ebf" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "4edad89860deebfdfd6f6efb38d1d704" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "049a8c23b2c65086cc2a1ace5d95b26a" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "5adf43c3c3873b38047a1b354f7b3e9b" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "a6ad79debf390825ee1a9b99d9b7901c" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "1369a2fc9ce6dc76af0954a80cf27150" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3f2b651fa088465db78e58e01ac336a5" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "f4bacf27fa2dacc0829547bf37f3d441" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "817d5283cddcb962ca2da640ad0c99a6" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "39b55910a11873519c8161c7471cf9da" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6a6b6d5aa198eac338c506f1c8db7cde" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b4be2cd67a702e2ee05f76a54dbb4a9b" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "8e0fe4c8db60f134597eed0e48f313d6" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "8934a8cdf0d56372d874b58e4b211953" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b9f954044613f10262e8272977dcfb93" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "27831f8192df5efff9bcdfabe44c7986" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "4e73146c0ae5ad7db266cfd450e1263b" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "03915fd37e7c559eb8713ba543516261" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "d1090d682cdf81e42f644f146317bd06" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "37b9184311164c8e669d22508113fa76" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "b5746b1637def17a737abcd63b352c73" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "77865080587584144a5f91e2c1171ccf" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e4e9a00eab9b30be06fc043546bcc8de" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "fff39629763fa2ba88692f40a0dbfab8" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 32475136, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14745600 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19169280 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 19179520 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 23603200 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32450560 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32460800 } ], "md5sum": "93197bc1af6cd4236b784fe5210caff7" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "416396660331e9abef66507d3410dbb4" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "79dbe7522db44af6682fc8c491bfbf39" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "81cfff7579b9e7e1c10456076cfeec35" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 32638976, "records": [ { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 17049600 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 21473280 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30320640 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 30330880 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 30345216 } ], "md5sum": "9314570b4550f2b9ae901e1076c2957f" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "16ecab1003706a43fd065e7203e25659" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ef099bacf09e7430ead7ee53c29d695c" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "184a7caaf30554b0bdd43e4e45eaa218" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "f61fcb8f19ea80987c53e07bc8321226" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "293769f22e954de1c06e3f29fd5d9c85" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2c6a7564bc89e79610b25a5e6581178d" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c6428193295c50aa837b50552c052e0a" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "6408e88d3872169041c69688d89cc221" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "b697d887312ca39cf83be12e89884804" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "bf60b46fa8e3cab8fe1071ff477b1b7a" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "99c683f4b54b91bf566bf54477c52e01" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "b6a8f1da42c8c17015417c9875e98095" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "562479934acf784ff6ae7c778e2a5b0f" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3ca7311f7e6c0112ba4e63b0fdc7a57f" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "9ab020f7a4ffdd08cab27052d6b9030d" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "fd8cc6f8719734584317368e8a822bf8" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "f37141b92ee14e780e7e653325094499" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "523a09a6b57d426d77853dcc8d0d604d" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ae9af29d14c15f2f74bb36c75edf7ad4" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "a03012d87f7266865dc5e6e8daed09e4" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "9881372b9c73d4e9d41dcca72fce4d35" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "eda357db40f8c4fb392bd22b756d46b6" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "a5a9da3d5240f8b22eddedc67823abab" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "c99dc2e95b6d14acdc12883eae1b7079" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "2c523ed56b84313826ac17d6890d8e5d" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "987b092df789070eb1d95b5dbe21dc58" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "bff51da716ffe156fbe6e32b81ea8781" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "8e4a60798a3fe8b2a0cf74cb1769de5d" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "66ddbb0830abb192d19538910f26f617" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4aece74c1f78cab4f9f9553b1cd3576e" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ff67cfbe32eea7ae77942499d2d2c9b0" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.28.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "a740893c94622680bf11a9b32114eadf" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "c891de351c9782b2e88598efd2340f37" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a9bc0c53385f083bc82d5a8b572fd65f" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.29.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3d494b092eadefc7a5361c14f64d9dcb" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.29.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "b80fa4ef0c2763ea81b50f36162f9dbb" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "919e483a9add85d180b7d5adac38e810" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8038c98a14289430ac329f1836dc61dd" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.30.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "d35d55651ae252b2adcbb0276c5884d1" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.30.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "8c0cbef8b286b6eafabcca28d39d8713" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "f8c202b417645fd220f7d1497fcf05dd" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0a80c3f33c078bf03772f55da2ee4624" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.31.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "9f2d9385ec14236dcd423137e7d72c82" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.31.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "cca9fc9054d556afe7f51d76f85f907b" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "84c4bede34fd35db551f7cfffd501ca7" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "39366777f5968abcf673f7a4018ed007" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.32.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "009bf020f5840bc4eca7c9ab6de57f9d" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.32.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "cf4f6515d5ca15cf4b15854dd25e2d92" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 33130496, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14755840 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 14766080 }, { "name": "model.layers.33.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 14780416 } ], "md5sum": "1d448e5044447721761df1ffbe3c4643" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "6a3d1c4855d6ec08132960b56dedbae9" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "949d047600921cb3d3c094f171701e20" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "59b36026d71b31b597529272caccad61" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 30320640, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 17039360 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 21463040 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30310400 } ], "md5sum": "01d0f7a949de924b9790cbfaf353d021" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7b1a6db8f1d89b664066d91226abdad7" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 31645696, "records": [ { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 13281280 }, { "name": "model.layers.34.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 13295616 } ], "md5sum": "588b8f242eb276652edad1b58dce981d" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "75ecd0b48e816bf33295da455a52f00d" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "47eba4c57c1c084eeb045a33744d339f" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.35.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "84a8a4345b826e32d9ae8a83e7514f2a" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 32638976, "records": [ { "name": "model.layers.34.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 17049600 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 21473280 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30320640 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 30330880 }, { "name": "model.layers.35.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 30345216 } ], "md5sum": "205df9426a65d7fdeb6f85fa45776cd1" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "82ded4f3ccb4e6d5f36f5c083da750d2" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2b1029285e309dfc2c7ca378578b164f" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.36.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3f7bec756c4f9ac1b3bfeb07db7d8fe6" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.36.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.36.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "5431428b9cb110dc2b51a68aa425c63a" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "3dda26ab36cdc119dfc84d2d2ba77bba" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f2bcf86a33fa862e99c660571e0933d0" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.37.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "6fa3a05f347ced336495cbba974e0544" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.37.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.37.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "46108ef48121b80ac9d9f3c3a8ffa9d7" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "4f3e27ecd33ff18b1e9817a5ae0f9a14" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2ae79c6d31239af078356dd7c24262e5" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.38.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "bf49f282db3da04e29ceda0f8cac6074" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.38.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.38.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "1a9966501ec25e8f4e021952fbe70a65" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "30c7c125fe1b97bfc2f80cbeeb2a0036" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c396bc5cd6a80a8b8fbcb20ee5b656af" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.39.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ebcacd7f3bc3358e58302bb6d183858c" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.39.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.39.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "968dfc1e04160049650718d9e28a84fd" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "433f95ab0fc67f32308a76c61878e9f7" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.40.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "a015e12d0e4c688eac76abe2600bf619" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.40.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.40.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "9213b05276df8bd335244e1bf0d3bf60" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "c83f676856094b1cfbc1e3912e214c33" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "22c529812c40fe2210068cc992ff2888" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e14410965108c90e843cc058f8fb2a22" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.41.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "995cb2ec795235139229d9c1ac1f07f7" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 32475136, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14745600 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19169280 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 19179520 }, { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 23603200 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32450560 }, { "name": "model.layers.41.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32460800 } ], "md5sum": "35c309e5830ac7d81b8607f7218883a1" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "689449f01880f6c9f1f85396cdea2c2e" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5f28ba09beb57d0727bfb55b1031df9e" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.42.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b8ec0e49c3afdea77fd05b7d42aa2615" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 32638976, "records": [ { "name": "model.layers.41.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 17049600 }, { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 21473280 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30320640 }, { "name": "model.layers.42.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 30330880 }, { "name": "model.layers.42.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 30345216 } ], "md5sum": "c2d9e5daa2c4b71b723f0fda0b93bbc3" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "6728a8818bfb5085a9031689fae0b2be" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2cb0ca634092d1d8bece5df5ec1f5d02" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.43.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "65d140a89d070c3e7b6697ff6e81322b" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.43.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.43.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "ce5a6c8160691cbe07484f900c101e55" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "557a24c541bb1c75e6dcfa50108b5ac6" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "da90c84c017b4d9598405207d1a42d68" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.44.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "5a9cf503069976ead783e019ffa50ad5" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.44.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.44.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "1b10bf5599a3af4c9af6fec68c2289f6" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "916bc70ae2205034897e4eb6994da501" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e3951d8866a11028e03bb866791da0c1" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.45.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3ca62695a28461dc085c00a75b6f5b9f" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.45.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.45.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "850ad1ef088f64a54e6919dc918bdb4d" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "1c1c5faf61e32b8e0b7fb1ca671b9031" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "feedb50cc03aafe7301fd1e513ab0a49" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.46.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3d048bba3d6b60d68719887b6838728f" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.46.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.46.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "ec11d9552e584d6bea3739a1ce613f9d" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "011b4a54e197caa8bd5a8b91b19677ca" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.47.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "d90618aa2eea18a36a28caabc6b57ae1" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.47.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.47.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "5ae3d9baf9368a8111674342830b3aab" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 14745600, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 } ], "md5sum": "adee9458c64fcc1b4fdc1398f0c30055" } ] }