{ "metadata": { "ParamSize": 243, "ParamBytes": 4193099136.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 424258560, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 122760, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 424258560, "byteOffset": 0 } ], "md5sum": "fa76ad8440a74eb26c2d0ea5da598383" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "bb0667d8534e13a041c68963d4b4567c" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 19906560, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 0 } ], "md5sum": "102b75a4ebad2d43d384c6278b08aad1" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.1.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "abaf6c9f72c91411f566fa8512f45813" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "8ee187f3e226b8cf100310794c9f59de" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 19906560, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 0 } ], "md5sum": "83a36896af493e2280f3ddcf18bcf71e" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.2.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "2695b8269c83ad88afd49ced489d6241" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 29873664, "records": [ { "name": "model.layers.0.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 17915904 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 23887872 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 23891328 }, { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 23894784 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 29866752 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 29870208 } ], "md5sum": "5ada174e13f16de3bf73d4a3a9bf911d" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "67d94938606ea8382a27c46271409956" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.3.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "2079592f36083a3132bdbd66d25361aa" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "d6cb1be11d488fe5afdfcca5b3376667" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 19906560, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 0 } ], "md5sum": "ca1488b2be86850fc8f2f3bb03aa3431" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.4.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "b5e369256ea1233d6e6d9ed54f53f2ce" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 31864320, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 0 }, { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 5971968 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25878528 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25881984 }, { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 25885440 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31857408 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31860864 } ], "md5sum": "3571de6e8131278ec5c181e98e77cc35" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "f9710075818be18950c134f56fba7b2d" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.5.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "e0bf893852817e9a4162d009163d1d95" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "81ffece1fa41ea725d7109b7f3687052" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 19906560, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 0 } ], "md5sum": "042617c2d4452025d7cf7237798c722a" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.6.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "41dc6b1f383188cc6248ad3165568fef" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 31864320, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 0 }, { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 5971968 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25878528 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25881984 }, { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 25885440 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31857408 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31860864 } ], "md5sum": "d77a7be0ea46b225840e4646c7e84166" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "2ab2fdae6393b7f5948a376fa3a4eb3b" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.7.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "c288871b3a0c8f8d202c61bee354e390" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "b22d0e81c3918b71237e933f220c0bf6" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 19906560, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 0 } ], "md5sum": "dada182b2e7296abfacbe42201477880" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.8.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "dcab8c8e83a6a3eef4d9442131b8bda6" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 31864320, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 0 }, { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 5971968 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25878528 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25881984 }, { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 25885440 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31857408 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31860864 } ], "md5sum": "874c2cfd33dcef829a245a31fe6747dc" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "b7f86624ee7ef3c863d9c069331e0dab" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.9.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "1e5a49f6d31a763d5815d8d137691ee4" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "c3279a145c1e05d88855354c0097bd04" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 19906560, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 0 } ], "md5sum": "59c834221e7245d1f173b3cbd26c424c" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.10.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "0c591d60417454ffc561688d09eda250" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 31864320, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 0 }, { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 5971968 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25878528 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25881984 }, { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 25885440 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31857408 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31860864 } ], "md5sum": "1ffb9aeb6d590db860f7ae57794e5a2f" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "495d807e19845fcbdd79701a46121bfe" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.11.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "98ef02eba2259187b07dca0c007e5660" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "686d3cd52c19e9db417839185be32cc3" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 19906560, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 0 } ], "md5sum": "d09540d447622fb7df82a19036f48730" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.12.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "83b5eb1e636573aec649813ca02821d8" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 31864320, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 0 }, { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 5971968 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25878528 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25881984 }, { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 25885440 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31857408 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31860864 } ], "md5sum": "74775672bbc540ea10c5369cff75f3d0" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "501af48842625adb8bdeadda192252b5" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.13.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "e3ae86f5a43160a499dfc113d700af8c" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "7a2d12d6fc9dc688688edeba677cc88e" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 19906560, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 0 } ], "md5sum": "34d57654c2521c36188617984560c1c1" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.14.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "66013c0310650967b724084d46998a44" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 31864320, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 0 }, { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 5971968 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25878528 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25881984 }, { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 25885440 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31857408 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31860864 } ], "md5sum": "89d0107d60ed7b45a95857dc19a83c5e" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "c0720cfb85eec22607fc430b0891882a" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.15.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "f59760a9ebb6719986a5d2f1cf9bf6db" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "f9fb8d7a93e1d5510c74acdc47026317" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 19906560, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 0 } ], "md5sum": "28e414c7c23b1aeef916c4d4feea2204" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.16.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "426fd5079c847944b254fc107723ac7f" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 31864320, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 0 }, { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 5971968 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25878528 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25881984 }, { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 25885440 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31857408 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31860864 } ], "md5sum": "93edd468042f4d8e7ecbdfddf4233d59" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "c0439f75ddca25edccfe481f09cffc50" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.17.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "645a9c2a219538aa7d49c30cc902ed06" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "95a916ff71eabd6ad88e201d58f7c6f3" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 19906560, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 0 } ], "md5sum": "b46ee8bda0dbfaad43bc0b6b0731842b" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.18.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "57109785e13228faba7c7b3252d8bc92" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 31864320, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 0 }, { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 5971968 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25878528 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25881984 }, { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 25885440 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31857408 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31860864 } ], "md5sum": "7540e950a073851405e7b5055587daf8" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "af75a98e9be86be8fa120ebd01cd9771" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.19.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "ea70168c8210e38fac41c31b30daebda" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "3598822ac84ef760bf6f900f82c0f2f1" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 19906560, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 0 } ], "md5sum": "787c9c88d64b6727f183e0f12ba29388" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.20.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "7c79f1ba4ee517b90f04fb62e03ee841" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 31864320, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 0 }, { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 5971968 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25878528 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25881984 }, { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 25885440 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31857408 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31860864 } ], "md5sum": "751212d26ac6f1401403772c779e6e7d" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "49c01705cdc85f6f9571edba8f062c53" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.21.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "e6c7e22fcc4a6a75faf3b859657694b5" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "2e59790bd60f8d4c0d11804801f470a3" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 19906560, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 0 } ], "md5sum": "684af2911095e6b1482f28c486888d56" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.22.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "ed9f19163665d8b81be2b7173ff41a42" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 31864320, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 0 }, { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 5971968 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25878528 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25881984 }, { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 25885440 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31857408 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31860864 } ], "md5sum": "41846c5365149e187bee85082ec7d8eb" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "b6e24d1413a11736e536be23ae6c36a9" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.23.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "ce4a385b9e2557173bc4a11126712bd3" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "44db1fa95fc6d45a77160b42ca88bdb6" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 19906560, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 0 } ], "md5sum": "e1f2dc182a390d3b7593fbb937332119" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.24.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "920b8278754364eefd922aad2420922d" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 31864320, "records": [ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 0 }, { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 5971968 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25878528 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25881984 }, { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 25885440 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31857408 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31860864 } ], "md5sum": "338cca63137cf293104366443d175545" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "80f37cc7ec33b847694c7298aabb68b6" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.25.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "913c3009bb3e270a7a1ddadfab558efb" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "2fbcd101d98ece0cd21ca6a7e1a89855" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 19906560, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 0 } ], "md5sum": "8f81832a2246abda91bfe853d70d6a83" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.26.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "c413dec726f41f15e2ca7c3fb9a34dde" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 31864320, "records": [ { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 0 }, { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 5971968 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25878528 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25881984 }, { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 25885440 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31857408 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31860864 } ], "md5sum": "80775e0cfcb25f292b324dfa3f06853f" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "a558fcac0c50190b0a4f82609924909e" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.27.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "bfc8b30f6d8c8dd1573052116b399d9e" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "bb3ad6806fafd3906db7e0b3502b4d1f" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 19906560, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 0 } ], "md5sum": "a42fcf54f879dc0ce8e301a18b617ec7" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.28.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "712ae1a43c6d6d437ad7e612b6e01286" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 31864320, "records": [ { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 0 }, { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 5971968 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25878528 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25881984 }, { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 25885440 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31857408 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31860864 } ], "md5sum": "c2c267447e94f4fdae66b7e7a101417c" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "51b974115d963e41bd5c11af7b8c7773" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.29.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "fb98e5b24aa861d5210854ac666b656c" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "d24ef1245c9da6a58e074a48686d5362" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 19906560, "records": [ { "name": "model.layers.29.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 0 } ], "md5sum": "343714a64afaa6fc41634e9fcbb594e2" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.30.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "3e8334c1979c2f644ffafa99c9801c9a" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 31864320, "records": [ { "name": "model.layers.28.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 0 }, { "name": "model.layers.28.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 5971968 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25878528 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25881984 }, { "name": "model.layers.29.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 25885440 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31857408 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31860864 } ], "md5sum": "d30d033c8bb6631cbdc16de6c7933bbf" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "9b0ad7a3b53a5858c176d8f504c8f1ea" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.31.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "f0504ec15e8a8aa97c73ff4ff0b22103" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "8a83cf168681e76f13f0bfdfa5ec64ab" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 19906560, "records": [ { "name": "model.layers.31.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 0 } ], "md5sum": "c301ff8d93bd2d7fb469026f0055a9e8" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.32.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "f3a4b2d30d8cc7368dd46bcd9b6b3c48" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 31864320, "records": [ { "name": "model.layers.30.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 0 }, { "name": "model.layers.30.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 5971968 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25878528 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25881984 }, { "name": "model.layers.31.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 25885440 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31857408 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31860864 } ], "md5sum": "173885a064341a3b5ab1fe49550186ab" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "062b61c6197ef32a399821904d87833d" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.33.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "c9fd5ec5a21cdd3ca7b7be38924439f6" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "741be5f823be2711989935adaa821b6b" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 19906560, "records": [ { "name": "model.layers.33.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 0 } ], "md5sum": "4319d4a825b6b27ee344d11c1b2f25d8" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.34.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "04b0482b02c90e60787bb3ddc0e1287d" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 31864320, "records": [ { "name": "model.layers.32.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 0 }, { "name": "model.layers.32.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 5971968 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25878528 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25881984 }, { "name": "model.layers.33.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 25885440 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31857408 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31860864 } ], "md5sum": "9e68938ee3112161286e8e0591eaff96" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "db62c51e8d279ff5bd8b156039654d8e" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.35.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "9fbdf62ad20b0483c03ead95d70fe6ef" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "ae6ea96f62f736f3f6615727f636845d" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 19906560, "records": [ { "name": "model.layers.35.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 0 } ], "md5sum": "87d6cf23993490382d3cf5d29124d73d" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.36.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "ef1cb792d2766c304e8b9969beae5165" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 31864320, "records": [ { "name": "model.layers.34.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 0 }, { "name": "model.layers.34.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 5971968 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25878528 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25881984 }, { "name": "model.layers.35.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 25885440 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31857408 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31860864 } ], "md5sum": "d125807a24da4ecb18693c884af0ed7f" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "4ebeaeda2de50d577cf21676a245667f" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.37.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "7e14b05aaa64942c1832a215fc01e721" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "00d50949f88c4985cf91d88af579439b" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 19906560, "records": [ { "name": "model.layers.37.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 0 } ], "md5sum": "9f10aaf234616846a3d04df6c10f66db" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.38.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "f90dc68088ba0caf169e999d446a74a4" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 31864320, "records": [ { "name": "model.layers.36.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 0 }, { "name": "model.layers.36.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 5971968 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25878528 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25881984 }, { "name": "model.layers.37.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 25885440 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31857408 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31860864 } ], "md5sum": "0e099c8e3425b210e8e42128f690092e" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "bcc97c373be40722d5a5976a68cad516" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 17915904, "records": [ { "name": "model.layers.39.self_attn.wqkv_pack.weight", "shape": [ 5184, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17915904, "byteOffset": 0 } ], "md5sum": "b1f74100999cb0eea178f69181ec4318" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 39813120, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.weight", "shape": [ 11520, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39813120, "byteOffset": 0 } ], "md5sum": "d2ebb28e6e6dfa44650b114ce51f3021" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 19906560, "records": [ { "name": "model.layers.39.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 0 } ], "md5sum": "51e71b5ae20c8449b7d19644d3408031" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 424258560, "records": [ { "name": "lm_head.weight", "shape": [ 122760, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 424258560, "byteOffset": 0 } ], "md5sum": "23346816a9b0c0bced3914b8cce3575b" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 31867776, "records": [ { "name": "model.layers.38.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 0 }, { "name": "model.layers.38.mlp.down_proj.weight", "shape": [ 1728, 5760 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19906560, "byteOffset": 5971968 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25878528 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 25881984 }, { "name": "model.layers.39.self_attn.o_proj.weight", "shape": [ 1728, 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5971968, "byteOffset": 25885440 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31857408 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31860864 }, { "name": "model.norm.weight", "shape": [ 1728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3456, "byteOffset": 31864320 } ], "md5sum": "3e2e1b9228694e7de15f3f4a76c5ed6c" } ] }