{ "metadata": { "ParamSize": 205, "ParamBytes": 500957200.0, "BitsPerParam": 32.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 77207040, "records": [ { "name": "roberta.embeddings.word_embeddings.weight", "shape": [ 50265, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 77207040, "byteOffset": 0 } ], "md5sum": "822cb1e659d209d0c4ab502aa588a4a9" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 32699912, "records": [ { "name": "latency_classifier.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 0 }, { "name": "latency_classifier.dense.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 1536 }, { "name": "latency_classifier.out_proj.bias", "shape": [ 2 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4, "byteOffset": 1181184 }, { "name": "latency_classifier.out_proj.weight", "shape": [ 2, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, "byteOffset": 1181188 }, { "name": "quality_classifier.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 1184260 }, { "name": "quality_classifier.dense.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 1185796 }, { "name": "quality_classifier.out_proj.bias", "shape": [ 2 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4, "byteOffset": 2365444 }, { "name": "quality_classifier.out_proj.weight", "shape": [ 2, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, "byteOffset": 2365448 }, { "name": "roberta.embeddings.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 2368520 }, { "name": "roberta.embeddings.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 2370056 }, { "name": "roberta.embeddings.position_embeddings.weight", "shape": [ 514, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 789504, "byteOffset": 2371592 }, { "name": "roberta.embeddings.token_type_embeddings.weight", "shape": [ 1, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 3161096 }, { "name": "roberta.encoder.layer.0.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 3162632 }, { "name": "roberta.encoder.layer.0.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 3164168 }, { "name": "roberta.encoder.layer.0.attention.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 3165704 }, { "name": "roberta.encoder.layer.0.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 3167240 }, { "name": "roberta.encoder.layer.0.attention.self.key.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 4346888 }, { "name": "roberta.encoder.layer.0.attention.self.key.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 4348424 }, { "name": "roberta.encoder.layer.0.attention.self.query.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 5528072 }, { "name": "roberta.encoder.layer.0.attention.self.query.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 5529608 }, { "name": "roberta.encoder.layer.0.attention.self.value.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 6709256 }, { "name": "roberta.encoder.layer.0.attention.self.value.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 6710792 }, { "name": "roberta.encoder.layer.0.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 7890440 }, { "name": "roberta.encoder.layer.0.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 7896584 }, { "name": "roberta.encoder.layer.0.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 12615176 }, { "name": "roberta.encoder.layer.0.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 12616712 }, { "name": "roberta.encoder.layer.0.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 12618248 }, { "name": "roberta.encoder.layer.0.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 12619784 }, { "name": "roberta.encoder.layer.1.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 17338376 }, { "name": "roberta.encoder.layer.1.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 17339912 }, { "name": "roberta.encoder.layer.1.attention.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 17341448 }, { "name": "roberta.encoder.layer.1.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 17342984 }, { "name": "roberta.encoder.layer.1.attention.self.key.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 18522632 }, { "name": "roberta.encoder.layer.1.attention.self.key.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 18524168 }, { "name": "roberta.encoder.layer.1.attention.self.query.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 19703816 }, { "name": "roberta.encoder.layer.1.attention.self.query.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 19705352 }, { "name": "roberta.encoder.layer.1.attention.self.value.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 20885000 }, { "name": "roberta.encoder.layer.1.attention.self.value.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 20886536 }, { "name": "roberta.encoder.layer.1.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 22066184 }, { "name": "roberta.encoder.layer.1.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 22072328 }, { "name": "roberta.encoder.layer.1.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 26790920 }, { "name": "roberta.encoder.layer.1.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 26792456 }, { "name": "roberta.encoder.layer.1.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 26793992 }, { "name": "roberta.encoder.layer.1.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 26795528 }, { "name": "roberta.encoder.layer.10.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 31514120 }, { "name": "roberta.encoder.layer.10.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 31515656 }, { "name": "roberta.encoder.layer.10.attention.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 31517192 }, { "name": "roberta.encoder.layer.10.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 31518728 }, { "name": "roberta.encoder.layer.10.attention.self.key.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 32698376 } ], "md5sum": "c773b3066463e8b7ca2c687935fdd38d" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 31899648, "records": [ { "name": "roberta.encoder.layer.10.attention.self.key.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 0 }, { "name": "roberta.encoder.layer.10.attention.self.query.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 1179648 }, { "name": "roberta.encoder.layer.10.attention.self.query.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 1181184 }, { "name": "roberta.encoder.layer.10.attention.self.value.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 2360832 }, { "name": "roberta.encoder.layer.10.attention.self.value.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 2362368 }, { "name": "roberta.encoder.layer.10.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 3542016 }, { "name": "roberta.encoder.layer.10.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 3548160 }, { "name": "roberta.encoder.layer.10.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 8266752 }, { "name": "roberta.encoder.layer.10.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 8268288 }, { "name": "roberta.encoder.layer.10.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 8269824 }, { "name": "roberta.encoder.layer.10.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 8271360 }, { "name": "roberta.encoder.layer.11.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 12989952 }, { "name": "roberta.encoder.layer.11.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 12991488 }, { "name": "roberta.encoder.layer.11.attention.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 12993024 }, { "name": "roberta.encoder.layer.11.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 12994560 }, { "name": "roberta.encoder.layer.11.attention.self.key.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 14174208 }, { "name": "roberta.encoder.layer.11.attention.self.key.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 14175744 }, { "name": "roberta.encoder.layer.11.attention.self.query.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 15355392 }, { "name": "roberta.encoder.layer.11.attention.self.query.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 15356928 }, { "name": "roberta.encoder.layer.11.attention.self.value.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 16536576 }, { "name": "roberta.encoder.layer.11.attention.self.value.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 16538112 }, { "name": "roberta.encoder.layer.11.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 17717760 }, { "name": "roberta.encoder.layer.11.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 17723904 }, { "name": "roberta.encoder.layer.11.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 22442496 }, { "name": "roberta.encoder.layer.11.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 22444032 }, { "name": "roberta.encoder.layer.11.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 22445568 }, { "name": "roberta.encoder.layer.11.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 22447104 }, { "name": "roberta.encoder.layer.2.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 27165696 }, { "name": "roberta.encoder.layer.2.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 27167232 }, { "name": "roberta.encoder.layer.2.attention.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 27168768 }, { "name": "roberta.encoder.layer.2.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 27170304 }, { "name": "roberta.encoder.layer.2.attention.self.key.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 28349952 }, { "name": "roberta.encoder.layer.2.attention.self.key.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 28351488 }, { "name": "roberta.encoder.layer.2.attention.self.query.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 29531136 }, { "name": "roberta.encoder.layer.2.attention.self.query.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 29532672 }, { "name": "roberta.encoder.layer.2.attention.self.value.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 30712320 }, { "name": "roberta.encoder.layer.2.attention.self.value.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 30713856 }, { "name": "roberta.encoder.layer.2.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 31893504 } ], "md5sum": "cd967b7aa914de66a898f89fcac8f9dc" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 33074688, "records": [ { "name": "roberta.encoder.layer.2.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 0 }, { "name": "roberta.encoder.layer.2.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 4718592 }, { "name": "roberta.encoder.layer.2.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 4720128 }, { "name": "roberta.encoder.layer.2.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 4721664 }, { "name": "roberta.encoder.layer.2.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 4723200 }, { "name": "roberta.encoder.layer.3.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 9441792 }, { "name": "roberta.encoder.layer.3.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 9443328 }, { "name": "roberta.encoder.layer.3.attention.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 9444864 }, { "name": "roberta.encoder.layer.3.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 9446400 }, { "name": "roberta.encoder.layer.3.attention.self.key.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 10626048 }, { "name": "roberta.encoder.layer.3.attention.self.key.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 10627584 }, { "name": "roberta.encoder.layer.3.attention.self.query.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 11807232 }, { "name": "roberta.encoder.layer.3.attention.self.query.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 11808768 }, { "name": "roberta.encoder.layer.3.attention.self.value.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 12988416 }, { "name": "roberta.encoder.layer.3.attention.self.value.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 12989952 }, { "name": "roberta.encoder.layer.3.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 14169600 }, { "name": "roberta.encoder.layer.3.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 14175744 }, { "name": "roberta.encoder.layer.3.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 18894336 }, { "name": "roberta.encoder.layer.3.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 18895872 }, { "name": "roberta.encoder.layer.3.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 18897408 }, { "name": "roberta.encoder.layer.3.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 18898944 }, { "name": "roberta.encoder.layer.4.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 23617536 }, { "name": "roberta.encoder.layer.4.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 23619072 }, { "name": "roberta.encoder.layer.4.attention.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 23620608 }, { "name": "roberta.encoder.layer.4.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 23622144 }, { "name": "roberta.encoder.layer.4.attention.self.key.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 24801792 }, { "name": "roberta.encoder.layer.4.attention.self.key.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 24803328 }, { "name": "roberta.encoder.layer.4.attention.self.query.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 25982976 }, { "name": "roberta.encoder.layer.4.attention.self.query.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 25984512 }, { "name": "roberta.encoder.layer.4.attention.self.value.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 27164160 }, { "name": "roberta.encoder.layer.4.attention.self.value.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 27165696 }, { "name": "roberta.encoder.layer.4.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 28345344 }, { "name": "roberta.encoder.layer.4.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 28351488 }, { "name": "roberta.encoder.layer.4.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 33070080 }, { "name": "roberta.encoder.layer.4.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 33071616 }, { "name": "roberta.encoder.layer.4.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 33073152 } ], "md5sum": "f1598d0a023d40021c0400ee9f49844d" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33074688, "records": [ { "name": "roberta.encoder.layer.4.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 0 }, { "name": "roberta.encoder.layer.5.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 4718592 }, { "name": "roberta.encoder.layer.5.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 4720128 }, { "name": "roberta.encoder.layer.5.attention.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 4721664 }, { "name": "roberta.encoder.layer.5.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 4723200 }, { "name": "roberta.encoder.layer.5.attention.self.key.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 5902848 }, { "name": "roberta.encoder.layer.5.attention.self.key.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 5904384 }, { "name": "roberta.encoder.layer.5.attention.self.query.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 7084032 }, { "name": "roberta.encoder.layer.5.attention.self.query.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 7085568 }, { "name": "roberta.encoder.layer.5.attention.self.value.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 8265216 }, { "name": "roberta.encoder.layer.5.attention.self.value.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 8266752 }, { "name": "roberta.encoder.layer.5.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 9446400 }, { "name": "roberta.encoder.layer.5.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 9452544 }, { "name": "roberta.encoder.layer.5.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 14171136 }, { "name": "roberta.encoder.layer.5.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 14172672 }, { "name": "roberta.encoder.layer.5.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 14174208 }, { "name": "roberta.encoder.layer.5.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 14175744 }, { "name": "roberta.encoder.layer.6.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 18894336 }, { "name": "roberta.encoder.layer.6.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 18895872 }, { "name": "roberta.encoder.layer.6.attention.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 18897408 }, { "name": "roberta.encoder.layer.6.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 18898944 }, { "name": "roberta.encoder.layer.6.attention.self.key.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 20078592 }, { "name": "roberta.encoder.layer.6.attention.self.key.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 20080128 }, { "name": "roberta.encoder.layer.6.attention.self.query.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 21259776 }, { "name": "roberta.encoder.layer.6.attention.self.query.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 21261312 }, { "name": "roberta.encoder.layer.6.attention.self.value.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 22440960 }, { "name": "roberta.encoder.layer.6.attention.self.value.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 22442496 }, { "name": "roberta.encoder.layer.6.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 23622144 }, { "name": "roberta.encoder.layer.6.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 23628288 }, { "name": "roberta.encoder.layer.6.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 28346880 }, { "name": "roberta.encoder.layer.6.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 28348416 }, { "name": "roberta.encoder.layer.6.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 28349952 }, { "name": "roberta.encoder.layer.6.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 28351488 }, { "name": "roberta.encoder.layer.7.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 33070080 }, { "name": "roberta.encoder.layer.7.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 33071616 }, { "name": "roberta.encoder.layer.7.attention.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 33073152 } ], "md5sum": "d7db2ff4e87fb3b13f5a0f5e9c00d5e9" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 33080832, "records": [ { "name": "roberta.encoder.layer.7.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 0 }, { "name": "roberta.encoder.layer.7.attention.self.key.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 1179648 }, { "name": "roberta.encoder.layer.7.attention.self.key.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 1181184 }, { "name": "roberta.encoder.layer.7.attention.self.query.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 2360832 }, { "name": "roberta.encoder.layer.7.attention.self.query.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 2362368 }, { "name": "roberta.encoder.layer.7.attention.self.value.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 3542016 }, { "name": "roberta.encoder.layer.7.attention.self.value.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 3543552 }, { "name": "roberta.encoder.layer.7.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 4723200 }, { "name": "roberta.encoder.layer.7.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 4729344 }, { "name": "roberta.encoder.layer.7.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 9447936 }, { "name": "roberta.encoder.layer.7.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 9449472 }, { "name": "roberta.encoder.layer.7.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 9451008 }, { "name": "roberta.encoder.layer.7.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 9452544 }, { "name": "roberta.encoder.layer.8.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 14171136 }, { "name": "roberta.encoder.layer.8.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 14172672 }, { "name": "roberta.encoder.layer.8.attention.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 14174208 }, { "name": "roberta.encoder.layer.8.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 14175744 }, { "name": "roberta.encoder.layer.8.attention.self.key.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 15355392 }, { "name": "roberta.encoder.layer.8.attention.self.key.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 15356928 }, { "name": "roberta.encoder.layer.8.attention.self.query.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 16536576 }, { "name": "roberta.encoder.layer.8.attention.self.query.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 16538112 }, { "name": "roberta.encoder.layer.8.attention.self.value.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 17717760 }, { "name": "roberta.encoder.layer.8.attention.self.value.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 17719296 }, { "name": "roberta.encoder.layer.8.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 18898944 }, { "name": "roberta.encoder.layer.8.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 18905088 }, { "name": "roberta.encoder.layer.8.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 23623680 }, { "name": "roberta.encoder.layer.8.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 23625216 }, { "name": "roberta.encoder.layer.8.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 23626752 }, { "name": "roberta.encoder.layer.8.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 23628288 }, { "name": "roberta.encoder.layer.9.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 28346880 }, { "name": "roberta.encoder.layer.9.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 28348416 }, { "name": "roberta.encoder.layer.9.attention.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 28349952 }, { "name": "roberta.encoder.layer.9.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 28351488 }, { "name": "roberta.encoder.layer.9.attention.self.key.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 29531136 }, { "name": "roberta.encoder.layer.9.attention.self.key.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 29532672 }, { "name": "roberta.encoder.layer.9.attention.self.query.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 30712320 }, { "name": "roberta.encoder.layer.9.attention.self.query.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 30713856 }, { "name": "roberta.encoder.layer.9.attention.self.value.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 31893504 }, { "name": "roberta.encoder.layer.9.attention.self.value.weight", "shape": [ 768, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 31895040 }, { "name": "roberta.encoder.layer.9.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 33074688 } ], "md5sum": "16e5fe6cb9c4b8dfe68170b6b4a350be" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 9441792, "records": [ { "name": "roberta.encoder.layer.9.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 0 }, { "name": "roberta.encoder.layer.9.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 4718592 }, { "name": "roberta.encoder.layer.9.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 4720128 }, { "name": "roberta.encoder.layer.9.output.dense.bias", "shape": [ 768 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 4721664 }, { "name": "roberta.encoder.layer.9.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4718592, "byteOffset": 4723200 } ], "md5sum": "5882c18b7fb291d522cc147ec14e7014" } ] }