|
{ |
|
"metadata": { |
|
"ParamSize": 205, |
|
"ParamBytes": 500957200.0, |
|
"BitsPerParam": 32.0 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 77207040, |
|
"records": [ |
|
{ |
|
"name": "roberta.embeddings.word_embeddings.weight", |
|
"shape": [ |
|
50265, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 77207040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "822cb1e659d209d0c4ab502aa588a4a9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32699912, |
|
"records": [ |
|
{ |
|
"name": "latency_classifier.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "latency_classifier.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 1536 |
|
}, |
|
{ |
|
"name": "latency_classifier.out_proj.bias", |
|
"shape": [ |
|
2 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4, |
|
"byteOffset": 1181184 |
|
}, |
|
{ |
|
"name": "latency_classifier.out_proj.weight", |
|
"shape": [ |
|
2, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3072, |
|
"byteOffset": 1181188 |
|
}, |
|
{ |
|
"name": "quality_classifier.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 1184260 |
|
}, |
|
{ |
|
"name": "quality_classifier.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 1185796 |
|
}, |
|
{ |
|
"name": "quality_classifier.out_proj.bias", |
|
"shape": [ |
|
2 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4, |
|
"byteOffset": 2365444 |
|
}, |
|
{ |
|
"name": "quality_classifier.out_proj.weight", |
|
"shape": [ |
|
2, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3072, |
|
"byteOffset": 2365448 |
|
}, |
|
{ |
|
"name": "roberta.embeddings.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 2368520 |
|
}, |
|
{ |
|
"name": "roberta.embeddings.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 2370056 |
|
}, |
|
{ |
|
"name": "roberta.embeddings.position_embeddings.weight", |
|
"shape": [ |
|
514, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 789504, |
|
"byteOffset": 2371592 |
|
}, |
|
{ |
|
"name": "roberta.embeddings.token_type_embeddings.weight", |
|
"shape": [ |
|
1, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 3161096 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.0.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 3162632 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.0.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 3164168 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.0.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 3165704 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.0.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 3167240 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.0.attention.self.key.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4346888 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.0.attention.self.key.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 4348424 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.0.attention.self.query.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 5528072 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.0.attention.self.query.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 5529608 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.0.attention.self.value.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 6709256 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.0.attention.self.value.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 6710792 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.0.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 7890440 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.0.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 7896584 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.0.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 12615176 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.0.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 12616712 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.0.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 12618248 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.0.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 12619784 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.1.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 17338376 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.1.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 17339912 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.1.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 17341448 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.1.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 17342984 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.1.attention.self.key.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 18522632 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.1.attention.self.key.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 18524168 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.1.attention.self.query.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 19703816 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.1.attention.self.query.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 19705352 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.1.attention.self.value.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 20885000 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.1.attention.self.value.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 20886536 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.1.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 22066184 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.1.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 22072328 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.1.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 26790920 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.1.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 26792456 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.1.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 26793992 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.1.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 26795528 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.10.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 31514120 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.10.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 31515656 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.10.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 31517192 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.10.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 31518728 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.10.attention.self.key.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 32698376 |
|
} |
|
], |
|
"md5sum": "c773b3066463e8b7ca2c687935fdd38d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31899648, |
|
"records": [ |
|
{ |
|
"name": "roberta.encoder.layer.10.attention.self.key.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.10.attention.self.query.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 1179648 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.10.attention.self.query.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 1181184 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.10.attention.self.value.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 2360832 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.10.attention.self.value.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 2362368 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.10.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 3542016 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.10.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 3548160 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.10.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 8266752 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.10.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 8268288 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.10.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 8269824 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.10.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 8271360 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.11.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 12989952 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.11.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 12991488 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.11.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 12993024 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.11.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 12994560 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.11.attention.self.key.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 14174208 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.11.attention.self.key.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 14175744 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.11.attention.self.query.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 15355392 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.11.attention.self.query.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 15356928 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.11.attention.self.value.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 16536576 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.11.attention.self.value.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 16538112 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.11.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 17717760 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.11.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 17723904 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.11.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 22442496 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.11.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 22444032 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.11.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 22445568 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.11.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 22447104 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.2.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 27165696 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.2.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 27167232 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.2.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 27168768 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.2.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 27170304 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.2.attention.self.key.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 28349952 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.2.attention.self.key.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 28351488 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.2.attention.self.query.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 29531136 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.2.attention.self.query.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 29532672 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.2.attention.self.value.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 30712320 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.2.attention.self.value.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 30713856 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.2.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 31893504 |
|
} |
|
], |
|
"md5sum": "cd967b7aa914de66a898f89fcac8f9dc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33074688, |
|
"records": [ |
|
{ |
|
"name": "roberta.encoder.layer.2.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.2.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.2.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4720128 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.2.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4721664 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.2.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 4723200 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.3.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 9441792 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.3.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 9443328 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.3.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 9444864 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.3.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 9446400 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.3.attention.self.key.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 10626048 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.3.attention.self.key.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 10627584 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.3.attention.self.query.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 11807232 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.3.attention.self.query.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 11808768 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.3.attention.self.value.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 12988416 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.3.attention.self.value.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 12989952 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.3.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 14169600 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.3.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 14175744 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.3.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 18894336 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.3.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 18895872 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.3.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 18897408 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.3.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.4.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 23617536 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.4.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 23619072 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.4.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 23620608 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.4.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 23622144 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.4.attention.self.key.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 24801792 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.4.attention.self.key.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 24803328 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.4.attention.self.query.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 25982976 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.4.attention.self.query.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 25984512 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.4.attention.self.value.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 27164160 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.4.attention.self.value.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 27165696 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.4.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 28345344 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.4.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 28351488 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.4.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 33070080 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.4.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 33071616 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.4.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 33073152 |
|
} |
|
], |
|
"md5sum": "f1598d0a023d40021c0400ee9f49844d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33074688, |
|
"records": [ |
|
{ |
|
"name": "roberta.encoder.layer.4.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.5.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.5.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4720128 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.5.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4721664 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.5.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 4723200 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.5.attention.self.key.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 5902848 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.5.attention.self.key.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 5904384 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.5.attention.self.query.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 7084032 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.5.attention.self.query.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 7085568 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.5.attention.self.value.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 8265216 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.5.attention.self.value.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 8266752 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.5.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 9446400 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.5.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 9452544 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.5.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 14171136 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.5.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 14172672 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.5.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 14174208 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.5.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 14175744 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.6.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 18894336 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.6.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 18895872 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.6.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 18897408 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.6.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.6.attention.self.key.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 20078592 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.6.attention.self.key.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 20080128 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.6.attention.self.query.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 21259776 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.6.attention.self.query.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 21261312 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.6.attention.self.value.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 22440960 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.6.attention.self.value.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 22442496 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.6.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23622144 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.6.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 23628288 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.6.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 28346880 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.6.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 28348416 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.6.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 28349952 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.6.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 28351488 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.7.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 33070080 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.7.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 33071616 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.7.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 33073152 |
|
} |
|
], |
|
"md5sum": "d7db2ff4e87fb3b13f5a0f5e9c00d5e9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33080832, |
|
"records": [ |
|
{ |
|
"name": "roberta.encoder.layer.7.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.7.attention.self.key.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 1179648 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.7.attention.self.key.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 1181184 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.7.attention.self.query.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 2360832 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.7.attention.self.query.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 2362368 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.7.attention.self.value.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 3542016 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.7.attention.self.value.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 3543552 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.7.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 4723200 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.7.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 4729344 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.7.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 9447936 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.7.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 9449472 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.7.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 9451008 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.7.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 9452544 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.8.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 14171136 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.8.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 14172672 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.8.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 14174208 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.8.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 14175744 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.8.attention.self.key.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 15355392 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.8.attention.self.key.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 15356928 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.8.attention.self.query.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 16536576 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.8.attention.self.query.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 16538112 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.8.attention.self.value.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 17717760 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.8.attention.self.value.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 17719296 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.8.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.8.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 18905088 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.8.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 23623680 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.8.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 23625216 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.8.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 23626752 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.8.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 23628288 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.9.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 28346880 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.9.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 28348416 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.9.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 28349952 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.9.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 28351488 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.9.attention.self.key.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 29531136 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.9.attention.self.key.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 29532672 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.9.attention.self.query.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 30712320 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.9.attention.self.query.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 30713856 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.9.attention.self.value.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 31893504 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.9.attention.self.value.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 31895040 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.9.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 33074688 |
|
} |
|
], |
|
"md5sum": "16e5fe6cb9c4b8dfe68170b6b4a350be" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 9441792, |
|
"records": [ |
|
{ |
|
"name": "roberta.encoder.layer.9.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.9.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.9.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4720128 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.9.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4721664 |
|
}, |
|
{ |
|
"name": "roberta.encoder.layer.9.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 4723200 |
|
} |
|
], |
|
"md5sum": "5882c18b7fb291d522cc147ec14e7014" |
|
} |
|
] |
|
} |