[llama] | |
model_name = XVERSE | |
head_num = 40 | |
size_per_head = 128 | |
inter_size = 13824 | |
num_layer = 40 | |
rotary_embedding = 128 | |
layernorm_eps = 1e-06 | |
vocab_size = 100278 | |
start_id = 2 | |
end_id = 3 | |
tensor_para_size = 1 | |
weight_data_type = fp32 | |
[llama] | |
model_name = XVERSE | |
head_num = 40 | |
size_per_head = 128 | |
inter_size = 13824 | |
num_layer = 40 | |
rotary_embedding = 128 | |
layernorm_eps = 1e-06 | |
vocab_size = 100278 | |
start_id = 2 | |
end_id = 3 | |
tensor_para_size = 1 | |
weight_data_type = fp32 | |