{ | |
"attn_type": "bi", | |
"bi_data": false, | |
"clamp_len": -1, | |
"d_head": 64, | |
"d_inner": 4096, | |
"d_model": 1024, | |
"dropatt": 0.1, | |
"dropout": 0.1, | |
"ff_activation": "gelu", | |
"init": "normal", | |
"init_range": 0.1, | |
"init_std": 0.02, | |
"initializer_range": 0.02, | |
"layer_norm_eps": 1e-12, | |
"max_position_embeddings": 512, | |
"mem_len": null, | |
"n_head": 16, | |
"n_layer": 24, | |
"n_token": 32000, | |
"reuse_len": null, | |
"same_length": false, | |
"untie_r": true | |
} | |