Text Generation
Transformers
Safetensors
openelm
custom_code
OpenELM-3B-Instruct / config.json
qicao-apple's picture
add OpenELM-3B-Instruct
7f6a2a3
{
"activation_fn_name": "swish",
"architectures": [
"OpenELMForCausalLM"
],
"auto_map": {
"AutoConfig": "configuration_openelm.OpenELMConfig",
"AutoModelForCausalLM": "modeling_openelm.OpenELMForCausalLM"
},
"bos_token_id": 1,
"eos_token_id": 2,
"ffn_dim_divisor": 256,
"ffn_multipliers": [
0.5,
0.6,
0.7,
0.8,
0.9,
1.0,
1.1,
1.2,
1.3,
1.4,
1.5,
1.6,
1.7,
1.8,
1.9,
2.0,
2.1,
2.2,
2.3,
2.4,
2.5,
2.6,
2.7,
2.8,
2.9,
3.0,
3.1,
3.2,
3.3,
3.4,
3.5,
3.6,
3.7,
3.8,
3.9,
4.0
],
"ffn_with_glu": true,
"head_dim": 128,
"initializer_range": 0.02,
"max_context_length": 2048,
"model_dim": 3072,
"model_type": "openelm",
"normalization_layer_name": "rms_norm",
"normalize_qk_projections": true,
"num_gqa_groups": 4,
"num_kv_heads": [
3,
3,
3,
3,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
6,
6,
6,
6,
6,
6
],
"num_query_heads": [
12,
12,
12,
12,
16,
16,
16,
16,
16,
16,
16,
16,
16,
16,
16,
16,
16,
16,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
20,
24,
24,
24,
24,
24,
24
],
"num_transformer_layers": 36,
"qkv_multipliers": [
0.5,
1.0
],
"rope_freq_constant": 10000,
"rope_max_length": 4096,
"share_input_output_layers": true,
"torch_dtype": "bfloat16",
"transformers_version": "4.39.3",
"use_cache": true,
"vocab_size": 32000
}