{ "producer": { "name": "modelopt", "version": "0.15.1" }, "architecture": "LlamaForCausalLM", "dtype": "bfloat16", "logits_dtype": "float16", "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 8, "hidden_size": 4096, "norm_epsilon": 1e-05, "vocab_size": 128256, "max_position_embeddings": 131072, "hidden_act": "silu", "use_parallel_embedding": true, "embedding_sharding_dim": 0, "quantization": { "quant_algo": "W4A16_AWQ", "kv_cache_quant_algo": null, "group_size": 64, "has_zero_point": false, "pre_quant_scale": true, "exclude_modules": [ "lm_head" ] }, "mapping": { "world_size": 1, "tp_size": 1, "pp_size": 1 }, "head_size": 128, "intermediate_size": 14336, "position_embedding_type": "rope_gpt_neox", "share_embedding_table": false, "residual_mlp": false, "bias": false, "rotary_pct": 1.0, "rank": 0, "decoder": "llama", "rmsnorm": true, "lm_head_bias": false, "rotary_base": 500000.0, "rotary_scaling": { "factor": 8.0, "low_freq_factor": 1.0, "high_freq_factor": 4.0, "original_max_position_embeddings": 8192, "rope_type": "llama3" } }