{ "_name_or_path": "/checkpoints/fulasuya-sandbox/sft-checkpoint/", "architectures": [ "AlexaLlamaForCausalLM" ], "attention_dropout": 0.0, "bos_token_id": 108, "embedding_dropout": 0.0, "eos_token_id": 109, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 16384, "max_position_embeddings": 32768, "model_type": "alexallm-v2", "moe_ffn_hidden_size": null, "moe_frequency": 1, "moe_router_activation": "sigmoid", "moe_shared_experts": 0, "moe_topk": 1, "num_attention_heads": 32, "num_hidden_layers": 32, "num_moe_experts": 1, "num_query_groups": 8, "original_max_position_embeddings": 2048, "pad_token_id": 0, "position_abf_factor": 50, "position_interpolation_factor": 1.0, "residual_dropout": 0.0, "rms_norm_eps": 1e-06, "rope_state": null, "tie_word_embeddings": true, "torch_dtype": "bfloat16", "transformers_version": "4.41.2", "use_cache": true, "use_flash_attention": true, "use_flash_mlp": false, "vocab_size": 129832, "yarn_scale": 1.0 }