File size: 1,172 Bytes
98473ba d06efa1 98473ba d06efa1 98473ba d06efa1 98473ba d06efa1 98473ba d06efa1 98473ba d06efa1 98473ba d06efa1 98473ba 1a6990e 6cb671b 98473ba 1b3456c 98473ba c3303b5 98473ba d06efa1 98473ba 1b3456c d06efa1 98473ba d06efa1 98473ba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
{
"_name_or_path": "hf-internal-testing/tiny-random-gpt2",
"activation_function": "gelu_new",
"attention_probs_dropout_prob": 0.1,
"attn_pdrop": 0.1,
"bos_token_id": 98,
"embd_pdrop": 0.1,
"eos_token_id": 98,
"gradient_checkpointing": false,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"initializer_range": 0.02,
"intermediate_size": 37,
"layer_norm_epsilon": 1e-05,
"model_type": "gpt2",
"n_ctx": 512,
"n_embd": 32,
"n_head": 4,
"n_inner": null,
"n_layer": 5,
"n_positions": 512,
"neuron": {
"auto_cast_type": "fp32",
"batch_size": 16,
"compiler_type": "neuronx-cc",
"compiler_version": "2.12.54.0+f631c2365",
"num_cores": 2,
"sequence_length": 512,
"task": "text-generation"
},
"pad_token_id": 98,
"reorder_and_upcast_attn": false,
"resid_pdrop": 0.1,
"scale_attn_by_inverse_layer_idx": false,
"scale_attn_weights": true,
"summary_activation": null,
"summary_first_dropout": 0.1,
"summary_proj_to_labels": true,
"summary_type": "cls_index",
"summary_use_proj": true,
"transformers_version": "4.35.0",
"type_vocab_size": 16,
"use_cache": true,
"vocab_size": 1000
}
|