File size: 1,172 Bytes
98473ba
d06efa1
98473ba
d06efa1
98473ba
d06efa1
98473ba
d06efa1
 
 
 
98473ba
d06efa1
98473ba
 
d06efa1
 
 
98473ba
d06efa1
 
98473ba
1a6990e
6cb671b
98473ba
1b3456c
98473ba
c3303b5
98473ba
 
d06efa1
98473ba
 
 
 
 
 
 
 
 
1b3456c
d06efa1
98473ba
d06efa1
98473ba
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
{
  "_name_or_path": "hf-internal-testing/tiny-random-gpt2",
  "activation_function": "gelu_new",
  "attention_probs_dropout_prob": 0.1,
  "attn_pdrop": 0.1,
  "bos_token_id": 98,
  "embd_pdrop": 0.1,
  "eos_token_id": 98,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "initializer_range": 0.02,
  "intermediate_size": 37,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 512,
  "n_embd": 32,
  "n_head": 4,
  "n_inner": null,
  "n_layer": 5,
  "n_positions": 512,
  "neuron": {
    "auto_cast_type": "fp32",
    "batch_size": 16,
    "compiler_type": "neuronx-cc",
    "compiler_version": "2.12.54.0+f631c2365",
    "num_cores": 2,
    "sequence_length": 512,
    "task": "text-generation"
  },
  "pad_token_id": 98,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "transformers_version": "4.35.0",
  "type_vocab_size": 16,
  "use_cache": true,
  "vocab_size": 1000
}