add missing attributes from llama
Browse files- configuration_aquila.py +10 -0
configuration_aquila.py
CHANGED
@@ -92,6 +92,10 @@ class AquilaConfig(PretrainedConfig):
|
|
92 |
bos_token_id=1,
|
93 |
eos_token_id=2,
|
94 |
tie_word_embeddings=False,
|
|
|
|
|
|
|
|
|
95 |
**kwargs,
|
96 |
):
|
97 |
self.vocab_size = vocab_size
|
@@ -100,10 +104,16 @@ class AquilaConfig(PretrainedConfig):
|
|
100 |
self.intermediate_size = intermediate_size
|
101 |
self.num_hidden_layers = num_hidden_layers
|
102 |
self.num_attention_heads = num_attention_heads
|
|
|
|
|
|
|
103 |
self.hidden_act = hidden_act
|
104 |
self.initializer_range = initializer_range
|
105 |
self.rms_norm_eps = rms_norm_eps
|
106 |
self.use_cache = use_cache
|
|
|
|
|
|
|
107 |
super().__init__(
|
108 |
pad_token_id=pad_token_id,
|
109 |
bos_token_id=bos_token_id,
|
|
|
92 |
bos_token_id=1,
|
93 |
eos_token_id=2,
|
94 |
tie_word_embeddings=False,
|
95 |
+
num_key_value_heads=None,
|
96 |
+
rope_theta=10000.0,
|
97 |
+
rope_scaling=None,
|
98 |
+
pretraining_tp=1,
|
99 |
**kwargs,
|
100 |
):
|
101 |
self.vocab_size = vocab_size
|
|
|
104 |
self.intermediate_size = intermediate_size
|
105 |
self.num_hidden_layers = num_hidden_layers
|
106 |
self.num_attention_heads = num_attention_heads
|
107 |
+
if num_key_value_heads is None:
|
108 |
+
num_key_value_heads = num_attention_heads
|
109 |
+
self.num_key_value_heads = num_key_value_heads
|
110 |
self.hidden_act = hidden_act
|
111 |
self.initializer_range = initializer_range
|
112 |
self.rms_norm_eps = rms_norm_eps
|
113 |
self.use_cache = use_cache
|
114 |
+
self.rope_theta = rope_theta
|
115 |
+
self.rope_scaling = rope_scaling
|
116 |
+
self.pretraining_tp = pretraining_tp
|
117 |
super().__init__(
|
118 |
pad_token_id=pad_token_id,
|
119 |
bos_token_id=bos_token_id,
|