Upload modeling_arctic.py with huggingface_hub
Browse files- modeling_arctic.py +2 -2
modeling_arctic.py
CHANGED
@@ -56,7 +56,7 @@ from transformers.utils import (
|
|
56 |
)
|
57 |
from transformers.utils.import_utils import is_torch_fx_available
|
58 |
from .configuration_arctic import ArcticConfig
|
59 |
-
from transformers.integrations.deepspeed import is_deepspeed_available
|
60 |
from transformers.utils.versions import require_version
|
61 |
|
62 |
if is_deepspeed_available():
|
@@ -354,7 +354,7 @@ class ArcticAttention(nn.Module):
|
|
354 |
ds_optimized_quantization_config=quantization_config,
|
355 |
ds_optimized_base_weight_sharding=True,
|
356 |
dtype=torch.bfloat16)
|
357 |
-
self.o_proj = get_arctic_linear(self.hidden_size, self.
|
358 |
use_deepspeed_implementation=self.use_deepspeed_implementation,
|
359 |
ds_optimized_lora_config=deepspeed_lora_config,
|
360 |
ds_optimized_quantization_config=quantization_config,
|
|
|
56 |
)
|
57 |
from transformers.utils.import_utils import is_torch_fx_available
|
58 |
from .configuration_arctic import ArcticConfig
|
59 |
+
from transformers.integrations.deepspeed import is_deepspeed_available
|
60 |
from transformers.utils.versions import require_version
|
61 |
|
62 |
if is_deepspeed_available():
|
|
|
354 |
ds_optimized_quantization_config=quantization_config,
|
355 |
ds_optimized_base_weight_sharding=True,
|
356 |
dtype=torch.bfloat16)
|
357 |
+
self.o_proj = get_arctic_linear(self.hidden_size, self.hidden_size, bias=False,
|
358 |
use_deepspeed_implementation=self.use_deepspeed_implementation,
|
359 |
ds_optimized_lora_config=deepspeed_lora_config,
|
360 |
ds_optimized_quantization_config=quantization_config,
|