ZN2at4_ops15sum_IntList_out4callERKNS_6TensorEN3c1016OptionalArrayRefIlEEbSt8optionalINS5_10ScalarTypeEERS2
waited a week, ran the update on text-generation-webui, same error
something to do with flash-attn
https://github.com/Dao-AILab/flash-attention/issues/784
even w 2.4.2 nogo
Hi. Without the stack trace, I have zero idea what that might mean.
Hi, I'm having the same issue. Here's the stack trace:
ImportError Traceback (most recent call last)
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-76870aa0-982b-4bf0-966d-a0e6e689516d/lib/python3.10/site-packages/transformers/utils/import_utils.py:1472, in _LazyModule._get_module(self, module_name)
1471 try:
-> 1472 return importlib.import_module("." + module_name, self.__name__)
1473 except Exception as e:
File /usr/lib/python3.10/importlib/__init__.py:126, in import_module(name, package)
125 level += 1
--> 126 return _bootstrap._gcd_import(name[level:], package, level)
File <frozen importlib._bootstrap>:1050, in _gcd_import(name, package, level)
File <frozen importlib._bootstrap>:1027, in _find_and_load(name, import_)
File <frozen importlib._bootstrap>:1006, in _find_and_load_unlocked(name, import_)
File <frozen importlib._bootstrap>:688, in _load_unlocked(spec)
File <frozen importlib._bootstrap_external>:883, in exec_module(self, module)
File <frozen importlib._bootstrap>:241, in _call_with_frames_removed(f, *args, **kwds)
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-76870aa0-982b-4bf0-966d-a0e6e689516d/lib/python3.10/site-packages/transformers/models/mixtral/modeling_mixtral.py:58
57 if is_flash_attn_2_available():
---> 58 from flash_attn import flash_attn_func, flash_attn_varlen_func
59 from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input # noqa
File /databricks/python/lib/python3.10/site-packages/flash_attn/__init__.py:3
1 __version__ = "2.3.2"
----> 3 from flash_attn.flash_attn_interface import (
4 flash_attn_func,
5 flash_attn_kvpacked_func,
6 flash_attn_qkvpacked_func,
7 flash_attn_varlen_func,
8 flash_attn_varlen_kvpacked_func,
9 flash_attn_varlen_qkvpacked_func,
10 flash_attn_with_kvcache,
11 )
File /databricks/python/lib/python3.10/site-packages/flash_attn/flash_attn_interface.py:8
6 # isort: off
7 # We need to import the CUDA kernels after importing torch
----> 8 import flash_attn_2_cuda as flash_attn_cuda
10 # isort: on
ImportError: /databricks/python/lib/python3.10/site-packages/flash_attn_2_cuda.cpython-310-x86_64-linux-gnu.so: undefined symbol: _ZN2at4_ops9_pad_enum4callERKNS_6TensorEN3c108ArrayRefINS5_6SymIntEEElNS5_8optionalIdEE
The above exception was the direct cause of the following exception:
RuntimeError Traceback (most recent call last)
File <command-1921129686235558>, line 8
2 from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
3 from transformers import (
4 AutoModelForCausalLM,
5 AutoTokenizer,
6 TrainingArguments,
7 )
----> 8 model = AutoModelForCausalLM.from_pretrained(
9 "ISTA-DASLab/Mixtral-8x7B-Instruct-v0_1-AQLM-2Bit-1x16-hf",
10 trust_remote_code=True, torch_dtype="auto", device_map="cuda", low_cpu_mem_usage=True
11
12 )
13 tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-v0.1")
14 tokenizer.pad_token = tokenizer.eos_token
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-76870aa0-982b-4bf0-966d-a0e6e689516d/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:562, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
558 return model_class.from_pretrained(
559 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
560 )
561 elif type(config) in cls._model_mapping.keys():
--> 562 model_class = _get_model_class(config, cls._model_mapping)
563 return model_class.from_pretrained(
564 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
565 )
566 raise ValueError(
567 f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n"
568 f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}."
569 )
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-76870aa0-982b-4bf0-966d-a0e6e689516d/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:383, in _get_model_class(config, model_mapping)
382 def _get_model_class(config, model_mapping):
--> 383 supported_models = model_mapping[type(config)]
384 if not isinstance(supported_models, (list, tuple)):
385 return supported_models
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-76870aa0-982b-4bf0-966d-a0e6e689516d/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:734, in _LazyAutoMapping.__getitem__(self, key)
732 if model_type in self._model_mapping:
733 model_name = self._model_mapping[model_type]
--> 734 return self._load_attr_from_module(model_type, model_name)
736 # Maybe there was several model types associated with this config.
737 model_types = [k for k, v in self._config_mapping.items() if v == key.__name__]
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-76870aa0-982b-4bf0-966d-a0e6e689516d/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:748, in _LazyAutoMapping._load_attr_from_module(self, model_type, attr)
746 if module_name not in self._modules:
747 self._modules[module_name] = importlib.import_module(f".{module_name}", "transformers.models")
--> 748 return getattribute_from_module(self._modules[module_name], attr)
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-76870aa0-982b-4bf0-966d-a0e6e689516d/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:692, in getattribute_from_module(module, attr)
690 if isinstance(attr, tuple):
691 return tuple(getattribute_from_module(module, a) for a in attr)
--> 692 if hasattr(module, attr):
693 return getattr(module, attr)
694 # Some of the mappings have entries model_type -> object of another model type. In that case we try to grab the
695 # object at the top level.
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-76870aa0-982b-4bf0-966d-a0e6e689516d/lib/python3.10/site-packages/transformers/utils/import_utils.py:1462, in _LazyModule.__getattr__(self, name)
1460 value = self._get_module(name)
1461 elif name in self._class_to_module.keys():
-> 1462 module = self._get_module(self._class_to_module[name])
1463 value = getattr(module, name)
1464 else:
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-76870aa0-982b-4bf0-966d-a0e6e689516d/lib/python3.10/site-packages/transformers/utils/import_utils.py:1474, in _LazyModule._get_module(self, module_name)
1472 return importlib.import_module("." + module_name, self.__name__)
1473 except Exception as e:
-> 1474 raise RuntimeError(
1475 f"Failed to import {self.__name__}.{module_name} because of the following error (look up to see its"
1476 f" traceback):\n{e}"
1477 ) from e
RuntimeError: Failed to import transformers.models.mixtral.modeling_mixtral because of the following error (look up to see its traceback):
/databricks/python/lib/python3.10/site-packages/flash_attn_2_cuda.cpython-310-x86_64-linux-gnu.so: undefined symbol: _ZN2at4_ops9_pad_enum4callERKNS_6TensorEN3c108ArrayRefINS5_6SymIntEEElNS5_8optionalIdEE
@amrothemich
Hi!
It's obviously a flash_attn_2
import error. Looks like a CUDA version mismatch of sorts.
Thanks! what's weird is I don't encounter it on your other one, BlackSamorez/Mixtral-8x7b-AQLM-2Bit-1x16-hf
Okay it looks like flash attention is not supported by V100s, which is what I'm using. Still confused as to why the other one works though.
It's weird that it's using flash attention at all. SDPA should be the default implementation.
I know, it looks like you've got the configs set up right. 🤷