KeyError: 'model.layers.10.mlp.down_proj.input_scale'

#4
by justinbahasa - opened

got this error when using vllm, it seems that there's some mismatch keys on llama 3.1 by meta weights keys and this fp8 weights keys. How to fix it?

Traceback (most recent call last):
  File "~/miniconda3/lib/python3.12/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "~/miniconda3/lib/python3.12/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "~/miniconda3/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 388, in run_mp_engine
    engine = MQLLMEngine.from_engine_args(engine_args=engine_args,
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "~/miniconda3/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 138, in from_engine_args
    return cls(
           ^^^^
  File "~/miniconda3/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 78, in __init__
    self.engine = LLMEngine(*args,
                  ^^^^^^^^^^^^^^^^
  File "~/miniconda3/lib/python3.12/site-packages/vllm/engine/llm_engine.py", line 325, in __init__
    self.model_executor = executor_class(
                          ^^^^^^^^^^^^^^^
  File "~/miniconda3/lib/python3.12/site-packages/vllm/executor/distributed_gpu_executor.py", line 26, in __init__
    super().__init__(*args, **kwargs)
  File "~/miniconda3/lib/python3.12/site-packages/vllm/executor/executor_base.py", line 47, in __init__
    self._init_executor()
  File "~/miniconda3/lib/python3.12/site-packages/vllm/executor/multiproc_gpu_executor.py", line 111, in _init_executor
    self._run_workers("load_model",
  File "~/miniconda3/lib/python3.12/site-packages/vllm/executor/multiproc_gpu_executor.py", line 185, in _run_workers
    driver_worker_output = driver_worker_method(*args, **kwargs)
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "~/miniconda3/lib/python3.12/site-packages/vllm/worker/worker.py", line 183, in load_model
    self.model_runner.load_model()
  File "~/miniconda3/lib/python3.12/site-packages/vllm/worker/model_runner.py", line 1016, in load_model
    self.model = get_model(model_config=self.model_config,
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "~/miniconda3/lib/python3.12/site-packages/vllm/model_executor/model_loader/__init__.py", line 19, in get_model
    return loader.load_model(model_config=model_config,
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "~/miniconda3/lib/python3.12/site-packages/vllm/model_executor/model_loader/loader.py", line 403, in load_model
    model.load_weights(self._get_all_weights(model_config, model))
  File "~/miniconda3/lib/python3.12/site-packages/vllm/model_executor/models/ultravox.py", line 502, in load_weights
    self.language_model.load_weights(weights_group["language_model"])
  File "~/miniconda3/lib/python3.12/site-packages/vllm/model_executor/models/llama.py", line 544, in load_weights
    param = params_dict[name]
            ~~~~~~~~~~~^^^^^^
KeyError: 'model.layers.10.mlp.down_proj.input_scale'

Sign up or log in to comment