Getting error when trying to use in Vllm

#4
by grhovo - opened

Hi there, I am getting this error trying to use in Vllm:
Loading safetensors checkpoint shards: 0% Completed | 0/1 [00:00<?, ?it/s]
[rank0]: Traceback (most recent call last):
[rank0]: File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
[rank0]: return _run_code(code, main_globals, None,
[rank0]: File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
[rank0]: exec(code, run_globals)
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/openai/api_server.py", line 370, in
[rank0]: asyncio.run(run_server(args))
[rank0]: File "/usr/lib/python3.10/asyncio/runners.py", line 44, in run
[rank0]: return loop.run_until_complete(main)
[rank0]: File "/usr/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
[rank0]: return future.result()
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/openai/api_server.py", line 342, in run_server
[rank0]: async with build_async_engine_client(args) as async_engine_client:
[rank0]: File "/usr/lib/python3.10/contextlib.py", line 199, in aenter
[rank0]: return await anext(self.gen)
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/openai/api_server.py", line 102, in build_async_engine_client
[rank0]: async_engine_client = AsyncLLMEngine.from_engine_args(
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 471, in from_engine_args
[rank0]: engine = cls(
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 381, in init
[rank0]: self.engine = self._init_engine(*args, **kwargs)
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 552, in _init_engine
[rank0]: return engine_class(*args, **kwargs)
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/engine/llm_engine.py", line 249, in init
[rank0]: self.model_executor = executor_class(
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/executor/executor_base.py", line 47, in init
[rank0]: self._init_executor()
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/executor/gpu_executor.py", line 36, in _init_executor
[rank0]: self.driver_worker.load_model()
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/worker/worker.py", line 139, in load_model
[rank0]: self.model_runner.load_model()
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/worker/model_runner.py", line 722, in load_model
[rank0]: self.model = get_model(model_config=self.model_config,
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/model_loader/init.py", line 21, in get_model
[rank0]: return loader.load_model(model_config=model_config,
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/model_loader/loader.py", line 327, in load_model
[rank0]: model.load_weights(
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/llama.py", line 513, in load_weights
[rank0]: param = params_dict[name]
[rank0]: KeyError: 'model.layers.0.mlp.down_proj.weight'
Anything that I am missing?

Unsloth AI org

Hi there, I am getting this error trying to use in Vllm:
Loading safetensors checkpoint shards: 0% Completed | 0/1 [00:00<?, ?it/s]
[rank0]: Traceback (most recent call last):
[rank0]: File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
[rank0]: return _run_code(code, main_globals, None,
[rank0]: File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
[rank0]: exec(code, run_globals)
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/openai/api_server.py", line 370, in
[rank0]: asyncio.run(run_server(args))
[rank0]: File "/usr/lib/python3.10/asyncio/runners.py", line 44, in run
[rank0]: return loop.run_until_complete(main)
[rank0]: File "/usr/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
[rank0]: return future.result()
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/openai/api_server.py", line 342, in run_server
[rank0]: async with build_async_engine_client(args) as async_engine_client:
[rank0]: File "/usr/lib/python3.10/contextlib.py", line 199, in aenter
[rank0]: return await anext(self.gen)
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/openai/api_server.py", line 102, in build_async_engine_client
[rank0]: async_engine_client = AsyncLLMEngine.from_engine_args(
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 471, in from_engine_args
[rank0]: engine = cls(
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 381, in init
[rank0]: self.engine = self._init_engine(*args, **kwargs)
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 552, in _init_engine
[rank0]: return engine_class(*args, **kwargs)
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/engine/llm_engine.py", line 249, in init
[rank0]: self.model_executor = executor_class(
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/executor/executor_base.py", line 47, in init
[rank0]: self._init_executor()
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/executor/gpu_executor.py", line 36, in _init_executor
[rank0]: self.driver_worker.load_model()
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/worker/worker.py", line 139, in load_model
[rank0]: self.model_runner.load_model()
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/worker/model_runner.py", line 722, in load_model
[rank0]: self.model = get_model(model_config=self.model_config,
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/model_loader/init.py", line 21, in get_model
[rank0]: return loader.load_model(model_config=model_config,
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/model_loader/loader.py", line 327, in load_model
[rank0]: model.load_weights(
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/llama.py", line 513, in load_weights
[rank0]: param = params_dict[name]
[rank0]: KeyError: 'model.layers.0.mlp.down_proj.weight'
Anything that I am missing?

You may have to ask the VLLM community for more info

Sign up or log in to comment