ValueError: Non-consecutive added token '<unk>' found. Should have index 32000 but has index 0 in saved vocabulary.
#3
by
gqd
- opened
When trying to serve the model using tgi:
sudo docker run \
ghcr.io/huggingface/text-generation-inference:1.3.1 \
--model-id Open-Orca/Mistral-7B-SlimOrca
It throws:
2024-01-06T22:37:24.034017Z ERROR text_generation_launcher: Error when initializing model
Traceback (most recent call last):
File "/opt/conda/bin/text-generation-server", line 8, in <module>
sys.exit(app())
File "/opt/conda/lib/python3.10/site-packages/typer/main.py", line 311, in __call__
return get_command(self)(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/click/core.py", line 1157, in __call__
return self.main(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/typer/core.py", line 778, in main
return _main(
File "/opt/conda/lib/python3.10/site-packages/typer/core.py", line 216, in _main
rv = self.invoke(ctx)
File "/opt/conda/lib/python3.10/site-packages/click/core.py", line 1688, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/opt/conda/lib/python3.10/site-packages/click/core.py", line 1434, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/opt/conda/lib/python3.10/site-packages/click/core.py", line 783, in invoke
return __callback(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/typer/main.py", line 683, in wrapper
return callback(**use_params) # type: ignore
File "/opt/conda/lib/python3.10/site-packages/text_generation_server/cli.py", line 89, in serve
server.serve(
File "/opt/conda/lib/python3.10/site-packages/text_generation_server/server.py", line 215, in serve
asyncio.run(
File "/opt/conda/lib/python3.10/asyncio/runners.py", line 44, in run
return loop.run_until_complete(main)
File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 636, in run_until_complete
self.run_forever()
File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
self._run_once()
File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
handle._run()
File "/opt/conda/lib/python3.10/asyncio/events.py", line 80, in _run
self._context.run(self._callback, *self._args)
> File "/opt/conda/lib/python3.10/site-packages/text_generation_server/server.py", line 161, in serve_inner
model = get_model(
File "/opt/conda/lib/python3.10/site-packages/text_generation_server/models/__init__.py", line 299, in get_model
return FlashMistral(
File "/opt/conda/lib/python3.10/site-packages/text_generation_server/models/flash_mistral.py", line 424, in __init__
super(FlashMistral, self).__init__(
File "/opt/conda/lib/python3.10/site-packages/text_generation_server/models/flash_mistral.py", line 303, in __init__
tokenizer = LlamaTokenizerFast.from_pretrained(
File "/opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1854, in from_pretrained
return cls._from_pretrained(
File "/opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1886, in _from_pretrained
slow_tokenizer = (cls.slow_tokenizer_class)._from_pretrained(
File "/opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 2073, in _from_pretrained
raise ValueError(
ValueError: Non-consecutive added token '<unk>' found. Should have index 32000 but has index 0 in saved vocabulary.
Okay, got that solved by dropping all except the chat ml tokens from added_tokens.json
Now running into:
2024-01-06T22:55:40.604513Z ERROR text_generation_launcher: Error when initializing model
Traceback (most recent call last):
File "/opt/conda/bin/text-generation-server", line 8, in <module>
sys.exit(app())
File "/opt/conda/lib/python3.10/site-packages/typer/main.py", line 311, in __call__
return get_command(self)(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/click/core.py", line 1157, in __call__
return self.main(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/typer/core.py", line 778, in main
return _main(
File "/opt/conda/lib/python3.10/site-packages/typer/core.py", line 216, in _main
rv = self.invoke(ctx)
File "/opt/conda/lib/python3.10/site-packages/click/core.py", line 1688, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/opt/conda/lib/python3.10/site-packages/click/core.py", line 1434, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/opt/conda/lib/python3.10/site-packages/click/core.py", line 783, in invoke
return __callback(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/typer/main.py", line 683, in wrapper
return callback(**use_params) # type: ignore
File "/opt/conda/lib/python3.10/site-packages/text_generation_server/cli.py", line 89, in serve
server.serve(
File "/opt/conda/lib/python3.10/site-packages/text_generation_server/server.py", line 215, in serve
asyncio.run(
File "/opt/conda/lib/python3.10/asyncio/runners.py", line 44, in run
return loop.run_until_complete(main)
File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 636, in run_until_complete
self.run_forever()
File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
self._run_once()
File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
handle._run()
File "/opt/conda/lib/python3.10/asyncio/events.py", line 80, in _run
self._context.run(self._callback, *self._args)
> File "/opt/conda/lib/python3.10/site-packages/text_generation_server/server.py", line 161, in serve_inner
model = get_model(
File "/opt/conda/lib/python3.10/site-packages/text_generation_server/models/__init__.py", line 299, in get_model
return FlashMistral(
File "/opt/conda/lib/python3.10/site-packages/text_generation_server/models/flash_mistral.py", line 424, in __init__
super(FlashMistral, self).__init__(
File "/opt/conda/lib/python3.10/site-packages/text_generation_server/models/flash_mistral.py", line 303, in __init__
tokenizer = LlamaTokenizerFast.from_pretrained(
File "/opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1854, in from_pretrained
return cls._from_pretrained(
File "/opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 2017, in _from_pretrained
tokenizer = cls(*init_inputs, **init_kwargs)
File "/opt/conda/lib/python3.10/site-packages/transformers/models/llama/tokenization_llama_fast.py", line 116, in __init__
super().__init__(
File "/opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_fast.py", line 110, in __init__
fast_tokenizer = TokenizerFast.from_file(fast_tokenizer_file)
Exception: No such file or directory (os error 2)
Which looks like it can be solved by also patching tokenizer_config.json seeing https://github.com/huggingface/transformers/pull/15319
Okay, can confirm that removing toikenizer_file
from tokenizer_config.json fixes it