ValueError: If `eos_token_id` is defined, make sure that `pad_token_id` is defined.
#5
by
Ayenem
- opened
Traceback (most recent call last):
File ".../demo.py", line 168, in docker_pipeline
llm_output = rag_chain(instruction)
^^^^^^^^^^^^^^^^^^^^^^
File ".../langchain/chains/base.py", line 306, in __call__
raise e
File ".../langchain/chains/base.py", line 300, in __call__
self._call(inputs, run_manager=run_manager)
File ".../langchain/chains/retrieval_qa/base.py", line 139, in _call
answer = self.combine_documents_chain.run(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".../langchain/chains/base.py", line 506, in run
return self(kwargs, callbacks=callbacks, tags=tags, metadata=metadata)[
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".../langchain/chains/base.py", line 306, in __call__
raise e
File ".../langchain/chains/base.py", line 300, in __call__
self._call(inputs, run_manager=run_manager)
File ".../langchain/chains/combine_documents/base.py", line 119, in _call
output, extra_return_dict = self.combine_docs(
^^^^^^^^^^^^^^^^^^
File ".../langchain/chains/combine_documents/stuff.py", line 171, in combine_docs
return self.llm_chain.predict(callbacks=callbacks, **inputs), {}
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".../langchain/chains/llm.py", line 257, in predict
return self(kwargs, callbacks=callbacks)[self.output_key]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".../langchain/chains/base.py", line 306, in __call__
raise e
File ".../langchain/chains/base.py", line 300, in __call__
self._call(inputs, run_manager=run_manager)
File ".../langchain/chains/llm.py", line 93, in _call
response = self.generate([inputs], run_manager=run_manager)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".../langchain/chains/llm.py", line 103, in generate
return self.llm.generate_prompt(
^^^^^^^^^^^^^^^^^^^^^^^^^
File ".../langchain/llms/base.py", line 498, in generate_prompt
return self.generate(prompt_strings, stop=stop, callbacks=callbacks, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".../langchain/llms/base.py", line 647, in generate
output = self._generate_helper(
^^^^^^^^^^^^^^^^^^^^^^
File ".../langchain/llms/base.py", line 535, in _generate_helper
raise e
File ".../langchain/llms/base.py", line 522, in _generate_helper
self._generate(
File ".../langchain/llms/huggingface_pipeline.py", line 183, in _generate
responses = self.pipeline(batch_prompts)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".../transformers/pipelines/text_generation.py", line 208, in __call__
return super().__call__(text_inputs, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".../transformers/pipelines/base.py", line 1121, in __call__
outputs = list(final_iterator)
^^^^^^^^^^^^^^^^^^^^
File ".../transformers/pipelines/pt_utils.py", line 124, in __next__
item = next(self.iterator)
^^^^^^^^^^^^^^^^^^^
File ".../transformers/pipelines/pt_utils.py", line 125, in __next__
processed = self.infer(item, **self.params)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".../transformers/pipelines/base.py", line 1046, in forward
model_outputs = self._forward(model_inputs, **forward_params)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".../transformers/pipelines/text_generation.py", line 271, in _forward
generated_sequence = self.model.generate(input_ids=input_ids, attention_mask=attention_mask, **generate_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".../torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File ".../transformers/generation/utils.py", line 1652, in generate
return self.sample(
^^^^^^^^^^^^
File ".../transformers/generation/utils.py", line 2775, in sample
raise ValueError("If `eos_token_id` is defined, make sure that `pad_token_id` is defined.")
ValueError: If `eos_token_id` is defined, make sure that `pad_token_id` is defined.
I looked up https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GPTQ/blob/main/tokenizer_config.json#L52 and it is indeed null
. Is that what it's supposed to be? And if so, how can I deal with this error?
model.generate(**input_ids, pad_token_id=tokenizer.eos_token_id, ...)
Did you tried replacing pad token with eos token? This should work.