sanghol commited on
Commit
2ca8459
1 Parent(s): 4f264c1

Update preprocessing_molmo.py (#11)

Browse files

- Update preprocessing_molmo.py (8e0256f86bfdb1368afbfd343d8b09bf2fe25d93)

Files changed (1) hide show
  1. preprocessing_molmo.py +9 -6
preprocessing_molmo.py CHANGED
@@ -23,7 +23,7 @@ from transformers.processing_utils import (
23
  ProcessorMixin,
24
  )
25
 
26
- from transformers.tokenization_utils_base import TextInput
27
  from transformers.utils import logging
28
 
29
  from transformers import AutoTokenizer
@@ -116,6 +116,8 @@ class MolmoProcessor(ProcessorMixin):
116
  self,
117
  text: TextInput = None,
118
  images: ImageInput = None,
 
 
119
  **kwargs: Unpack[MolmoProcessorKwargs],
120
  ):
121
  output_kwargs = self._merge_kwargs(
@@ -124,11 +126,12 @@ class MolmoProcessor(ProcessorMixin):
124
  **kwargs,
125
  )
126
 
127
- tokens = self.get_tokens_input(
128
- text,
129
- output_kwargs["text_kwargs"]["message_format"],
130
- output_kwargs["text_kwargs"]["always_start_with_space"],
131
- )
 
132
 
133
  image_token_id = self.special_token_ids[IMAGE_PROMPT]
134
 
 
23
  ProcessorMixin,
24
  )
25
 
26
+ from transformers.tokenization_utils_base import TextInput, PreTokenizedInput
27
  from transformers.utils import logging
28
 
29
  from transformers import AutoTokenizer
 
116
  self,
117
  text: TextInput = None,
118
  images: ImageInput = None,
119
+ *,
120
+ tokens: Optional[PreTokenizedInput] = None,
121
  **kwargs: Unpack[MolmoProcessorKwargs],
122
  ):
123
  output_kwargs = self._merge_kwargs(
 
126
  **kwargs,
127
  )
128
 
129
+ if tokens is None:
130
+ tokens = self.get_tokens_input(
131
+ text,
132
+ output_kwargs["text_kwargs"]["message_format"],
133
+ output_kwargs["text_kwargs"]["always_start_with_space"],
134
+ )
135
 
136
  image_token_id = self.special_token_ids[IMAGE_PROMPT]
137