Upload folder using huggingface_hub
Browse files- modeling_internvl_chat.py +6 -11
modeling_internvl_chat.py
CHANGED
@@ -26,7 +26,7 @@ logger = logging.get_logger(__name__)
|
|
26 |
class InternVLChatModel(PreTrainedModel):
|
27 |
config_class = InternVLChatConfig
|
28 |
main_input_name = 'pixel_values'
|
29 |
-
_no_split_modules = ['InternVisionEncoderLayer', 'LlamaDecoderLayer']
|
30 |
|
31 |
def __init__(self, config: InternVLChatConfig, vision_model=None, language_model=None):
|
32 |
super().__init__(config)
|
@@ -237,10 +237,6 @@ class InternVLChatModel(PreTrainedModel):
|
|
237 |
raise NotImplementedError
|
238 |
img_context_token_id = tokenizer.convert_tokens_to_ids(IMG_CONTEXT_TOKEN)
|
239 |
self.img_context_token_id = img_context_token_id
|
240 |
-
if tokenizer.convert_tokens_to_ids('<|im_end|>') != 0:
|
241 |
-
eos_token_id = tokenizer.convert_tokens_to_ids('<|im_end|>') # 92542, InternLM2
|
242 |
-
else:
|
243 |
-
eos_token_id = tokenizer.eos_token_id
|
244 |
|
245 |
from .conversation import get_conv_template
|
246 |
|
@@ -259,6 +255,7 @@ class InternVLChatModel(PreTrainedModel):
|
|
259 |
model_inputs = tokenizer(queries, return_tensors='pt', padding=True)
|
260 |
input_ids = model_inputs['input_ids'].cuda()
|
261 |
attention_mask = model_inputs['attention_mask'].cuda()
|
|
|
262 |
generation_config['eos_token_id'] = eos_token_id
|
263 |
|
264 |
generation_output = self.generate(
|
@@ -268,7 +265,7 @@ class InternVLChatModel(PreTrainedModel):
|
|
268 |
**generation_config
|
269 |
)
|
270 |
responses = tokenizer.batch_decode(generation_output, skip_special_tokens=True)
|
271 |
-
responses = [response.split(
|
272 |
return responses
|
273 |
|
274 |
def chat(self, tokenizer, pixel_values, question, generation_config, history=None, return_history=False,
|
@@ -276,10 +273,6 @@ class InternVLChatModel(PreTrainedModel):
|
|
276 |
|
277 |
img_context_token_id = tokenizer.convert_tokens_to_ids(IMG_CONTEXT_TOKEN)
|
278 |
self.img_context_token_id = img_context_token_id
|
279 |
-
if tokenizer.convert_tokens_to_ids('<|im_end|>') != 0:
|
280 |
-
eos_token_id = tokenizer.convert_tokens_to_ids('<|im_end|>') # 92542, InternLM2
|
281 |
-
else:
|
282 |
-
eos_token_id = tokenizer.eos_token_id
|
283 |
|
284 |
from .conversation import get_conv_template
|
285 |
|
@@ -300,7 +293,9 @@ class InternVLChatModel(PreTrainedModel):
|
|
300 |
model_inputs = tokenizer(query, return_tensors='pt')
|
301 |
input_ids = model_inputs['input_ids'].cuda()
|
302 |
attention_mask = model_inputs['attention_mask'].cuda()
|
|
|
303 |
generation_config['eos_token_id'] = eos_token_id
|
|
|
304 |
generation_output = self.generate(
|
305 |
pixel_values=pixel_values,
|
306 |
input_ids=input_ids,
|
@@ -308,7 +303,7 @@ class InternVLChatModel(PreTrainedModel):
|
|
308 |
**generation_config
|
309 |
)
|
310 |
response = tokenizer.batch_decode(generation_output, skip_special_tokens=True)[0]
|
311 |
-
response = response.split(
|
312 |
history.append((question, response))
|
313 |
if return_history:
|
314 |
return response, history
|
|
|
26 |
class InternVLChatModel(PreTrainedModel):
|
27 |
config_class = InternVLChatConfig
|
28 |
main_input_name = 'pixel_values'
|
29 |
+
_no_split_modules = ['InternVisionEncoderLayer', 'LlamaDecoderLayer', 'InternLM2DecoderLayer']
|
30 |
|
31 |
def __init__(self, config: InternVLChatConfig, vision_model=None, language_model=None):
|
32 |
super().__init__(config)
|
|
|
237 |
raise NotImplementedError
|
238 |
img_context_token_id = tokenizer.convert_tokens_to_ids(IMG_CONTEXT_TOKEN)
|
239 |
self.img_context_token_id = img_context_token_id
|
|
|
|
|
|
|
|
|
240 |
|
241 |
from .conversation import get_conv_template
|
242 |
|
|
|
255 |
model_inputs = tokenizer(queries, return_tensors='pt', padding=True)
|
256 |
input_ids = model_inputs['input_ids'].cuda()
|
257 |
attention_mask = model_inputs['attention_mask'].cuda()
|
258 |
+
eos_token_id = tokenizer.convert_tokens_to_ids(template.sep)
|
259 |
generation_config['eos_token_id'] = eos_token_id
|
260 |
|
261 |
generation_output = self.generate(
|
|
|
265 |
**generation_config
|
266 |
)
|
267 |
responses = tokenizer.batch_decode(generation_output, skip_special_tokens=True)
|
268 |
+
responses = [response.split(template.sep)[0].strip() for response in responses]
|
269 |
return responses
|
270 |
|
271 |
def chat(self, tokenizer, pixel_values, question, generation_config, history=None, return_history=False,
|
|
|
273 |
|
274 |
img_context_token_id = tokenizer.convert_tokens_to_ids(IMG_CONTEXT_TOKEN)
|
275 |
self.img_context_token_id = img_context_token_id
|
|
|
|
|
|
|
|
|
276 |
|
277 |
from .conversation import get_conv_template
|
278 |
|
|
|
293 |
model_inputs = tokenizer(query, return_tensors='pt')
|
294 |
input_ids = model_inputs['input_ids'].cuda()
|
295 |
attention_mask = model_inputs['attention_mask'].cuda()
|
296 |
+
eos_token_id = tokenizer.convert_tokens_to_ids(template.sep)
|
297 |
generation_config['eos_token_id'] = eos_token_id
|
298 |
+
|
299 |
generation_output = self.generate(
|
300 |
pixel_values=pixel_values,
|
301 |
input_ids=input_ids,
|
|
|
303 |
**generation_config
|
304 |
)
|
305 |
response = tokenizer.batch_decode(generation_output, skip_special_tokens=True)[0]
|
306 |
+
response = response.split(template.sep)[0].strip()
|
307 |
history.append((question, response))
|
308 |
if return_history:
|
309 |
return response, history
|