yutong-dai commited on
Commit
099c0ff
1 Parent(s): 9d0dc73

update inference code to support transformers==4.41.1

Browse files
Files changed (2) hide show
  1. README.md +8 -2
  2. vlm.py +3 -9
README.md CHANGED
@@ -52,7 +52,7 @@ The model is for research purposes, more technical details will come with a tech
52
 
53
  # How to use
54
 
55
- > We require the use of the development version (`"4.41.0.dev0"`) of the `transformers` library. To get it, as of 05/07/2024, one can use `pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers.`
56
 
57
  ```python
58
  from transformers import AutoModelForVision2Seq, AutoTokenizer, AutoImageProcessor
@@ -149,4 +149,10 @@ pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https
149
  pip install open_clip_torch==2.24.0
150
  pip install einops
151
  pip install einops-exts
152
- ```
 
 
 
 
 
 
 
52
 
53
  # How to use
54
 
55
+ ~~> We require the use of the development version (`"4.41.0.dev0"`) of the `transformers` library. To get it, as of 05/07/2024, one can use `pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers.`~~
56
 
57
  ```python
58
  from transformers import AutoModelForVision2Seq, AutoTokenizer, AutoImageProcessor
 
149
  pip install open_clip_torch==2.24.0
150
  pip install einops
151
  pip install einops-exts
152
+ pip install transformers==4.41.1
153
+ ```
154
+
155
+ # Changelog
156
+
157
+ * 05/24/2024
158
+ * update codebase to be compatiable with `transformers==4.41.1`.
vlm.py CHANGED
@@ -10,6 +10,7 @@ from transformers.modeling_outputs import CausalLMOutputWithPast
10
  from dataclasses import dataclass
11
  from transformers import CLIPVisionModel
12
  import transformers
 
13
 
14
  from .utils import num_params, getattr_recursive, stack_with_padding, get_anyres_image_grid_shape, unpad_image
15
 
@@ -1289,8 +1290,7 @@ class Kosmos(VLMWithLanguageStream):
1289
  padding_side="left",
1290
  num_beams=num_beams,
1291
  )
1292
-
1293
- if transformers.__version__ == '4.41.0.dev0':
1294
  output = self.lang_model.generate(
1295
  **new_inputs,
1296
  num_beams=num_beams,
@@ -1298,11 +1298,5 @@ class Kosmos(VLMWithLanguageStream):
1298
  eos_token_id=self.end_of_trunk_token_id,
1299
  **kwargs)
1300
  else:
1301
- output = self.lang_model.generate(
1302
- **new_inputs,
1303
- past_key_values=past_key_values,
1304
- num_beams=num_beams,
1305
- use_cache=True,
1306
- eos_token_id=self.end_of_trunk_token_id,
1307
- **kwargs)
1308
  return output
 
10
  from dataclasses import dataclass
11
  from transformers import CLIPVisionModel
12
  import transformers
13
+ from packaging.version import Version
14
 
15
  from .utils import num_params, getattr_recursive, stack_with_padding, get_anyres_image_grid_shape, unpad_image
16
 
 
1290
  padding_side="left",
1291
  num_beams=num_beams,
1292
  )
1293
+ if Version(transformers.__version__) >= Version('4.41.1'):
 
1294
  output = self.lang_model.generate(
1295
  **new_inputs,
1296
  num_beams=num_beams,
 
1298
  eos_token_id=self.end_of_trunk_token_id,
1299
  **kwargs)
1300
  else:
1301
+ raise ValueError("Please upgrade transformers to version 4.41.1 or higher.")
 
 
 
 
 
 
1302
  return output