yutong-dai
commited on
Commit
•
099c0ff
1
Parent(s):
9d0dc73
update inference code to support transformers==4.41.1
Browse files
README.md
CHANGED
@@ -52,7 +52,7 @@ The model is for research purposes, more technical details will come with a tech
|
|
52 |
|
53 |
# How to use
|
54 |
|
55 |
-
|
56 |
|
57 |
```python
|
58 |
from transformers import AutoModelForVision2Seq, AutoTokenizer, AutoImageProcessor
|
@@ -149,4 +149,10 @@ pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https
|
|
149 |
pip install open_clip_torch==2.24.0
|
150 |
pip install einops
|
151 |
pip install einops-exts
|
152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
# How to use
|
54 |
|
55 |
+
~~> We require the use of the development version (`"4.41.0.dev0"`) of the `transformers` library. To get it, as of 05/07/2024, one can use `pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers.`~~
|
56 |
|
57 |
```python
|
58 |
from transformers import AutoModelForVision2Seq, AutoTokenizer, AutoImageProcessor
|
|
|
149 |
pip install open_clip_torch==2.24.0
|
150 |
pip install einops
|
151 |
pip install einops-exts
|
152 |
+
pip install transformers==4.41.1
|
153 |
+
```
|
154 |
+
|
155 |
+
# Changelog
|
156 |
+
|
157 |
+
* 05/24/2024
|
158 |
+
* update codebase to be compatiable with `transformers==4.41.1`.
|
vlm.py
CHANGED
@@ -10,6 +10,7 @@ from transformers.modeling_outputs import CausalLMOutputWithPast
|
|
10 |
from dataclasses import dataclass
|
11 |
from transformers import CLIPVisionModel
|
12 |
import transformers
|
|
|
13 |
|
14 |
from .utils import num_params, getattr_recursive, stack_with_padding, get_anyres_image_grid_shape, unpad_image
|
15 |
|
@@ -1289,8 +1290,7 @@ class Kosmos(VLMWithLanguageStream):
|
|
1289 |
padding_side="left",
|
1290 |
num_beams=num_beams,
|
1291 |
)
|
1292 |
-
|
1293 |
-
if transformers.__version__ == '4.41.0.dev0':
|
1294 |
output = self.lang_model.generate(
|
1295 |
**new_inputs,
|
1296 |
num_beams=num_beams,
|
@@ -1298,11 +1298,5 @@ class Kosmos(VLMWithLanguageStream):
|
|
1298 |
eos_token_id=self.end_of_trunk_token_id,
|
1299 |
**kwargs)
|
1300 |
else:
|
1301 |
-
|
1302 |
-
**new_inputs,
|
1303 |
-
past_key_values=past_key_values,
|
1304 |
-
num_beams=num_beams,
|
1305 |
-
use_cache=True,
|
1306 |
-
eos_token_id=self.end_of_trunk_token_id,
|
1307 |
-
**kwargs)
|
1308 |
return output
|
|
|
10 |
from dataclasses import dataclass
|
11 |
from transformers import CLIPVisionModel
|
12 |
import transformers
|
13 |
+
from packaging.version import Version
|
14 |
|
15 |
from .utils import num_params, getattr_recursive, stack_with_padding, get_anyres_image_grid_shape, unpad_image
|
16 |
|
|
|
1290 |
padding_side="left",
|
1291 |
num_beams=num_beams,
|
1292 |
)
|
1293 |
+
if Version(transformers.__version__) >= Version('4.41.1'):
|
|
|
1294 |
output = self.lang_model.generate(
|
1295 |
**new_inputs,
|
1296 |
num_beams=num_beams,
|
|
|
1298 |
eos_token_id=self.end_of_trunk_token_id,
|
1299 |
**kwargs)
|
1300 |
else:
|
1301 |
+
raise ValueError("Please upgrade transformers to version 4.41.1 or higher.")
|
|
|
|
|
|
|
|
|
|
|
|
|
1302 |
return output
|