pdufour
/

Qwen2-VL-2B-Instruct-ONNX-Q4-F16

Xenova HF staff commited on 1 day ago

Commit

7eb2d52

•

1 Parent(s): 62f6f1c

Use model config directly

(No need to download the entire pytorch/safetensors model since we only use the config)

Files changed (1) hide show

README.md CHANGED Viewed

@@ -28,24 +28,22 @@ import requests
 import onnxruntime as ort
 from PIL import Image
 from io import BytesIO
-from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer
 # Command line arguments
 model_path = sys.argv[1]
 onnx_path = sys.argv[2]
-# Initialize model and tokenizer
-model = Qwen2VLForConditionalGeneration.from_pretrained(
-    model_path, torch_dtype=torch.float32, device_map='mps'
-)
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 # Model configuration
 max_length = 1024
-num_attention_heads = model.config.num_attention_heads
-num_key_value_heads = model.config.num_key_value_heads
-head_dim = model.config.hidden_size // num_attention_heads
-num_layers = model.config.num_hidden_layers
 # Setup ONNX sessions
 session_options = ort.SessionOptions()

 import onnxruntime as ort
 from PIL import Image
 from io import BytesIO
+from transformers import Qwen2VLConfig, AutoTokenizer
 # Command line arguments
 model_path = sys.argv[1]
 onnx_path = sys.argv[2]
+# Initialize model config and tokenizer
+model_config = Qwen2VLConfig.from_pretrained(model_path)
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 # Model configuration
 max_length = 1024
+num_attention_heads = model_config.num_attention_heads
+num_key_value_heads = model_config.num_key_value_heads
+head_dim = model_config.hidden_size // num_attention_heads
+num_layers = model_config.num_hidden_layers
 # Setup ONNX sessions
 session_options = ort.SessionOptions()