mgoin commited on
Commit
eac389f
1 Parent(s): 0333dc3

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -4
README.md CHANGED
@@ -9,16 +9,15 @@ TBD
9
  https://github.com/vllm-project/llm-compressor/pull/185
10
 
11
  ```python
12
- from transformers import AutoProcessor
13
 
14
  from llmcompressor.modifiers.quantization import QuantizationModifier
15
- from llmcompressor.transformers import oneshot
16
- from llmcompressor.transformers.sparsification import create_sparse_auto_model_class
17
 
18
  MODEL_ID = "meta-llama/Llama-3.2-11B-Vision-Instruct"
19
 
20
  # Load model.
21
- model_class = create_sparse_auto_model_class("MllamaForConditionalGeneration")
22
  model = model_class.from_pretrained(MODEL_ID, device_map="auto", torch_dtype="auto")
23
  processor = AutoProcessor.from_pretrained(MODEL_ID)
24
 
@@ -35,6 +34,7 @@ recipe = QuantizationModifier(
35
  # Apply quantization and save to disk in compressed-tensors format.
36
  SAVE_DIR = MODEL_ID.split("/")[1] + "-FP8-Dynamic"
37
  oneshot(model=model, recipe=recipe, output_dir=SAVE_DIR)
 
38
 
39
  # Confirm generations of the quantized model look sane.
40
  print("========== SAMPLE GENERATION ==============")
 
9
  https://github.com/vllm-project/llm-compressor/pull/185
10
 
11
  ```python
12
+ from transformers import AutoProcessor, MllamaForConditionalGeneration
13
 
14
  from llmcompressor.modifiers.quantization import QuantizationModifier
15
+ from llmcompressor.transformers import oneshot, wrap_hf_model_class
 
16
 
17
  MODEL_ID = "meta-llama/Llama-3.2-11B-Vision-Instruct"
18
 
19
  # Load model.
20
+ model_class = wrap_hf_model_class(MllamaForConditionalGeneration)
21
  model = model_class.from_pretrained(MODEL_ID, device_map="auto", torch_dtype="auto")
22
  processor = AutoProcessor.from_pretrained(MODEL_ID)
23
 
 
34
  # Apply quantization and save to disk in compressed-tensors format.
35
  SAVE_DIR = MODEL_ID.split("/")[1] + "-FP8-Dynamic"
36
  oneshot(model=model, recipe=recipe, output_dir=SAVE_DIR)
37
+ processor.save_pretrained(SAVE_DIR)
38
 
39
  # Confirm generations of the quantized model look sane.
40
  print("========== SAMPLE GENERATION ==============")