lvkaokao commited on
Commit
fae7b4e
1 Parent(s): 1ca0df5

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +20 -0
README.md CHANGED
@@ -156,6 +156,26 @@ So, the sum of 100, 520, and 60 is 680.
156
  """
157
  ```
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  ### INT4 Inference with Transformers and Intel Extension for Transformers
160
  ```python
161
  from transformers import AutoTokenizer, TextStreamer
 
156
  """
157
  ```
158
 
159
+ ### BF16 Inference with Intel Extension for Transformers and Intel Extension for Pytorch
160
+ ```python
161
+ from transformers import AutoTokenizer, TextStreamer
162
+ import torch
163
+ from intel_extension_for_transformers.transformers import AutoModelForCausalLM
164
+ import intel_extension_for_pytorch as ipex
165
+
166
+ model_name = "Intel/neural-chat-7b-v3-2"
167
+ prompt = "Once upon a time, there existed a little girl,"
168
+
169
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
170
+ inputs = tokenizer(prompt, return_tensors="pt").input_ids
171
+ streamer = TextStreamer(tokenizer)
172
+
173
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
174
+ model = ipex.optimize(model.eval(), dtype=torch.bfloat16, inplace=True, level="O1", auto_kernel_selection=True)
175
+
176
+ outputs = model.generate(inputs, streamer=streamer, max_new_tokens=300)
177
+ ```
178
+
179
  ### INT4 Inference with Transformers and Intel Extension for Transformers
180
  ```python
181
  from transformers import AutoTokenizer, TextStreamer