Update README.md
Browse files
README.md
CHANGED
@@ -181,6 +181,27 @@ So, the sum of 100, 520, and 60 is 680.
|
|
181 |
"""
|
182 |
```
|
183 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
### INT4 Inference with Transformers and Intel Extension for Transformers
|
185 |
```python
|
186 |
from transformers import AutoTokenizer, TextStreamer
|
|
|
181 |
"""
|
182 |
```
|
183 |
|
184 |
+
### BF16 Inference with Intel Extension for Transformers and Intel Extension for Pytorch
|
185 |
+
```python
|
186 |
+
from transformers import AutoTokenizer, TextStreamer
|
187 |
+
import torch
|
188 |
+
from intel_extension_for_transformers.transformers import AutoModelForCausalLM
|
189 |
+
import intel_extension_for_pytorch as ipex
|
190 |
+
|
191 |
+
model_name = "Intel/neural-chat-7b-v3"
|
192 |
+
prompt = "Once upon a time, there existed a little girl,"
|
193 |
+
|
194 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
195 |
+
inputs = tokenizer(prompt, return_tensors="pt").input_ids
|
196 |
+
streamer = TextStreamer(tokenizer)
|
197 |
+
|
198 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
|
199 |
+
model = ipex.optimize(model.eval(), dtype=torch.bfloat16, inplace=True, level="O1", auto_kernel_selection=True)
|
200 |
+
|
201 |
+
outputs = model.generate(inputs, streamer=streamer, max_new_tokens=300)
|
202 |
+
```
|
203 |
+
|
204 |
+
|
205 |
### INT4 Inference with Transformers and Intel Extension for Transformers
|
206 |
```python
|
207 |
from transformers import AutoTokenizer, TextStreamer
|