Update README.md
Browse files
README.md
CHANGED
@@ -67,10 +67,10 @@ model.eval();
|
|
67 |
# model = torch.compile(model)
|
68 |
|
69 |
#Streaming Inference
|
70 |
-
import torch
|
71 |
from threading import Thread
|
72 |
|
73 |
-
def chat_processor(chat, max_new_tokens=100, do_sample=True):
|
74 |
tokenizer.use_default_system_prompt = False
|
75 |
streamer = transformers.TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
|
76 |
|
|
|
67 |
# model = torch.compile(model)
|
68 |
|
69 |
#Streaming Inference
|
70 |
+
import torch, transformers
|
71 |
from threading import Thread
|
72 |
|
73 |
+
def chat_processor(chat, max_new_tokens=100, do_sample=True, device='cuda'):
|
74 |
tokenizer.use_default_system_prompt = False
|
75 |
streamer = transformers.TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
|
76 |
|