mobiuslabsgmbh
/

Llama-2-7b-chat-hf_1bitgs8_hqq

Text Generation

Model card Files Files and versions Community

mobicham commited on Mar 25

Commit

f0c7a8b

•

1 Parent(s): 092838f

Update README.md

Files changed (1) hide show

README.md +2 -2

README.md CHANGED Viewed

@@ -67,10 +67,10 @@ model.eval();
 # model = torch.compile(model)
 #Streaming Inference
-import torch
 from threading import Thread
-def chat_processor(chat, max_new_tokens=100, do_sample=True):
     tokenizer.use_default_system_prompt = False
     streamer = transformers.TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)

 # model = torch.compile(model)
 #Streaming Inference
+import torch, transformers
 from threading import Thread
+def chat_processor(chat, max_new_tokens=100, do_sample=True, device='cuda'):
     tokenizer.use_default_system_prompt = False
     streamer = transformers.TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)