dh-mc commited on
Commit
734948a
1 Parent(s): 7d1a192

fixed bugs for tgi

Browse files
.env.example CHANGED
@@ -30,7 +30,7 @@ DISABLE_MODEL_PRELOADING=true
30
  CHAT_HISTORY_ENABLED=true
31
  SHOW_PARAM_SETTINGS=false
32
  SHARE_GRADIO_APP=false
33
- PDF_FILE_BASE_URL=https://ai-engd.netlify.app/pdfs/books/
34
 
35
  # if unset, default to "hkunlp/instructor-xl"
36
  HF_EMBEDDINGS_MODEL_NAME="hkunlp/instructor-large"
 
30
  CHAT_HISTORY_ENABLED=true
31
  SHOW_PARAM_SETTINGS=false
32
  SHARE_GRADIO_APP=false
33
+ PDF_FILE_BASE_URL=https://chat-with-llama-2.netlify.app/pdfs/books/
34
 
35
  # if unset, default to "hkunlp/instructor-xl"
36
  HF_EMBEDDINGS_MODEL_NAME="hkunlp/instructor-large"
app_modules/llm_inference.py CHANGED
@@ -51,7 +51,6 @@ class LLMInference(metaclass=abc.ABCMeta):
51
  streaming_handler,
52
  )
53
  if streaming_handler is not None
54
- and self.llm_loader.streamer.for_huggingface
55
  else chain(inputs)
56
  )
57
 
@@ -82,20 +81,23 @@ class LLMInference(metaclass=abc.ABCMeta):
82
  )
83
  t.start()
84
 
85
- count = (
86
- 2 if "chat_history" in inputs and len(inputs.get("chat_history")) > 0 else 1
87
- )
 
 
 
88
 
89
- while count > 0:
90
- try:
91
- for token in self.llm_loader.streamer:
92
- streaming_handler.on_llm_new_token(token)
93
 
94
- self.llm_loader.streamer.reset()
95
- count -= 1
96
- except Exception:
97
- print("nothing generated yet - retry in 0.5s")
98
- time.sleep(0.5)
99
 
100
  t.join()
101
  return que.get()
 
51
  streaming_handler,
52
  )
53
  if streaming_handler is not None
 
54
  else chain(inputs)
55
  )
56
 
 
81
  )
82
  t.start()
83
 
84
+ if self.llm_loader.streamer.for_huggingface:
85
+ count = (
86
+ 2
87
+ if "chat_history" in inputs and len(inputs.get("chat_history")) > 0
88
+ else 1
89
+ )
90
 
91
+ while count > 0:
92
+ try:
93
+ for token in self.llm_loader.streamer:
94
+ streaming_handler.on_llm_new_token(token)
95
 
96
+ self.llm_loader.streamer.reset()
97
+ count -= 1
98
+ except Exception:
99
+ print("nothing generated yet - retry in 0.5s")
100
+ time.sleep(0.5)
101
 
102
  t.join()
103
  return que.get()
app_modules/llm_loader.py CHANGED
@@ -66,9 +66,6 @@ class TextIteratorStreamer(TextStreamer, StreamingStdOutCallbackHandler):
66
  self.text_queue.put("\n", timeout=self.timeout)
67
  self.text_queue.put(self.stop_signal, timeout=self.timeout)
68
 
69
- def for_huggingface(self) -> bool:
70
- return self.tokenizer != ""
71
-
72
  def __iter__(self):
73
  return self
74
 
 
66
  self.text_queue.put("\n", timeout=self.timeout)
67
  self.text_queue.put(self.stop_signal, timeout=self.timeout)
68
 
 
 
 
69
  def __iter__(self):
70
  return self
71