AFischer1985 commited on
Commit
509798d
1 Parent(s): 9b74782

fix intendation error

Browse files
Files changed (1) hide show
  1. run.py +67 -67
run.py CHANGED
@@ -215,77 +215,77 @@ def response(
215
  removeHTML=True # remove HTML-components from History (to prevent bugs with Markdown)
216
  )
217
 
218
- ## Request response from model
219
- #------------------------------
220
 
221
- print("AI running on prem!" if(onPrem) else "AI running HFHub!")
222
- print(prompt)
223
- if(onPrem==False):
224
- temperature=float(0.9)
225
- max_new_tokens=1000
226
- top_p=0.95
227
- repetition_penalty=1.0
228
- if temperature < 1e-2: temperature = 1e-2
229
- top_p = float(top_p)
230
- generate_kwargs = dict(
231
- temperature=temperature,
232
- max_new_tokens=max_new_tokens,
233
- top_p=top_p,
234
- repetition_penalty=repetition_penalty,
235
- do_sample=True,
236
- seed=42,
237
- )
238
- stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
239
- response = ""
240
- #print("User: "+message+"\nAI: ")
241
- for text in stream:
242
- part=text.token.text
243
- #print(part, end="", flush=True)
244
- response += part
245
- if removeHTML==True: response = re.sub("<(.*?)>","\n", response) # remove HTML-components in general (may cause bugs with markdown-rendering)
246
- yield response
247
- if(True): #len(history)==0):
248
- response=response+"\n\n<br><details open><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
249
- yield response
250
-
251
- if(onPrem==True):
252
- # url="https://afischer1985-wizardlm-13b-v1-2-q4-0-gguf.hf.space/v1/completions"
253
- url="http://0.0.0.0:2600/v1/completions"
254
- body={"prompt":prompt,"max_tokens":None, "echo":"False","stream":"True"} # e.g. Mixtral-Instruct
255
- if("Discolm_german_7b" in modelPath): body.update({"stop": ["<|im_end|>"]}) # fix stop-token of DiscoLM
256
- if("Gemma-" in modelPath): body.update({"stop": ["<|im_end|>","</end_of_turn>"]}) # fix stop-token of Gemma
257
- response="" #+"("+myType+")\n"
258
- buffer=""
259
- #print("URL: "+url)
260
- #print("User: "+message+"\nAI: ")
261
- for text in requests.post(url, json=body, stream=True): #-H 'accept: application/json' -H 'Content-Type: application/json'
262
- if buffer is None: buffer=""
263
- buffer=str("".join(buffer))
264
- # print("*** Raw String: "+str(text)+"\n***\n")
265
- text=text.decode('utf-8')
266
- if((text.startswith(": ping -")==False) & (len(text.strip("\n\r"))>0)): buffer=buffer+str(text)
267
- # print("\n*** Buffer: "+str(buffer)+"\n***\n")
268
- buffer=buffer.split('"finish_reason": null}]}')
269
- if(len(buffer)==1):
270
- buffer="".join(buffer)
271
- pass
272
- if(len(buffer)==2):
273
- part=buffer[0]+'"finish_reason": null}]}'
274
- if(part.lstrip('\n\r').startswith("data: ")): part=part.lstrip('\n\r').replace("data: ", "")
275
- try:
276
- part = str(json.loads(part)["choices"][0]["text"])
277
  #print(part, end="", flush=True)
278
- response=response+part
279
- buffer="" # reset buffer
280
- except Exception as e:
281
- print("Exception:"+str(e))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  pass
283
- if removeHTML==True: response = re.sub("<(.*?)>","\n", response) # remove HTML-components in general (may cause bugs with markdown-rendering)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  yield response
285
- if(True): #len(history)==0):
286
- response=response+"\n\n<br><details open><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
287
- yield response
288
- #history.append((message, response)) # add current dialog to history
289
 
290
  gr.ChatInterface(response, chatbot=gr.Chatbot(value=[[None,"Herzlich willkommen! Ich bin ein KI-basiertes Assistenzsystem, das für jede Anfrage die am besten geeigneten KI-Tools empfiehlt.<br>Aktuell bin ich wenig mehr als eine Tech-Demo und kenne nur 7 KI-Modelle - also sei bitte nicht zu streng mit mir.<br>Was ist dein Anliegen?"]],render_markdown=True),title="German AI-RAG-Interface to the Hugging Face Hub").queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
291
  print("Interface up and running!")
 
215
  removeHTML=True # remove HTML-components from History (to prevent bugs with Markdown)
216
  )
217
 
218
+ ## Request response from model
219
+ #------------------------------
220
 
221
+ print("AI running on prem!" if(onPrem) else "AI running HFHub!")
222
+ print(prompt)
223
+ if(onPrem==False):
224
+ temperature=float(0.9)
225
+ max_new_tokens=1000
226
+ top_p=0.95
227
+ repetition_penalty=1.0
228
+ if temperature < 1e-2: temperature = 1e-2
229
+ top_p = float(top_p)
230
+ generate_kwargs = dict(
231
+ temperature=temperature,
232
+ max_new_tokens=max_new_tokens,
233
+ top_p=top_p,
234
+ repetition_penalty=repetition_penalty,
235
+ do_sample=True,
236
+ seed=42,
237
+ )
238
+ stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
239
+ response = ""
240
+ #print("User: "+message+"\nAI: ")
241
+ for text in stream:
242
+ part=text.token.text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  #print(part, end="", flush=True)
244
+ response += part
245
+ if removeHTML==True: response = re.sub("<(.*?)>","\n", response) # remove HTML-components in general (may cause bugs with markdown-rendering)
246
+ yield response
247
+ if(True): #len(history)==0):
248
+ response=response+"\n\n<br><details open><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
249
+ yield response
250
+
251
+ if(onPrem==True):
252
+ # url="https://afischer1985-wizardlm-13b-v1-2-q4-0-gguf.hf.space/v1/completions"
253
+ url="http://0.0.0.0:2600/v1/completions"
254
+ body={"prompt":prompt,"max_tokens":None, "echo":"False","stream":"True"} # e.g. Mixtral-Instruct
255
+ if("Discolm_german_7b" in modelPath): body.update({"stop": ["<|im_end|>"]}) # fix stop-token of DiscoLM
256
+ if("Gemma-" in modelPath): body.update({"stop": ["<|im_end|>","</end_of_turn>"]}) # fix stop-token of Gemma
257
+ response="" #+"("+myType+")\n"
258
+ buffer=""
259
+ #print("URL: "+url)
260
+ #print("User: "+message+"\nAI: ")
261
+ for text in requests.post(url, json=body, stream=True): #-H 'accept: application/json' -H 'Content-Type: application/json'
262
+ if buffer is None: buffer=""
263
+ buffer=str("".join(buffer))
264
+ # print("*** Raw String: "+str(text)+"\n***\n")
265
+ text=text.decode('utf-8')
266
+ if((text.startswith(": ping -")==False) & (len(text.strip("\n\r"))>0)): buffer=buffer+str(text)
267
+ # print("\n*** Buffer: "+str(buffer)+"\n***\n")
268
+ buffer=buffer.split('"finish_reason": null}]}')
269
+ if(len(buffer)==1):
270
+ buffer="".join(buffer)
271
  pass
272
+ if(len(buffer)==2):
273
+ part=buffer[0]+'"finish_reason": null}]}'
274
+ if(part.lstrip('\n\r').startswith("data: ")): part=part.lstrip('\n\r').replace("data: ", "")
275
+ try:
276
+ part = str(json.loads(part)["choices"][0]["text"])
277
+ #print(part, end="", flush=True)
278
+ response=response+part
279
+ buffer="" # reset buffer
280
+ except Exception as e:
281
+ print("Exception:"+str(e))
282
+ pass
283
+ if removeHTML==True: response = re.sub("<(.*?)>","\n", response) # remove HTML-components in general (may cause bugs with markdown-rendering)
284
+ yield response
285
+ if(True): #len(history)==0):
286
+ response=response+"\n\n<br><details open><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
287
  yield response
288
+ #history.append((message, response)) # add current dialog to history
 
 
 
289
 
290
  gr.ChatInterface(response, chatbot=gr.Chatbot(value=[[None,"Herzlich willkommen! Ich bin ein KI-basiertes Assistenzsystem, das für jede Anfrage die am besten geeigneten KI-Tools empfiehlt.<br>Aktuell bin ich wenig mehr als eine Tech-Demo und kenne nur 7 KI-Modelle - also sei bitte nicht zu streng mit mir.<br>Was ist dein Anliegen?"]],render_markdown=True),title="German AI-RAG-Interface to the Hugging Face Hub").queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
291
  print("Interface up and running!")