fblgit commited on
Commit
7678793
1 Parent(s): 78b965d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -1
app.py CHANGED
@@ -1,3 +1,71 @@
 
1
  import gradio as gr
 
 
 
 
 
2
 
3
- gr.load("models/fblgit/una-cybertron-7b-v2-bf16").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
  import gradio as gr
3
+ import copy
4
+ import time
5
+ import llama_cpp
6
+ from llama_cpp import Llama
7
+ from huggingface_hub import hf_hub_download
8
 
9
+
10
+ llm = Llama(
11
+ model_path=hf_hub_download(
12
+ repo_id=os.environ.get("REPO_ID", "TheBloke/una-cybertron-7B-v2-GGUF"),
13
+ filename=os.environ.get("MODEL_FILE", "una-cybertron-7b-v2-bf16.Q5_K_S.gguf"),
14
+ ),
15
+ n_ctx=2048,
16
+ n_gpu_layers=0,
17
+ )
18
+ system_prompt = 'You are a helpful assistant.'
19
+ def make_prompt(history, msg):
20
+ prompt = f"""<|im_start|>system
21
+ {system_prompt}</s>"""
22
+ for m in history:
23
+ prompt += f"\n<|user|>\n{m[0]}</s>"
24
+ if m[1]:
25
+ prompt += f"\n<|assistant|>\n{m[1]}"
26
+ prompt += f"\n<|user|>\n{msg}</s>"
27
+ prompt += "\n<|assistant|>\n"
28
+ return prompt
29
+
30
+
31
+
32
+ def generate_text(message, history):
33
+
34
+ temp = ""
35
+ input_prompt = make_prompt(history, message)
36
+ print(input_prompt)
37
+ output = llm(
38
+ input_prompt,
39
+ temperature=0.8,
40
+ top_p=0.95,
41
+ top_k=40,
42
+ repeat_penalty=1.1,
43
+ max_tokens=2048,
44
+ stop=[
45
+ "<|prompter|>",
46
+ "<|endoftext|>",
47
+ "<|endoftext|> \n",
48
+ "ASSISTANT:",
49
+ "USER:",
50
+ "SYSTEM:",
51
+ ],
52
+ stream=True,
53
+ )
54
+ for out in output:
55
+ stream = copy.deepcopy(out)
56
+ temp += stream["choices"][0]["text"]
57
+ yield temp
58
+
59
+ history.append([message, temp])
60
+
61
+
62
+ demo = gr.ChatInterface(
63
+ generate_text,
64
+ concurrency_limit=5,
65
+ title="Cybertron 7B v2 CPU",
66
+ retry_btn=None,
67
+ undo_btn="Delete Previous",
68
+ clear_btn="Clear"
69
+ )
70
+ demo.queue(max_size=10)
71
+ demo.launch()