Spaces:
Runtime error
Runtime error
Sean-Case
commited on
Commit
•
d213c15
1
Parent(s):
d5a8385
Improved advanced model prompt, added stop generation button. context prompt improvements
Browse files- app.py +7 -5
- chatfuncs/chatfuncs.py +93 -42
- chatfuncs/ingest.py +1 -1
app.py
CHANGED
@@ -90,12 +90,14 @@ def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_d
|
|
90 |
print(vars(cpu_config))
|
91 |
|
92 |
try:
|
93 |
-
model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
|
94 |
-
|
|
|
95 |
#model = AutoModelForCausalLM.from_pretrained('TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF', model_type='llama', model_file='tinyllama-1.1b-1t-openorca.Q8_0.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
|
96 |
except:
|
97 |
-
model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu())
|
98 |
-
|
|
|
99 |
#model = AutoModelForCausalLM.from_pretrained('TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF', model_type='llama', model_file='tinyllama-1.1b-1t-openorca.Q8_0.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
|
100 |
|
101 |
|
@@ -228,7 +230,7 @@ with block:
|
|
228 |
with gr.Tab("Advanced features"):
|
229 |
model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca", choices = ["Flan Alpaca", "Orca Mini"])
|
230 |
with gr.Row():
|
231 |
-
gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU (WARNING: please don't modify unless you have a GPU).", value=0, minimum=0, maximum=6, step = 1, visible=
|
232 |
change_model_button = gr.Button(value="Load model", scale=0)
|
233 |
load_text = gr.Text(label="Load status")
|
234 |
|
|
|
90 |
print(vars(cpu_config))
|
91 |
|
92 |
try:
|
93 |
+
#model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
|
94 |
+
model = AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
|
95 |
+
#model = AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
|
96 |
#model = AutoModelForCausalLM.from_pretrained('TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF', model_type='llama', model_file='tinyllama-1.1b-1t-openorca.Q8_0.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
|
97 |
except:
|
98 |
+
#model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu())
|
99 |
+
model = AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu())
|
100 |
+
#model = AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
|
101 |
#model = AutoModelForCausalLM.from_pretrained('TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF', model_type='llama', model_file='tinyllama-1.1b-1t-openorca.Q8_0.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
|
102 |
|
103 |
|
|
|
230 |
with gr.Tab("Advanced features"):
|
231 |
model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca", choices = ["Flan Alpaca", "Orca Mini"])
|
232 |
with gr.Row():
|
233 |
+
gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU (WARNING: please don't modify unless you have a GPU).", value=0, minimum=0, maximum=6, step = 1, visible=False)
|
234 |
change_model_button = gr.Button(value="Load model", scale=0)
|
235 |
load_text = gr.Text(label="Load status")
|
236 |
|
chatfuncs/chatfuncs.py
CHANGED
@@ -12,7 +12,7 @@ from threading import Thread
|
|
12 |
from transformers import pipeline, TextIteratorStreamer
|
13 |
|
14 |
# Alternative model sources
|
15 |
-
from dataclasses import asdict, dataclass
|
16 |
|
17 |
# Langchain functions
|
18 |
from langchain.prompts import PromptTemplate
|
@@ -55,8 +55,8 @@ model = [] # Define empty list for model functions to run
|
|
55 |
tokenizer = [] # Define empty list for model functions to run
|
56 |
|
57 |
## Highlight text constants
|
58 |
-
hlt_chunk_size =
|
59 |
-
hlt_strat = [" ", ".", "!", "?", ":", "\n\n", "\n", ","]
|
60 |
hlt_overlap = 4
|
61 |
|
62 |
## Initialise NER model ##
|
@@ -217,58 +217,106 @@ def base_prompt_templates(model_type = "Flan Alpaca"):
|
|
217 |
# The main prompt:
|
218 |
|
219 |
instruction_prompt_template_alpaca_quote = """### Instruction:
|
220 |
-
|
221 |
|
222 |
-
|
223 |
-
|
224 |
-
QUESTION: {question}
|
225 |
|
226 |
-
|
227 |
|
228 |
instruction_prompt_template_alpaca = """### Instruction:
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
239 |
|
240 |
-
Answer:"""
|
241 |
|
242 |
instruction_prompt_template_orca = """
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
|
252 |
instruction_prompt_mistral_orca = """<|im_start|>system\n
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
|
260 |
instruction_prompt_tinyllama_orca = """<|im_start|>system\n
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
267 |
|
268 |
if model_type == "Flan Alpaca":
|
269 |
INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_alpaca, input_variables=['question', 'summaries'])
|
270 |
elif model_type == "Orca Mini":
|
271 |
-
INSTRUCTION_PROMPT=PromptTemplate(template=
|
272 |
|
273 |
return INSTRUCTION_PROMPT, CONTENT_PROMPT
|
274 |
|
@@ -281,7 +329,7 @@ def generate_expanded_prompt(inputs: Dict[str, str], instruction_prompt, content
|
|
281 |
new_question_kworded = adapt_q_from_chat_history(question, chat_history, extracted_memory) # new_question_keywords,
|
282 |
|
283 |
|
284 |
-
docs_keep_as_doc, doc_df, docs_keep_out = hybrid_retrieval(new_question_kworded, vectorstore, embeddings, k_val =
|
285 |
vec_score_cut_off = 1, vec_weight = 1, bm25_weight = 1, svm_weight = 1)#,
|
286 |
#vectorstore=globals()["vectorstore"], embeddings=globals()["embeddings"])
|
287 |
|
@@ -382,6 +430,8 @@ def produce_streaming_answer_chatbot(history, full_prompt, model_type):
|
|
382 |
|
383 |
gen_config = CtransGenGenerationConfig()
|
384 |
|
|
|
|
|
385 |
# Pull the generated text from the streamer, and update the model output.
|
386 |
start = time.time()
|
387 |
NUM_TOKENS=0
|
@@ -633,7 +683,8 @@ def get_expanded_passages(vectorstore, docs, width):
|
|
633 |
return ''.join(content), meta[0], meta[-1]
|
634 |
|
635 |
def get_parent_content_and_meta(vstore_docs, width, target):
|
636 |
-
target_range = range(max(0, target - width), min(len(vstore_docs), target + width + 1))
|
|
|
637 |
parent_vstore_out = [vstore_docs[i] for i in target_range]
|
638 |
|
639 |
content_str_out, meta_first_out, meta_last_out = [], [], []
|
|
|
12 |
from transformers import pipeline, TextIteratorStreamer
|
13 |
|
14 |
# Alternative model sources
|
15 |
+
#from dataclasses import asdict, dataclass
|
16 |
|
17 |
# Langchain functions
|
18 |
from langchain.prompts import PromptTemplate
|
|
|
55 |
tokenizer = [] # Define empty list for model functions to run
|
56 |
|
57 |
## Highlight text constants
|
58 |
+
hlt_chunk_size = 12
|
59 |
+
hlt_strat = [" ", ". ", "! ", "? ", ": ", "\n\n", "\n", ", "]
|
60 |
hlt_overlap = 4
|
61 |
|
62 |
## Initialise NER model ##
|
|
|
217 |
# The main prompt:
|
218 |
|
219 |
instruction_prompt_template_alpaca_quote = """### Instruction:
|
220 |
+
Quote directly from the SOURCE below that best answers the QUESTION. Only quote full sentences in the correct order. If you cannot find an answer, start your response with "My best guess is: ".
|
221 |
|
222 |
+
CONTENT: {summaries}
|
223 |
+
QUESTION: {question}
|
|
|
224 |
|
225 |
+
Response:"""
|
226 |
|
227 |
instruction_prompt_template_alpaca = """### Instruction:
|
228 |
+
### User:
|
229 |
+
Answer the QUESTION using information from the following CONTENT.
|
230 |
+
CONTENT: {summaries}
|
231 |
+
QUESTION: {question}
|
232 |
+
|
233 |
+
Response:"""
|
234 |
+
|
235 |
+
instruction_prompt_template_openllama = """Answer the QUESTION using information from the following CONTENT.
|
236 |
+
QUESTION - {question}
|
237 |
+
CONTENT - {summaries}
|
238 |
+
Answer:"""
|
239 |
+
|
240 |
+
instruction_prompt_template_platypus = """### Instruction:
|
241 |
+
Answer the QUESTION using information from the following CONTENT.
|
242 |
+
CONTENT: {summaries}
|
243 |
+
QUESTION: {question}
|
244 |
+
### Response:"""
|
245 |
+
|
246 |
+
instruction_prompt_template_wizard_orca_quote = """### HUMAN:
|
247 |
+
Quote text from the CONTENT to answer the QUESTION below.
|
248 |
+
CONTENT - {summaries}
|
249 |
+
QUESTION - {question}
|
250 |
+
### RESPONSE:
|
251 |
+
"""
|
252 |
+
|
253 |
+
instruction_prompt_template_wizard_orca = """### HUMAN:
|
254 |
+
Answer the QUESTION below based on the CONTENT. Only refer to CONTENT that directly answers the question.
|
255 |
+
CONTENT - {summaries}
|
256 |
+
QUESTION - {question}
|
257 |
+
### RESPONSE:
|
258 |
+
"""
|
259 |
|
|
|
260 |
|
261 |
instruction_prompt_template_orca = """
|
262 |
+
### System:
|
263 |
+
You are an AI assistant that follows instruction extremely well. Help as much as you can.
|
264 |
+
### User:
|
265 |
+
Answer the QUESTION with a short response using information from the following CONTENT.
|
266 |
+
QUESTION: {question}
|
267 |
+
CONTENT: {summaries}
|
268 |
+
|
269 |
+
### Response:"""
|
270 |
+
|
271 |
+
instruction_prompt_template_orca_quote = """
|
272 |
+
### System:
|
273 |
+
You are an AI assistant that follows instruction extremely well. Help as much as you can.
|
274 |
+
### User:
|
275 |
+
Quote text from the CONTENT to answer the QUESTION below.
|
276 |
+
QUESTION: {question}
|
277 |
+
CONTENT: {summaries}
|
278 |
+
### Response:
|
279 |
+
"""
|
280 |
+
|
281 |
+
instruction_prompt_template_orca_rev = """
|
282 |
+
### System:
|
283 |
+
You are an AI assistant that follows instruction extremely well. Help as much as you can.
|
284 |
+
### User:
|
285 |
+
Answer the QUESTION with a short response using information from the following CONTENT.
|
286 |
+
QUESTION: {question}
|
287 |
+
CONTENT: {summaries}
|
288 |
+
|
289 |
+
### Response:"""
|
290 |
|
291 |
instruction_prompt_mistral_orca = """<|im_start|>system\n
|
292 |
+
You are an AI assistant that follows instruction extremely well. Help as much as you can.
|
293 |
+
<|im_start|>user\n
|
294 |
+
Answer the QUESTION using information from the following CONTENT. Respond with short answers that directly answer the question.
|
295 |
+
CONTENT: {summaries}
|
296 |
+
QUESTION: {question}\n
|
297 |
+
Answer:<|im_end|>"""
|
298 |
|
299 |
instruction_prompt_tinyllama_orca = """<|im_start|>system\n
|
300 |
+
You are an AI assistant that follows instruction extremely well. Help as much as you can.
|
301 |
+
<|im_start|>user\n
|
302 |
+
Answer the QUESTION using information from the following CONTENT. Only quote text that directly answers the question and nothing more. If you can't find an answer to the question, respond with "Sorry, I can't find an answer to that question.".
|
303 |
+
CONTENT: {summaries}
|
304 |
+
QUESTION: {question}\n
|
305 |
+
Answer:<|im_end|>"""
|
306 |
+
|
307 |
+
instruction_prompt_marx = """
|
308 |
+
### HUMAN:
|
309 |
+
Answer the QUESTION using information from the following CONTENT.
|
310 |
+
CONTENT: {summaries}
|
311 |
+
QUESTION: {question}
|
312 |
+
|
313 |
+
### RESPONSE:
|
314 |
+
"""
|
315 |
|
316 |
if model_type == "Flan Alpaca":
|
317 |
INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_alpaca, input_variables=['question', 'summaries'])
|
318 |
elif model_type == "Orca Mini":
|
319 |
+
INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_wizard_orca, input_variables=['question', 'summaries'])
|
320 |
|
321 |
return INSTRUCTION_PROMPT, CONTENT_PROMPT
|
322 |
|
|
|
329 |
new_question_kworded = adapt_q_from_chat_history(question, chat_history, extracted_memory) # new_question_keywords,
|
330 |
|
331 |
|
332 |
+
docs_keep_as_doc, doc_df, docs_keep_out = hybrid_retrieval(new_question_kworded, vectorstore, embeddings, k_val = 10, out_passages = 2,
|
333 |
vec_score_cut_off = 1, vec_weight = 1, bm25_weight = 1, svm_weight = 1)#,
|
334 |
#vectorstore=globals()["vectorstore"], embeddings=globals()["embeddings"])
|
335 |
|
|
|
430 |
|
431 |
gen_config = CtransGenGenerationConfig()
|
432 |
|
433 |
+
print(vars(gen_config))
|
434 |
+
|
435 |
# Pull the generated text from the streamer, and update the model output.
|
436 |
start = time.time()
|
437 |
NUM_TOKENS=0
|
|
|
683 |
return ''.join(content), meta[0], meta[-1]
|
684 |
|
685 |
def get_parent_content_and_meta(vstore_docs, width, target):
|
686 |
+
#target_range = range(max(0, target - width), min(len(vstore_docs), target + width + 1))
|
687 |
+
target_range = range(max(0, target), min(len(vstore_docs), target + width + 1)) # Now only selects extra passages AFTER the found passage
|
688 |
parent_vstore_out = [vstore_docs[i] for i in target_range]
|
689 |
|
690 |
content_str_out, meta_first_out, meta_last_out = [], [], []
|
chatfuncs/ingest.py
CHANGED
@@ -38,7 +38,7 @@ from pypdf import PdfReader
|
|
38 |
PandasDataFrame = TypeVar('pd.core.frame.DataFrame')
|
39 |
# -
|
40 |
|
41 |
-
split_strat = ["\n\n", "\n", ".", "!", "?"
|
42 |
chunk_size = 500
|
43 |
chunk_overlap = 0
|
44 |
start_index = True
|
|
|
38 |
PandasDataFrame = TypeVar('pd.core.frame.DataFrame')
|
39 |
# -
|
40 |
|
41 |
+
split_strat = ["\n\n", "\n", ". ", "! ", "? "]
|
42 |
chunk_size = 500
|
43 |
chunk_overlap = 0
|
44 |
start_index = True
|