seanpedrickcase commited on
Commit
9ad3bc3
β€’
1 Parent(s): 1f9788f

Upgraded Gradio version. Changed large model to Phi 3 128k. Now uses Llama cpp Python

Browse files
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: πŸš€
4
  colorFrom: green
5
  colorTo: gray
6
  sdk: gradio
7
- sdk_version: 3.50.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
4
  colorFrom: green
5
  colorTo: gray
6
  sdk: gradio
7
+ sdk_version: 4.36.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
app.py CHANGED
@@ -5,15 +5,17 @@ from transformers import pipeline, AutoTokenizer
5
  import os
6
  from typing import Type
7
  import gradio as gr
8
- import ctransformers
9
- # Concurrent futures is used to cancel processes that are taking too long
10
- import concurrent.futures
11
 
12
  PandasDataFrame = Type[pd.DataFrame]
13
 
14
  import chatfuncs.chatfuncs as chatf
 
15
 
16
- from chatfuncs.helper_functions import dummy_function, display_info, put_columns_in_df, put_columns_in_join_df, get_temp_folder_path, empty_folder
 
17
 
18
  # Disable cuda devices if necessary
19
  #os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
@@ -36,22 +38,7 @@ def create_hf_model(model_name):
36
 
37
  tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = chatf.context_length)
38
 
39
- summariser = pipeline("summarization", model=model_name, tokenizer=tokenizer) # philschmid/bart-large-cnn-samsum
40
-
41
- #from transformers import AutoModelForSeq2SeqLM, AutoModelForCausalLM
42
-
43
- # if torch_device == "cuda":
44
- # if "flan" in model_name:
45
- # model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto")
46
- # else:
47
- # model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
48
- # else:
49
- # if "flan" in model_name:
50
- # model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
51
- # else:
52
- # model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
53
-
54
-
55
 
56
  return summariser, tokenizer, model_name
57
 
@@ -66,46 +53,40 @@ def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_d
66
  if torch_device is None:
67
  torch_device = chatf.torch_device
68
 
69
- if model_type == "Mistral Nous Capybara 4k (larger, slow)":
70
- hf_checkpoint = 'NousResearch/Nous-Capybara-7B-V1.9-GGUF'
71
-
72
  if torch_device == "cuda":
73
  gpu_config.update_gpu(gpu_layers)
 
74
  else:
75
  gpu_config.update_gpu(gpu_layers)
76
  cpu_config.update_gpu(gpu_layers)
77
 
78
- print("Loading with", cpu_config.gpu_layers, "model layers sent to GPU.")
79
 
80
  print(vars(gpu_config))
81
  print(vars(cpu_config))
82
 
83
  try:
84
- #model = ctransformers.AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
85
- #model = ctransformers.AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
86
- #model = ctransformers.AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **vars(gpu_config), hf=True) # **asdict(CtransRunConfig_cpu())
87
- #model = ctransformers.AutoModelForCausalLM.from_pretrained('TheBloke/OpenHermes-2.5-Mistral-7B-16k-GGUF', model_type='mistral', model_file='openhermes-2.5-mistral-7b-16k.Q4_K_M.gguf', **vars(gpu_config), hf=True) # **asdict(CtransRunConfig_cpu())
88
- model = ctransformers.AutoModelForCausalLM.from_pretrained('NousResearch/Nous-Capybara-7B-V1.9-GGUF', model_type='mistral', model_file='Capybara-7B-V1.9-Q5_K_M.gguf', **vars(gpu_config), hf=True) # **asdict(CtransRunConfig_cpu())
89
-
90
-
91
- tokenizer = AutoTokenizer.from_pretrained("NousResearch/Nous-Capybara-7B-V1.9")
92
- summariser = pipeline("text-generation", model=model, tokenizer=tokenizer)
93
-
94
- except:
95
- #model = ctransformers.AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu())
96
- #model = ctransformers.AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
97
- #model = ctransformers.AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **vars(cpu_config), hf=True) # **asdict(CtransRunConfig_cpu())
98
- #model = ctransformers.AutoModelForCausalLM.from_pretrained('TheBloke/OpenHermes-2.5-Mistral-7B-16k-GGUF', model_type='mistral', model_file='openhermes-2.5-mistral-7b-16k.Q4_K_M.gguf', **vars(gpu_config), hf=True) # **asdict(CtransRunConfig_cpu())
99
- model = ctransformers.AutoModelForCausalLM.from_pretrained('NousResearch/Nous-Capybara-7B-V1.9-GGUF', model_type='mistral', model_file='Capybara-7B-V1.9-Q5_K_M.gguf', **vars(gpu_config), hf=True) # **asdict(CtransRunConfig_cpu())
100
-
101
- #tokenizer = ctransformers.AutoTokenizer.from_pretrained(model)
102
-
103
- tokenizer = AutoTokenizer.from_pretrained("NousResearch/Nous-Capybara-7B-V1.9")
104
- summariser = pipeline("text-generation", model=model, tokenizer=tokenizer) # model
105
-
106
- #model = []
107
- #tokenizer = []
108
- #summariser = []
109
 
110
  if model_type == "Flan T5 Large Stacked Samsum 1k":
111
  # Huggingface chat model
@@ -118,9 +99,9 @@ def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_d
118
  hf_checkpoint = 'pszemraj/long-t5-tglobal-base-16384-book-summary' #'philschmid/flan-t5-small-stacked-samsum'#'declare-lab/flan-alpaca-base' # # #
119
  summariser, tokenizer, model_type = create_hf_model(model_name = hf_checkpoint)
120
 
121
- chatf.model = summariser
122
- chatf.tokenizer = tokenizer
123
- chatf.model_type = model_type
124
 
125
  load_confirmation = "Finished loading model: " + model_type
126
 
@@ -128,7 +109,7 @@ def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_d
128
  return model_type, load_confirmation, model_type
129
 
130
  # Both models are loaded on app initialisation so that users don't have to wait for the models to be downloaded
131
- model_type = "Mistral Nous Capybara 4k (larger, slow)"
132
  load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
133
 
134
  model_type = "Flan T5 Large Stacked Samsum 1k"
@@ -140,115 +121,6 @@ load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, cha
140
  today = datetime.now().strftime("%d%m%Y")
141
  today_rev = datetime.now().strftime("%Y%m%d")
142
 
143
- def summarise_text(text, text_df, length_slider, in_colname, model_type, progress=gr.Progress()):
144
-
145
- if text_df.empty:
146
- in_colname="text"
147
- in_colname_list_first = in_colname
148
-
149
- in_text_df = pd.DataFrame({in_colname_list_first:[text]})
150
-
151
- else:
152
- in_text_df = text_df
153
- in_colname_list_first = in_colname
154
-
155
- print(model_type)
156
-
157
- texts_list = list(in_text_df[in_colname_list_first])
158
-
159
- if model_type != "Mistral Nous Capybara 4k (larger, slow)":
160
- summarised_texts = []
161
-
162
- for single_text in progress.tqdm(texts_list, desc = "Summarising texts", unit = "texts"):
163
- summarised_text = chatf.model(single_text, max_length=length_slider)
164
-
165
- #print(summarised_text)
166
-
167
- summarised_text_str = summarised_text[0]['summary_text']
168
-
169
- summarised_texts.append(summarised_text_str)
170
-
171
- print(summarised_text_str)
172
-
173
- #pd.Series(summarised_texts).to_csv("summarised_texts_out.csv")
174
-
175
- #print(summarised_texts)
176
-
177
- if model_type == "Mistral Nous Capybara 4k (larger, slow)":
178
-
179
-
180
- # Define a function that calls your model
181
- def call_model(formatted_string, max_length=10000):
182
- return chatf.model(formatted_string, max_length=max_length)
183
-
184
- # Set your timeout duration (in seconds)
185
- timeout_duration = 300 # Adjust this value as needed
186
-
187
- length = str(length_slider)
188
-
189
- from chatfuncs.prompts import nous_capybara_prompt
190
-
191
- summarised_texts = []
192
-
193
- for single_text in progress.tqdm(texts_list, desc = "Summarising texts", unit = "texts"):
194
-
195
- formatted_string = nous_capybara_prompt.format(length=length, text=single_text)
196
-
197
- # Use ThreadPoolExecutor to enforce a timeout
198
- with concurrent.futures.ThreadPoolExecutor() as executor:
199
- future = executor.submit(call_model, formatted_string, 10000)
200
- try:
201
- output = future.result(timeout=timeout_duration)
202
- # Process the output here
203
- except concurrent.futures.TimeoutError:
204
- error_text = f"Timeout (five minutes) occurred for text: {single_text}. Consider using a smaller model."
205
- print(error_text)
206
- return error_text, None
207
-
208
- print(output)
209
-
210
- output_str = output[0]['generated_text']
211
-
212
- # Find the index of 'ASSISTANT: ' to select only text after this location
213
- index = output_str.find('ASSISTANT: ')
214
-
215
- # Check if 'ASSISTANT: ' is found in the string
216
- if index != -1:
217
- # Add the length of 'ASSISTANT: ' to the index to start from the end of this substring
218
- start_index = index + len('ASSISTANT: ')
219
-
220
- # Slice the string from this point to the end
221
- assistant_text = output_str[start_index:]
222
- else:
223
- assistant_text = "ASSISTANT: not found in text"
224
-
225
- print(assistant_text)
226
-
227
- summarised_texts.append(assistant_text)
228
-
229
- #print(summarised_text)
230
-
231
- #pd.Series(summarised_texts).to_csv("summarised_texts_out.csv")
232
-
233
- if text_df.empty:
234
- #if model_type != "Mistral Nous Capybara 4k (larger, slow)":
235
- summarised_text_out = summarised_texts[0]#.values()
236
-
237
- #if model_type == "Mistral Nous Capybara 4k (larger, slow)":
238
- # summarised_text_out = summarised_texts[0]
239
-
240
- else:
241
- summarised_text_out = summarised_texts #[d['summary_text'] for d in summarised_texts] #summarised_text[0].values()
242
-
243
- output_name = "summarise_output_" + today_rev + ".csv"
244
- output_df = pd.DataFrame({"Original text":in_text_df[in_colname_list_first],
245
- "Summarised text":summarised_text_out})
246
-
247
- summarised_text_out_str = str(output_df["Summarised text"][0])#.str.replace("dict_values([","").str.replace("])",""))
248
-
249
- output_df.to_csv(output_name, index = None)
250
-
251
- return summarised_text_out_str, output_name
252
 
253
  # ## Gradio app - summarise
254
  block = gr.Blocks(theme = gr.themes.Base())
@@ -261,21 +133,21 @@ with block:
261
  gr.Markdown(
262
  """
263
  # Text summariser
264
- Enter open text below to get a summary. You can copy and paste text directly, or upload a file and specify the column that you want to summarise. The default small model will be able to summarise up to about 16,000 words, but the quality may not be great. The larger model around 900 words of better quality. Summarisation with Mistral Nous Capybara 4k works on up to around 4,000 words, and may give a higher quality summary, but will be slow, and it may not respect your desired maximum word count.
265
  """)
266
 
267
  with gr.Tab("Summariser"):
268
  current_model = gr.Textbox(label="Current model", value=model_type, scale = 3)
269
 
270
- with gr.Accordion("Paste open text", open = False):
271
- in_text = gr.Textbox(label="Copy and paste your open text here", lines = 5)
272
-
273
- with gr.Accordion("Summarise open text from a file", open = False):
274
  in_text_df = gr.File(label="Input text from file", file_count='multiple')
275
  in_colname = gr.Dropdown(label="Write the column name for the open text to summarise")
 
 
 
276
 
277
  with gr.Row():
278
- summarise_btn = gr.Button("Summarise")
279
  stop = gr.Button(value="Interrupt processing", variant="secondary", scale=0)
280
  length_slider = gr.Slider(minimum = 30, maximum = 500, value = 100, step = 10, label = "Maximum length of summary")
281
 
@@ -284,35 +156,104 @@ with block:
284
  output_file = gr.File(label="Output file")
285
 
286
  with gr.Tab("Advanced features"):
287
- #out_passages = gr.Slider(minimum=1, value = 2, maximum=10, step=1, label="Choose number of passages to retrieve from the document. Numbers greater than 2 may lead to increased hallucinations or input text being truncated.")
288
- #temp_slide = gr.Slider(minimum=0.1, value = 0.1, maximum=1, step=0.1, label="Choose temperature setting for response generation.")
289
  with gr.Row():
290
- model_choice = gr.Radio(label="Choose a summariser model", value="Long T5 Global Base 16k Book Summary", choices = ["Long T5 Global Base 16k Book Summary", "Flan T5 Large Stacked Samsum 1k", "Mistral Nous Capybara 4k (larger, slow)"])
291
  change_model_button = gr.Button(value="Load model", scale=0)
292
  with gr.Accordion("Choose number of model layers to send to GPU (WARNING: please don't modify unless you are sure you have a GPU).", open = False):
293
  gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU.", value=0, minimum=0, maximum=100, step = 1, visible=True)
 
 
294
 
295
  load_text = gr.Text(label="Load status")
296
 
297
-
298
  # Update dropdowns upon initial file load
299
  in_text_df.upload(put_columns_in_df, inputs=[in_text_df, in_colname], outputs=[in_colname, data_state])
300
 
301
  change_model_button.click(fn=load_model, inputs=[model_choice, gpu_layer_choice], outputs = [model_type_state, load_text, current_model])
302
 
303
  summarise_click = summarise_btn.click(fn=summarise_text, inputs=[in_text, data_state, length_slider, in_colname, model_type_state],
304
- outputs=[output_single_text, output_file], api_name="summarise_single_text")
305
- summarise_enter = summarise_btn.click(fn=summarise_text, inputs=[in_text, data_state, length_slider, in_colname, model_type_state],
306
- outputs=[output_single_text, output_file])
 
 
307
 
308
  # Stop processing if it's taking too long
309
- stop.click(fn=None, inputs=None, outputs=None, cancels=[summarise_click, summarise_enter])
310
 
311
  # Dummy function to allow dropdown modification to work correctly (strange thing needed for Gradio 3.50, will be deprecated upon upgrading Gradio version)
312
  in_colname.change(dummy_function, in_colname, None)
313
 
314
- block.queue(concurrency_count=1).launch()
315
- # -
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
 
 
 
 
317
 
 
318
 
 
 
 
5
  import os
6
  from typing import Type
7
  import gradio as gr
8
+
9
+ from llama_cpp import Llama
10
+ from huggingface_hub import hf_hub_download
11
 
12
  PandasDataFrame = Type[pd.DataFrame]
13
 
14
  import chatfuncs.chatfuncs as chatf
15
+ import chatfuncs.summarise_funcs as sumf
16
 
17
+ from chatfuncs.helper_functions import dummy_function, put_columns_in_df
18
+ from chatfuncs.summarise_funcs import summarise_text
19
 
20
  # Disable cuda devices if necessary
21
  #os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
 
38
 
39
  tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = chatf.context_length)
40
 
41
+ summariser = pipeline("summarization", model=model_name, tokenizer=tokenizer) # philschmid/bart-large-cnn-samsum
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  return summariser, tokenizer, model_name
44
 
 
53
  if torch_device is None:
54
  torch_device = chatf.torch_device
55
 
56
+ if model_type == "Phi 3 128k (larger, slow)":
 
 
57
  if torch_device == "cuda":
58
  gpu_config.update_gpu(gpu_layers)
59
+ print("Loading with", gpu_config.n_gpu_layers, "model layers sent to GPU.")
60
  else:
61
  gpu_config.update_gpu(gpu_layers)
62
  cpu_config.update_gpu(gpu_layers)
63
 
64
+ print("Loading with", cpu_config.n_gpu_layers, "model layers sent to GPU.")
65
 
66
  print(vars(gpu_config))
67
  print(vars(cpu_config))
68
 
69
  try:
70
+ summariser = Llama(
71
+ model_path=hf_hub_download(
72
+ repo_id=os.environ.get("REPO_ID", "QuantFactory/Phi-3-mini-128k-instruct-GGUF"),# "QuantFactory/Phi-3-mini-128k-instruct-GGUF"), # "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2"), #"microsoft/Phi-3-mini-4k-instruct-gguf"),#"TheBloke/Mistral-7B-OpenOrca-GGUF"),
73
+ filename=os.environ.get("MODEL_FILE", "Phi-3-mini-128k-instruct.Q4_K_M.gguf") #"Phi-3-mini-128k-instruct.Q4_K_M.gguf") #"Meta-Llama-3-8B-Instruct-v2.Q6_K.gguf") #"Phi-3-mini-4k-instruct-q4.gguf")#"mistral-7b-openorca.Q4_K_M.gguf"),
74
+ ),
75
+ **vars(gpu_config) # change n_gpu_layers if you have more or less VRAM
76
+ )
77
+
78
+ except Exception as e:
79
+ print("GPU load failed")
80
+ print(e)
81
+ summariser = Llama(
82
+ model_path=hf_hub_download(
83
+ repo_id=os.environ.get("REPO_ID", "QuantFactory/Phi-3-mini-128k-instruct-GGUF"), #"QuantFactory/Phi-3-mini-128k-instruct-GGUF"), #, "microsoft/Phi-3-mini-4k-instruct-gguf"),#"QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2"), #"microsoft/Phi-3-mini-4k-instruct-gguf"),#"TheBloke/Mistral-7B-OpenOrca-GGUF"),
84
+ filename=os.environ.get("MODEL_FILE", "Phi-3-mini-128k-instruct.Q4_K_M.gguf"), # "Phi-3-mini-128k-instruct.Q4_K_M.gguf") # , #"Meta-Llama-3-8B-Instruct-v2.Q6_K.gguf") #"Phi-3-mini-4k-instruct-q4.gguf"),#"mistral-7b-openorca.Q4_K_M.gguf"),
85
+ ),
86
+ **vars(cpu_config)
87
+ )
88
+
89
+ tokenizer = []
 
 
 
 
 
90
 
91
  if model_type == "Flan T5 Large Stacked Samsum 1k":
92
  # Huggingface chat model
 
99
  hf_checkpoint = 'pszemraj/long-t5-tglobal-base-16384-book-summary' #'philschmid/flan-t5-small-stacked-samsum'#'declare-lab/flan-alpaca-base' # # #
100
  summariser, tokenizer, model_type = create_hf_model(model_name = hf_checkpoint)
101
 
102
+ sumf.model = summariser
103
+ sumf.tokenizer = tokenizer
104
+ sumf.model_type = model_type
105
 
106
  load_confirmation = "Finished loading model: " + model_type
107
 
 
109
  return model_type, load_confirmation, model_type
110
 
111
  # Both models are loaded on app initialisation so that users don't have to wait for the models to be downloaded
112
+ model_type = "Phi 3 128k (larger, slow)"
113
  load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
114
 
115
  model_type = "Flan T5 Large Stacked Samsum 1k"
 
121
  today = datetime.now().strftime("%d%m%Y")
122
  today_rev = datetime.now().strftime("%Y%m%d")
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
  # ## Gradio app - summarise
126
  block = gr.Blocks(theme = gr.themes.Base())
 
133
  gr.Markdown(
134
  """
135
  # Text summariser
136
+ Enter open text below to get a summary. You can copy and paste text directly, or upload a file and specify the column that you want to summarise. The default small model will be able to summarise up to about 16,000 words, but the quality may not be great. The larger model around 900 words of better quality. Summarisation with Phi 3 128k works on up to around 4,000 words, and may give a higher quality summary, but will be slow, and it may not respect your desired maximum word count.
137
  """)
138
 
139
  with gr.Tab("Summariser"):
140
  current_model = gr.Textbox(label="Current model", value=model_type, scale = 3)
141
 
142
+ with gr.Accordion("Summarise open text from a file", open = True):
 
 
 
143
  in_text_df = gr.File(label="Input text from file", file_count='multiple')
144
  in_colname = gr.Dropdown(label="Write the column name for the open text to summarise")
145
+
146
+ with gr.Accordion("Paste open text", open = False):
147
+ in_text = gr.Textbox(label="Copy and paste your open text here", lines = 5)
148
 
149
  with gr.Row():
150
+ summarise_btn = gr.Button("Summarise", variant="primary")
151
  stop = gr.Button(value="Interrupt processing", variant="secondary", scale=0)
152
  length_slider = gr.Slider(minimum = 30, maximum = 500, value = 100, step = 10, label = "Maximum length of summary")
153
 
 
156
  output_file = gr.File(label="Output file")
157
 
158
  with gr.Tab("Advanced features"):
 
 
159
  with gr.Row():
160
+ model_choice = gr.Radio(label="Choose a summariser model", value="Long T5 Global Base 16k Book Summary", choices = ["Long T5 Global Base 16k Book Summary", "Flan T5 Large Stacked Samsum 1k", "Phi 3 128k (larger, slow)"])
161
  change_model_button = gr.Button(value="Load model", scale=0)
162
  with gr.Accordion("Choose number of model layers to send to GPU (WARNING: please don't modify unless you are sure you have a GPU).", open = False):
163
  gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU.", value=0, minimum=0, maximum=100, step = 1, visible=True)
164
+ with gr.Accordion("LLM parameters"):
165
+ temp_slide = gr.Slider(minimum=0.1, value = 0.5, maximum=1, step=0.1, label="Choose temperature setting for response generation.")
166
 
167
  load_text = gr.Text(label="Load status")
168
 
 
169
  # Update dropdowns upon initial file load
170
  in_text_df.upload(put_columns_in_df, inputs=[in_text_df, in_colname], outputs=[in_colname, data_state])
171
 
172
  change_model_button.click(fn=load_model, inputs=[model_choice, gpu_layer_choice], outputs = [model_type_state, load_text, current_model])
173
 
174
  summarise_click = summarise_btn.click(fn=summarise_text, inputs=[in_text, data_state, length_slider, in_colname, model_type_state],
175
+ outputs=[output_single_text, output_file], api_name="summarise_single_text")
176
+ # summarise_enter = summarise_btn.submit(fn=summarise_text, inputs=[in_text, data_state, length_slider, in_colname, model_type_state],
177
+ # outputs=[output_single_text, output_file])
178
+
179
+ #summarise_click = summarise_btn.click(chatf.llama_cpp_streaming, [chatbot, instruction_prompt_out, model_type_state, temp_slide], chatbot)
180
 
181
  # Stop processing if it's taking too long
182
+ stop.click(fn=None, inputs=None, outputs=None, cancels=[summarise_click])
183
 
184
  # Dummy function to allow dropdown modification to work correctly (strange thing needed for Gradio 3.50, will be deprecated upon upgrading Gradio version)
185
  in_colname.change(dummy_function, in_colname, None)
186
 
187
+ block.queue().launch()
188
+
189
+ # def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_device=None):
190
+ # print("Loading model ", model_type)
191
+
192
+ # # Default values inside the function
193
+ # if gpu_config is None:
194
+ # gpu_config = chatf.gpu_config
195
+ # if cpu_config is None:
196
+ # cpu_config = chatf.cpu_config
197
+ # if torch_device is None:
198
+ # torch_device = chatf.torch_device
199
+
200
+ # if model_type == "Phi 3 128k (larger, slow)":
201
+ # hf_checkpoint = 'NousResearch/Nous-Capybara-7B-V1.9-GGUF'
202
+
203
+ # if torch_device == "cuda":
204
+ # gpu_config.update_gpu(gpu_layers)
205
+ # else:
206
+ # gpu_config.update_gpu(gpu_layers)
207
+ # cpu_config.update_gpu(gpu_layers)
208
+
209
+ # print("Loading with", cpu_config.gpu_layers, "model layers sent to GPU.")
210
+
211
+ # print(vars(gpu_config))
212
+ # print(vars(cpu_config))
213
+
214
+ # try:
215
+ # #model = ctransformers.AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
216
+ # #model = ctransformers.AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
217
+ # #model = ctransformers.AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **vars(gpu_config), hf=True) # **asdict(CtransRunConfig_cpu())
218
+ # #model = ctransformers.AutoModelForCausalLM.from_pretrained('TheBloke/OpenHermes-2.5-Mistral-7B-16k-GGUF', model_type='mistral', model_file='openhermes-2.5-mistral-7b-16k.Q4_K_M.gguf', **vars(gpu_config), hf=True) # **asdict(CtransRunConfig_cpu())
219
+ # model = ctransformers.AutoModelForCausalLM.from_pretrained('NousResearch/Nous-Capybara-7B-V1.9-GGUF', model_type='mistral', model_file='Capybara-7B-V1.9-Q5_K_M.gguf', **vars(gpu_config), hf=True) # **asdict(CtransRunConfig_cpu())
220
+
221
+
222
+ # tokenizer = AutoTokenizer.from_pretrained("NousResearch/Nous-Capybara-7B-V1.9")
223
+ # summariser = pipeline("text-generation", model=model, tokenizer=tokenizer)
224
+
225
+ # except:
226
+ # #model = ctransformers.AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu())
227
+ # #model = ctransformers.AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
228
+ # #model = ctransformers.AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **vars(cpu_config), hf=True) # **asdict(CtransRunConfig_cpu())
229
+ # #model = ctransformers.AutoModelForCausalLM.from_pretrained('TheBloke/OpenHermes-2.5-Mistral-7B-16k-GGUF', model_type='mistral', model_file='openhermes-2.5-mistral-7b-16k.Q4_K_M.gguf', **vars(gpu_config), hf=True) # **asdict(CtransRunConfig_cpu())
230
+ # model = ctransformers.AutoModelForCausalLM.from_pretrained('NousResearch/Nous-Capybara-7B-V1.9-GGUF', model_type='mistral', model_file='Capybara-7B-V1.9-Q5_K_M.gguf', **vars(gpu_config), hf=True) # **asdict(CtransRunConfig_cpu())
231
+
232
+ # #tokenizer = ctransformers.AutoTokenizer.from_pretrained(model)
233
+
234
+ # tokenizer = AutoTokenizer.from_pretrained("NousResearch/Nous-Capybara-7B-V1.9")
235
+ # summariser = pipeline("text-generation", model=model, tokenizer=tokenizer) # model
236
+
237
+ # #model = []
238
+ # #tokenizer = []
239
+ # #summariser = []
240
+
241
+ # if model_type == "Flan T5 Large Stacked Samsum 1k":
242
+ # # Huggingface chat model
243
+ # hf_checkpoint = 'stacked-summaries/flan-t5-large-stacked-samsum-1024'#'declare-lab/flan-alpaca-base' # # #
244
+
245
+ # summariser, tokenizer, model_type = create_hf_model(model_name = hf_checkpoint)
246
+
247
+ # if model_type == "Long T5 Global Base 16k Book Summary":
248
+ # # Huggingface chat model
249
+ # hf_checkpoint = 'pszemraj/long-t5-tglobal-base-16384-book-summary' #'philschmid/flan-t5-small-stacked-samsum'#'declare-lab/flan-alpaca-base' # # #
250
+ # summariser, tokenizer, model_type = create_hf_model(model_name = hf_checkpoint)
251
 
252
+ # chatf.model = summariser
253
+ # chatf.tokenizer = tokenizer
254
+ # chatf.model_type = model_type
255
 
256
+ # load_confirmation = "Finished loading model: " + model_type
257
 
258
+ # print(load_confirmation)
259
+ # return model_type, load_confirmation, model_type
chatfuncs/chatfuncs.py CHANGED
@@ -4,6 +4,7 @@ from typing import TypeVar
4
  # Model packages
5
  import torch.cuda
6
  from transformers import pipeline
 
7
 
8
  torch.cuda.empty_cache()
9
 
@@ -46,41 +47,29 @@ sample = True
46
 
47
 
48
  class CtransInitConfig_gpu:
49
- def __init__(self, temperature=temperature,
50
- top_k=top_k,
51
- top_p=top_p,
52
- repetition_penalty=repetition_penalty,
53
  last_n_tokens=last_n_tokens,
54
- max_new_tokens=max_new_tokens,
55
  seed=seed,
56
- reset=reset,
57
- stream=stream,
58
- threads=threads,
59
- batch_size=batch_size,
60
- context_length=context_length,
61
- gpu_layers=gpu_layers):
62
- self.temperature = temperature
63
- self.top_k = top_k
64
- self.top_p = top_p
65
- self.repetition_penalty = repetition_penalty# repetition_penalty
66
  self.last_n_tokens = last_n_tokens
67
- self.max_new_tokens = max_new_tokens
68
  self.seed = seed
69
- self.reset = reset
70
- self.stream = True #stream
71
- self.threads = threads
72
- self.batch_size = batch_size
73
- self.context_length = context_length
74
- self.gpu_layers = gpu_layers
75
  # self.stop: list[str] = field(default_factory=lambda: [stop_string])
76
 
77
  def update_gpu(self, new_value):
78
- self.gpu_layers = new_value
79
 
80
  class CtransInitConfig_cpu(CtransInitConfig_gpu):
81
  def __init__(self):
82
  super().__init__()
83
- self.gpu_layers = 0
84
 
85
  gpu_config = CtransInitConfig_gpu()
86
  cpu_config = CtransInitConfig_cpu()
@@ -90,22 +79,68 @@ class CtransGenGenerationConfig:
90
  def __init__(self, temperature=temperature,
91
  top_k=top_k,
92
  top_p=top_p,
93
- repetition_penalty=repetition_penalty,
94
- last_n_tokens=last_n_tokens,
95
  seed=seed,
96
- threads=threads,
97
- batch_size=batch_size,
98
- reset=True
99
  ):
100
  self.temperature = temperature
101
  self.top_k = top_k
102
  self.top_p = top_p
103
- self.repetition_penalty = repetition_penalty# repetition_penalty
104
- self.last_n_tokens = last_n_tokens
105
  self.seed = seed
106
- self.threads = threads
107
- self.batch_size = batch_size
108
- self.reset = reset
109
 
110
  def update_temp(self, new_value):
111
- self.temperature = new_value
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  # Model packages
5
  import torch.cuda
6
  from transformers import pipeline
7
+ import time
8
 
9
  torch.cuda.empty_cache()
10
 
 
47
 
48
 
49
  class CtransInitConfig_gpu:
50
+ def __init__(self,
 
 
 
51
  last_n_tokens=last_n_tokens,
 
52
  seed=seed,
53
+ n_threads=threads,
54
+ n_batch=batch_size,
55
+ n_ctx=32768,
56
+ n_gpu_layers=gpu_layers):
57
+
 
 
 
 
 
58
  self.last_n_tokens = last_n_tokens
 
59
  self.seed = seed
60
+ self.n_threads = n_threads
61
+ self.n_batch = n_batch
62
+ self.n_ctx = n_ctx
63
+ self.n_gpu_layers = n_gpu_layers
 
 
64
  # self.stop: list[str] = field(default_factory=lambda: [stop_string])
65
 
66
  def update_gpu(self, new_value):
67
+ self.n_gpu_layers = new_value
68
 
69
  class CtransInitConfig_cpu(CtransInitConfig_gpu):
70
  def __init__(self):
71
  super().__init__()
72
+ self.n_gpu_layers = 0
73
 
74
  gpu_config = CtransInitConfig_gpu()
75
  cpu_config = CtransInitConfig_cpu()
 
79
  def __init__(self, temperature=temperature,
80
  top_k=top_k,
81
  top_p=top_p,
82
+ repeat_penalty=repetition_penalty,
 
83
  seed=seed,
84
+ stream=stream,
85
+ max_tokens=max_new_tokens
 
86
  ):
87
  self.temperature = temperature
88
  self.top_k = top_k
89
  self.top_p = top_p
90
+ self.repeat_penalty = repeat_penalty
 
91
  self.seed = seed
92
+ self.max_tokens=max_tokens
93
+ self.stream = stream
 
94
 
95
  def update_temp(self, new_value):
96
+ self.temperature = new_value
97
+
98
+
99
+ def llama_cpp_streaming(history, full_prompt, model_type,
100
+ temperature=temperature,
101
+ max_new_tokens=max_new_tokens,
102
+ sample=sample,
103
+ repetition_penalty=repetition_penalty,
104
+ top_p=top_p,
105
+ top_k=top_k
106
+ ):
107
+ #print("Model type is: ", model_type)
108
+
109
+ #if not full_prompt.strip():
110
+ # if history is None:
111
+ # history = []
112
+
113
+ # return history
114
+
115
+ #tokens = model.tokenize(full_prompt)
116
+
117
+ gen_config = CtransGenGenerationConfig()
118
+ gen_config.update_temp(temperature)
119
+
120
+ print(vars(gen_config))
121
+
122
+ # Pull the generated text from the streamer, and update the model output.
123
+ start = time.time()
124
+ NUM_TOKENS=0
125
+ print('-'*4+'Start Generation'+'-'*4)
126
+
127
+ output = model(
128
+ full_prompt, **vars(gen_config))
129
+
130
+ history[-1][1] = ""
131
+ for out in output:
132
+
133
+ if "choices" in out and len(out["choices"]) > 0 and "text" in out["choices"][0]:
134
+ history[-1][1] += out["choices"][0]["text"]
135
+ NUM_TOKENS+=1
136
+ yield history
137
+ else:
138
+ print(f"Unexpected output structure: {out}")
139
+
140
+ time_generate = time.time() - start
141
+ print('\n')
142
+ print('-'*4+'End Generation'+'-'*4)
143
+ print(f'Num of generated tokens: {NUM_TOKENS}')
144
+ print(f'Time for complete generation: {time_generate}s')
145
+ print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
146
+ print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
chatfuncs/helper_functions.py CHANGED
@@ -73,11 +73,11 @@ def read_file(filename):
73
  print("Loading in file")
74
 
75
  if file_type == 'csv':
76
- file = pd.read_csv(filename, low_memory=False).reset_index().drop(["index", "Unnamed: 0"], axis=1, errors="ignore")
77
  elif file_type == 'xlsx':
78
- file = pd.read_excel(filename).reset_index().drop(["index", "Unnamed: 0"], axis=1, errors="ignore")
79
  elif file_type == 'parquet':
80
- file = pd.read_parquet(filename).reset_index().drop(["index", "Unnamed: 0"], axis=1, errors="ignore")
81
  elif file_type == 'pkl.gz':
82
  with gzip.open(filename, 'rb') as file:
83
  file = pickle.load(file)
 
73
  print("Loading in file")
74
 
75
  if file_type == 'csv':
76
+ file = pd.read_csv(filename, low_memory=False).reset_index(drop=True).drop(["index", "Unnamed: 0"], axis=1, errors="ignore")
77
  elif file_type == 'xlsx':
78
+ file = pd.read_excel(filename).reset_index(drop=True).drop(["index", "Unnamed: 0"], axis=1, errors="ignore")
79
  elif file_type == 'parquet':
80
+ file = pd.read_parquet(filename).reset_index(drop=True).drop(["index", "Unnamed: 0"], axis=1, errors="ignore")
81
  elif file_type == 'pkl.gz':
82
  with gzip.open(filename, 'rb') as file:
83
  file = pickle.load(file)
chatfuncs/prompts.py CHANGED
@@ -27,4 +27,14 @@ In 1994, when a major affiliation switch threatened to leave CBS without an affi
27
  ASSISTANT:"""
28
 
29
  nous_capybara_prompt = """USER:Summarise the following text in less than {length} words "{text}"
30
- ASSISTANT:"""
 
 
 
 
 
 
 
 
 
 
 
27
  ASSISTANT:"""
28
 
29
  nous_capybara_prompt = """USER:Summarise the following text in less than {length} words "{text}"
30
+ ASSISTANT:"""
31
+
32
+ instruction_prompt_phi3 = """<|user|>\n
33
+ Summarise the following text in less than {length} words: "{text}"\n
34
+ Summary:<|end|>\n
35
+ <|assistant|>"""
36
+
37
+ instruction_prompt_llama3 = """<|start_header_id|>system<|end_header_id|>\n
38
+ You are an AI assistant that follows instruction extremely well. Help as much as you can.<|eot_id|><|start_header_id|>user<|end_header_id|>\n
39
+ Summarise the following text in less than {length} words: "{text}"\n
40
+ Summary:<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n"""
chatfuncs/summarise_funcs.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import concurrent.futures
3
+ import gradio as gr
4
+ from chatfuncs.chatfuncs import model, CtransGenGenerationConfig, temperature
5
+ from datetime import datetime
6
+
7
+ today = datetime.now().strftime("%d%m%Y")
8
+ today_rev = datetime.now().strftime("%Y%m%d")
9
+
10
+ def summarise_text(text, text_df, length_slider, in_colname, model_type, progress=gr.Progress()):
11
+
12
+ if text_df.empty:
13
+ in_colname="text"
14
+ in_colname_list_first = in_colname
15
+
16
+ in_text_df = pd.DataFrame({in_colname_list_first:[text]})
17
+
18
+ else:
19
+ in_text_df = text_df
20
+ in_colname_list_first = in_colname
21
+
22
+ print(model_type)
23
+
24
+ texts_list = list(in_text_df[in_colname_list_first])
25
+
26
+ if model_type != "Phi 3 128k (larger, slow)":
27
+ summarised_texts = []
28
+
29
+ for single_text in progress.tqdm(texts_list, desc = "Summarising texts", unit = "texts"):
30
+
31
+ summarised_text = model(single_text, max_length=length_slider)
32
+
33
+ #print(summarised_text)
34
+
35
+ summarised_text_str = summarised_text[0]['summary_text']
36
+
37
+ summarised_texts.append(summarised_text_str)
38
+
39
+ print(summarised_text_str)
40
+
41
+ #pd.Series(summarised_texts).to_csv("summarised_texts_out.csv")
42
+
43
+ #print(summarised_texts)
44
+
45
+ if model_type == "Phi 3 128k (larger, slow)":
46
+
47
+ gen_config = CtransGenGenerationConfig()
48
+ gen_config.update_temp(temperature)
49
+
50
+ print(gen_config)
51
+
52
+ # Define a function that calls your model
53
+ # def call_model(formatted_string):#, vars):
54
+ # return model(formatted_string)#, vars)
55
+
56
+ def call_model(formatted_string, gen_config):
57
+ """
58
+ Calls your generation model with parameters from the CtransGenGenerationConfig object.
59
+
60
+ Args:
61
+ formatted_string (str): The formatted input text for the model.
62
+ gen_config (CtransGenGenerationConfig): An object containing generation parameters.
63
+ """
64
+ # Extracting parameters from the gen_config object
65
+ temperature = gen_config.temperature
66
+ top_k = gen_config.top_k
67
+ top_p = gen_config.top_p
68
+ repeat_penalty = gen_config.repeat_penalty
69
+ seed = gen_config.seed
70
+ max_tokens = gen_config.max_tokens
71
+ stream = gen_config.stream
72
+
73
+ # Now you can call your model directly, passing the parameters:
74
+ output = model(
75
+ formatted_string,
76
+ temperature=temperature,
77
+ top_k=top_k,
78
+ top_p=top_p,
79
+ repeat_penalty=repeat_penalty,
80
+ seed=seed,
81
+ max_tokens=max_tokens,
82
+ stream=stream,
83
+ )
84
+
85
+ return output
86
+
87
+ # Set your timeout duration (in seconds)
88
+ timeout_duration = 300 # Adjust this value as needed
89
+
90
+ length = str(length_slider)
91
+
92
+ from chatfuncs.prompts import instruction_prompt_phi3
93
+
94
+ summarised_texts = []
95
+
96
+ for single_text in progress.tqdm(texts_list, desc = "Summarising texts", unit = "texts"):
97
+
98
+ formatted_string = instruction_prompt_phi3.format(length=length, text=single_text)
99
+
100
+ # Use ThreadPoolExecutor to enforce a timeout
101
+ with concurrent.futures.ThreadPoolExecutor() as executor:
102
+ #future = executor.submit(call_model, formatted_string)#, **vars(gen_config))
103
+ future = executor.submit(call_model, formatted_string, gen_config)
104
+ try:
105
+ output = future.result(timeout=timeout_duration)
106
+ # Process the output here
107
+ except concurrent.futures.TimeoutError:
108
+ error_text = f"Timeout (five minutes) occurred for text: {single_text}. Consider using a smaller model."
109
+ print(error_text)
110
+ return error_text, None
111
+
112
+ print(output)
113
+
114
+ output_str = output['choices'][0]['text']
115
+
116
+ # Find the index of 'ASSISTANT: ' to select only text after this location
117
+ # index = output_str.find('ASSISTANT: ')
118
+
119
+ # # Check if 'ASSISTANT: ' is found in the string
120
+ # if index != -1:
121
+ # # Add the length of 'ASSISTANT: ' to the index to start from the end of this substring
122
+ # start_index = index + len('ASSISTANT: ')
123
+
124
+ # # Slice the string from this point to the end
125
+ # assistant_text = output_str[start_index:]
126
+ # else:
127
+ # assistant_text = "ASSISTANT: not found in text"
128
+
129
+ # print(assistant_text)
130
+
131
+ #summarised_texts.append(assistant_text)
132
+
133
+ summarised_texts.append(output_str)
134
+
135
+ #print(summarised_text)
136
+
137
+ #pd.Series(summarised_texts).to_csv("summarised_texts_out.csv")
138
+
139
+ if text_df.empty:
140
+ #if model_type != "Phi 3 128k (larger, slow)":
141
+ summarised_text_out = summarised_texts[0]#.values()
142
+
143
+ #if model_type == "Phi 3 128k (larger, slow)":
144
+ # summarised_text_out = summarised_texts[0]
145
+
146
+ else:
147
+ summarised_text_out = summarised_texts #[d['summary_text'] for d in summarised_texts] #summarised_text[0].values()
148
+
149
+ output_name = "summarise_output_" + today_rev + ".csv"
150
+ output_df = pd.DataFrame({"Original text":in_text_df[in_colname_list_first],
151
+ "Summarised text":summarised_text_out})
152
+
153
+ summarised_text_out_str = str(output_df["Summarised text"][0])#.str.replace("dict_values([","").str.replace("])",""))
154
+
155
+ output_df.to_csv(output_name, index = None)
156
+
157
+ return summarised_text_out_str, output_name
158
+
159
+
160
+ # def summarise_text(text, text_df, length_slider, in_colname, model_type, progress=gr.Progress()):
161
+
162
+ # if text_df.empty:
163
+ # in_colname="text"
164
+ # in_colname_list_first = in_colname
165
+
166
+ # in_text_df = pd.DataFrame({in_colname_list_first:[text]})
167
+
168
+ # else:
169
+ # in_text_df = text_df
170
+ # in_colname_list_first = in_colname
171
+
172
+ # print(model_type)
173
+
174
+ # texts_list = list(in_text_df[in_colname_list_first])
175
+
176
+ # if model_type != "Phi 3 128k (larger, slow)":
177
+ # summarised_texts = []
178
+
179
+ # for single_text in progress.tqdm(texts_list, desc = "Summarising texts", unit = "texts"):
180
+ # summarised_text = chatf.model(single_text, max_length=length_slider)
181
+
182
+ # #print(summarised_text)
183
+
184
+ # summarised_text_str = summarised_text[0]['summary_text']
185
+
186
+ # summarised_texts.append(summarised_text_str)
187
+
188
+ # print(summarised_text_str)
189
+
190
+ # #pd.Series(summarised_texts).to_csv("summarised_texts_out.csv")
191
+
192
+ # #print(summarised_texts)
193
+
194
+ # if model_type == "Phi 3 128k (larger, slow)":
195
+
196
+
197
+ # # Define a function that calls your model
198
+ # def call_model(formatted_string, max_length=10000):
199
+ # return chatf.model(formatted_string, max_length=max_length)
200
+
201
+ # # Set your timeout duration (in seconds)
202
+ # timeout_duration = 300 # Adjust this value as needed
203
+
204
+ # length = str(length_slider)
205
+
206
+ # from chatfuncs.prompts import nous_capybara_prompt
207
+
208
+ # summarised_texts = []
209
+
210
+ # for single_text in progress.tqdm(texts_list, desc = "Summarising texts", unit = "texts"):
211
+
212
+ # formatted_string = nous_capybara_prompt.format(length=length, text=single_text)
213
+
214
+ # # Use ThreadPoolExecutor to enforce a timeout
215
+ # with concurrent.futures.ThreadPoolExecutor() as executor:
216
+ # future = executor.submit(call_model, formatted_string, 10000)
217
+ # try:
218
+ # output = future.result(timeout=timeout_duration)
219
+ # # Process the output here
220
+ # except concurrent.futures.TimeoutError:
221
+ # error_text = f"Timeout (five minutes) occurred for text: {single_text}. Consider using a smaller model."
222
+ # print(error_text)
223
+ # return error_text, None
224
+
225
+ # print(output)
226
+
227
+ # output_str = output[0]['generated_text']
228
+
229
+ # # Find the index of 'ASSISTANT: ' to select only text after this location
230
+ # index = output_str.find('ASSISTANT: ')
231
+
232
+ # # Check if 'ASSISTANT: ' is found in the string
233
+ # if index != -1:
234
+ # # Add the length of 'ASSISTANT: ' to the index to start from the end of this substring
235
+ # start_index = index + len('ASSISTANT: ')
236
+
237
+ # # Slice the string from this point to the end
238
+ # assistant_text = output_str[start_index:]
239
+ # else:
240
+ # assistant_text = "ASSISTANT: not found in text"
241
+
242
+ # print(assistant_text)
243
+
244
+ # summarised_texts.append(assistant_text)
245
+
246
+ # #print(summarised_text)
247
+
248
+ # #pd.Series(summarised_texts).to_csv("summarised_texts_out.csv")
249
+
250
+ # if text_df.empty:
251
+ # #if model_type != "Phi 3 128k (larger, slow)":
252
+ # summarised_text_out = summarised_texts[0]#.values()
253
+
254
+ # #if model_type == "Phi 3 128k (larger, slow)":
255
+ # # summarised_text_out = summarised_texts[0]
256
+
257
+ # else:
258
+ # summarised_text_out = summarised_texts #[d['summary_text'] for d in summarised_texts] #summarised_text[0].values()
259
+
260
+ # output_name = "summarise_output_" + today_rev + ".csv"
261
+ # output_df = pd.DataFrame({"Original text":in_text_df[in_colname_list_first],
262
+ # "Summarised text":summarised_text_out})
263
+
264
+ # summarised_text_out_str = str(output_df["Summarised text"][0])#.str.replace("dict_values([","").str.replace("])",""))
265
+
266
+ # output_df.to_csv(output_name, index = None)
267
+
268
+ # return summarised_text_out_str, output_name
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
- gradio==3.50.0
2
  transformers
3
- torch
4
  pyarrow
5
  openpyxl
6
- ctransformers[cuda]
 
 
1
+ gradio==4.36.0
2
  transformers
 
3
  pyarrow
4
  openpyxl
5
+ llama-cpp-python==0.2.77 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
6
+ torch==2.3.1 --extra-index-url https://download.pytorch.org/whl/cu121