seanpedrickcase commited on
Commit
3809dc8
1 Parent(s): 9ad3bc3

Dockerfile now loads models to local folder. Can use custom output folder. requrirements for GPU-enabled summarisation now in separate file to hopefully avoid HF space issues.

Browse files
.dockerignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ *.pyc
2
+ *.ipynb
3
+ *.csv
4
+ *.parquet
5
+ tests/*
6
+ output/*
7
+ model/*
.gitignore CHANGED
@@ -1,3 +1,7 @@
1
  *.pyc
2
  *.ipynb
3
- *.csv
 
 
 
 
 
1
  *.pyc
2
  *.ipynb
3
+ *.csv
4
+ *.parquet
5
+ tests/*
6
+ output/*
7
+ model/*
Dockerfile CHANGED
@@ -1,30 +1,66 @@
1
- FROM python:3.10
 
 
 
 
 
 
 
 
 
2
 
3
  WORKDIR /src
4
 
5
  COPY requirements.txt .
6
 
7
- RUN pip install --no-cache-dir -r requirements.txt
 
 
8
 
9
- # Set up a new user named "user" with user ID 1000
10
- RUN useradd -m -u 1000 user
11
- # Switch to the "user" user
 
12
  USER user
13
- # Set home to the user's home directory
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  ENV HOME=/home/user \
15
- PATH=/home/user/.local/bin:$PATH \
16
  PYTHONPATH=$HOME/app \
17
- PYTHONUNBUFFERED=1 \
18
- GRADIO_ALLOW_FLAGGING=never \
19
- GRADIO_NUM_PORTS=1 \
20
- GRADIO_SERVER_NAME=0.0.0.0 \
21
- GRADIO_THEME=huggingface \
22
- SYSTEM=spaces
23
-
24
- # Set the working directory to the user's home directory
25
- WORKDIR $HOME/app
 
 
26
 
27
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
28
- COPY --chown=user . $HOME/app
29
 
 
 
30
  CMD ["python", "app.py"]
 
1
+ # First stage: build dependencies
2
+ FROM public.ecr.aws/docker/library/python:3.11.9-slim-bookworm
3
+
4
+ # Install Lambda web adapter
5
+ COPY --from=public.ecr.aws/awsguru/aws-lambda-adapter:0.8.3 /lambda-adapter /opt/extensions/lambda-adapter
6
+
7
+ # Install wget, git, curl
8
+ RUN apt-get update && \
9
+ apt-get install -y wget git curl && \
10
+ apt-get clean && rm -rf /var/lib/apt/lists/*
11
 
12
  WORKDIR /src
13
 
14
  COPY requirements.txt .
15
 
16
+ # Optimized dependency installation
17
+ RUN pip install --no-cache-dir -r requirements.txt && \
18
+ pip install --no-cache-dir gradio==4.36.0
19
 
20
+ # Create a directory for the models and switch to user
21
+ RUN mkdir /model && \
22
+ useradd -m -u 1000 user && \
23
+ chown -R user:user /model
24
  USER user
25
+
26
+ WORKDIR /home/user
27
+
28
+ # Download the GGUF model to local model/phi directory:
29
+ ENV REPO_ID "QuantFactory/Phi-3-mini-128k-instruct-GGUF"
30
+ ENV MODEL_FILE "Phi-3-mini-128k-instruct.Q4_K_M.gguf"
31
+
32
+ RUN python -c "from huggingface_hub import hf_hub_download; \
33
+ hf_hub_download(repo_id='$REPO_ID', filename='$MODEL_FILE', local_dir='/model/phi')"
34
+
35
+ # Download the transformers-based models
36
+ RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash && \
37
+ apt-get install -y git-lfs && \
38
+ git lfs install
39
+
40
+ RUN git clone https://huggingface.co/stacked-summaries/flan-t5-large-stacked-samsum-1024 /model/stacked_t5 && \
41
+ rm -rf /model/stacked_t5/.git && \
42
+ git clone https://huggingface.co/pszemraj/long-t5-tglobal-base-16384-book-summary /model/long_t5 && \
43
+ rm -rf /model/long_t5/.git
44
+
45
+
46
  ENV HOME=/home/user \
47
+ PATH=/home/user/.local/bin:$PATH \
48
  PYTHONPATH=$HOME/app \
49
+ PYTHONUNBUFFERED=1 \
50
+ GRADIO_ALLOW_FLAGGING=never \
51
+ GRADIO_NUM_PORTS=1 \
52
+ GRADIO_SERVER_NAME=0.0.0.0 \
53
+ GRADIO_SERVER_PORT=7860 \
54
+ GRADIO_THEME=huggingface \
55
+ SYSTEM=spaces
56
+
57
+ # Switch back to root to copy the app files
58
+ USER root
59
+ WORKDIR /home/user/app
60
 
61
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
62
+ COPY --chown=user . $HOME/user/app
63
 
64
+ # Switch back to the user to run the app
65
+ USER user
66
  CMD ["python", "app.py"]
app.py CHANGED
@@ -14,9 +14,11 @@ PandasDataFrame = Type[pd.DataFrame]
14
  import chatfuncs.chatfuncs as chatf
15
  import chatfuncs.summarise_funcs as sumf
16
 
17
- from chatfuncs.helper_functions import dummy_function, put_columns_in_df
18
  from chatfuncs.summarise_funcs import summarise_text
19
 
 
 
20
  # Disable cuda devices if necessary
21
  #os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
22
 
@@ -34,11 +36,28 @@ else:
34
 
35
  print("Device used is: ", torch_device)
36
 
37
- def create_hf_model(model_name):
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = chatf.context_length)
 
 
 
 
 
40
 
41
- summariser = pipeline("summarization", model=model_name, tokenizer=tokenizer) # philschmid/bart-large-cnn-samsum
 
42
 
43
  return summariser, tokenizer, model_name
44
 
@@ -53,7 +72,7 @@ def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_d
53
  if torch_device is None:
54
  torch_device = chatf.torch_device
55
 
56
- if model_type == "Phi 3 128k (larger, slow)":
57
  if torch_device == "cuda":
58
  gpu_config.update_gpu(gpu_layers)
59
  print("Loading with", gpu_config.n_gpu_layers, "model layers sent to GPU.")
@@ -66,38 +85,43 @@ def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_d
66
  print(vars(gpu_config))
67
  print(vars(cpu_config))
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  try:
70
- summariser = Llama(
71
- model_path=hf_hub_download(
72
- repo_id=os.environ.get("REPO_ID", "QuantFactory/Phi-3-mini-128k-instruct-GGUF"),# "QuantFactory/Phi-3-mini-128k-instruct-GGUF"), # "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2"), #"microsoft/Phi-3-mini-4k-instruct-gguf"),#"TheBloke/Mistral-7B-OpenOrca-GGUF"),
73
- filename=os.environ.get("MODEL_FILE", "Phi-3-mini-128k-instruct.Q4_K_M.gguf") #"Phi-3-mini-128k-instruct.Q4_K_M.gguf") #"Meta-Llama-3-8B-Instruct-v2.Q6_K.gguf") #"Phi-3-mini-4k-instruct-q4.gguf")#"mistral-7b-openorca.Q4_K_M.gguf"),
74
- ),
75
- **vars(gpu_config) # change n_gpu_layers if you have more or less VRAM
76
- )
77
 
78
  except Exception as e:
79
  print("GPU load failed")
80
  print(e)
81
- summariser = Llama(
82
- model_path=hf_hub_download(
83
- repo_id=os.environ.get("REPO_ID", "QuantFactory/Phi-3-mini-128k-instruct-GGUF"), #"QuantFactory/Phi-3-mini-128k-instruct-GGUF"), #, "microsoft/Phi-3-mini-4k-instruct-gguf"),#"QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2"), #"microsoft/Phi-3-mini-4k-instruct-gguf"),#"TheBloke/Mistral-7B-OpenOrca-GGUF"),
84
- filename=os.environ.get("MODEL_FILE", "Phi-3-mini-128k-instruct.Q4_K_M.gguf"), # "Phi-3-mini-128k-instruct.Q4_K_M.gguf") # , #"Meta-Llama-3-8B-Instruct-v2.Q6_K.gguf") #"Phi-3-mini-4k-instruct-q4.gguf"),#"mistral-7b-openorca.Q4_K_M.gguf"),
85
- ),
86
- **vars(cpu_config)
87
- )
88
 
89
  tokenizer = []
90
 
91
  if model_type == "Flan T5 Large Stacked Samsum 1k":
92
  # Huggingface chat model
93
- hf_checkpoint = 'stacked-summaries/flan-t5-large-stacked-samsum-1024'#'declare-lab/flan-alpaca-base' # # #
94
-
95
- summariser, tokenizer, model_type = create_hf_model(model_name = hf_checkpoint)
96
 
97
  if model_type == "Long T5 Global Base 16k Book Summary":
98
  # Huggingface chat model
99
- hf_checkpoint = 'pszemraj/long-t5-tglobal-base-16384-book-summary' #'philschmid/flan-t5-small-stacked-samsum'#'declare-lab/flan-alpaca-base' # # #
100
- summariser, tokenizer, model_type = create_hf_model(model_name = hf_checkpoint)
101
 
102
  sumf.model = summariser
103
  sumf.tokenizer = tokenizer
@@ -109,7 +133,7 @@ def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_d
109
  return model_type, load_confirmation, model_type
110
 
111
  # Both models are loaded on app initialisation so that users don't have to wait for the models to be downloaded
112
- model_type = "Phi 3 128k (larger, slow)"
113
  load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
114
 
115
  model_type = "Flan T5 Large Stacked Samsum 1k"
@@ -133,7 +157,7 @@ with block:
133
  gr.Markdown(
134
  """
135
  # Text summariser
136
- Enter open text below to get a summary. You can copy and paste text directly, or upload a file and specify the column that you want to summarise. The default small model will be able to summarise up to about 16,000 words, but the quality may not be great. The larger model around 900 words of better quality. Summarisation with Phi 3 128k works on up to around 4,000 words, and may give a higher quality summary, but will be slow, and it may not respect your desired maximum word count.
137
  """)
138
 
139
  with gr.Tab("Summariser"):
@@ -149,7 +173,7 @@ with block:
149
  with gr.Row():
150
  summarise_btn = gr.Button("Summarise", variant="primary")
151
  stop = gr.Button(value="Interrupt processing", variant="secondary", scale=0)
152
- length_slider = gr.Slider(minimum = 30, maximum = 500, value = 100, step = 10, label = "Maximum length of summary")
153
 
154
  with gr.Row():
155
  output_single_text = gr.Textbox(label="Output example (first example in dataset)")
@@ -157,12 +181,12 @@ with block:
157
 
158
  with gr.Tab("Advanced features"):
159
  with gr.Row():
160
- model_choice = gr.Radio(label="Choose a summariser model", value="Long T5 Global Base 16k Book Summary", choices = ["Long T5 Global Base 16k Book Summary", "Flan T5 Large Stacked Samsum 1k", "Phi 3 128k (larger, slow)"])
161
  change_model_button = gr.Button(value="Load model", scale=0)
162
  with gr.Accordion("Choose number of model layers to send to GPU (WARNING: please don't modify unless you are sure you have a GPU).", open = False):
163
  gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU.", value=0, minimum=0, maximum=100, step = 1, visible=True)
164
  with gr.Accordion("LLM parameters"):
165
- temp_slide = gr.Slider(minimum=0.1, value = 0.5, maximum=1, step=0.1, label="Choose temperature setting for response generation.")
166
 
167
  load_text = gr.Text(label="Load status")
168
 
@@ -172,7 +196,7 @@ with block:
172
  change_model_button.click(fn=load_model, inputs=[model_choice, gpu_layer_choice], outputs = [model_type_state, load_text, current_model])
173
 
174
  summarise_click = summarise_btn.click(fn=summarise_text, inputs=[in_text, data_state, length_slider, in_colname, model_type_state],
175
- outputs=[output_single_text, output_file], api_name="summarise_single_text")
176
  # summarise_enter = summarise_btn.submit(fn=summarise_text, inputs=[in_text, data_state, length_slider, in_colname, model_type_state],
177
  # outputs=[output_single_text, output_file])
178
 
@@ -184,7 +208,7 @@ with block:
184
  # Dummy function to allow dropdown modification to work correctly (strange thing needed for Gradio 3.50, will be deprecated upon upgrading Gradio version)
185
  in_colname.change(dummy_function, in_colname, None)
186
 
187
- block.queue().launch()
188
 
189
  # def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_device=None):
190
  # print("Loading model ", model_type)
@@ -197,7 +221,7 @@ block.queue().launch()
197
  # if torch_device is None:
198
  # torch_device = chatf.torch_device
199
 
200
- # if model_type == "Phi 3 128k (larger, slow)":
201
  # hf_checkpoint = 'NousResearch/Nous-Capybara-7B-V1.9-GGUF'
202
 
203
  # if torch_device == "cuda":
 
14
  import chatfuncs.chatfuncs as chatf
15
  import chatfuncs.summarise_funcs as sumf
16
 
17
+ from chatfuncs.helper_functions import dummy_function, put_columns_in_df, output_folder, ensure_output_folder_exists
18
  from chatfuncs.summarise_funcs import summarise_text
19
 
20
+ ensure_output_folder_exists(output_folder)
21
+
22
  # Disable cuda devices if necessary
23
  #os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
24
 
 
36
 
37
  print("Device used is: ", torch_device)
38
 
39
+ def create_hf_model(model_name, local_model_dir="model/t5_long"):
40
+
41
+ # Construct the expected local model path
42
+ local_model_path = os.path.join(local_model_dir, model_name)
43
+
44
+ # Check if the model directory exists
45
+ if os.path.exists(local_model_path):
46
+ print(f"Model '{model_name}' found locally at: {local_model_path}")
47
+
48
+ # Load tokenizer and pipeline from local path
49
+ tokenizer = AutoTokenizer.from_pretrained(local_model_path, model_max_length=chatf.context_length)
50
+ summariser = pipeline("summarization", model=local_model_path, tokenizer=tokenizer)
51
 
52
+ else:
53
+ print(f"Downloading model '{model_name}' from Hugging Face Hub...")
54
+
55
+ # Download tokenizer and pipeline from Hugging Face Hub
56
+ tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length=chatf.context_length)
57
+ summariser = pipeline("summarization", model=model_name, tokenizer=tokenizer)
58
 
59
+ # Save the model locally (optional, but recommended for future use)
60
+ #summariser.save_pretrained(local_model_path)
61
 
62
  return summariser, tokenizer, model_name
63
 
 
72
  if torch_device is None:
73
  torch_device = chatf.torch_device
74
 
75
+ if model_type == "Phi 3 128k (24k tokens max)":
76
  if torch_device == "cuda":
77
  gpu_config.update_gpu(gpu_layers)
78
  print("Loading with", gpu_config.n_gpu_layers, "model layers sent to GPU.")
 
85
  print(vars(gpu_config))
86
  print(vars(cpu_config))
87
 
88
+ def get_model_path():
89
+ repo_id = os.environ.get("REPO_ID", "QuantFactory/Phi-3-mini-128k-instruct-GGUF")
90
+ filename = os.environ.get("MODEL_FILE", "Phi-3-mini-128k-instruct.Q4_K_M.gguf")
91
+ model_dir = "model/phi" # Assuming this is your intended directory
92
+
93
+ # Construct the expected local path
94
+ local_path = os.path.join(model_dir, filename)
95
+
96
+ if os.path.exists(local_path):
97
+ print(f"Model already exists at: {local_path}")
98
+ return local_path
99
+ else:
100
+ print(f"Checking default Hugging Face folder. Downloading model from Hugging Face Hub if not found")
101
+ return hf_hub_download(repo_id=repo_id, filename=filename)
102
+
103
+ model_path = get_model_path()
104
+
105
+
106
  try:
107
+ summariser = Llama(model_path=model_path, **vars(gpu_config))
 
 
 
 
 
 
108
 
109
  except Exception as e:
110
  print("GPU load failed")
111
  print(e)
112
+ summariser = Llama(model_path=model_path, **vars(cpu_config))
 
 
 
 
 
 
113
 
114
  tokenizer = []
115
 
116
  if model_type == "Flan T5 Large Stacked Samsum 1k":
117
  # Huggingface chat model
118
+ hf_checkpoint = 'stacked-summaries/flan-t5-large-stacked-samsum-1024'
119
+ summariser, tokenizer, model_type = create_hf_model(model_name = hf_checkpoint, local_model_dir="model/t5_stacked")
 
120
 
121
  if model_type == "Long T5 Global Base 16k Book Summary":
122
  # Huggingface chat model
123
+ hf_checkpoint = 'pszemraj/long-t5-tglobal-base-16384-book-summary'
124
+ summariser, tokenizer, model_type = create_hf_model(model_name = hf_checkpoint, local_model_dir="model/t5_long")
125
 
126
  sumf.model = summariser
127
  sumf.tokenizer = tokenizer
 
133
  return model_type, load_confirmation, model_type
134
 
135
  # Both models are loaded on app initialisation so that users don't have to wait for the models to be downloaded
136
+ model_type = "Phi 3 128k (24k tokens max)"
137
  load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
138
 
139
  model_type = "Flan T5 Large Stacked Samsum 1k"
 
157
  gr.Markdown(
158
  """
159
  # Text summariser
160
+ Enter open text below to get a summary. You can copy and paste text directly, or upload a file and specify the column that you want to summarise. The default small model will be able to summarise up to about 12,000 words, but the quality may not be great. The larger model around 800 words of better quality. Summarisation with Phi 3 128k works on up to around 20,000 words (suitable for a 12Gb graphics card without out of memory issues), and may give a higher quality summary, but will be slow, and it may not respect your desired maximum word count.
161
  """)
162
 
163
  with gr.Tab("Summariser"):
 
173
  with gr.Row():
174
  summarise_btn = gr.Button("Summarise", variant="primary")
175
  stop = gr.Button(value="Interrupt processing", variant="secondary", scale=0)
176
+ length_slider = gr.Slider(minimum = 30, maximum = 1000, value = 500, step = 10, label = "Maximum length of summary (in words)")
177
 
178
  with gr.Row():
179
  output_single_text = gr.Textbox(label="Output example (first example in dataset)")
 
181
 
182
  with gr.Tab("Advanced features"):
183
  with gr.Row():
184
+ model_choice = gr.Radio(label="Choose a summariser model", value="Long T5 Global Base 16k Book Summary", choices = ["Long T5 Global Base 16k Book Summary", "Flan T5 Large Stacked Samsum 1k", "Phi 3 128k (24k tokens max)"])
185
  change_model_button = gr.Button(value="Load model", scale=0)
186
  with gr.Accordion("Choose number of model layers to send to GPU (WARNING: please don't modify unless you are sure you have a GPU).", open = False):
187
  gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU.", value=0, minimum=0, maximum=100, step = 1, visible=True)
188
  with gr.Accordion("LLM parameters"):
189
+ temp_slide = gr.Slider(minimum=0.1, value = 0.5, maximum=1, step=0.1, label="Choose temperature setting for response generation.", interactive=True)
190
 
191
  load_text = gr.Text(label="Load status")
192
 
 
196
  change_model_button.click(fn=load_model, inputs=[model_choice, gpu_layer_choice], outputs = [model_type_state, load_text, current_model])
197
 
198
  summarise_click = summarise_btn.click(fn=summarise_text, inputs=[in_text, data_state, length_slider, in_colname, model_type_state],
199
+ outputs=[output_single_text, output_file], api_name="summarise")
200
  # summarise_enter = summarise_btn.submit(fn=summarise_text, inputs=[in_text, data_state, length_slider, in_colname, model_type_state],
201
  # outputs=[output_single_text, output_file])
202
 
 
208
  # Dummy function to allow dropdown modification to work correctly (strange thing needed for Gradio 3.50, will be deprecated upon upgrading Gradio version)
209
  in_colname.change(dummy_function, in_colname, None)
210
 
211
+ block.queue().launch(show_error=True)
212
 
213
  # def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_device=None):
214
  # print("Loading model ", model_type)
 
221
  # if torch_device is None:
222
  # torch_device = chatf.torch_device
223
 
224
+ # if model_type == "Phi 3 128k (24k tokens max)":
225
  # hf_checkpoint = 'NousResearch/Nous-Capybara-7B-V1.9-GGUF'
226
 
227
  # if torch_device == "cuda":
chatfuncs/chatfuncs.py CHANGED
@@ -52,7 +52,7 @@ class CtransInitConfig_gpu:
52
  seed=seed,
53
  n_threads=threads,
54
  n_batch=batch_size,
55
- n_ctx=32768,
56
  n_gpu_layers=gpu_layers):
57
 
58
  self.last_n_tokens = last_n_tokens
 
52
  seed=seed,
53
  n_threads=threads,
54
  n_batch=batch_size,
55
+ n_ctx=24576,
56
  n_gpu_layers=gpu_layers):
57
 
58
  self.last_n_tokens = last_n_tokens
chatfuncs/helper_functions.py CHANGED
@@ -12,6 +12,36 @@ import getpass
12
  import gzip
13
  import pickle
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  # Attempt to delete content of gradio temp folder
16
  def get_temp_folder_path():
17
  username = getpass.getuser()
 
12
  import gzip
13
  import pickle
14
 
15
+ def get_or_create_env_var(var_name, default_value):
16
+ # Get the environment variable if it exists
17
+ value = os.environ.get(var_name)
18
+
19
+ # If it doesn't exist, set it to the default value
20
+ if value is None:
21
+ os.environ[var_name] = default_value
22
+ value = default_value
23
+
24
+ return value
25
+
26
+ # Retrieving or setting output folder
27
+ env_var_name = 'GRADIO_OUTPUT_FOLDER'
28
+ default_value = 'output/'
29
+
30
+ output_folder = get_or_create_env_var(env_var_name, default_value)
31
+ print(f'The value of {env_var_name} is {output_folder}')
32
+
33
+ def ensure_output_folder_exists(output_folder):
34
+ """Checks if the output folder exists, creates it if not."""
35
+
36
+ folder_name = output_folder
37
+
38
+ if not os.path.exists(folder_name):
39
+ # Create the folder if it doesn't exist
40
+ os.makedirs(folder_name)
41
+ print(f"Created the output folder:", folder_name)
42
+ else:
43
+ print(f"The output folder already exists:", folder_name)
44
+
45
  # Attempt to delete content of gradio temp folder
46
  def get_temp_folder_path():
47
  username = getpass.getuser()
chatfuncs/summarise_funcs.py CHANGED
@@ -3,158 +3,175 @@ import concurrent.futures
3
  import gradio as gr
4
  from chatfuncs.chatfuncs import model, CtransGenGenerationConfig, temperature
5
  from datetime import datetime
 
 
 
6
 
7
  today = datetime.now().strftime("%d%m%Y")
8
  today_rev = datetime.now().strftime("%Y%m%d")
9
 
10
- def summarise_text(text, text_df, length_slider, in_colname, model_type, progress=gr.Progress()):
11
-
12
- if text_df.empty:
13
- in_colname="text"
14
- in_colname_list_first = in_colname
15
 
16
- in_text_df = pd.DataFrame({in_colname_list_first:[text]})
17
-
18
- else:
19
- in_text_df = text_df
20
- in_colname_list_first = in_colname
21
 
22
- print(model_type)
 
 
 
 
 
 
23
 
24
- texts_list = list(in_text_df[in_colname_list_first])
 
 
 
 
25
 
26
- if model_type != "Phi 3 128k (larger, slow)":
27
- summarised_texts = []
28
 
29
- for single_text in progress.tqdm(texts_list, desc = "Summarising texts", unit = "texts"):
30
 
31
- summarised_text = model(single_text, max_length=length_slider)
 
32
 
33
- #print(summarised_text)
34
 
35
- summarised_text_str = summarised_text[0]['summary_text']
36
 
37
- summarised_texts.append(summarised_text_str)
38
 
39
- print(summarised_text_str)
40
 
41
- #pd.Series(summarised_texts).to_csv("summarised_texts_out.csv")
42
 
43
- #print(summarised_texts)
44
 
45
- if model_type == "Phi 3 128k (larger, slow)":
46
 
47
- gen_config = CtransGenGenerationConfig()
48
- gen_config.update_temp(temperature)
49
 
50
- print(gen_config)
51
 
52
- # Define a function that calls your model
53
- # def call_model(formatted_string):#, vars):
54
- # return model(formatted_string)#, vars)
55
-
56
- def call_model(formatted_string, gen_config):
57
- """
58
- Calls your generation model with parameters from the CtransGenGenerationConfig object.
59
-
60
- Args:
61
- formatted_string (str): The formatted input text for the model.
62
- gen_config (CtransGenGenerationConfig): An object containing generation parameters.
63
- """
64
- # Extracting parameters from the gen_config object
65
- temperature = gen_config.temperature
66
- top_k = gen_config.top_k
67
- top_p = gen_config.top_p
68
- repeat_penalty = gen_config.repeat_penalty
69
- seed = gen_config.seed
70
- max_tokens = gen_config.max_tokens
71
- stream = gen_config.stream
72
-
73
- # Now you can call your model directly, passing the parameters:
74
- output = model(
75
- formatted_string,
76
- temperature=temperature,
77
- top_k=top_k,
78
- top_p=top_p,
79
- repeat_penalty=repeat_penalty,
80
- seed=seed,
81
- max_tokens=max_tokens,
82
- stream=stream,
83
- )
84
-
85
- return output
86
-
87
- # Set your timeout duration (in seconds)
88
- timeout_duration = 300 # Adjust this value as needed
89
-
90
- length = str(length_slider)
91
-
92
- from chatfuncs.prompts import instruction_prompt_phi3
93
-
94
- summarised_texts = []
95
-
96
- for single_text in progress.tqdm(texts_list, desc = "Summarising texts", unit = "texts"):
97
-
98
- formatted_string = instruction_prompt_phi3.format(length=length, text=single_text)
99
-
100
- # Use ThreadPoolExecutor to enforce a timeout
101
- with concurrent.futures.ThreadPoolExecutor() as executor:
102
- #future = executor.submit(call_model, formatted_string)#, **vars(gen_config))
103
- future = executor.submit(call_model, formatted_string, gen_config)
104
- try:
105
- output = future.result(timeout=timeout_duration)
106
- # Process the output here
107
- except concurrent.futures.TimeoutError:
108
- error_text = f"Timeout (five minutes) occurred for text: {single_text}. Consider using a smaller model."
109
- print(error_text)
110
- return error_text, None
111
-
112
- print(output)
113
-
114
- output_str = output['choices'][0]['text']
115
-
116
- # Find the index of 'ASSISTANT: ' to select only text after this location
117
- # index = output_str.find('ASSISTANT: ')
118
-
119
- # # Check if 'ASSISTANT: ' is found in the string
120
- # if index != -1:
121
- # # Add the length of 'ASSISTANT: ' to the index to start from the end of this substring
122
- # start_index = index + len('ASSISTANT: ')
123
-
124
- # # Slice the string from this point to the end
125
- # assistant_text = output_str[start_index:]
126
- # else:
127
- # assistant_text = "ASSISTANT: not found in text"
128
 
129
- # print(assistant_text)
130
 
131
- #summarised_texts.append(assistant_text)
132
 
133
- summarised_texts.append(output_str)
134
 
135
- #print(summarised_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
- #pd.Series(summarised_texts).to_csv("summarised_texts_out.csv")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
- if text_df.empty:
140
- #if model_type != "Phi 3 128k (larger, slow)":
141
- summarised_text_out = summarised_texts[0]#.values()
142
 
143
- #if model_type == "Phi 3 128k (larger, slow)":
144
- # summarised_text_out = summarised_texts[0]
145
 
146
- else:
147
- summarised_text_out = summarised_texts #[d['summary_text'] for d in summarised_texts] #summarised_text[0].values()
 
 
148
 
149
- output_name = "summarise_output_" + today_rev + ".csv"
150
- output_df = pd.DataFrame({"Original text":in_text_df[in_colname_list_first],
151
- "Summarised text":summarised_text_out})
152
 
153
- summarised_text_out_str = str(output_df["Summarised text"][0])#.str.replace("dict_values([","").str.replace("])",""))
 
154
 
155
- output_df.to_csv(output_name, index = None)
 
156
 
157
- return summarised_text_out_str, output_name
158
 
159
 
160
  # def summarise_text(text, text_df, length_slider, in_colname, model_type, progress=gr.Progress()):
@@ -173,7 +190,7 @@ def summarise_text(text, text_df, length_slider, in_colname, model_type, progres
173
 
174
  # texts_list = list(in_text_df[in_colname_list_first])
175
 
176
- # if model_type != "Phi 3 128k (larger, slow)":
177
  # summarised_texts = []
178
 
179
  # for single_text in progress.tqdm(texts_list, desc = "Summarising texts", unit = "texts"):
@@ -191,7 +208,7 @@ def summarise_text(text, text_df, length_slider, in_colname, model_type, progres
191
 
192
  # #print(summarised_texts)
193
 
194
- # if model_type == "Phi 3 128k (larger, slow)":
195
 
196
 
197
  # # Define a function that calls your model
@@ -248,10 +265,10 @@ def summarise_text(text, text_df, length_slider, in_colname, model_type, progres
248
  # #pd.Series(summarised_texts).to_csv("summarised_texts_out.csv")
249
 
250
  # if text_df.empty:
251
- # #if model_type != "Phi 3 128k (larger, slow)":
252
  # summarised_text_out = summarised_texts[0]#.values()
253
 
254
- # #if model_type == "Phi 3 128k (larger, slow)":
255
  # # summarised_text_out = summarised_texts[0]
256
 
257
  # else:
 
3
  import gradio as gr
4
  from chatfuncs.chatfuncs import model, CtransGenGenerationConfig, temperature
5
  from datetime import datetime
6
+ from typing import Type
7
+
8
+ from chatfuncs.helper_functions import output_folder
9
 
10
  today = datetime.now().strftime("%d%m%Y")
11
  today_rev = datetime.now().strftime("%Y%m%d")
12
 
13
+ PandasDataFrame = Type[pd.DataFrame]
 
 
 
 
14
 
15
+ def summarise_text(text:str, text_df:PandasDataFrame, length_slider:int, in_colname:str, model_type:str, progress=gr.Progress()):
16
+ '''
17
+ Summarise a text or series of texts using Transformers of Llama.cpp
18
+ '''
 
19
 
20
+ outputs = []
21
+ output_name = ""
22
+ output_name_parquet = ""
23
+
24
+ if text_df.empty:
25
+ in_colname="text"
26
+ in_colname_list_first = in_colname
27
 
28
+ in_text_df = pd.DataFrame({in_colname_list_first:[text]})
29
+
30
+ else:
31
+ in_text_df = text_df
32
+ in_colname_list_first = in_colname
33
 
34
+ print(model_type)
 
35
 
36
+ texts_list = list(in_text_df[in_colname_list_first])
37
 
38
+ if model_type != "Phi 3 128k (24k tokens max)":
39
+ summarised_texts = []
40
 
41
+ for single_text in progress.tqdm(texts_list, desc = "Summarising texts", unit = "texts"):
42
 
43
+ summarised_text = model(single_text, max_length=length_slider)
44
 
45
+ #print(summarised_text)
46
 
47
+ summarised_text_str = summarised_text[0]['summary_text']
48
 
49
+ summarised_texts.append(summarised_text_str)
50
 
51
+ print(summarised_text_str)
52
 
53
+ #pd.Series(summarised_texts).to_csv("summarised_texts_out.csv")
54
 
55
+ #print(summarised_texts)
 
56
 
57
+ if model_type == "Phi 3 128k (24k tokens max)":
58
 
59
+ gen_config = CtransGenGenerationConfig()
60
+ gen_config.update_temp(temperature)
61
+
62
+ print(gen_config)
63
+
64
+ # Define a function that calls your model
65
+ # def call_model(formatted_string):#, vars):
66
+ # return model(formatted_string)#, vars)
67
+
68
+ def call_model(formatted_string, gen_config):
69
+ """
70
+ Calls your generation model with parameters from the CtransGenGenerationConfig object.
71
+
72
+ Args:
73
+ formatted_string (str): The formatted input text for the model.
74
+ gen_config (CtransGenGenerationConfig): An object containing generation parameters.
75
+ """
76
+ # Extracting parameters from the gen_config object
77
+ temperature = gen_config.temperature
78
+ top_k = gen_config.top_k
79
+ top_p = gen_config.top_p
80
+ repeat_penalty = gen_config.repeat_penalty
81
+ seed = gen_config.seed
82
+ max_tokens = gen_config.max_tokens
83
+ stream = gen_config.stream
84
+
85
+ # Now you can call your model directly, passing the parameters:
86
+ output = model(
87
+ formatted_string,
88
+ temperature=temperature,
89
+ top_k=top_k,
90
+ top_p=top_p,
91
+ repeat_penalty=repeat_penalty,
92
+ seed=seed,
93
+ max_tokens=max_tokens,
94
+ stream=stream,
95
+ )
96
+
97
+ return output
98
+
99
+ # Set your timeout duration (in seconds)
100
+ timeout_duration = 300 # Adjust this value as needed
101
+
102
+ length = str(length_slider)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
+ from chatfuncs.prompts import instruction_prompt_phi3
105
 
106
+ summarised_texts = []
107
 
108
+ for single_text in progress.tqdm(texts_list, desc = "Summarising texts", unit = "texts"):
109
 
110
+ formatted_string = instruction_prompt_phi3.format(length=length, text=single_text)
111
+
112
+ # Use ThreadPoolExecutor to enforce a timeout
113
+ with concurrent.futures.ThreadPoolExecutor() as executor:
114
+ #future = executor.submit(call_model, formatted_string)#, **vars(gen_config))
115
+ future = executor.submit(call_model, formatted_string, gen_config)
116
+ try:
117
+ output = future.result(timeout=timeout_duration)
118
+ # Process the output here
119
+ except concurrent.futures.TimeoutError:
120
+ error_text = f"Timeout (five minutes) occurred for text: {single_text}. Consider using a smaller model."
121
+ print(error_text)
122
+ return error_text, None
123
+
124
+ print(output)
125
+
126
+ output_str = output['choices'][0]['text']
127
+
128
+ # Find the index of 'ASSISTANT: ' to select only text after this location
129
+ # index = output_str.find('ASSISTANT: ')
130
+
131
+ # # Check if 'ASSISTANT: ' is found in the string
132
+ # if index != -1:
133
+ # # Add the length of 'ASSISTANT: ' to the index to start from the end of this substring
134
+ # start_index = index + len('ASSISTANT: ')
135
 
136
+ # # Slice the string from this point to the end
137
+ # assistant_text = output_str[start_index:]
138
+ # else:
139
+ # assistant_text = "ASSISTANT: not found in text"
140
+
141
+ # print(assistant_text)
142
+
143
+ #summarised_texts.append(assistant_text)
144
+
145
+ summarised_texts.append(output_str)
146
+
147
+ #print(summarised_text)
148
+
149
+ #pd.Series(summarised_texts).to_csv("summarised_texts_out.csv")
150
+
151
+ if text_df.empty:
152
+ #if model_type != "Phi 3 128k (24k tokens max)":
153
+ summarised_text_out = summarised_texts[0]#.values()
154
 
155
+ #if model_type == "Phi 3 128k (24k tokens max)":
156
+ # summarised_text_out = summarised_texts[0]
 
157
 
158
+ else:
159
+ summarised_text_out = summarised_texts #[d['summary_text'] for d in summarised_texts] #summarised_text[0].values()
160
 
161
+ output_name = output_folder + "summarise_output_" + today_rev + ".csv"
162
+ output_name_parquet = output_folder + "summarise_output_" + today_rev + ".parquet"
163
+ output_df = pd.DataFrame({"Original text":in_text_df[in_colname_list_first],
164
+ "Summarised text":summarised_text_out})
165
 
166
+ summarised_text_out_str = str(output_df["Summarised text"][0])#.str.replace("dict_values([","").str.replace("])",""))
 
 
167
 
168
+ output_df.to_csv(output_name, index = None)
169
+ output_df.to_parquet(output_name_parquet, index = None)
170
 
171
+ outputs.append(output_name)
172
+ outputs.append(output_name_parquet)
173
 
174
+ return summarised_text_out_str, outputs
175
 
176
 
177
  # def summarise_text(text, text_df, length_slider, in_colname, model_type, progress=gr.Progress()):
 
190
 
191
  # texts_list = list(in_text_df[in_colname_list_first])
192
 
193
+ # if model_type != "Phi 3 128k (24k tokens max)":
194
  # summarised_texts = []
195
 
196
  # for single_text in progress.tqdm(texts_list, desc = "Summarising texts", unit = "texts"):
 
208
 
209
  # #print(summarised_texts)
210
 
211
+ # if model_type == "Phi 3 128k (24k tokens max)":
212
 
213
 
214
  # # Define a function that calls your model
 
265
  # #pd.Series(summarised_texts).to_csv("summarised_texts_out.csv")
266
 
267
  # if text_df.empty:
268
+ # #if model_type != "Phi 3 128k (24k tokens max)":
269
  # summarised_text_out = summarised_texts[0]#.values()
270
 
271
+ # #if model_type == "Phi 3 128k (24k tokens max)":
272
  # # summarised_text_out = summarised_texts[0]
273
 
274
  # else:
requirements.txt CHANGED
@@ -2,5 +2,5 @@ gradio==4.36.0
2
  transformers
3
  pyarrow
4
  openpyxl
5
- llama-cpp-python==0.2.77 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
6
- torch==2.3.1 --extra-index-url https://download.pytorch.org/whl/cu121
 
2
  transformers
3
  pyarrow
4
  openpyxl
5
+ llama-cpp-python==0.2.77
6
+ torch==2.3.1
requirements_gpu.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio==4.36.0
2
+ transformers
3
+ pyarrow
4
+ openpyxl
5
+ llama-cpp-python==0.2.77 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
6
+ torch==2.3.1 --extra-index-url https://download.pytorch.org/whl/cu121