dron3flyv3r commited on
Commit
6d1b7ca
1 Parent(s): a1d7b67

Add AutoTokenizer for text summarization

Browse files
Files changed (2) hide show
  1. app.py +15 -2
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  import os
3
  from huggingface_hub import InferenceClient
 
4
 
5
  HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"]
6
 
@@ -15,19 +16,31 @@ def transcript_audio(audio_file) -> str:
15
  def summarize_text(text: str, bullet_points: int, conclusion: bool) -> str:
16
  llm_model = "google/gemma-7b-it"
17
  api = InferenceClient(llm_model, token=HUGGINGFACE_API_KEY)
 
18
  if conclusion:
19
- prompt = f"Summarize the following text into {bullet_points} bullet points and a conclusion: {text}"
20
  else:
21
- prompt = f"Summarize the following text into {bullet_points} bullet points: {text}"
 
 
 
 
 
 
 
 
22
  summary = api.text_generation(prompt, max_new_tokens=250, do_sample=True)
23
  print(summary)
24
  return summary
25
 
 
26
  def control(audio_file, text: str, bullet_points: int, conclusion: bool) -> str:
27
  if audio_file:
28
  text = transcript_audio(audio_file)
29
  summary = summarize_text(text, bullet_points, conclusion)
30
  return summary
 
 
31
  # make a simeple interface, where the user can input a text and get a summary or input an audio file and get a transcript and a summary
32
  iface = gr.Interface(
33
  fn=control,
 
1
  import gradio as gr
2
  import os
3
  from huggingface_hub import InferenceClient
4
+ from transformers import AutoTokenizer
5
 
6
  HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"]
7
 
 
16
  def summarize_text(text: str, bullet_points: int, conclusion: bool) -> str:
17
  llm_model = "google/gemma-7b-it"
18
  api = InferenceClient(llm_model, token=HUGGINGFACE_API_KEY)
19
+ tokenizer = AutoTokenizer.from_pretrained(llm_model, token=HUGGINGFACE_API_KEY)
20
  if conclusion:
21
+ user_chat = f"Summarize the following text into {bullet_points} bullet points and a conclusion: {text}"
22
  else:
23
+ user_chat = (
24
+ f"Summarize the following text into {bullet_points} bullet points: {text}"
25
+ )
26
+ chat = [
27
+ {"role": "user", "content": user_chat},
28
+ ]
29
+ prompt = tokenizer.apply_chat_template(
30
+ chat, tokenize=False, add_generation_prompt=True
31
+ )
32
  summary = api.text_generation(prompt, max_new_tokens=250, do_sample=True)
33
  print(summary)
34
  return summary
35
 
36
+
37
  def control(audio_file, text: str, bullet_points: int, conclusion: bool) -> str:
38
  if audio_file:
39
  text = transcript_audio(audio_file)
40
  summary = summarize_text(text, bullet_points, conclusion)
41
  return summary
42
+
43
+
44
  # make a simeple interface, where the user can input a text and get a summary or input an audio file and get a transcript and a summary
45
  iface = gr.Interface(
46
  fn=control,
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ transformers