Spaces:

dron3flyv3r
/

Meeting-Summarizer

Runtime error

dron3flyv3r commited on Mar 22

Commit

6d1b7ca

•

1 Parent(s): a1d7b67

Add AutoTokenizer for text summarization

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 import os
 from huggingface_hub import InferenceClient
 HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"]
@@ -15,19 +16,31 @@ def transcript_audio(audio_file) -> str:
 def summarize_text(text: str, bullet_points: int, conclusion: bool) -> str:
     llm_model = "google/gemma-7b-it"
     api = InferenceClient(llm_model, token=HUGGINGFACE_API_KEY)
     if conclusion:
-        prompt = f"Summarize the following text into {bullet_points} bullet points and a conclusion: {text}"
     else:
-        prompt = f"Summarize the following text into {bullet_points} bullet points: {text}"
     summary = api.text_generation(prompt, max_new_tokens=250, do_sample=True)
     print(summary)
     return summary
 def control(audio_file, text: str, bullet_points: int, conclusion: bool) -> str:
     if audio_file:
         text = transcript_audio(audio_file)
     summary = summarize_text(text, bullet_points, conclusion)
     return summary
 # make a simeple interface, where the user can input a text and get a summary or input an audio file and get a transcript and a summary
 iface = gr.Interface(
     fn=control,

 import gradio as gr
 import os
 from huggingface_hub import InferenceClient
+from transformers import AutoTokenizer
 HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"]
 def summarize_text(text: str, bullet_points: int, conclusion: bool) -> str:
     llm_model = "google/gemma-7b-it"
     api = InferenceClient(llm_model, token=HUGGINGFACE_API_KEY)
+    tokenizer = AutoTokenizer.from_pretrained(llm_model, token=HUGGINGFACE_API_KEY)
     if conclusion:
+        user_chat = f"Summarize the following text into {bullet_points} bullet points and a conclusion: {text}"
     else:
+        user_chat = (
+            f"Summarize the following text into {bullet_points} bullet points: {text}"
+        )
+    chat = [
+        {"role": "user", "content": user_chat},
+    ]
+    prompt = tokenizer.apply_chat_template(
+        chat, tokenize=False, add_generation_prompt=True
+    )
     summary = api.text_generation(prompt, max_new_tokens=250, do_sample=True)
     print(summary)
     return summary
 def control(audio_file, text: str, bullet_points: int, conclusion: bool) -> str:
     if audio_file:
         text = transcript_audio(audio_file)
     summary = summarize_text(text, bullet_points, conclusion)
     return summary
 # make a simeple interface, where the user can input a text and get a summary or input an audio file and get a transcript and a summary
 iface = gr.Interface(
     fn=control,

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ transformers