Spaces:

Sabbah13
/

text_transcribation_diarization_and_summarization

Paused

App Files Files Community

Sabbah13 commited on Jul 5

Commit

09b358f

•

1 Parent(s): fa7c624

added openai

Browse files

Files changed (1) hide show

app.py +43 -24

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import whisperx
 import torch
 from utils import convert_segments_object_to_text, check_password
 from gigiachat_requests import get_access_token, get_completion_from_gigachat, get_number_of_tokens
 if check_password():
     st.title('Audio Transcription App')
@@ -13,54 +14,69 @@ if check_password():
     batch_size = int(os.getenv('BATCH_SIZE'))
     compute_type = os.getenv('COMPUTE_TYPE')
-    initial_giga_base_prompt = os.getenv('GIGA_BASE_PROMPT')
-    initial_giga_processing_prompt = os.getenv('GIGA_PROCCESS_PROMPT')
-    giga_base_prompt = st.sidebar.text_area("Промпт для резюмирования", value=initial_giga_base_prompt)
-    giga_max_tokens = st.sidebar.number_input("Максимальное количество токенов при резюмировании", min_value=1, value=1024)
     enable_summarization = st.sidebar.checkbox("Добавить обработку транскрибации", value=False)
-    giga_processing_prompt = st.sidebar.text_area("Промпт для обработки транскрибации", value=initial_giga_processing_prompt)
     ACCESS_TOKEN = st.secrets["HF_TOKEN"]
     uploaded_file = st.file_uploader("Загрузите аудиофайл", type=["mp4", "wav", "m4a"])
     if uploaded_file is not None:
         st.audio(uploaded_file)
         file_extension = uploaded_file.name.split(".")[-1]  # Получаем расширение файла
         temp_file_path = f"temp_file.{file_extension}"  # Создаем временное имя файла с правильным расширением
         with open(temp_file_path, "wb") as f:
             f.write(uploaded_file.getbuffer())
-        with st.spinner('Транскрибируем...'):
-            # Load model
-            model = whisperx.load_model(os.getenv('WHISPER_MODEL_SIZE'), device, compute_type=compute_type)
-            # Load and transcribe audio
-            audio = whisperx.load_audio(temp_file_path)
-            result = model.transcribe(audio, batch_size=batch_size, language="ru")
-            print('Transcribed, now aligning')
-            model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
-            result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False)
-            print('Aligned, now diarizing')
-            diarize_model = whisperx.DiarizationPipeline(use_auth_token=st.secrets["HF_TOKEN"], device=device)
-            diarize_segments = diarize_model(audio)
-            result_diar = whisperx.assign_word_speakers(diarize_segments, result)
-        st.write("Результат транскрибации:")
-        transcript = convert_segments_object_to_text(result_diar)
         st.text(transcript)
         access_token = get_access_token()
         if (enable_summarization):
             with st.spinner('Обрабатываем транскрибацию...'):
-                number_of_tokens = get_number_of_tokens(transcript, access_token)
-                print('Количество токенов в транскрибации: ' + str(number_of_tokens))
-                transcript = get_completion_from_gigachat(giga_processing_prompt + transcript, number_of_tokens + 500, access_token)
                 st.write("Результат обработки:")
                 st.text(transcript)
@@ -68,7 +84,10 @@ if check_password():
         with st.spinner('Резюмируем...'):
-            summary_answer = get_completion_from_gigachat(giga_base_prompt + transcript, giga_max_tokens, access_token)
             st.write("Результат резюмирования:")
             st.text(summary_answer)

 import torch
 from utils import convert_segments_object_to_text, check_password
 from gigiachat_requests import get_access_token, get_completion_from_gigachat, get_number_of_tokens
+from openai_requests import get_completion_from_openai
 if check_password():
     st.title('Audio Transcription App')
     batch_size = int(os.getenv('BATCH_SIZE'))
     compute_type = os.getenv('COMPUTE_TYPE')
+    initial_base_prompt = os.getenv('BASE_PROMPT')
+    initial_processing_prompt = os.getenv('PROCCESS_PROMPT')
+    llm = st.sidebar.selectbox("LLM", ["GigaChat", "Chat GPT"], index=0)
+    base_prompt = st.sidebar.text_area("Промпт для резюмирования", value=initial_base_prompt)
+    max_tokens_summary = st.sidebar.number_input("Максимальное количество токенов при резюмировании", min_value=1, value=1024)
     enable_summarization = st.sidebar.checkbox("Добавить обработку транскрибации", value=False)
+    processing_prompt = st.sidebar.text_area("Промпт для обработки транскрибации", value=initial_processing_prompt)
     ACCESS_TOKEN = st.secrets["HF_TOKEN"]
     uploaded_file = st.file_uploader("Загрузите аудиофайл", type=["mp4", "wav", "m4a"])
     if uploaded_file is not None:
+        file_name = uploaded_file.name
+        if 'file_name' not in st.session_state or st.session_state.file_name != file_name:
+            st.session_state.transcript = ''
+            st.session_state.file_name = file_name
         st.audio(uploaded_file)
         file_extension = uploaded_file.name.split(".")[-1]  # Получаем расширение файла
         temp_file_path = f"temp_file.{file_extension}"  # Создаем временное имя файла с правильным расширением
         with open(temp_file_path, "wb") as f:
             f.write(uploaded_file.getbuffer())
+        if 'transcript' not in st.session_state or st.session_state.transcript == '':
+            with st.spinner('Транскрибируем...'):
+                # Load model
+                model = whisperx.load_model(os.getenv('WHISPER_MODEL_SIZE'), device, compute_type=compute_type)
+                # Load and transcribe audio
+                audio = whisperx.load_audio(temp_file_path)
+                result = model.transcribe(audio, batch_size=batch_size, language="ru")
+                print('Transcribed, now aligning')
+                model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
+                result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False)
+                print('Aligned, now diarizing')
+                diarize_model = whisperx.DiarizationPipeline(use_auth_token=st.secrets["HF_TOKEN"], device=device)
+                diarize_segments = diarize_model(audio)
+                result_diar = whisperx.assign_word_speakers(diarize_segments, result)
+            st.write("Результат транскрибации:")
+            transcript = convert_segments_object_to_text(result_diar)
+        else:
+            transcript = st.session_state.transcript
         st.text(transcript)
         access_token = get_access_token()
         if (enable_summarization):
             with st.spinner('Обрабатываем транскрибацию...'):
+                if (llm == 'GigaChat'):
+                    number_of_tokens = get_number_of_tokens(transcript, access_token)
+                    print('Количество токенов в транскрибации: ' + str(number_of_tokens))
+                    transcript = get_completion_from_gigachat(processing_prompt + transcript, number_of_tokens + 500, access_token)
+                elif (llm == 'Chat GPT'):
+                    transcript = get_completion_from_openai(processing_prompt + transcript)
                 st.write("Результат обработки:")
                 st.text(transcript)
         with st.spinner('Резюмируем...'):
+            if (llm == 'GigaChat'):
+                summary_answer = get_completion_from_gigachat(base_prompt + transcript, max_tokens_summary, access_token)
+            elif (llm == 'Chat GPT'):
+                summary_answer = get_completion_from_openai(base_prompt + transcript)
             st.write("Результат резюмирования:")
             st.text(summary_answer)