import os import streamlit as st import whisperx import torch from utils import convert_segments_object_to_text, check_password, convert_segments_object_to_text_simple from gigiachat_requests import get_access_token, get_completion_from_gigachat, get_number_of_tokens, process_transcribation_with_gigachat from openai_requests import get_completion_from_openai, process_transcribation_with_assistant if check_password(): st.title('Audio Transcription App') st.sidebar.title("Settings") device = os.getenv('DEVICE') batch_size = int(os.getenv('BATCH_SIZE')) compute_type = os.getenv('COMPUTE_TYPE') initial_base_prompt = os.getenv('BASE_PROMPT') initial_processing_prompt = os.getenv('PROCCESS_PROMPT') min_speakers = st.sidebar.number_input("Минимальное количество спикеров", min_value=1, value=2) max_speakers = st.sidebar.number_input("Максимальное количество спикеров", min_value=1, value=2) llm = st.sidebar.selectbox("Производитель LLM", ["Сбер", "OpenAI"], index=0) if llm == "Сбер": options = ["GigaChat-Plus", "GigaChat", "GigaChat-Pro"] elif llm == "OpenAI": options = ["gpt-4o", "gpt-4o-mini", "gpt-4-turbo", "gpt-4", "gpt-3.5-turbo"] else: options = [] llm_model = st.sidebar.selectbox("Модель", options, index=0) base_prompt = st.sidebar.text_area("Промпт для резюмирования", value=initial_base_prompt) enable_processing = st.sidebar.checkbox("Добавить обработку транскрибации", value=False) processing_prompt = st.sidebar.text_area("Промпт для обработки транскрибации", value=initial_processing_prompt) ACCESS_TOKEN = st.secrets["HF_TOKEN"] uploaded_file = st.file_uploader("Загрузите аудиофайл", type=["mp4", "wav", "m4a"]) if uploaded_file is not None: file_name = uploaded_file.name if 'file_name' not in st.session_state or st.session_state.file_name != file_name: st.session_state.transcript = '' st.session_state.file_name = file_name print(st.session_state.file_name) print(st.session_state.transcript) print(st.session_state.file_name) print(st.session_state.transcript) st.audio(uploaded_file) file_extension = uploaded_file.name.split(".")[-1] # Получаем расширение файла temp_file_path = f"temp_file.{file_extension}" # Создаем временное имя файла с правильным расширением with open(temp_file_path, "wb") as f: f.write(uploaded_file.getbuffer()) print(st.session_state.transcript) if 'transcript' not in st.session_state or st.session_state.transcript == '': with st.spinner('Транскрибируем...'): # Load model model = whisperx.load_model(os.getenv('WHISPER_MODEL_SIZE'), device, compute_type=compute_type) # Load and transcribe audio audio = whisperx.load_audio(temp_file_path) result = model.transcribe(audio, batch_size=batch_size, language="ru") print('Transcribed, now aligning') model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device) result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False) print('Aligned, now diarizing') diarize_model = whisperx.DiarizationPipeline(use_auth_token=st.secrets["HF_TOKEN"], device=device) diarize_segments = diarize_model(audio, min_speakers=min_speakers, max_speakers=max_speakers) result_diar = whisperx.assign_word_speakers(diarize_segments, result) transcript = convert_segments_object_to_text_simple(result_diar) st.session_state.transcript = transcript else: transcript = st.session_state.transcript st.write("Результат транскрибации:") st.text(transcript) if (llm == 'Сбер'): access_token = get_access_token() if (enable_processing): with st.spinner('Обрабатываем транскрибацию...'): if (llm == 'Сбер'): number_of_tokens = get_number_of_tokens(transcript, access_token, llm_model) print('Количество токенов в транскрибации: ' + str(number_of_tokens)) transcript = process_transcribation_with_gigachat(processing_prompt, transcript, number_of_tokens + 1000, access_token, llm_model) print(transcript) elif (llm == 'OpenAI'): transcript = process_transcribation_with_assistant(processing_prompt, transcript) print(transcript) with st.spinner('Резюмируем...'): if (llm == 'Сбер'): summary_answer = get_completion_from_gigachat(base_prompt + transcript, 1024, access_token, llm_model) elif (llm == 'OpenAI'): summary_answer = get_completion_from_openai(base_prompt + transcript,llm_model, 1024) st.write("Результат резюмирования:") st.text(summary_answer)