Sabbah13 commited on
Commit
4b331f0
1 Parent(s): 9f68a6e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -47
app.py CHANGED
@@ -5,68 +5,68 @@ import torch
5
  from utils import convert_segments_object_to_text, check_password
6
  from gigiachat_requests import get_access_token, get_completion_from_gigachat
7
 
8
-
9
- st.title('Audio Transcription App')
10
- st.sidebar.title("Settings")
11
- # Sidebar inputs
12
- device = st.sidebar.selectbox("Device", ["cpu", "cuda"], index=1)
13
- batch_size = st.sidebar.number_input("Batch Size", min_value=1, value=16)
14
- compute_type = st.sidebar.selectbox("Compute Type", ["float16", "int8"], index=0)
15
 
16
- initial_giga_base_prompt = os.getenv('GIGA_BASE_PROMPT')
17
- initial_giga_processing_prompt = os.getenv('GIGA_PROCCESS_PROMPT')
18
 
19
- giga_base_prompt = st.sidebar.text_area("Промпт ГигаЧата для резюмирования", value=initial_giga_base_prompt)
20
- giga_max_tokens = st.sidebar.number_input("Максимальное количество токенов при резюмировании", min_value=1, value=1024)
21
 
22
- enable_summarization = st.sidebar.checkbox("Добавить обработку транскрибации", value=False)
23
- giga_processing_prompt = st.sidebar.text_area("Промпт ГигаЧата для обработки транскрибации", value=initial_giga_processing_prompt)
24
 
25
- ACCESS_TOKEN = st.secrets["HF_TOKEN"]
26
 
27
- uploaded_file = st.file_uploader("Загрузите аудиофайл", type=["mp4", "wav", "m4a"])
28
 
29
- if uploaded_file is not None:
30
- st.audio(uploaded_file)
31
- file_extension = uploaded_file.name.split(".")[-1] # Получаем расширение файла
32
- temp_file_path = f"temp_file.{file_extension}" # Создаем временное имя файла с правильным расширением
33
 
34
- with open(temp_file_path, "wb") as f:
35
- f.write(uploaded_file.getbuffer())
36
 
37
- with st.spinner('Транскрибируем...'):
38
- # Load model
39
- model = whisperx.load_model(os.getenv('WHISPER_MODEL_SIZE'), device, compute_type=compute_type)
40
- # Load and transcribe audio
41
- audio = whisperx.load_audio(temp_file_path)
42
- result = model.transcribe(audio, batch_size=batch_size, language="ru")
43
- print('Transcribed, now aligning')
44
 
45
- model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
46
- result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False)
47
- print('Aligned, now diarizing')
48
 
49
- diarize_model = whisperx.DiarizationPipeline(use_auth_token=st.secrets["HF_TOKEN"], device=device)
50
- diarize_segments = diarize_model(audio)
51
- result_diar = whisperx.assign_word_speakers(diarize_segments, result)
52
 
53
- st.write("Результат транскрибации:")
54
- transcript = convert_segments_object_to_text(result_diar)
55
- st.text(transcript)
56
 
57
- access_token = get_access_token()
58
 
59
- if (enable_summarization):
60
- with st.spinner('Обрабатываем транскрибацию...'):
61
- transcript = get_completion_from_gigachat(giga_processing_prompt + transcript, 32768, access_token)
62
 
63
- st.write("Результат обработки:")
64
- st.text(transcript)
65
 
66
 
67
 
68
- with st.spinner('Резюмируем...'):
69
- summary_answer = get_completion_from_gigachat(giga_base_prompt + transcript, giga_max_tokens, access_token)
70
 
71
- st.write("Результат резюмирования:")
72
- st.text(summary_answer)
 
5
  from utils import convert_segments_object_to_text, check_password
6
  from gigiachat_requests import get_access_token, get_completion_from_gigachat
7
 
8
+ if check_password():
9
+ st.title('Audio Transcription App')
10
+ st.sidebar.title("Settings")
11
+ # Sidebar inputs
12
+ device = st.sidebar.selectbox("Device", ["cpu", "cuda"], index=1)
13
+ batch_size = st.sidebar.number_input("Batch Size", min_value=1, value=16)
14
+ compute_type = st.sidebar.selectbox("Compute Type", ["float16", "int8"], index=0)
15
 
16
+ initial_giga_base_prompt = os.getenv('GIGA_BASE_PROMPT')
17
+ initial_giga_processing_prompt = os.getenv('GIGA_PROCCESS_PROMPT')
18
 
19
+ giga_base_prompt = st.sidebar.text_area("Промпт ГигаЧата для резюмирования", value=initial_giga_base_prompt)
20
+ giga_max_tokens = st.sidebar.number_input("Максимальное количество токенов при резюмировании", min_value=1, value=1024)
21
 
22
+ enable_summarization = st.sidebar.checkbox("Добавить обработку транскрибации", value=False)
23
+ giga_processing_prompt = st.sidebar.text_area("Промпт ГигаЧата для обработки транскрибации", value=initial_giga_processing_prompt)
24
 
25
+ ACCESS_TOKEN = st.secrets["HF_TOKEN"]
26
 
27
+ uploaded_file = st.file_uploader("Загрузите аудиофайл", type=["mp4", "wav", "m4a"])
28
 
29
+ if uploaded_file is not None:
30
+ st.audio(uploaded_file)
31
+ file_extension = uploaded_file.name.split(".")[-1] # Получаем расширение файла
32
+ temp_file_path = f"temp_file.{file_extension}" # Создаем временное имя файла с правильным расширением
33
 
34
+ with open(temp_file_path, "wb") as f:
35
+ f.write(uploaded_file.getbuffer())
36
 
37
+ with st.spinner('Транскрибируем...'):
38
+ # Load model
39
+ model = whisperx.load_model(os.getenv('WHISPER_MODEL_SIZE'), device, compute_type=compute_type)
40
+ # Load and transcribe audio
41
+ audio = whisperx.load_audio(temp_file_path)
42
+ result = model.transcribe(audio, batch_size=batch_size, language="ru")
43
+ print('Transcribed, now aligning')
44
 
45
+ model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
46
+ result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False)
47
+ print('Aligned, now diarizing')
48
 
49
+ diarize_model = whisperx.DiarizationPipeline(use_auth_token=st.secrets["HF_TOKEN"], device=device)
50
+ diarize_segments = diarize_model(audio)
51
+ result_diar = whisperx.assign_word_speakers(diarize_segments, result)
52
 
53
+ st.write("Результат транскрибации:")
54
+ transcript = convert_segments_object_to_text(result_diar)
55
+ st.text(transcript)
56
 
57
+ access_token = get_access_token()
58
 
59
+ if (enable_summarization):
60
+ with st.spinner('Обрабатываем транскрибацию...'):
61
+ transcript = get_completion_from_gigachat(giga_processing_prompt + transcript, 32768, access_token)
62
 
63
+ st.write("Результат обработки:")
64
+ st.text(transcript)
65
 
66
 
67
 
68
+ with st.spinner('Резюмируем...'):
69
+ summary_answer = get_completion_from_gigachat(giga_base_prompt + transcript, giga_max_tokens, access_token)
70
 
71
+ st.write("Результат резюмирования:")
72
+ st.text(summary_answer)