Update app.py
Browse files
app.py
CHANGED
@@ -6,22 +6,48 @@ import streamlit as st
|
|
6 |
import whisperx
|
7 |
import torch
|
8 |
|
9 |
-
def
|
10 |
result = []
|
|
|
11 |
for segment in data['segments']:
|
12 |
words = segment['words']
|
|
|
|
|
|
|
13 |
current_speaker = None
|
14 |
current_start = None
|
15 |
current_end = None
|
16 |
current_text = []
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
for word_info in words:
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
end = word_info['end']
|
24 |
-
speaker = word_info['speaker']
|
25 |
|
26 |
if current_speaker is None:
|
27 |
current_speaker = speaker
|
@@ -32,7 +58,10 @@ def convert_segments_to_text(data):
|
|
32 |
current_end = end
|
33 |
else:
|
34 |
# Finish current segment
|
35 |
-
|
|
|
|
|
|
|
36 |
result.append(formatted_text)
|
37 |
|
38 |
# Start new segment
|
@@ -43,7 +72,10 @@ def convert_segments_to_text(data):
|
|
43 |
|
44 |
# Append the last segment
|
45 |
if current_text:
|
46 |
-
|
|
|
|
|
|
|
47 |
result.append(formatted_text)
|
48 |
|
49 |
return '\n'.join(result)
|
|
|
6 |
import whisperx
|
7 |
import torch
|
8 |
|
9 |
+
def convert_segments_object_to_text(data):
|
10 |
result = []
|
11 |
+
|
12 |
for segment in data['segments']:
|
13 |
words = segment['words']
|
14 |
+
segment_speaker = segment.get('speaker', None)
|
15 |
+
segment_start = segment.get('start', None)
|
16 |
+
segment_end = segment.get('end', None)
|
17 |
current_speaker = None
|
18 |
current_start = None
|
19 |
current_end = None
|
20 |
current_text = []
|
21 |
|
22 |
+
# Forward fill speaker, start and end if missing
|
23 |
+
for i, word_info in enumerate(words):
|
24 |
+
if 'speaker' not in word_info:
|
25 |
+
if i > 0 and 'speaker' in words[i - 1]:
|
26 |
+
word_info['speaker'] = words[i - 1]['speaker']
|
27 |
+
elif i < len(words) - 1 and 'speaker' in words[i + 1]:
|
28 |
+
word_info['speaker'] = words[i + 1]['speaker']
|
29 |
+
else:
|
30 |
+
word_info['speaker'] = segment_speaker
|
31 |
+
|
32 |
+
if 'start' not in word_info:
|
33 |
+
if i > 0 and 'end' in words[i - 1]:
|
34 |
+
word_info['start'] = words[i - 1]['end']
|
35 |
+
else:
|
36 |
+
word_info['start'] = segment_start
|
37 |
+
|
38 |
+
if 'end' not in word_info:
|
39 |
+
if i < len(words) - 1 and 'start' in words[i + 1]:
|
40 |
+
word_info['end'] = words[i - 1]['start']
|
41 |
+
elif i == len(words) - 1:
|
42 |
+
word_info['end'] = segment_end
|
43 |
+
else:
|
44 |
+
word_info['end'] = word_info['start']
|
45 |
+
|
46 |
for word_info in words:
|
47 |
+
word = word_info.get('word', '')
|
48 |
+
start = word_info.get('start', None)
|
49 |
+
end = word_info.get('end', None)
|
50 |
+
speaker = word_info.get('speaker', None)
|
|
|
|
|
51 |
|
52 |
if current_speaker is None:
|
53 |
current_speaker = speaker
|
|
|
58 |
current_end = end
|
59 |
else:
|
60 |
# Finish current segment
|
61 |
+
if current_start is not None and current_end is not None:
|
62 |
+
formatted_text = f'{current_speaker} ({current_start} : {current_end}) : {" ".join(current_text)}'
|
63 |
+
else:
|
64 |
+
formatted_text = f'{current_speaker} : {" ".join(current_text)}'
|
65 |
result.append(formatted_text)
|
66 |
|
67 |
# Start new segment
|
|
|
72 |
|
73 |
# Append the last segment
|
74 |
if current_text:
|
75 |
+
if current_start is not None and current_end is not None:
|
76 |
+
formatted_text = f'{current_speaker} ({current_start} : {current_end}) : {" ".join(current_text)}'
|
77 |
+
else:
|
78 |
+
formatted_text = f'{current_speaker} : {" ".join(current_text)}'
|
79 |
result.append(formatted_text)
|
80 |
|
81 |
return '\n'.join(result)
|