Sabbah13 commited on
Commit
350d733
1 Parent(s): ab98593

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -9
app.py CHANGED
@@ -6,22 +6,48 @@ import streamlit as st
6
  import whisperx
7
  import torch
8
 
9
- def convert_segments_to_text(data):
10
  result = []
 
11
  for segment in data['segments']:
12
  words = segment['words']
 
 
 
13
  current_speaker = None
14
  current_start = None
15
  current_end = None
16
  current_text = []
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  for word_info in words:
19
- print('Word info: ')
20
- print(word_info)
21
- word = word_info['word']
22
- start = word_info['start']
23
- end = word_info['end']
24
- speaker = word_info['speaker']
25
 
26
  if current_speaker is None:
27
  current_speaker = speaker
@@ -32,7 +58,10 @@ def convert_segments_to_text(data):
32
  current_end = end
33
  else:
34
  # Finish current segment
35
- formatted_text = f'{current_speaker} ({current_start} : {current_end}) : {" ".join(current_text)}'
 
 
 
36
  result.append(formatted_text)
37
 
38
  # Start new segment
@@ -43,7 +72,10 @@ def convert_segments_to_text(data):
43
 
44
  # Append the last segment
45
  if current_text:
46
- formatted_text = f'{current_speaker} ({current_start} : {current_end}) : {" ".join(current_text)}'
 
 
 
47
  result.append(formatted_text)
48
 
49
  return '\n'.join(result)
 
6
  import whisperx
7
  import torch
8
 
9
+ def convert_segments_object_to_text(data):
10
  result = []
11
+
12
  for segment in data['segments']:
13
  words = segment['words']
14
+ segment_speaker = segment.get('speaker', None)
15
+ segment_start = segment.get('start', None)
16
+ segment_end = segment.get('end', None)
17
  current_speaker = None
18
  current_start = None
19
  current_end = None
20
  current_text = []
21
 
22
+ # Forward fill speaker, start and end if missing
23
+ for i, word_info in enumerate(words):
24
+ if 'speaker' not in word_info:
25
+ if i > 0 and 'speaker' in words[i - 1]:
26
+ word_info['speaker'] = words[i - 1]['speaker']
27
+ elif i < len(words) - 1 and 'speaker' in words[i + 1]:
28
+ word_info['speaker'] = words[i + 1]['speaker']
29
+ else:
30
+ word_info['speaker'] = segment_speaker
31
+
32
+ if 'start' not in word_info:
33
+ if i > 0 and 'end' in words[i - 1]:
34
+ word_info['start'] = words[i - 1]['end']
35
+ else:
36
+ word_info['start'] = segment_start
37
+
38
+ if 'end' not in word_info:
39
+ if i < len(words) - 1 and 'start' in words[i + 1]:
40
+ word_info['end'] = words[i - 1]['start']
41
+ elif i == len(words) - 1:
42
+ word_info['end'] = segment_end
43
+ else:
44
+ word_info['end'] = word_info['start']
45
+
46
  for word_info in words:
47
+ word = word_info.get('word', '')
48
+ start = word_info.get('start', None)
49
+ end = word_info.get('end', None)
50
+ speaker = word_info.get('speaker', None)
 
 
51
 
52
  if current_speaker is None:
53
  current_speaker = speaker
 
58
  current_end = end
59
  else:
60
  # Finish current segment
61
+ if current_start is not None and current_end is not None:
62
+ formatted_text = f'{current_speaker} ({current_start} : {current_end}) : {" ".join(current_text)}'
63
+ else:
64
+ formatted_text = f'{current_speaker} : {" ".join(current_text)}'
65
  result.append(formatted_text)
66
 
67
  # Start new segment
 
72
 
73
  # Append the last segment
74
  if current_text:
75
+ if current_start is not None and current_end is not None:
76
+ formatted_text = f'{current_speaker} ({current_start} : {current_end}) : {" ".join(current_text)}'
77
+ else:
78
+ formatted_text = f'{current_speaker} : {" ".join(current_text)}'
79
  result.append(formatted_text)
80
 
81
  return '\n'.join(result)