jhj0517 commited on
Commit
6f0e822
1 Parent(s): 0b56157

updated subtitle manager

Browse files
Files changed (1) hide show
  1. modules/subtitle_manager.py +89 -17
modules/subtitle_manager.py CHANGED
@@ -1,43 +1,115 @@
1
  import re
2
 
 
3
  def timeformat_srt(time):
4
- hours = time//3600
5
- minutes = (time - hours*3600)//60
6
- seconds = time - hours*3600 - minutes*60
7
- milliseconds = (time - int(time))*1000
8
  return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}"
9
 
 
10
  def timeformat_vtt(time):
11
- hours = time//3600
12
- minutes = (time - hours*3600)//60
13
- seconds = time - hours*3600 - minutes*60
14
- milliseconds = (time - int(time))*1000
15
  return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}.{int(milliseconds):03d}"
16
 
17
- def write_srt(subtitle,output_file):
 
18
  with open(output_file, 'w', encoding='utf-8') as f:
19
  f.write(subtitle)
20
 
21
- def write_vtt(subtitle,output_file):
 
22
  with open(output_file, 'w', encoding='utf-8') as f:
23
- f.write(subtitle)
 
24
 
25
  def get_srt(segments):
26
  output = ""
27
  for i, segment in enumerate(segments):
28
- output += f"{i+1}\n"
29
  output += f"{timeformat_srt(segment['start'])} --> {timeformat_srt(segment['end'])}\n"
30
- output += f"{segment['text']}\n\n"
31
- return output
 
32
 
33
  def get_vtt(segments):
34
  output = "WebVTT\n\n"
35
  for i, segment in enumerate(segments):
36
- output += f"{i+1}\n"
37
  output += f"{timeformat_vtt(segment['start'])} --> {timeformat_vtt(segment['end'])}\n"
38
- output += f"{segment['text']}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  return output
40
 
 
 
 
 
 
 
 
 
 
 
41
  def safe_filename(name):
42
  INVALID_FILENAME_CHARS = r'[<>:"/\\|?*\x00-\x1f]'
43
- return re.sub(INVALID_FILENAME_CHARS, '_', name)
 
1
  import re
2
 
3
+
4
  def timeformat_srt(time):
5
+ hours = time // 3600
6
+ minutes = (time - hours * 3600) // 60
7
+ seconds = time - hours * 3600 - minutes * 60
8
+ milliseconds = (time - int(time)) * 1000
9
  return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}"
10
 
11
+
12
  def timeformat_vtt(time):
13
+ hours = time // 3600
14
+ minutes = (time - hours * 3600) // 60
15
+ seconds = time - hours * 3600 - minutes * 60
16
+ milliseconds = (time - int(time)) * 1000
17
  return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}.{int(milliseconds):03d}"
18
 
19
+
20
+ def write_srt(subtitle, output_file):
21
  with open(output_file, 'w', encoding='utf-8') as f:
22
  f.write(subtitle)
23
 
24
+
25
+ def write_vtt(subtitle, output_file):
26
  with open(output_file, 'w', encoding='utf-8') as f:
27
+ f.write(subtitle)
28
+
29
 
30
  def get_srt(segments):
31
  output = ""
32
  for i, segment in enumerate(segments):
33
+ output += f"{i + 1}\n"
34
  output += f"{timeformat_srt(segment['start'])} --> {timeformat_srt(segment['end'])}\n"
35
+ output += f"{segment['text']}\n\n"
36
+ return output
37
+
38
 
39
  def get_vtt(segments):
40
  output = "WebVTT\n\n"
41
  for i, segment in enumerate(segments):
42
+ output += f"{i + 1}\n"
43
  output += f"{timeformat_vtt(segment['start'])} --> {timeformat_vtt(segment['end'])}\n"
44
+ output += f"{segment['text']}\n\n"
45
+ return output
46
+
47
+
48
+ def parse_srt(file_path):
49
+ """Reads SRT file and returns as dict"""
50
+ with open(file_path, 'r', encoding='utf-8') as file:
51
+ srt_data = file.read()
52
+
53
+ data = []
54
+ blocks = srt_data.split('\n\n')
55
+
56
+ for block in blocks:
57
+ if block.strip() != '':
58
+ lines = block.strip().split('\n')
59
+ index = lines[0]
60
+ timestamp = lines[1]
61
+ sentence = ' '.join(lines[2:])
62
+
63
+ data.append({
64
+ "index": index,
65
+ "timestamp": timestamp,
66
+ "sentence": sentence
67
+ })
68
+ return data
69
+
70
+
71
+ def parse_vtt(file_path):
72
+ """Reads WebVTT file and returns as dict"""
73
+ with open(file_path, 'r', encoding='utf-8') as file:
74
+ webvtt_data = file.read()
75
+
76
+ data = []
77
+ blocks = webvtt_data.split('\n\n')
78
+
79
+ for block in blocks:
80
+ if block.strip() != '' and not block.strip().startswith("WebVTT"):
81
+ lines = block.strip().split('\n')
82
+ index = lines[0]
83
+ timestamp = lines[1]
84
+ sentence = ' '.join(lines[2:])
85
+
86
+ data.append({
87
+ "index": index,
88
+ "timestamp": timestamp,
89
+ "sentence": sentence
90
+ })
91
+
92
+ return data
93
+
94
+
95
+ def get_serialized_srt(dicts):
96
+ output = ""
97
+ for dic in dicts:
98
+ output += f'{dic["index"]}\n'
99
+ output += f'{dic["timestamp"]}\n'
100
+ output += f'{dic["sentence"]}\n\n'
101
  return output
102
 
103
+
104
+ def get_serialized_vtt(dicts):
105
+ output = "WebVTT\n\n"
106
+ for dic in dicts:
107
+ output += f'{dic["index"]}\n'
108
+ output += f'{dic["timestamp"]}\n'
109
+ output += f'{dic["sentence"]}\n\n'
110
+ return output
111
+
112
+
113
  def safe_filename(name):
114
  INVALID_FILENAME_CHARS = r'[<>:"/\\|?*\x00-\x1f]'
115
+ return re.sub(INVALID_FILENAME_CHARS, '_', name)