Spaces:
Running
Running
jhj0517
commited on
Commit
•
6f0e822
1
Parent(s):
0b56157
updated subtitle manager
Browse files- modules/subtitle_manager.py +89 -17
modules/subtitle_manager.py
CHANGED
@@ -1,43 +1,115 @@
|
|
1 |
import re
|
2 |
|
|
|
3 |
def timeformat_srt(time):
|
4 |
-
hours = time//3600
|
5 |
-
minutes = (time - hours*3600)//60
|
6 |
-
seconds = time - hours*3600 - minutes*60
|
7 |
-
milliseconds = (time - int(time))*1000
|
8 |
return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}"
|
9 |
|
|
|
10 |
def timeformat_vtt(time):
|
11 |
-
hours = time//3600
|
12 |
-
minutes = (time - hours*3600)//60
|
13 |
-
seconds = time - hours*3600 - minutes*60
|
14 |
-
milliseconds = (time - int(time))*1000
|
15 |
return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}.{int(milliseconds):03d}"
|
16 |
|
17 |
-
|
|
|
18 |
with open(output_file, 'w', encoding='utf-8') as f:
|
19 |
f.write(subtitle)
|
20 |
|
21 |
-
|
|
|
22 |
with open(output_file, 'w', encoding='utf-8') as f:
|
23 |
-
f.write(subtitle)
|
|
|
24 |
|
25 |
def get_srt(segments):
|
26 |
output = ""
|
27 |
for i, segment in enumerate(segments):
|
28 |
-
output += f"{i+1}\n"
|
29 |
output += f"{timeformat_srt(segment['start'])} --> {timeformat_srt(segment['end'])}\n"
|
30 |
-
output += f"{segment['text']}\n\n"
|
31 |
-
return output
|
|
|
32 |
|
33 |
def get_vtt(segments):
|
34 |
output = "WebVTT\n\n"
|
35 |
for i, segment in enumerate(segments):
|
36 |
-
output += f"{i+1}\n"
|
37 |
output += f"{timeformat_vtt(segment['start'])} --> {timeformat_vtt(segment['end'])}\n"
|
38 |
-
output += f"{segment['text']}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
return output
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
def safe_filename(name):
|
42 |
INVALID_FILENAME_CHARS = r'[<>:"/\\|?*\x00-\x1f]'
|
43 |
-
return re.sub(INVALID_FILENAME_CHARS, '_', name)
|
|
|
1 |
import re
|
2 |
|
3 |
+
|
4 |
def timeformat_srt(time):
|
5 |
+
hours = time // 3600
|
6 |
+
minutes = (time - hours * 3600) // 60
|
7 |
+
seconds = time - hours * 3600 - minutes * 60
|
8 |
+
milliseconds = (time - int(time)) * 1000
|
9 |
return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}"
|
10 |
|
11 |
+
|
12 |
def timeformat_vtt(time):
|
13 |
+
hours = time // 3600
|
14 |
+
minutes = (time - hours * 3600) // 60
|
15 |
+
seconds = time - hours * 3600 - minutes * 60
|
16 |
+
milliseconds = (time - int(time)) * 1000
|
17 |
return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}.{int(milliseconds):03d}"
|
18 |
|
19 |
+
|
20 |
+
def write_srt(subtitle, output_file):
|
21 |
with open(output_file, 'w', encoding='utf-8') as f:
|
22 |
f.write(subtitle)
|
23 |
|
24 |
+
|
25 |
+
def write_vtt(subtitle, output_file):
|
26 |
with open(output_file, 'w', encoding='utf-8') as f:
|
27 |
+
f.write(subtitle)
|
28 |
+
|
29 |
|
30 |
def get_srt(segments):
|
31 |
output = ""
|
32 |
for i, segment in enumerate(segments):
|
33 |
+
output += f"{i + 1}\n"
|
34 |
output += f"{timeformat_srt(segment['start'])} --> {timeformat_srt(segment['end'])}\n"
|
35 |
+
output += f"{segment['text']}\n\n"
|
36 |
+
return output
|
37 |
+
|
38 |
|
39 |
def get_vtt(segments):
|
40 |
output = "WebVTT\n\n"
|
41 |
for i, segment in enumerate(segments):
|
42 |
+
output += f"{i + 1}\n"
|
43 |
output += f"{timeformat_vtt(segment['start'])} --> {timeformat_vtt(segment['end'])}\n"
|
44 |
+
output += f"{segment['text']}\n\n"
|
45 |
+
return output
|
46 |
+
|
47 |
+
|
48 |
+
def parse_srt(file_path):
|
49 |
+
"""Reads SRT file and returns as dict"""
|
50 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
51 |
+
srt_data = file.read()
|
52 |
+
|
53 |
+
data = []
|
54 |
+
blocks = srt_data.split('\n\n')
|
55 |
+
|
56 |
+
for block in blocks:
|
57 |
+
if block.strip() != '':
|
58 |
+
lines = block.strip().split('\n')
|
59 |
+
index = lines[0]
|
60 |
+
timestamp = lines[1]
|
61 |
+
sentence = ' '.join(lines[2:])
|
62 |
+
|
63 |
+
data.append({
|
64 |
+
"index": index,
|
65 |
+
"timestamp": timestamp,
|
66 |
+
"sentence": sentence
|
67 |
+
})
|
68 |
+
return data
|
69 |
+
|
70 |
+
|
71 |
+
def parse_vtt(file_path):
|
72 |
+
"""Reads WebVTT file and returns as dict"""
|
73 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
74 |
+
webvtt_data = file.read()
|
75 |
+
|
76 |
+
data = []
|
77 |
+
blocks = webvtt_data.split('\n\n')
|
78 |
+
|
79 |
+
for block in blocks:
|
80 |
+
if block.strip() != '' and not block.strip().startswith("WebVTT"):
|
81 |
+
lines = block.strip().split('\n')
|
82 |
+
index = lines[0]
|
83 |
+
timestamp = lines[1]
|
84 |
+
sentence = ' '.join(lines[2:])
|
85 |
+
|
86 |
+
data.append({
|
87 |
+
"index": index,
|
88 |
+
"timestamp": timestamp,
|
89 |
+
"sentence": sentence
|
90 |
+
})
|
91 |
+
|
92 |
+
return data
|
93 |
+
|
94 |
+
|
95 |
+
def get_serialized_srt(dicts):
|
96 |
+
output = ""
|
97 |
+
for dic in dicts:
|
98 |
+
output += f'{dic["index"]}\n'
|
99 |
+
output += f'{dic["timestamp"]}\n'
|
100 |
+
output += f'{dic["sentence"]}\n\n'
|
101 |
return output
|
102 |
|
103 |
+
|
104 |
+
def get_serialized_vtt(dicts):
|
105 |
+
output = "WebVTT\n\n"
|
106 |
+
for dic in dicts:
|
107 |
+
output += f'{dic["index"]}\n'
|
108 |
+
output += f'{dic["timestamp"]}\n'
|
109 |
+
output += f'{dic["sentence"]}\n\n'
|
110 |
+
return output
|
111 |
+
|
112 |
+
|
113 |
def safe_filename(name):
|
114 |
INVALID_FILENAME_CHARS = r'[<>:"/\\|?*\x00-\x1f]'
|
115 |
+
return re.sub(INVALID_FILENAME_CHARS, '_', name)
|