lanbogao commited on
Commit
d53a7e5
1 Parent(s): 5f54ad5

Set user requested lang to requested_subtitles

Browse files

Ge subtitle from subtitles if get subtitle by lang is empty, eg: uwIHyLPBZkY this video has jp and jp to others but jp to en is empty.

Files changed (1) hide show
  1. fetchYoutubeSubtitle.py +39 -27
fetchYoutubeSubtitle.py CHANGED
@@ -121,7 +121,7 @@ async def fetchSubtitlebyType(
121
  proxy: Optional[str] = None,
122
  ) -> dict:
123
  # lang-code or lang.* .* is regex
124
- reqLang = lang if len(lang.split("-")) > 1 or lang.endswith(".*") else lang + ".*"
125
 
126
  ydl_opts = {
127
  "noplaylist": True,
@@ -129,7 +129,10 @@ async def fetchSubtitlebyType(
129
  "writeautomaticsub": True,
130
  # "listsubtitles": True,
131
  # "subtitlesformat": subType, # mark due to default youtube no srt and xml format
132
- "subtitleslangs": [reqLang],
 
 
 
133
  "skip_download": True,
134
  "socket_timeout": 10,
135
  "extractor_retries": 0,
@@ -164,37 +167,46 @@ async def fetchSubtitlebyType(
164
  isSrt = True
165
  if debug:
166
  print(
167
- "subtitles.keys(): {} automatic_captions: {}".format(
168
  info_dict.get("subtitles").keys(),
169
  info_dict.get("automatic_captions").keys(),
 
170
  )
171
  )
172
 
173
- subtitle_url = getRequestedSubtitlesUrl(info_dict, lang, subType)
174
- if not subtitle_url:
175
- subtitle_url = getSubtitleLangUrl(info_dict, lang, subType)
176
- if not subtitle_url:
177
- subtitle_url = getSubtitleOtherUrl(info_dict, lang, subType)
178
-
179
- if subtitle_url:
180
- # print("subtitle_url: {}".format(subtitle_url))
181
- with ydl.urlopen(subtitle_url) as response:
182
- subtitle = (
183
- xml_caption_to_srt(response.read().decode())
184
- if isSrt
185
- else response.read().decode()
186
- )
187
- print(
188
- "url:{}, title:{}, duration:{} len(subtitle): {}".format(
189
- url, title, duration, len(subtitle or "")
 
 
 
190
  )
191
- )
192
- return {
193
- "title": title,
194
- "duration": duration,
195
- "subtitle": subtitle,
196
- "chapters": info_dict.get("chapters", None),
197
- }
 
 
 
 
 
198
  except Exception as e:
199
  print(e)
200
  traceback.print_exc()
 
121
  proxy: Optional[str] = None,
122
  ) -> dict:
123
  # lang-code or lang.* .* is regex
124
+ # reqLang = lang if len(lang.split("-")) > 1 or lang.endswith(".*") else lang + ".*"
125
 
126
  ydl_opts = {
127
  "noplaylist": True,
 
129
  "writeautomaticsub": True,
130
  # "listsubtitles": True,
131
  # "subtitlesformat": subType, # mark due to default youtube no srt and xml format
132
+ "subtitleslangs": [
133
+ lang,
134
+ "-live_chat",
135
+ ], # filter live chat to requested_subtitles
136
  "skip_download": True,
137
  "socket_timeout": 10,
138
  "extractor_retries": 0,
 
167
  isSrt = True
168
  if debug:
169
  print(
170
+ "subtitles.keys(): {} automatic_captions: {} requested_subtitles: {}".format(
171
  info_dict.get("subtitles").keys(),
172
  info_dict.get("automatic_captions").keys(),
173
+ info_dict.get("requested_subtitles").keys(),
174
  )
175
  )
176
 
177
+ # subtitle_url = getRequestedSubtitlesUrl(info_dict, lang, subType)
178
+ # if not subtitle_url:
179
+ # subtitle_url = getSubtitleLangUrl(info_dict, lang, subType)
180
+ # if not subtitle_url:
181
+ # subtitle_url = getSubtitleOtherUrl(info_dict, lang, subType)
182
+
183
+ subtitle_funcs = [
184
+ getRequestedSubtitlesUrl,
185
+ getSubtitleLangUrl,
186
+ getSubtitleOtherUrl,
187
+ ]
188
+ for index in range(len(subtitle_funcs)):
189
+ subtitle_url = subtitle_funcs[index](info_dict, lang, subType)
190
+ if subtitle_url:
191
+ # print("subtitle_url: {}".format(subtitle_url))
192
+ with ydl.urlopen(subtitle_url) as response:
193
+ subtitle = (
194
+ xml_caption_to_srt(response.read().decode())
195
+ if isSrt
196
+ else response.read().decode()
197
  )
198
+ print(
199
+ "function index:{}, url:{}, title:{}, duration:{} len(subtitle): {}".format(
200
+ index, url, title, duration, len(subtitle or "")
201
+ )
202
+ )
203
+ if subtitle is not None:
204
+ return {
205
+ "title": title,
206
+ "duration": duration,
207
+ "subtitle": subtitle,
208
+ "chapters": info_dict.get("chapters", None),
209
+ }
210
  except Exception as e:
211
  print(e)
212
  traceback.print_exc()