lanbogao commited on
Commit
0d59eb5
1 Parent(s): 3965709

Add support download subType xml.

Browse files
Files changed (1) hide show
  1. fetchYoutubeSubtitle.py +6 -2
fetchYoutubeSubtitle.py CHANGED
@@ -30,7 +30,8 @@ def getUrlFromSubtitles(item, lang='en', subType="vtt"):
30
  for subtitle in item[l]:
31
  # print("getUrlFromSubtitles subtitle: %s" % subtitle)
32
  if l != "live_chat" and subType =="xml":
33
- return subtitle.get("url").replace("fmt="+subtitle.get("ext"),"")
 
34
  if subtitle.get("ext") == subType:
35
  return subtitle.get("url")
36
  return None
@@ -56,15 +57,18 @@ async def fetchSubtitlebyType(url: str, lang: Optional[str] = 'en', subType="vtt
56
  title = info_dict.get("title", "unknow")
57
  seconds = info_dict.get("duration")
58
  duration = str(seconds) if seconds else ""
 
59
  if info_dict.get("extractor") == "youtube" and subType == "srt":
60
  subType = "xml"
 
61
 
62
  for subtitle_item in ["subtitles", "automatic_captions"]: # "requested_subtitles" item is dict
63
  if info_dict.get(subtitle_item):
64
  subtitle_url = getUrlFromSubtitles(info_dict.get(subtitle_item), lang, subType)
65
  if subtitle_url:
 
66
  with ydl.urlopen(subtitle_url) as response:
67
- subtitle = xml_caption_to_srt(response.read().decode()) if subType == "xml" else response.read().decode()
68
  print("url{}, title:{}, duration:{} len(subtitle): {}".format(url, title, duration, len(subtitle)))
69
  return {"title": title, "duration": duration,"subtitle": subtitle}
70
  except Exception as e:
 
30
  for subtitle in item[l]:
31
  # print("getUrlFromSubtitles subtitle: %s" % subtitle)
32
  if l != "live_chat" and subType =="xml":
33
+ # print("subtitle source url: {}".format(subtitle.get("url")))
34
+ return subtitle.get("url").replace("&fmt="+subtitle.get("ext"),"")
35
  if subtitle.get("ext") == subType:
36
  return subtitle.get("url")
37
  return None
 
57
  title = info_dict.get("title", "unknow")
58
  seconds = info_dict.get("duration")
59
  duration = str(seconds) if seconds else ""
60
+ isSrt = False
61
  if info_dict.get("extractor") == "youtube" and subType == "srt":
62
  subType = "xml"
63
+ isSrt = True
64
 
65
  for subtitle_item in ["subtitles", "automatic_captions"]: # "requested_subtitles" item is dict
66
  if info_dict.get(subtitle_item):
67
  subtitle_url = getUrlFromSubtitles(info_dict.get(subtitle_item), lang, subType)
68
  if subtitle_url:
69
+ # print("subtitle_url: {}".format(subtitle_url))
70
  with ydl.urlopen(subtitle_url) as response:
71
+ subtitle = xml_caption_to_srt(response.read().decode()) if isSrt else response.read().decode()
72
  print("url{}, title:{}, duration:{} len(subtitle): {}".format(url, title, duration, len(subtitle)))
73
  return {"title": title, "duration": duration,"subtitle": subtitle}
74
  except Exception as e: