Spaces:
Sleeping
Sleeping
import json | |
from typing import Optional | |
import yt_dlp | |
def getVttUrlFromSubtitles(item, lang='en', vttType="vtt"): | |
langs = item.keys() | |
key = lang if lang in langs else ('en' if 'en' in langs else langs[0] ) | |
for item in langs[key]: | |
if(item.get("ext") == vttType): | |
return item.get("url") | |
return None | |
async def fetchSubtitle(url: str, lang: Optional[str] = 'en', vttType="vtt") -> Optional[str]: | |
ydl_opts = { | |
"writesubtitles": True, | |
"allsubtitles": True, | |
"subtitleslangs": [lang] if lang else [], | |
"skip_download": True, | |
} | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
info_dict = ydl.extract_info(url, download=False) | |
if info_dict.get("subtitles"): | |
# get first available subtitle | |
subtitle_url = getVttUrlFromSubtitles(info_dict.get("subtitles"), lang, vttType) | |
with ydl.urlopen(subtitle_url) as subtitle: | |
return subtitle.read().decode() | |
if info_dict.get("automatic_captions"): | |
subtitle_url = getVttUrlFromSubtitles(info_dict.get("automatic_captions"), lang, vttType) | |
with ydl.urlopen(subtitle_url) as subtitle: | |
return subtitle.read().decode() | |
return None | |
async def fetchSubtitleUrls(url: str) -> json: | |
ydl_opts = { | |
"writesubtitles": True, | |
"allsubtitles": True, | |
"skip_download": True, | |
} | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
info_dict = ydl.extract_info(url, download=False) | |
if info_dict.get("subtitles"): | |
langs = info_dict.get("subtitles").keys() | |
if not (len(langs) == 1 and "live_chat" in langs): | |
return info_dict.get("subtitles") | |
if info_dict.get("automatic_captions"): | |
return info_dict.get("automatic_captions") | |
return None | |
def get_subtitle(url, lang='en'): | |
if lang is None: | |
lang = 'en' | |
# Download subtitles if available | |
ydl_opts = { | |
'writesubtitles': True, | |
'outtmpl': '%(id)s.%(ext)s', | |
'subtitleslangs': [lang], | |
'skip_download': True, | |
} | |
try: | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
info_dict = ydl.extract_info(url, download=True) | |
video_id = info_dict.get("id", None) | |
if video_id is None: | |
return None | |
subtitle_file = f"{video_id}.{lang}.vtt" | |
with open(subtitle_file, 'r') as f: | |
subtitle_content = f.read() | |
subtitle_content = re.sub(r"<[^>]+>", "", subtitle_content) | |
return subtitle_content | |
except error: | |
print(error) | |
return None | |
return None |