File size: 2,680 Bytes
093a866
ba9fae4
 
093a866
ba9fae4
 
 
 
 
 
 
093a866
ba9fae4
 
 
 
 
 
093a866
 
ba9fae4
 
 
 
 
 
 
 
 
 
 
 
093a866
ba9fae4
 
 
 
 
 
093a866
ba9fae4
 
 
 
 
 
 
 
 
093a866
ba9fae4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
093a866
ba9fae4
 
 
 
 
 
 
 
093a866
ba9fae4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import json
from typing import Optional
import yt_dlp

def getVttUrlFromSubtitles(item, lang='en', vttType="vtt"):
    langs = item.keys()
    key =  lang if langs.get(lang) else ('en' if langs.get('en') else langs[0] )
    for item in langs[key]:
        if(item.get("ext") == type):
            return item.get("url")
    return None

def getSubtitle(url: str, lang: Optional[str] = 'en', vttType="vtt") -> Optional[str]:
    ydl_opts = {
        "writesubtitles": True,
        "allsubtitles": True,
        "subtitleslangs": [lang] if lang else [],
        "skip_download": True,
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(url, download=False)
        if info_dict.get("subtitles"):
            # get first available subtitle
            subtitle_url = getVttUrlFromSubtitles(info_dict.get("subtitles"), lang, vttType)
            with ydl.urlopen(subtitle_url) as subtitle:
                return subtitle.read().decode()
        if info_dict.get("automatic_captions"):
            subtitle_url = getVttUrlFromSubtitles(info_dict.get("automatic_captions"), , lang, vttType)
            with ydl.urlopen(subtitle_url) as subtitle:
                return subtitle.read().decode()
    return None

def fetchSubtitleUrls(url: str) -> json:
    ydl_opts = {
        "writesubtitles": True,
        "allsubtitles": True,
        "skip_download": True,
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(url, download=False)
        if info_dict.get("subtitles"):
            langs = info_dict.get("subtitles").keys()
            if not (langs.length == 1 and "live_chat" in langs):
                return info_dict.get("subtitles")
            if info_dict.get("automatic_captions"):
                return info_dict.get("automatic_captions")
    return None

def get_subtitle(url, lang='en'):
    if lang is None:
        lang = 'en'
    # Download subtitles if available
    ydl_opts = {
        'writesubtitles': True,
        'outtmpl': '%(id)s.%(ext)s',
        'subtitleslangs': [lang],
        'skip_download': True,
    }
    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info_dict = ydl.extract_info(url, download=True)
            video_id = info_dict.get("id", None)
            if video_id is None:
                return None

            subtitle_file = f"{video_id}.{lang}.vtt"
            with open(subtitle_file, 'r') as f:
                subtitle_content = f.read()
                subtitle_content = re.sub(r"<[^>]+>", "", subtitle_content)
                return subtitle_content
    except error:
        print(error)
        return None
    
    return None