""" Module for working with YouTube service """ import gzip import json import re import urllib from html.parser import HTMLParser from json import JSONDecodeError from urllib.request import Request from app.commons import log _YT_PATTERN = re.compile(r"https://www.youtube.com/.+(?:v=)([\w-]{11}).*") _YT_LIST_PATTERN = re.compile(r"https://www.youtube.com/.+?(?:list=)([\w-]{23,})?.*") _YT_VIDEO_PATTERN = re.compile(r"https://r\d+---sn-[\w]{10}-[\w]{3,5}.googlevideo.com/videoplayback?.*") _HEADERS = {"User-Agent": "Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/69.0", "DNT": "1", "Accept-Encoding": "gzip, deflate"} Quality = {137: "1080p", 136: "720p", 135: "480p", 134: "360p", 133: "240p", 160: "144p", 0: "0p", 18: "360p", 22: "720p"} class YouTube: """ Helper class for working with YouTube service. """ @staticmethod def is_yt_video_link(url): return re.match(_YT_VIDEO_PATTERN, url) @staticmethod def get_yt_id(url): """ Returns video id or None """ yt = re.search(_YT_PATTERN, url) if yt: return yt.group(1) @staticmethod def get_yt_list_id(url): """ Returns playlist id or None """ yt = re.search(_YT_LIST_PATTERN, url) if yt: return yt.group(1) @staticmethod def get_yt_link(video_id): """ Getting link to YouTube video by id. returns tuple from the video links dict and title """ req = Request("https://youtube.com/get_video_info?video_id={}&hl=en".format(video_id), headers=_HEADERS) with urllib.request.urlopen(req, timeout=2) as resp: data = urllib.request.unquote(gzip.decompress(resp.read()).decode("utf-8")).split("&") out = {k: v for k, sep, v in (str(d).partition("=") for d in map(urllib.request.unquote, data))} player_resp = out.get("player_response", None) if player_resp: try: resp = json.loads(player_resp) except JSONDecodeError as e: log("{}: Parsing player response error: {}".format(__class__.__name__, e)) else: det = resp.get("videoDetails", None) title = det.get("title", None) if det else None streaming_data = resp.get("streamingData", None) fmts = streaming_data.get("formats", None) if streaming_data else None if fmts: urls = {Quality[i["itag"]]: i["url"] for i in filter(lambda i: i.get("itag", -1) in Quality, fmts)} if urls and title: return urls, title.replace("+", " ") stream_map = out.get("url_encoded_fmt_stream_map", None) if stream_map: s_map = {k: v for k, sep, v in (str(d).partition("=") for d in stream_map.split("&"))} url, title = s_map.get("url", None), out.get("title", None) url, title = urllib.request.unquote(url) if url else "", title.replace("+", " ") if title else "" if url and title: return {Quality[0]: url}, title.replace("+", " ") rsn = out.get("reason", None) rsn = rsn.replace("+", " ") if rsn else "" log("{}: Getting link to video with id {} filed! Cause: {}".format(__class__.__name__, video_id, rsn)) return None, rsn class PlayListParser(HTMLParser): """ Very simple parser to handle YouTube playlist pages. """ def __init__(self): super().__init__() self._is_header = False self._header = "" self._playlist = [] self._is_script = False def handle_starttag(self, tag, attrs): if tag == "script": self._is_script = True def handle_data(self, data): if self._is_script: data = data.lstrip() if data.startswith('window["ytInitialData"] = '): data = data.split(";")[0].lstrip('window["ytInitialData"] = ') try: resp = json.loads(data) except JSONDecodeError as e: log("{}: Parsing data error: {}".format(__class__.__name__, e)) else: sb = resp.get("sidebar", None) if sb: for t in [t["runs"][0] for t in flat("title", sb) if "runs" in t]: txt = t.get("text", None) if txt: self._header = txt break ct = resp.get("contents", None) if ct: for d in [(d["title"]["simpleText"], d["videoId"]) for d in flat("playlistVideoRenderer", ct)]: self._playlist.append(d) self._is_script = False def error(self, message): log("{} Parsing error: {}".format(__class__.__name__, message)) @property def header(self): return self._header @property def playlist(self): return self._playlist @staticmethod def get_yt_playlist(play_list_id): """ Getting YouTube playlist by id. returns tuple from the playlist header and list of tuples (title, video id) """ request = Request("https://www.youtube.com/playlist?list={}&hl=en".format(play_list_id), headers=_HEADERS) with urllib.request.urlopen(request, timeout=2) as resp: data = gzip.decompress(resp.read()).decode("utf-8") parser = PlayListParser() parser.feed(data) return parser.header, parser.playlist def flat(key, d): for k, v in d.items(): if k == key: yield v elif isinstance(v, dict): yield from flat(key, v) elif isinstance(v, list): for el in v: if isinstance(el, dict): yield from flat(key, el) if __name__ == "__main__": pass