mirror of
https://github.com/DYefremov/DemonEditor.git
synced 2026-01-19 22:13:10 +01:00
168 lines
6.0 KiB
Python
168 lines
6.0 KiB
Python
""" Module for working with YouTube service """
|
|
import gzip
|
|
import json
|
|
import re
|
|
import urllib
|
|
from html.parser import HTMLParser
|
|
from json import JSONDecodeError
|
|
from urllib.request import Request
|
|
|
|
from app.commons import log
|
|
|
|
_YT_PATTERN = re.compile(r"https://www.youtube.com/.+(?:v=)([\w-]{11}).*")
|
|
_YT_LIST_PATTERN = re.compile(r"https://www.youtube.com/.+?(?:list=)([\w-]{23,})?.*")
|
|
_YT_VIDEO_PATTERN = re.compile(r"https://r\d+---sn-[\w]{10}-[\w]{3,5}.googlevideo.com/videoplayback?.*")
|
|
_HEADERS = {"User-Agent": "Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/69.0",
|
|
"DNT": "1",
|
|
"Accept-Encoding": "gzip, deflate"}
|
|
|
|
Quality = {137: "1080p", 136: "720p", 135: "480p", 134: "360p",
|
|
133: "240p", 160: "144p", 0: "0p", 18: "360p", 22: "720p"}
|
|
|
|
|
|
class YouTube:
|
|
""" Helper class for working with YouTube service. """
|
|
|
|
@staticmethod
|
|
def is_yt_video_link(url):
|
|
return re.match(_YT_VIDEO_PATTERN, url)
|
|
|
|
@staticmethod
|
|
def get_yt_id(url):
|
|
""" Returns video id or None """
|
|
yt = re.search(_YT_PATTERN, url)
|
|
if yt:
|
|
return yt.group(1)
|
|
|
|
@staticmethod
|
|
def get_yt_list_id(url):
|
|
""" Returns playlist id or None """
|
|
yt = re.search(_YT_LIST_PATTERN, url)
|
|
if yt:
|
|
return yt.group(1)
|
|
|
|
@staticmethod
|
|
def get_yt_link(video_id):
|
|
""" Getting link to YouTube video by id.
|
|
|
|
returns tuple from the video links dict and title
|
|
"""
|
|
req = Request("https://youtube.com/get_video_info?video_id={}&hl=en".format(video_id), headers=_HEADERS)
|
|
|
|
with urllib.request.urlopen(req, timeout=2) as resp:
|
|
data = urllib.request.unquote(gzip.decompress(resp.read()).decode("utf-8")).split("&")
|
|
out = {k: v for k, sep, v in (str(d).partition("=") for d in map(urllib.request.unquote, data))}
|
|
player_resp = out.get("player_response", None)
|
|
|
|
if player_resp:
|
|
try:
|
|
resp = json.loads(player_resp)
|
|
except JSONDecodeError as e:
|
|
log("{}: Parsing player response error: {}".format(__class__.__name__, e))
|
|
else:
|
|
det = resp.get("videoDetails", None)
|
|
title = det.get("title", None) if det else None
|
|
streaming_data = resp.get("streamingData", None)
|
|
fmts = streaming_data.get("formats", None) if streaming_data else None
|
|
|
|
if fmts:
|
|
urls = {Quality[i["itag"]]: i["url"] for i in
|
|
filter(lambda i: i.get("itag", -1) in Quality, fmts)}
|
|
|
|
if urls and title:
|
|
return urls, title.replace("+", " ")
|
|
|
|
stream_map = out.get("url_encoded_fmt_stream_map", None)
|
|
if stream_map:
|
|
s_map = {k: v for k, sep, v in (str(d).partition("=") for d in stream_map.split("&"))}
|
|
url, title = s_map.get("url", None), out.get("title", None)
|
|
url, title = urllib.request.unquote(url) if url else "", title.replace("+", " ") if title else ""
|
|
if url and title:
|
|
return {Quality[0]: url}, title.replace("+", " ")
|
|
|
|
rsn = out.get("reason", None)
|
|
rsn = rsn.replace("+", " ") if rsn else ""
|
|
log("{}: Getting link to video with id {} filed! Cause: {}".format(__class__.__name__, video_id, rsn))
|
|
|
|
return None, rsn
|
|
|
|
|
|
class PlayListParser(HTMLParser):
|
|
""" Very simple parser to handle YouTube playlist pages. """
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self._is_header = False
|
|
self._header = ""
|
|
self._playlist = []
|
|
self._is_script = False
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
if tag == "script":
|
|
self._is_script = True
|
|
|
|
def handle_data(self, data):
|
|
if self._is_script:
|
|
data = data.lstrip()
|
|
if data.startswith('window["ytInitialData"] = '):
|
|
data = data.split(";")[0].lstrip('window["ytInitialData"] = ')
|
|
try:
|
|
resp = json.loads(data)
|
|
except JSONDecodeError as e:
|
|
log("{}: Parsing data error: {}".format(__class__.__name__, e))
|
|
else:
|
|
sb = resp.get("sidebar", None)
|
|
if sb:
|
|
for t in [t["runs"][0] for t in flat("title", sb) if "runs" in t]:
|
|
txt = t.get("text", None)
|
|
if txt:
|
|
self._header = txt
|
|
break
|
|
|
|
ct = resp.get("contents", None)
|
|
if ct:
|
|
for d in [(d["title"]["simpleText"], d["videoId"]) for d in flat("playlistVideoRenderer", ct)]:
|
|
self._playlist.append(d)
|
|
self._is_script = False
|
|
|
|
def error(self, message):
|
|
log("{} Parsing error: {}".format(__class__.__name__, message))
|
|
|
|
@property
|
|
def header(self):
|
|
return self._header
|
|
|
|
@property
|
|
def playlist(self):
|
|
return self._playlist
|
|
|
|
@staticmethod
|
|
def get_yt_playlist(play_list_id):
|
|
""" Getting YouTube playlist by id.
|
|
|
|
returns tuple from the playlist header and list of tuples (title, video id)
|
|
"""
|
|
request = Request("https://www.youtube.com/playlist?list={}&hl=en".format(play_list_id), headers=_HEADERS)
|
|
|
|
with urllib.request.urlopen(request, timeout=2) as resp:
|
|
data = gzip.decompress(resp.read()).decode("utf-8")
|
|
parser = PlayListParser()
|
|
parser.feed(data)
|
|
return parser.header, parser.playlist
|
|
|
|
|
|
def flat(key, d):
|
|
for k, v in d.items():
|
|
if k == key:
|
|
yield v
|
|
elif isinstance(v, dict):
|
|
yield from flat(key, v)
|
|
elif isinstance(v, list):
|
|
for el in v:
|
|
if isinstance(el, dict):
|
|
yield from flat(key, el)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pass
|