mirror of
https://github.com/DYefremov/DemonEditor.git
synced 2026-05-06 05:47:12 +02:00
112 lines
3.7 KiB
Python
112 lines
3.7 KiB
Python
""" Module for working with YouTube service """
|
|
import re
|
|
import urllib
|
|
from html.parser import HTMLParser
|
|
from urllib.request import Request
|
|
|
|
from app.commons import log
|
|
|
|
_YT_PATTERN = re.compile(r"https://www.youtube.com/.+(?:v=)([\w-]{11}).*")
|
|
_YT_LIST_PATTERN = re.compile(r"https://www.youtube.com/.+?(?:list=)([\w-]{23,})?.*")
|
|
_YT_VIDEO_PATTERN = re.compile(r"https://r\d+---sn-[\w]{10}-[\w]{3,5}.googlevideo.com/videoplayback?.*")
|
|
_HEADERS = {"User-Agent": "Mozilla/5.0"}
|
|
|
|
|
|
class YouTube:
|
|
""" Helper class for working with YouTube service. """
|
|
|
|
@staticmethod
|
|
def is_yt_video_link(url):
|
|
return re.match(_YT_VIDEO_PATTERN, url)
|
|
|
|
@staticmethod
|
|
def get_yt_id(url):
|
|
""" Returns video id or None """
|
|
yt = re.search(_YT_PATTERN, url)
|
|
if yt:
|
|
return yt.group(1)
|
|
|
|
@staticmethod
|
|
def get_yt_list_id(url):
|
|
""" Returns playlist id or None """
|
|
yt = re.search(_YT_LIST_PATTERN, url)
|
|
if yt:
|
|
return yt.group(1)
|
|
|
|
@staticmethod
|
|
def get_yt_link(video_id):
|
|
""" Getting link to YouTube video by id.
|
|
|
|
returns tuple from the video link and title
|
|
"""
|
|
req = Request("https://youtube.com/get_video_info?video_id={}".format(video_id), headers=_HEADERS)
|
|
with urllib.request.urlopen(req, timeout=2) as resp:
|
|
data = resp.read().decode("utf-8").split("&")
|
|
out = {k: v for k, sep, v in (str(d).partition("=") for d in map(urllib.request.unquote, data))}
|
|
stream_map = out.get("url_encoded_fmt_stream_map", None)
|
|
if stream_map:
|
|
s_map = {k: v for k, sep, v in (str(d).partition("=") for d in stream_map.split("&"))}
|
|
url, title = s_map.get("url", None), out.get("title", None)
|
|
return urllib.request.unquote(url) if url else "", title.replace("+", " ") if title else ""
|
|
|
|
rsn = out.get("reason", None)
|
|
rsn = rsn.replace("+", " ") if rsn else ""
|
|
log("{}: Getting link to video with id {} filed! Cause: {}".format(__class__.__name__, video_id, rsn))
|
|
|
|
return "", rsn
|
|
|
|
|
|
class PlayListParser(HTMLParser):
|
|
""" Very simple parser to handle YouTube playlist pages. """
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self._is_header = False
|
|
self._header = ""
|
|
self._playlist = []
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
if tag == "h1" and ("class", "pl-header-title") in attrs:
|
|
self._is_header = True
|
|
|
|
elif tag == "tr" and ("class", "pl-video yt-uix-tile ") in attrs:
|
|
p_data = {k: v for k, v in attrs}
|
|
self._playlist.append((p_data.get("data-title", None), p_data.get("data-video-id", None)))
|
|
|
|
def handle_data(self, data):
|
|
if self._is_header:
|
|
self._header = data.strip()
|
|
|
|
def handle_endtag(self, tag):
|
|
if self._is_header:
|
|
self._is_header = False
|
|
|
|
def error(self, message):
|
|
log("{} Parsing error: {}".format(__class__.__name__, message))
|
|
|
|
@property
|
|
def header(self):
|
|
return self._header
|
|
|
|
@property
|
|
def playlist(self):
|
|
return self._playlist
|
|
|
|
@staticmethod
|
|
def get_yt_playlist(play_list_id):
|
|
""" Getting YouTube playlist by id.
|
|
|
|
returns tuple from the playlist header and list of tuples (title, video id)
|
|
"""
|
|
request = Request("https://www.youtube.com/playlist?list={}".format(play_list_id), headers=_HEADERS)
|
|
|
|
with urllib.request.urlopen(request, timeout=2) as resp:
|
|
data = resp.read().decode("utf-8")
|
|
parser = PlayListParser()
|
|
parser.feed(data)
|
|
return parser.header, parser.playlist
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pass
|