Files
DemonEditor/app/tools/yt.py

482 lines
18 KiB
Python
Raw Normal View History

2021-08-30 15:04:15 +03:00
# -*- coding: utf-8 -*-
#
# The MIT License (MIT)
#
2025-11-01 17:07:53 +03:00
# Copyright (c) 2018-2025 Dmitriy Yefremov
2021-08-30 15:04:15 +03:00
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# Author: Dmitriy Yefremov
#
""" Module for working with YouTube service. """
2019-09-28 17:44:33 +03:00
import gzip
2019-08-13 19:22:08 +03:00
import json
2020-06-10 11:10:41 +03:00
import os
import re
2020-06-10 11:10:41 +03:00
import shutil
import sys
from html.parser import HTMLParser
2019-08-13 19:22:08 +03:00
from json import JSONDecodeError
from urllib import parse
2020-06-10 11:10:41 +03:00
from urllib.error import URLError
from urllib.request import Request, urlopen, urlretrieve
2025-11-01 17:07:53 +03:00
from app.commons import log, run_task
2021-08-30 15:04:15 +03:00
from app.settings import SEP
2020-07-15 11:16:09 +03:00
from app.ui.uicommons import show_notification
2019-06-28 08:58:33 +03:00
_TIMEOUT = 5
2021-11-13 13:13:52 +03:00
_HEADERS = {"User-Agent": "Mozilla/5.0 (Linux x86_64; rv:92.0) Gecko/20100101 Firefox/92.0",
2019-09-28 17:44:33 +03:00
"DNT": "1",
"Accept-Encoding": "gzip, deflate"}
_YT_PATTERN = re.compile(r"https://www.youtube.com/.+(?:v=)([\w-]{11}).*")
_YT_LIST_PATTERN = re.compile(r"https://www.youtube.com/.+?(?:list=)([\w-]{18,})?.*")
_YT_VIDEO_PATTERN = re.compile(r"https://r\d+---sn-[\w]{10}-[\w]{3,5}.googlevideo.com/videoplayback?.*")
2019-08-18 17:02:32 +03:00
Quality = {137: "1080p", 136: "720p", 135: "480p", 134: "360p",
133: "240p", 160: "144p", 0: "0p", 18: "360p", 22: "720p"}
2020-06-13 20:57:37 +03:00
class YouTubeException(Exception):
pass
class YouTube:
""" Helper class for working with YouTube service. """
2020-06-13 20:57:37 +03:00
_YT_INSTANCE = None
2020-06-10 11:10:41 +03:00
_VIDEO_INFO_LINK = "https://youtube.com/get_video_info?video_id={}&hl=en"
2020-06-13 20:57:37 +03:00
VIDEO_LINK = "https://www.youtube.com/watch?v={}"
def __init__(self, settings, callback):
self._settings = settings
self._yt_dl = None
self._callback = callback
if self._settings.enable_yt_dl:
2020-08-15 16:50:34 +03:00
try:
self._yt_dl = YouTubeDL.get_instance(self._settings, callback=self._callback)
except YouTubeException:
pass # NOP
2020-06-13 20:57:37 +03:00
@classmethod
def get_instance(cls, settings, callback=log):
if not cls._YT_INSTANCE:
cls._YT_INSTANCE = YouTube(settings, callback)
return cls._YT_INSTANCE
@staticmethod
def is_yt_video_link(url):
return re.match(_YT_VIDEO_PATTERN, url)
@staticmethod
def get_yt_id(url):
""" Returns video id or None """
yt = re.search(_YT_PATTERN, url)
if yt:
return yt.group(1)
2019-06-24 00:36:54 +03:00
@staticmethod
def get_yt_list_id(url):
""" Returns playlist id or None """
yt = re.search(_YT_LIST_PATTERN, url)
if yt:
return yt.group(1)
2020-06-13 20:57:37 +03:00
def get_yt_link(self, video_id, url=None, skip_errors=False):
""" Getting link to YouTube video by id or URL.
2020-06-13 20:57:37 +03:00
Returns tuple from the video links dict and title.
"""
2020-06-13 20:57:37 +03:00
if self._settings.enable_yt_dl and url:
if not self._yt_dl:
self._yt_dl = YouTubeDL.get_instance(self._settings, self._callback)
2021-11-13 13:13:52 +03:00
if not self._yt_dl:
2023-06-06 17:41:23 +03:00
raise YouTubeException("yt-dlp initialization error.")
2020-06-13 20:57:37 +03:00
return self._yt_dl.get_yt_link(url, skip_errors)
return self.get_yt_link_by_id(video_id)
@staticmethod
def get_yt_link_by_id(video_id):
""" Getting link to YouTube video by id.
Returns tuple from the video links dict and title.
"""
info = InnerTube().player(video_id)
det = info.get("videoDetails", None)
title = det.get("title", None) if det else None
streaming_data = info.get("streamingData", None)
fmts = streaming_data.get("formats", None) if streaming_data else None
2019-09-28 17:44:33 +03:00
if fmts:
2023-03-26 16:22:12 +03:00
links = {Quality[i["itag"]]: i["url"] for i in fmts if i.get("itag", -1) in Quality and "url" in i}
2019-08-13 19:22:08 +03:00
if links and title:
return links, title.replace("+", " ")
2019-08-13 19:22:08 +03:00
cause = None
status = info.get("playabilityStatus", None)
if status:
cause = f"[{status.get('status', '')}] {status.get('reason', '')}"
2019-08-13 19:22:08 +03:00
log(f"{__class__.__name__}: Getting link to video with id '{video_id}' filed! Cause: {cause}")
2019-06-28 08:58:33 +03:00
return None, cause
def get_yt_playlist(self, list_id, url=None):
""" Returns tuple from the playlist header and list of tuples (title, video id). """
if self._settings.enable_yt_dl and url:
try:
2021-11-13 13:13:52 +03:00
if not self._yt_dl:
2023-06-06 17:41:23 +03:00
raise YouTubeException("yt-dlp is not initialized!")
2021-11-13 13:13:52 +03:00
self._yt_dl.update_options({"noplaylist": False, "extract_flat": True})
info = self._yt_dl.get_info(url, skip_errors=False)
2020-12-23 09:09:08 +03:00
if "url" in info:
info = self._yt_dl.get_info(info.get("url"), skip_errors=False)
2021-11-13 13:13:52 +03:00
return info.get("title", ""), [(e.get("title", ""), e.get("id", "")) for e in info.get("entries", [])]
finally:
# Restoring default options
2021-11-13 13:13:52 +03:00
if self._yt_dl:
self._yt_dl.update_options({"noplaylist": True, "extract_flat": False})
return PlayListParser.get_yt_playlist(list_id)
class InnerTube:
""" Object for interacting with the innertube API.
Based on InnerTube class from pytube [https://github.com/pytube/pytube] project!
"""
_BASE_URI = "https://www.youtube.com/youtubei/v1"
_DEFAULT_CLIENTS = {
2025-08-18 10:36:13 +03:00
# The client is taken from -> https://github.com/JuanBindez/pytubefix
"ANDROID": {"context": {"client": {"clientName": "ANDROID",
"clientVersion": "19.44.38",
"platform": "MOBILE",
"osName": "Android",
"osVersion": "14",
"androidSdkVersion": "34"}},
"header": {"User-Agent": "com.google.android.youtube/",
"X-Youtube-Client-Name": "3"},
"api_key": "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8",
"require_js_player": False,
"require_po_token": True}
}
2025-08-18 10:36:13 +03:00
def __init__(self, client="ANDROID"):
""" Initialize an InnerTube object.
@param client: Client to use for the object. Default to web because it returns the most playback types.
"""
self.context = self._DEFAULT_CLIENTS[client]["context"]
self.api_key = self._DEFAULT_CLIENTS[client]["api_key"]
@property
def base_data(self):
"""Return the base json data to transmit to the innertube API."""
return {"context": self.context}
@property
def base_params(self):
"""Return the base query parameters to transmit to the innertube API."""
return {"key": self.api_key, "contentCheckOk": True, "racyCheckOk": True}
def player(self, video_id):
""" Make a request to the player endpoint. Returns raw player info results. """
endpoint = f"{self._BASE_URI}/player"
query = {"videoId": video_id}
query.update(self.base_params)
return self._call_api(endpoint, query, self.base_data) or {}
@staticmethod
def _call_api(endpoint, query, data):
""" Make a request to a given endpoint with the provided query parameters and data."""
headers = {"Content-Type": "application/json", }
response = InnerTube._execute(f"{endpoint}?{parse.urlencode(query)}", "POST", headers=headers, data=data)
try:
resp = json.loads(response.read())
except JSONDecodeError as e:
log(f"{__class__.__name__}: Parsing response error: {e}")
else:
return resp
@staticmethod
def _execute(url, method=None, headers=None, data=None, timeout=_TIMEOUT):
base_headers = {"User-Agent": "Mozilla/5.0", "accept-language": "en-US,en"}
if headers:
base_headers.update(headers)
if data:
# Encoding data for request.
if not isinstance(data, bytes):
data = bytes(json.dumps(data), encoding="utf-8")
return urlopen(Request(url, headers=base_headers, method=method, data=data), timeout=timeout)
class PlayListParser(HTMLParser):
""" Very simple parser to handle YouTube playlist pages. """
def __init__(self):
super().__init__()
self._is_header = False
self._header = ""
self._playlist = []
2019-09-22 16:54:20 +03:00
self._is_script = False
self._scr_start = ('var ytInitialData = ', 'window["ytInitialData"] = ')
def handle_starttag(self, tag, attrs):
2019-09-22 16:54:20 +03:00
if tag == "script":
self._is_script = True
def handle_data(self, data):
2019-09-22 16:54:20 +03:00
if self._is_script:
data = data.lstrip()
if data.startswith(self._scr_start):
data = data.split(";")[0]
for s in self._scr_start:
data = data.lstrip(s)
2019-09-22 16:54:20 +03:00
try:
resp = json.loads(data)
except JSONDecodeError as e:
log(f"{__class__.__name__}: Parsing data error: {e}")
2019-09-22 16:54:20 +03:00
else:
sb = resp.get("sidebar", None)
if sb:
for t in [t["runs"][0] for t in flat("title", sb) if "runs" in t]:
txt = t.get("text", None)
if txt:
self._header = txt
break
ct = resp.get("contents", None)
if ct:
2020-10-06 11:25:26 +03:00
for d in [(d.get("title", {}).get("runs", [{}])[0].get("text", ""),
2020-05-03 02:04:51 +03:00
d.get("videoId", "")) for d in flat("playlistVideoRenderer", ct)]:
2019-09-22 16:54:20 +03:00
self._playlist.append(d)
self._is_script = False
def error(self, message):
2021-11-13 13:13:52 +03:00
log(f"{__class__.__name__} Parsing error: {message}")
@property
def header(self):
return self._header
@property
def playlist(self):
return self._playlist
@staticmethod
def get_yt_playlist(play_list_id):
""" Getting YouTube playlist by id.
returns tuple from the playlist header and list of tuples (title, video id)
"""
request = Request(f"https://www.youtube.com/playlist?list={play_list_id}&hl=en", headers=_HEADERS)
with urlopen(request, timeout=_TIMEOUT) as resp:
2019-09-28 17:44:33 +03:00
data = gzip.decompress(resp.read()).decode("utf-8")
parser = PlayListParser()
parser.feed(data)
return parser.header, parser.playlist
2020-06-10 11:10:41 +03:00
class YouTubeDL:
2023-06-06 17:41:23 +03:00
""" Utility class [experimental] for working with yt-dlp.
2020-06-10 11:10:41 +03:00
2023-06-06 17:41:23 +03:00
[https://github.com/yt-dlp/yt-dlp]
2020-06-10 11:10:41 +03:00
"""
_DL_INSTANCE = None
_DownloadError = None
2023-06-06 17:41:23 +03:00
_LATEST_RELEASE_URL = "https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest"
2020-06-10 11:10:41 +03:00
_OPTIONS = {"noplaylist": True, # Single video instead of a playlist [ignoring playlist in URL].
"extract_flat": False, # Do not resolve URLs, return the immediate result.
2020-06-10 11:10:41 +03:00
"quiet": True, # Do not print messages to stdout.
2020-06-13 20:57:37 +03:00
"simulate": True, # Do not download the video files.
"cookiefile": "cookies.txt"} # File name where cookies should be read from and dumped to.
2020-06-10 11:10:41 +03:00
def __init__(self, settings, callback):
2021-11-13 13:13:52 +03:00
self._path = f"{settings.default_data_path}tools{SEP}"
2020-06-10 11:10:41 +03:00
self._update = settings.enable_yt_dl_update
self._supported = {"22", "18"}
self._dl = None
self._callback = callback
self._download_exception = None
self._is_update_process = False
2020-06-13 20:57:37 +03:00
self.init()
2020-06-10 11:10:41 +03:00
@classmethod
def get_instance(cls, settings, callback=print):
if not cls._DL_INSTANCE:
cls._DL_INSTANCE = YouTubeDL(settings, callback)
return cls._DL_INSTANCE
def init(self):
2025-11-01 17:07:53 +03:00
if os.path.isfile(f"{self._path}yt_dlp{SEP}version.py"):
if self._path not in sys.path:
sys.path.append(self._path)
2020-06-10 11:10:41 +03:00
2025-11-01 17:07:53 +03:00
self.init_dl()
else:
self.get_latest_release()
2020-06-10 11:10:41 +03:00
def init_dl(self):
try:
2023-06-06 17:41:23 +03:00
import yt_dlp
2020-06-10 11:10:41 +03:00
except ModuleNotFoundError as e:
log(f"YouTubeDLHelper error: {e}")
2020-06-13 20:57:37 +03:00
raise YouTubeException(e)
2020-07-15 11:16:09 +03:00
except ImportError as e:
log(f"YouTubeDLHelper error: {e}")
2020-06-10 11:10:41 +03:00
else:
2023-06-06 17:41:23 +03:00
if self._path not in yt_dlp.__file__:
msg = "Another version of yt-dlp was found on your system!"
2021-11-13 13:13:52 +03:00
log(msg)
raise YouTubeException(msg)
2023-06-06 17:41:23 +03:00
self._DownloadError = yt_dlp.utils.DownloadError
self._dl = yt_dlp.YoutubeDL(self._OPTIONS)
msg = "yt-dlp initialized..."
2020-07-15 11:16:09 +03:00
show_notification(msg)
log(msg)
2020-06-10 11:10:41 +03:00
2025-11-01 17:07:53 +03:00
if self._update:
if hasattr(yt_dlp.version, "__version__"):
self.update(yt_dlp.version.__version__)
2020-06-13 20:57:37 +03:00
@staticmethod
def get_last_release_id():
2020-06-10 11:10:41 +03:00
""" Getting last release id. """
2023-06-06 17:41:23 +03:00
url = "https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest"
2020-08-15 16:50:34 +03:00
try:
with urlopen(url, timeout=10) as resp:
return json.loads(resp.read().decode("utf-8")).get("tag_name", "0")
except URLError as e:
2021-11-13 13:13:52 +03:00
log(f"YouTubeDLHelper error [get last release id]: {e}")
2020-06-10 11:10:41 +03:00
2025-11-01 17:07:53 +03:00
@run_task
def update(self, current_version):
l_ver = self.get_last_release_id()
if l_ver and current_version < l_ver:
msg = f"yt-dlp has new release!\nCurrent: {current_version}. Last: {l_ver}."
show_notification(msg)
log(msg)
self._callback(msg, False)
self.get_latest_release(update=True)
@run_task
def get_latest_release(self, update=False):
2020-06-10 11:10:41 +03:00
try:
self._is_update_process = True
2023-06-06 17:41:23 +03:00
log("Getting the last yt-dlp release...")
2020-06-10 11:10:41 +03:00
with urlopen(YouTubeDL._LATEST_RELEASE_URL, timeout=10) as resp:
2020-07-15 11:16:09 +03:00
r = json.loads(resp.read().decode("utf-8"))
2020-06-10 11:10:41 +03:00
zip_url = r.get("zipball_url", None)
if zip_url:
2021-11-13 13:13:52 +03:00
if os.path.isdir(self._path):
shutil.rmtree(self._path)
2023-06-06 17:41:23 +03:00
zip_file = f"{self._path}yt.zip"
2020-06-10 11:10:41 +03:00
os.makedirs(os.path.dirname(self._path), exist_ok=True)
f_name, headers = urlretrieve(zip_url, filename=zip_file)
import zipfile
with zipfile.ZipFile(f_name) as arch:
for info in arch.infolist():
2023-06-06 17:41:23 +03:00
pref, sep, f = info.filename.partition("/yt_dlp/")
2020-06-10 11:10:41 +03:00
if sep:
arch.extract(info.filename)
2021-11-13 13:13:52 +03:00
shutil.move(info.filename, f"{self._path}{sep}{f}")
2020-06-10 11:10:41 +03:00
shutil.rmtree(pref)
2023-06-06 17:41:23 +03:00
msg = "Getting the last yt-dlp release is done!"
2020-07-15 11:16:09 +03:00
show_notification(msg)
2020-06-10 11:10:41 +03:00
log(msg)
self._callback(msg, False)
2021-11-13 13:13:52 +03:00
if os.path.isfile(zip_file):
os.remove(zip_file)
return True
2020-06-10 11:10:41 +03:00
except URLError as e:
2021-11-13 13:13:52 +03:00
log(f"YouTubeDLHelper error: {e}")
2020-06-13 20:57:37 +03:00
raise YouTubeException(e)
2020-06-10 11:10:41 +03:00
finally:
self._is_update_process = False
2025-11-01 17:07:53 +03:00
if not update:
self.init()
2020-06-10 11:10:41 +03:00
def get_yt_link(self, url, skip_errors=False):
""" Returns tuple from the video links [dict] and title. """
if self._is_update_process:
self._callback("Update process. Please wait.", False)
return {}, ""
info = self.get_info(url, skip_errors)
fmts = info.get("formats", None)
if fmts:
return {Quality.get(int(fm["format_id"])): fm.get("url", "") for fm in fmts if
fm.get("format_id", "") in self._supported}, info.get("title", "")
return {}, info.get("title", "")
def get_info(self, url, skip_errors=False):
2020-06-10 11:10:41 +03:00
try:
return self._dl.extract_info(url, download=False)
2020-06-10 11:10:41 +03:00
except URLError as e:
2021-11-13 13:13:52 +03:00
log(f"YouTubeDLHelper error [get info]: {e}")
2020-06-13 20:57:37 +03:00
raise YouTubeException(e)
2020-06-10 11:10:41 +03:00
except self._DownloadError as e:
2021-11-13 13:13:52 +03:00
log(f"YouTubeDLHelper error [get info]: {e}")
2020-06-10 11:10:41 +03:00
if not skip_errors:
2020-06-13 20:57:37 +03:00
raise YouTubeException(e)
2020-06-10 11:10:41 +03:00
def update_options(self, options):
self._dl.params.update(options)
@property
def options(self):
return self._dl.params
2020-06-10 11:10:41 +03:00
2019-09-22 16:54:20 +03:00
def flat(key, d):
for k, v in d.items():
if k == key:
yield v
elif isinstance(v, dict):
yield from flat(key, v)
elif isinstance(v, list):
for el in v:
if isinstance(el, dict):
yield from flat(key, el)
if __name__ == "__main__":
2019-09-28 17:44:33 +03:00
pass