diff --git a/README.md b/README.md index a3b8604..d27c290 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,8 @@

`squid-dl` is a massively parallel -[yt-dlp](https://github.com/yt-dlp/yt-dlp)-based YouTube downloader. +[yt-dlp](https://github.com/yt-dlp/yt-dlp)-based YouTube downloader useful for +downloading large video playlists a fast internet connection. ## Installation Run the `setup.py`, which will install `squid-dl` and its two dependencies: @@ -22,7 +23,7 @@ Run the `setup.py`, which will install `squid-dl` and its two dependencies: $ python3 setup.py install ``` -### Linode Setup +### Linode Proxy Setup If you want to use the Linode SOCKS proxy feature, be sure to configure the `linode-cli` first: ``` @@ -87,7 +88,43 @@ playlist example, we'll spawn 12 workers: For more information see the built-in help by running `squid-dl -h`. -## Linode Proxying +### SOCKS Proxying +For those with access to a dedicated SOCKS proxy already, you can use +`squid-dl`'s `-S` and a fully-qualified SOCKS4, SOCKS4A, or SOCKS5 proxy URL +to download your playlists through a proxy! Here's an example using NordVPN's +SOCKS5 proxy: +``` +(.venv) $ squid-dl -S socks5://us.socks.nordhold.net:1080 -n 12 Mems.json + +[INFO]: Starting squid-dl... +[INFO]: saving videos to "Mems" directory +SOCKS username: 0asFVrZt0bw1ucPvQRKiUe87 +SOCKS password: + +... +[download] Download completed +[INFO]: Worker 1667326 done... +[INFO]: Worker 1667396 done... +[INFO]: Worker 1667484 done... +[download] Download completed +[download] Download completed +[INFO]: Worker 1667352 done... +[INFO]: Worker 1667421 done... +[INFO]: All done! +``` + +You can also add a username and password to SOCKS5 and SOCKS4A proxy URLs in a +format like this: +``` +socks5://username:password@hostname:port +``` + +**SECURITY NOTE:** typing in usernames and passwords this way is considered +insecure, as they will likely end up in your shell's history file completely +unprotected and in the clear (☹). It is generally recommended to input the +username and password interactively unless you are scripting `squid-dl`. + +### Linode Proxying With the `-L` option, you can run each worker through its own Linode-powered SSH-tunneled SOCKSv5 proxy! `squid-dl` will make an temporary SSH key in the current working directory and then get to work spinning up Linodes and diff --git a/squid_dl/downloader.py b/squid_dl/downloader.py index aac6dce..7ab2cae 100644 --- a/squid_dl/downloader.py +++ b/squid_dl/downloader.py @@ -42,6 +42,8 @@ from yt_dlp.utils import encodeFilename, sanitize_path from yt_dlp.extractor.common import InfoExtractor as IE from .linode import LinodeProxy +from .proxy import Proxy +from .socks import SocksProxy from .util import die, eprint, runcmd @@ -49,7 +51,7 @@ def do_download( entry_q: Queue, opts: argparse.Namespace, sub_langs: [str], - proxy: LinodeProxy = None, + proxy: Proxy = None, ): sub_opts = { @@ -330,7 +332,7 @@ def resume_preprocess(entries: [dict]) -> list: return unfinished_entries -def validate_proxy(proxy: LinodeProxy) -> LinodeProxy: +def validate_linode_proxy(proxy: LinodeProxy) -> LinodeProxy: if not proxy.start(): eprint( "[WARN]: " @@ -339,7 +341,7 @@ def validate_proxy(proxy: LinodeProxy) -> LinodeProxy: port = proxy.proxy_port proxy.cleanup() proxy = LinodeProxy(proxy_port=port) - return validate_proxy(proxy) + return validate_linode_proxy(proxy) else: print( "[INFO]: SOCKS validation succeeded on port {} from ID {}".format( @@ -349,7 +351,7 @@ def validate_proxy(proxy: LinodeProxy) -> LinodeProxy: return proxy -def cleanup(workers: [Process], proxies: [LinodeProxy]) -> None: +def cleanup(workers: [Process], linode_proxies: [LinodeProxy]) -> None: if len(workers) > 0: for worker in workers: if worker.is_alive(): @@ -360,16 +362,16 @@ def cleanup(workers: [Process], proxies: [LinodeProxy]) -> None: ) worker.terminate() - if len(proxies) > 0: + if len(linode_proxies) > 0: print("[CLEANUP]: Deleting Linode proxies...") - for proxy in proxies: + for proxy in linode_proxies: proxy.cleanup() def parse_args(args: list, name: str): parser = argparse.ArgumentParser(prog=name) - group = parser.add_argument_group("Proxy settings") + group = parser.add_mutually_exclusive_group(required=False) group.add_argument( "-L", "--linode-proxy", @@ -380,12 +382,26 @@ def parse_args(args: list, name: str): + "for more information.", ) group.add_argument( + "-S", + "--socks-proxy", + type=str, + default=None, + help="Run workers through a SOCKS proxy. Requires a fully-qualified " + + 'proxy URL (e.g. "socks5://user:pass@hostname:port" or ' + + '"socks5://hostname:port").\n' + + "Be mindful of your shell's history file when entering passwords on " + + "the command line. If this script encounters a proxy that requires " + + "authentication, it will prompt the user for a password " + + "interactively, as well.", + ) + parser.add_argument( "-p", "--proxy-base-port", type=int, default=1337, - help="Port number proxy ports are derived from, does nothing without " - "enabling a type of proxy (like --linode-proxy).", + help="Port number that local Linode-powered proxy ports are derived " + + "from, does nothing without " + + "enabling --linode-proxy (aka. -L).", ) parser.add_argument( "--resume-dump", @@ -446,9 +462,7 @@ def main(args: [str], name: str) -> int: print('[INFO]: saving videos to "{}" directory'.format(dirname)) if not (os.path.exists(dirname) and os.path.isdir(dirname)): os.mkdir(dirname) - os.chdir(dirname) else: - os.chdir(dirname) playlist_size = len(info_dict["entries"]) info_dict["entries"] = resume_preprocess(info_dict["entries"]) @@ -462,7 +476,7 @@ def main(args: [str], name: str) -> int: ) ) if opts.resume_dump: - rdump = open("resume.json", mode="w") + rdump = open(info_dict["title"] + ".resume.json", mode="w") rdump.write(j.dumps(info_dict, sort_keys=True, indent=2)) rdump.close() @@ -476,16 +490,20 @@ def main(args: [str], name: str) -> int: base_port = 1337 workers = [] - proxies = [] + linode_proxies = [] + if opts.socks_proxy is not None: + socks_proxy = SocksProxy(url=opts.socks_proxy) try: for n in range(n_workers): port = base_port + n if opts.linode_proxy: - proxies.append( + linode_proxies.append( LinodeProxy(proxy_port=port, pubkey_path=pubkey_path) ) - worker_args = (entry_q, opts, sub_langs, proxies[n]) + worker_args = (entry_q, opts, sub_langs, linode_proxies[n]) + elif opts.socks_proxy is not None: + worker_args = (entry_q, opts, sub_langs, socks_proxy) else: worker_args = (entry_q, opts, sub_langs) @@ -496,7 +514,7 @@ def main(args: [str], name: str) -> int: ) ) - if len(proxies) > 0: + if len(linode_proxies) > 0: if not ( os.path.isfile(pubkey_path) or os.path.isfile(os.path.splitext(pubkey_path)[0]) @@ -512,7 +530,7 @@ def main(args: [str], name: str) -> int: print(".", end="") temp_list = [] for proxy_idx in nodes_to_ping: - if proxies[proxy_idx].get_status() != "running": + if linode_proxies[proxy_idx].get_status() != "running": temp_list.append(proxy_idx) sleep(0.2) nodes_to_ping = temp_list @@ -521,9 +539,11 @@ def main(args: [str], name: str) -> int: while not entry_q.full(): sleep(0.2) + os.chdir(dirname) + for i in range(n_workers): - if len(proxies) > 0: - proxies[i] = validate_proxy(proxies[i]) + if len(linode_proxies) > 0: + linode_proxies[i] = validate_linode_proxy(linode_proxies[i]) seconds = randint(0, 1) else: seconds = randint(1, 6) @@ -535,7 +555,7 @@ def main(args: [str], name: str) -> int: except KeyboardInterrupt: eprint("\n[CLEANUP]: Interrupted, cleaning up...") - cleanup(workers, proxies) + cleanup(workers, linode_proxies) if entry_getter.is_alive(): print( "[CLEANUP]: Terminating queue worker {}".format( @@ -546,6 +566,6 @@ def main(args: [str], name: str) -> int: return 1 print("[INFO]: All done!") - cleanup(workers, proxies) + cleanup(workers, linode_proxies) return 0 diff --git a/squid_dl/linode.py b/squid_dl/linode.py index 0cda81a..f6cbdc7 100644 --- a/squid_dl/linode.py +++ b/squid_dl/linode.py @@ -25,10 +25,11 @@ import subprocess from time import sleep import typing +from .proxy import Proxy from .util import eprint, runcmd -class LinodeProxy: +class LinodeProxy(Proxy): user_made = False def __init__( @@ -39,42 +40,45 @@ class LinodeProxy: debug: bool = False, exclusive: bool = True, ): - self.proxy_port = proxy_port - self.proxy_user = proxy_user - self.pubkey_path = pubkey_path - self.debug = debug - self.exclusive = exclusive + try: + self.proxy_port = proxy_port + self.proxy_user = proxy_user + self.pubkey_path = pubkey_path + self.debug = debug + self.exclusive = exclusive - self.proxy_url = "socks5://127.0.0.1:" + str(self.proxy_port) + self.proxy_url = "socks5://127.0.0.1:" + str(self.proxy_port) - self.ssh_prefix = ( - 'ssh -o "UserKnownHostsFile=/dev/null" ' - + '-o "StrictHostKeyChecking=no" -i ' - + splitext(self.pubkey_path)[0] - + " " - ) - pubfile = open(self.pubkey_path, mode="r") - self.pubkey = pubfile.readline().rstrip() - pubfile.close() + self.ssh_prefix = ( + 'ssh -o "UserKnownHostsFile=/dev/null" ' + + '-o "StrictHostKeyChecking=no" -i ' + + splitext(self.pubkey_path)[0] + + " " + ) + pubfile = open(self.pubkey_path, mode="r") + self.pubkey = pubfile.readline().rstrip() + pubfile.close() - self.passwd = runcmd( - "echo $(cat /dev/random | strings | head -c 512 | " - + "grep -oE '[a-zA-Z0-9#%!]') | sed 's/\s//g' | head -c 32;" - ).decode() + self.passwd = runcmd( + "echo $(cat /dev/random | strings | head -c 512 | " + + "grep -oE '[a-zA-Z0-9#%!]') | sed 's/\s//g' | head -c 32;" + ).decode() - create_cmd = ( - "linode-cli --json linodes create " - + "--image linode/arch " - + "--authorized_keys " - + '"' - + self.pubkey - + '"' - + ' --root_pass "' - + self.passwd - + '"' - ) - self.info = j.loads(runcmd(create_cmd).decode())[0] - print("[INFO]: Created Linode {}.".format(self.info["id"])) + create_cmd = ( + "linode-cli --json linodes create " + + "--image linode/arch " + + "--authorized_keys " + + '"' + + self.pubkey + + '"' + + ' --root_pass "' + + self.passwd + + '"' + ) + self.info = j.loads(runcmd(create_cmd).decode())[0] + print("[INFO]: Created Linode {}.".format(self.info["id"])) + except KeyboardInterrupt: + self.cleanup() def find_linode(self) -> bool: linodes = j.loads(runcmd("linode-cli --json linodes list").decode()) @@ -161,6 +165,8 @@ class LinodeProxy: def test_proxy(self) -> bool: sen = struct.pack("BBB", 0x05, 0x01, 0x00) s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(2) + try: s.connect(("127.0.0.1", self.proxy_port)) except ConnectionRefusedError as e: @@ -170,8 +176,18 @@ class LinodeProxy: ) ) return False - s.sendall(sen) - data = s.recv(2) + + for n in range(3): + s.sendall(sen) + try: + data = s.recv(2) + break + except socket.timeout: + if n == 2: + eprint( + "[ERROR]: Linode SOCKS timed out after three attempts!", + ) + return False version, auth = struct.unpack("BB", data) if version == 5 and auth == 0: diff --git a/squid_dl/proxy.py b/squid_dl/proxy.py new file mode 100644 index 0000000..5b3f018 --- /dev/null +++ b/squid_dl/proxy.py @@ -0,0 +1,38 @@ +""" +squid_dl Proxy base class (for typing) + + ⣀⣠⣴⣷⣾⣿⣿⣿⣶⣦⣠ + ⣴⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡖ + ⣰⣿⣿⠟⠉⣀⠄⠻⣿⣿⣿⣿⣿⣿⡟ + ⣠⣿⣿⡯ ⠜⠁ ⢰⣿⣿⣿⣿⣿⡿⠋ + ⢀⣠⣼⣿⣿⣿⣿⣯⡄⡀⣠⣤⡿⡿⢿⣿⠿⠋ + ⣀⣶⣿⣿⣿⡿⠿⠿⠿⢿⢿⣿⣿⣿⣿⣿⣿⣶ + ⣠⣿⣿⣿⣿⡿⠗⠁ ⠙⣿⣿⣿⣿⣿⣿⣷⡆ +⢀⣴⣿⣿⣿⣿⣿ ⢻⣿⣿⣿⣿⣿⣿⣿⣥⠄ +⣴⣿⣿⣿⣿⣿⡃ ⢨⣿⣿⣿⣿⣿⣿⣿⣿⡅ +⠼⣿⣿⣿⣿⠋ ⠈⣿⣿⣿⣿⣿⣿⣿⣿⣗⠄ + ⠹⠛⠏⠃ ⠸⣿⣿⣿⣿⣿⣿⣿⣿⣿⡁ + ⣨⣟⣿⣿⣿⣿⣿⡏⣿⣿⣗ + ⢠⣾⢷⣿⣿⣿⣿⣿⣯⢿⣿⣿⡂ + ⣠⣾⣟⣾⣿⣿⣿⣿⣿⣿⣸⣿⣿⣧⠄ + ⢀⣴⣿⣿⣽⣿⣿⣿⣿⣿⣿⣿⣧⢿⣿⣿⣷⣵⣀ + ⢾⣿⣻⣾⣿⣿⣿⣿⣿⣿⣿⣿⡟⠘⢿⣿⣿⣿⣷⠄ + ⢀⣤⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡗ ⠈⠘⠉⠋⠁ + ⢀⣴⣿⣿⡟⠋⠉⠁⠁⠁⠉⠹⣻⣿⣿⣧⡀ + ⣰⣿⣿⡟⠉ ⢻⣿⣿⡄ + ⢠⣾⣿⣿⠯ ⣻⣿⣷⣆ + ⢠⣿⣿⣿⠩ ⠙⣿⣿⣿⣦⡀ + ⢠⣿⣿⣟⡷⡕ ⠈⢻⣿⣿⣿⣷⡀ + ⢰⣿⣿⡳⡩⣇ ⢙⣿⣿⣿⣿⣞⡄ + ⢸⣿⣿⢧⡲⣧ ⢺⣿⣿⣿⣿⡽⡆ + ⠻⣿⣯⠛ ⠘⠟⠿⠿⡙ + +""" + + +class Proxy: + proxy_url = None + exclusive = False + + def cleanup(self) -> None: + pass diff --git a/squid_dl/socks.py b/squid_dl/socks.py new file mode 100644 index 0000000..dd966a5 --- /dev/null +++ b/squid_dl/socks.py @@ -0,0 +1,162 @@ +""" +Generic SOCKS proxy support. + ⢀⣀⣤⣴⣶⣶⣾⣿⣷⣶⣶⣦⣄⡀⠀⠀⠀ + ⠀⢠⣴⣿⣿⣿⣿⣿⣭⣭⣭⣭⣭⣿⣿⣿⣿⣧⣀⠀ + ⢰⣿⣿⣿⣿⣿⣯⣿⡶⠶⠶⠶⠶⣶⣭⣽⣿⣿⣷⣆ + ⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ + ⠈⢿⣿⣿⡿⠋⠉⠁⠈⠉⠛⠉⠀⠀⠀⠈⠻⣿⡿⠃ + ⠀⠀⠀⠉⠁⠀⢴⣐⢦⠀⠀⠀⣴⡖⣦⠀⠀⠈⠀⠀ + ⠀⠀⠀⠀⠀⠀⠈⠛⠋⠀⠀⠀⠈⠛⠁⠀⠀⠀⠀⠀ + ⠀⠀⠀⠀⠀⣀⡀⠀⠀⠀⣀⠀⠀⠀⢀⡀⠀⠀⠀⠀ + ⠀⠀⢀⡔⣻⣭⡇⠀⣼⣿⣿⣿⡇⠦⣬⣟⢓⡄⠀⠀ + ⠀⠀⠀⠉⠁⠀⠀⠀⣿⣿⣿⣿⡇⠀⠀⠉⠉⠀⠀⠀ + ⠀⠀⠀⠀⠀⠀⠀⠀⠻⠿⠿⠟⠁⠀⠀ +""" + +from getpass import getpass +import json as j +import typing +import os +import socket +import struct +from urllib.parse import unquote_plus, urlparse +from yt_dlp.socks import ProxyType, Socks4Error, Socks5Error, sockssocket + +from .proxy import Proxy +from .util import die, eprint + + +class SocksUnknownSchemeError(Exception): + pass + + +def test_proxy(host: str, port: int): + sen = struct.pack("BBB", 0x05, 0x01, 0x00) + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(2) + + try: + s.connect((host, port)) + except ConnectionRefusedError as e: + die( + '[ERROR]: Got "{}" when connecting to {} on port {}'.format( + e, host, port + ) + ) + + for n in range(3): + s.sendall(sen) + try: + data = s.recv(2) + break + except socket.timeout: + if n == 2: + eprint("[ERROR]: SOCKS proxy timed out after three attempts!") + exit(1) + + s.close() + return struct.unpack("BB", data) # (version, auth) + + +class SocksProxy(Proxy): + """ + The ``params`` dict can either contain the single element ``url`` or: + * ``host`` (string) + * ``port`` (string) + * ``scheme`` (string, optional) + * ``user`` (string, optional) + * ``password`` (string, optional) + """ + + def __init__( + self, url: str = None, params: dict = None, debug: bool = False + ): + self.debug = debug + + if url is not None: + self.init_from_url(url) + else: + self.init_from_params(params) + + self.setup() + + def get_socks_proxytype(self): + if self.scheme.lower() == "socks5": + return ProxyType.SOCKS5 + elif self.scheme.lower() in ("socks", "socks4"): + return ProxyType.SOCKS4 + elif self.scheme.lower() == "socks4a": + return ProxyType.SOCKS4A + else: + eprint("[ERROR]: Unknown scheme in proxy URL!") + raise SocksUnknownSchemeError + + def init_from_url(self, url: str): + self.proxy_url = url + url_pieces = urlparse(url) + + self.scheme = url_pieces.scheme + self.host = url_pieces.hostname + self.port = url_pieces.port or 1080 + self.user = url_pieces.username + self.password = url_pieces.password + + def init_from_params(self, params: dict): + self.host = params["host"] + self.port = params["port"] if "port" in params else 1080 + if "user" in params and "password" in params: + self.user = params["user"] + self.password = params["password"] + authstr = self.user + ":" + self.password + "@" + else: + self.user = None + self.password = None + authstr = "" + self.scheme = ( + params["scheme"].lower() if "scheme" in params else "socks5" + ) + self.proxy_url = ( + self.scheme + "://" + authstr + self.host + ":" + str(self.port) + ) + + def get_creds(self): + self.username = input("SOCKS username: ") + self.password = getpass(prompt="SOCKS password: ") + self.proxy_url = ( + self.scheme + + "://" + + self.username + + ":" + + self.password + + "@" + + self.host + + ":" + + str(self.port) + ) + + def setup(self): + version, auth = test_proxy(host=self.host, port=self.port) + if auth != 0 and (self.user is None or self.password is None): + self.get_creds() + + def unquote_if_non_empty(s): + if not s: + return s + return unquote_plus(s) + + proxy_args = ( + self.get_socks_proxytype(), + self.host, + self.port or 1080, + True, # Remote DNS + unquote_if_non_empty(self.username), + unquote_if_non_empty(self.password), + ) + + testsock = sockssocket() + testsock.setproxy(*proxy_args) + try: + testsock.connect((self.host, self.port)) + testsock.close() + except (Socks4Error, Socks5Error) as e: + die("[ERROR]: {}: {}".format(type(e).__name__, e))