Diff to getpot_bgutil_http def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs): if client != 'ios': raise UnsupportedRequest(f'Client {client} is not supported') base_url = ydl.get_info_extractor('Youtube')._configuration_arg( 'getpot_bgutil_baseurl', ['http://127.0.0.1:4416'], casesense=True)[0] # Validate visitor data format for ios client if visitor_data and not visitor_data.startswith('Cg'): raise UnsupportedRequest('Invalid visitor data format for ios client') if not data_sync_id and not visitor_data: raise UnsupportedRequest( 'One of [data_sync_id, visitor_data] must be passed') >>>>>>> 559b875 (feat: Add support for pre-provided ios PO tokens and client-specific validation) try: self.logger.trace( f'Checking server availability at {self._base_url}/ping') response = json.load(self._request_webpage(Request( f'{self._base_url}/ping', extensions={'timeout': self._GET_SERVER_VSN_TIMEOUT}, proxies={'all': None}), note=False)) except TransportError as e: # the server may be down script_path_provided = self.ie._configuration_arg( ie_key='youtubepot-bgutilscript', key='script_path', default=[None])[0] is not None warning_base = f'Error reaching GET {self._base_url}/ping (caused by {e.__class__.__name__}). ' if script_path_provided: # server down is expected, log info self._info_and_raise( warning_base + 'This is expected if you are using the script method.') else: self._warn_and_raise( warning_base + f'Please make sure that the server is reachable at {self._base_url}.') return except HTTPError as e: # may be an old server, don't raise self.logger.warning( f'HTTP Error reaching GET /ping (caused by {e!r})', once=True) return except json.JSONDecodeError as e: # invalid server self._warn_and_raise( f'Error parsing ping response JSON (caused by {e!r})') return except Exception as e: self._warn_and_raise( f'Unknown error reaching GET /ping (caused by {e!r})', raise_from=e) return else: self._check_version(response.get('version', ''), name='HTTP server') self._server_available = True return True finally: self._last_server_check = time.time() <<<<<<< HEAD def is_available(self): return self._server_available or self._last_server_check + 60 < int(time.time()) def _real_request_pot( self, request: PoTokenRequest, ) -> PoTokenResponse: if not self._check_server_availability(request): raise PoTokenProviderRejectedRequest( f'{self.PROVIDER_NAME} server is not available') # used for CI check self.logger.trace('Generating POT via HTTP server') ======= def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs): if client != 'ios': raise UnsupportedRequest(f'Client {client} is not supported') base_url = ydl.get_info_extractor('Youtube')._configuration_arg( 'getpot_bgutil_baseurl', ['http://127.0.0.1:4416'], casesense=True)[0] # Validate visitor data format for ios client if visitor_data and not visitor_data.startswith('Cg'): raise UnsupportedRequest('Invalid visitor data format for ios client') if not data_sync_id and not visitor_data: raise UnsupportedRequest( 'One of [data_sync_id, visitor_data] must be passed') >>>>>>> 559b875 (feat: Add support for pre-provided ios PO tokens and client-specific validation) try: self.logger.trace( f'Checking server availability at {self._base_url}/ping') response = json.load(self._request_webpage(Request( f'{self._base_url}/ping', extensions={'timeout': self._GET_SERVER_VSN_TIMEOUT}, proxies={'all': None}), note=False)) except TransportError as e: # the server may be down script_path_provided = self.ie._configuration_arg( ie_key='youtubepot-bgutilscript', key='script_path', default=[None])[0] is not None warning_base = f'Error reaching GET {self._base_url}/ping (caused by {e.__class__.__name__}). ' if script_path_provided: # server down is expected, log info self._info_and_raise( warning_base + 'This is expected if you are using the script method.') else: self._warn_and_raise( warning_base + f'Please make sure that the server is reachable at {self._base_url}.') return except HTTPError as e: # may be an old server, don't raise self.logger.warning( f'HTTP Error reaching GET /ping (caused by {e!r})', once=True) return except json.JSONDecodeError as e: # invalid server self._warn_and_raise( f'Error parsing ping response JSON (caused by {e!r})') return except Exception as e: self._warn_and_raise( f'Unknown error reaching GET /ping (caused by {e!r})', raise_from=e) return else: self._check_version(response.get('version', ''), name='HTTP server') self._server_available = True return True finally: self._last_server_check = time.time() <<<<<<< HEAD def is_available(self): return self._server_available or self._last_server_check + 60 < int(time.time()) def _real_request_pot( self, request: PoTokenRequest, ) -> PoTokenResponse: if not self._check_server_availability(request): raise PoTokenProviderRejectedRequest( f'{self.PROVIDER_NAME} server is not available') # used for CI check self.logger.trace('Generating POT via HTTP server') ======= def _get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs) -> str: # Check if we have a pre-provided token if client == 'ios' and kwargs.get('po_token'): self._logger.info('Using provided ios PO token') return kwargs['po_token'] self._logger.info(f'Generating POT via HTTP server for {client} client') if ((proxy := select_proxy('https://jnn-pa.googleapis.com', self.proxies)) != select_proxy('https://youtube.com', self.proxies)): self._logger.warning( 'Proxies for https://youtube.com and https://jnn-pa.googleapis.com are different. ' 'This is likely to cause subsequent errors.') >>>>>>> 559b875 (feat: Add support for pre-provided ios PO tokens and client-specific validation) try: response = self._request_webpage( request=Request( f'{self._base_url}/get_pot', data=json.dumps({ 'content_binding': get_webpo_content_binding(request)[0], 'proxy': request.request_proxy, 'bypass_cache': request.bypass_cache, 'source_address': request.request_source_address, 'disable_tls_verification': not request.request_verify_tls, }).encode(), headers={'Content-Type': 'application/json'}, extensions={'timeout': self._GETPOT_TIMEOUT}, proxies={'all': None}), note=f'Generating a {request.context.value} PO Token for ' f'{request.internal_client_name} client via bgutil HTTP server', ) except Exception as e: raise PoTokenProviderError( f'Error reaching POST /get_pot (caused by {e!r})') from e try: response_json = json.load(response) except Exception as e: raise PoTokenProviderError( f'Error parsing response JSON (caused by {e!r}). response = {response.read().decode()}') from e if error_msg := response_json.get('error'): raise PoTokenProviderError(error_msg) if 'poToken' not in response_json: raise PoTokenProviderError( f'Server did not respond with a poToken. Received response: {json.dumps(response_json)}') po_token = response_json['poToken'] self.logger.trace(f'Generated POT: {po_token}') return PoTokenResponse(po_token=po_token) @register_preference(BgUtilHTTPPTP) def bgutil_HTTP_getpot_preference(provider, request): return 100 __all__ = [BgUtilHTTPPTP.__name__, bgutil_HTTP_getpot_preference.__name__] ------------------------- Diff to getpot_bgutil_script.py from __future__ import annotations import contextlib import functools import json import os.path import re import shutil import subprocess from yt_dlp.extractor.youtube.pot.utils import get_webpo_content_binding from yt_dlp.utils import Popen with contextlib.suppress(ImportError): from yt_dlp_plugins.extractor.getpot_bgutil import BgUtilPTPBase from yt_dlp.extractor.youtube.pot.provider import ( PoTokenProviderError, PoTokenRequest, PoTokenResponse, register_preference, register_provider, ) @register_provider class BgUtilScriptPTP(BgUtilPTPBase): PROVIDER_NAME = 'bgutil:script' def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._check_script = functools.cache(self._check_script_impl) @functools.cached_property def _script_path(self): script_path = self._configuration_arg( 'script_path', casesense=True, default=[None])[0] if script_path: return os.path.expandvars(script_path) # check deprecated arg deprecated_script_path = self.ie._configuration_arg( ie_key='youtube', key='getpot_bgutil_script', default=[None])[0] if deprecated_script_path: self._warn_and_raise( "'youtube:getpot_bgutil_script' extractor arg is deprecated, use 'youtubepot-bgutilscript:script_path' instead") # default if no arg was passed home = os.path.expanduser('~') default_path = os.path.join( home, 'bgutil-ytdlp-pot-provider', 'server', 'build', 'generate_once.js') self.logger.debug( f'No script path passed, defaulting to {default_path}') return default_path <<<<<<< HEAD def is_available(self): return self._check_script(self._script_path) @functools.cached_property def _node_path(self): node_path = shutil.which('node') if node_path is None: self.logger.trace('node is not in PATH') vsn = self._check_node_version(node_path) if vsn: self.logger.trace(f'Node version: {vsn}') return node_path def _check_script_impl(self, script_path): ======= def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs): script_path = ydl.get_info_extractor('Youtube')._configuration_arg( 'getpot_bgutil_script', [self._default_script_path], casesense=True)[0] # If a specific client is requested, validate it's supported requested_client = ydl.params.get('extractor_args', {}).get('youtube', {}).get('formats') if requested_client and client != requested_client: raise UnsupportedRequest(f'Skipping {client} as {requested_client} was specifically requested') if not data_sync_id and not visitor_data: raise UnsupportedRequest( 'One of [data_sync_id, visitor_data] must be passed') >>>>>>> 046a994 (refactor: support client-specific requests via extractor_args in POT providers) if not os.path.isfile(script_path): self.logger.debug( f"Script path doesn't exist: {script_path}") return False if os.path.basename(script_path) != 'generate_once.js': self.logger.warning( 'Incorrect script passed to extractor args. Path to generate_once.js required', once=True) return False node_path = self._node_path if not node_path: return False stdout, stderr, returncode = Popen.run( [self._node_path, script_path, '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=self._GET_SERVER_VSN_TIMEOUT) if returncode: self.logger.warning( f'Failed to check script version. ' f'Script returned {returncode} exit status. ' f'Script stdout: {stdout}; Script stderr: {stderr}', once=True) return False else: self._check_version(stdout.strip(), name='script') return True def _check_node_version(self, node_path): try: stdout, stderr, returncode = Popen.run( [node_path, '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=self._GET_SERVER_VSN_TIMEOUT) stdout = stdout.strip() mobj = re.match(r'v(\d+)\.(\d+)\.(\d+)', stdout) if returncode or not mobj: raise ValueError node_vsn = tuple(map(int, mobj.groups())) if node_vsn >= self._MIN_NODE_VSN: return node_vsn raise RuntimeError except RuntimeError: min_vsn_str = 'v' + '.'.join(str(v) for v in self._MIN_NODE_VSN) self.logger.warning( f'Node version too low. ' f'(got {stdout}, but at least {min_vsn_str} is required)') except (subprocess.TimeoutExpired, ValueError): self.logger.warning( f'Failed to check node version. ' f'Node returned {returncode} exit status. ' f'Node stdout: {stdout}; Node stderr: {stderr}') def _real_request_pot( self, request: PoTokenRequest, ) -> PoTokenResponse: # used for CI check self.logger.trace( f'Generating POT via script: {self._script_path}') command_args = [self._node_path, self._script_path] if proxy := request.request_proxy: command_args.extend(['-p', proxy]) command_args.extend(['-c', get_webpo_content_binding(request)[0]]) if request.bypass_cache: command_args.append('--bypass-cache') if request.request_source_address: command_args.extend( ['--source-address', request.request_source_address]) if request.request_verify_tls is False: command_args.append('--disable-tls-verification') self.logger.info( f'Generating a {request.context.value} PO Token for ' f'{request.internal_client_name} client via bgutil script', ) self.logger.debug( f'Executing command to get POT via script: {" ".join(command_args)}') try: stdout, stderr, returncode = Popen.run( command_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=self._GETPOT_TIMEOUT) except subprocess.TimeoutExpired as e: raise PoTokenProviderError( f'_get_pot_via_script failed: Timeout expired when trying to run script (caused by {e!r})') except Exception as e: raise PoTokenProviderError( f'_get_pot_via_script failed: Unable to run script (caused by {e!r})') from e msg = f'stdout:\n{stdout.strip()}' if stderr.strip(): # Empty strings are falsy msg += f'\nstderr:\n{stderr.strip()}' self.logger.trace(msg) if returncode: raise PoTokenProviderError( f'_get_pot_via_script failed with returncode {returncode}') try: # The JSON response is always the last line script_data_resp = json.loads(stdout.splitlines()[-1]) except json.JSONDecodeError as e: raise PoTokenProviderError( f'Error parsing JSON response from _get_pot_via_script (caused by {e!r})') from e if 'poToken' not in script_data_resp: raise PoTokenProviderError( 'The script did not respond with a po_token') return PoTokenResponse(po_token=script_data_resp['poToken']) @register_preference(BgUtilScriptPTP) def bgutil_script_getpot_preference(provider, request): return 1 __all__ = [BgUtilScriptPTP.__name__, bgutil_script_getpot_preference.__name__]