yt-dlp-dags/airflow/bgutil-diff.txt
2025-08-26 18:00:55 +03:00

408 lines
17 KiB
Plaintext

Diff to getpot_bgutil_http
def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs):
if client != 'ios':
raise UnsupportedRequest(f'Client {client} is not supported')
base_url = ydl.get_info_extractor('Youtube')._configuration_arg(
'getpot_bgutil_baseurl', ['http://127.0.0.1:4416'], casesense=True)[0]
# Validate visitor data format for ios client
if visitor_data and not visitor_data.startswith('Cg'):
raise UnsupportedRequest('Invalid visitor data format for ios client')
if not data_sync_id and not visitor_data:
raise UnsupportedRequest(
'One of [data_sync_id, visitor_data] must be passed')
>>>>>>> 559b875 (feat: Add support for pre-provided ios PO tokens and client-specific validation)
try:
self.logger.trace(
f'Checking server availability at {self._base_url}/ping')
response = json.load(self._request_webpage(Request(
f'{self._base_url}/ping', extensions={'timeout': self._GET_SERVER_VSN_TIMEOUT}, proxies={'all': None}),
note=False))
except TransportError as e:
# the server may be down
script_path_provided = self.ie._configuration_arg(
ie_key='youtubepot-bgutilscript', key='script_path', default=[None])[0] is not None
warning_base = f'Error reaching GET {self._base_url}/ping (caused by {e.__class__.__name__}). '
if script_path_provided: # server down is expected, log info
self._info_and_raise(
warning_base + 'This is expected if you are using the script method.')
else:
self._warn_and_raise(
warning_base + f'Please make sure that the server is reachable at {self._base_url}.')
return
except HTTPError as e:
# may be an old server, don't raise
self.logger.warning(
f'HTTP Error reaching GET /ping (caused by {e!r})', once=True)
return
except json.JSONDecodeError as e:
# invalid server
self._warn_and_raise(
f'Error parsing ping response JSON (caused by {e!r})')
return
except Exception as e:
self._warn_and_raise(
f'Unknown error reaching GET /ping (caused by {e!r})', raise_from=e)
return
else:
self._check_version(response.get('version', ''), name='HTTP server')
self._server_available = True
return True
finally:
self._last_server_check = time.time()
<<<<<<< HEAD
def is_available(self):
return self._server_available or self._last_server_check + 60 < int(time.time())
def _real_request_pot(
self,
request: PoTokenRequest,
) -> PoTokenResponse:
if not self._check_server_availability(request):
raise PoTokenProviderRejectedRequest(
f'{self.PROVIDER_NAME} server is not available')
# used for CI check
self.logger.trace('Generating POT via HTTP server')
=======
def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs):
if client != 'ios':
raise UnsupportedRequest(f'Client {client} is not supported')
base_url = ydl.get_info_extractor('Youtube')._configuration_arg(
'getpot_bgutil_baseurl', ['http://127.0.0.1:4416'], casesense=True)[0]
# Validate visitor data format for ios client
if visitor_data and not visitor_data.startswith('Cg'):
raise UnsupportedRequest('Invalid visitor data format for ios client')
if not data_sync_id and not visitor_data:
raise UnsupportedRequest(
'One of [data_sync_id, visitor_data] must be passed')
>>>>>>> 559b875 (feat: Add support for pre-provided ios PO tokens and client-specific validation)
try:
self.logger.trace(
f'Checking server availability at {self._base_url}/ping')
response = json.load(self._request_webpage(Request(
f'{self._base_url}/ping', extensions={'timeout': self._GET_SERVER_VSN_TIMEOUT}, proxies={'all': None}),
note=False))
except TransportError as e:
# the server may be down
script_path_provided = self.ie._configuration_arg(
ie_key='youtubepot-bgutilscript', key='script_path', default=[None])[0] is not None
warning_base = f'Error reaching GET {self._base_url}/ping (caused by {e.__class__.__name__}). '
if script_path_provided: # server down is expected, log info
self._info_and_raise(
warning_base + 'This is expected if you are using the script method.')
else:
self._warn_and_raise(
warning_base + f'Please make sure that the server is reachable at {self._base_url}.')
return
except HTTPError as e:
# may be an old server, don't raise
self.logger.warning(
f'HTTP Error reaching GET /ping (caused by {e!r})', once=True)
return
except json.JSONDecodeError as e:
# invalid server
self._warn_and_raise(
f'Error parsing ping response JSON (caused by {e!r})')
return
except Exception as e:
self._warn_and_raise(
f'Unknown error reaching GET /ping (caused by {e!r})', raise_from=e)
return
else:
self._check_version(response.get('version', ''), name='HTTP server')
self._server_available = True
return True
finally:
self._last_server_check = time.time()
<<<<<<< HEAD
def is_available(self):
return self._server_available or self._last_server_check + 60 < int(time.time())
def _real_request_pot(
self,
request: PoTokenRequest,
) -> PoTokenResponse:
if not self._check_server_availability(request):
raise PoTokenProviderRejectedRequest(
f'{self.PROVIDER_NAME} server is not available')
# used for CI check
self.logger.trace('Generating POT via HTTP server')
=======
def _get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs) -> str:
# Check if we have a pre-provided token
if client == 'ios' and kwargs.get('po_token'):
self._logger.info('Using provided ios PO token')
return kwargs['po_token']
self._logger.info(f'Generating POT via HTTP server for {client} client')
if ((proxy := select_proxy('https://jnn-pa.googleapis.com', self.proxies))
!= select_proxy('https://youtube.com', self.proxies)):
self._logger.warning(
'Proxies for https://youtube.com and https://jnn-pa.googleapis.com are different. '
'This is likely to cause subsequent errors.')
>>>>>>> 559b875 (feat: Add support for pre-provided ios PO tokens and client-specific validation)
try:
response = self._request_webpage(
request=Request(
f'{self._base_url}/get_pot', data=json.dumps({
'content_binding': get_webpo_content_binding(request)[0],
'proxy': request.request_proxy,
'bypass_cache': request.bypass_cache,
'source_address': request.request_source_address,
'disable_tls_verification': not request.request_verify_tls,
}).encode(), headers={'Content-Type': 'application/json'},
extensions={'timeout': self._GETPOT_TIMEOUT}, proxies={'all': None}),
note=f'Generating a {request.context.value} PO Token for '
f'{request.internal_client_name} client via bgutil HTTP server',
)
except Exception as e:
raise PoTokenProviderError(
f'Error reaching POST /get_pot (caused by {e!r})') from e
try:
response_json = json.load(response)
except Exception as e:
raise PoTokenProviderError(
f'Error parsing response JSON (caused by {e!r}). response = {response.read().decode()}') from e
if error_msg := response_json.get('error'):
raise PoTokenProviderError(error_msg)
if 'poToken' not in response_json:
raise PoTokenProviderError(
f'Server did not respond with a poToken. Received response: {json.dumps(response_json)}')
po_token = response_json['poToken']
self.logger.trace(f'Generated POT: {po_token}')
return PoTokenResponse(po_token=po_token)
@register_preference(BgUtilHTTPPTP)
def bgutil_HTTP_getpot_preference(provider, request):
return 100
__all__ = [BgUtilHTTPPTP.__name__,
bgutil_HTTP_getpot_preference.__name__]
-------------------------
Diff to getpot_bgutil_script.py
from __future__ import annotations
import contextlib
import functools
import json
import os.path
import re
import shutil
import subprocess
from yt_dlp.extractor.youtube.pot.utils import get_webpo_content_binding
from yt_dlp.utils import Popen
with contextlib.suppress(ImportError):
from yt_dlp_plugins.extractor.getpot_bgutil import BgUtilPTPBase
from yt_dlp.extractor.youtube.pot.provider import (
PoTokenProviderError,
PoTokenRequest,
PoTokenResponse,
register_preference,
register_provider,
)
@register_provider
class BgUtilScriptPTP(BgUtilPTPBase):
PROVIDER_NAME = 'bgutil:script'
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._check_script = functools.cache(self._check_script_impl)
@functools.cached_property
def _script_path(self):
script_path = self._configuration_arg(
'script_path', casesense=True, default=[None])[0]
if script_path:
return os.path.expandvars(script_path)
# check deprecated arg
deprecated_script_path = self.ie._configuration_arg(
ie_key='youtube', key='getpot_bgutil_script', default=[None])[0]
if deprecated_script_path:
self._warn_and_raise(
"'youtube:getpot_bgutil_script' extractor arg is deprecated, use 'youtubepot-bgutilscript:script_path' instead")
# default if no arg was passed
home = os.path.expanduser('~')
default_path = os.path.join(
home, 'bgutil-ytdlp-pot-provider', 'server', 'build', 'generate_once.js')
self.logger.debug(
f'No script path passed, defaulting to {default_path}')
return default_path
<<<<<<< HEAD
def is_available(self):
return self._check_script(self._script_path)
@functools.cached_property
def _node_path(self):
node_path = shutil.which('node')
if node_path is None:
self.logger.trace('node is not in PATH')
vsn = self._check_node_version(node_path)
if vsn:
self.logger.trace(f'Node version: {vsn}')
return node_path
def _check_script_impl(self, script_path):
=======
def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs):
script_path = ydl.get_info_extractor('Youtube')._configuration_arg(
'getpot_bgutil_script', [self._default_script_path], casesense=True)[0]
# If a specific client is requested, validate it's supported
requested_client = ydl.params.get('extractor_args', {}).get('youtube', {}).get('formats')
if requested_client and client != requested_client:
raise UnsupportedRequest(f'Skipping {client} as {requested_client} was specifically requested')
if not data_sync_id and not visitor_data:
raise UnsupportedRequest(
'One of [data_sync_id, visitor_data] must be passed')
>>>>>>> 046a994 (refactor: support client-specific requests via extractor_args in POT providers)
if not os.path.isfile(script_path):
self.logger.debug(
f"Script path doesn't exist: {script_path}")
return False
if os.path.basename(script_path) != 'generate_once.js':
self.logger.warning(
'Incorrect script passed to extractor args. Path to generate_once.js required', once=True)
return False
node_path = self._node_path
if not node_path:
return False
stdout, stderr, returncode = Popen.run(
[self._node_path, script_path, '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True,
timeout=self._GET_SERVER_VSN_TIMEOUT)
if returncode:
self.logger.warning(
f'Failed to check script version. '
f'Script returned {returncode} exit status. '
f'Script stdout: {stdout}; Script stderr: {stderr}',
once=True)
return False
else:
self._check_version(stdout.strip(), name='script')
return True
def _check_node_version(self, node_path):
try:
stdout, stderr, returncode = Popen.run(
[node_path, '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True,
timeout=self._GET_SERVER_VSN_TIMEOUT)
stdout = stdout.strip()
mobj = re.match(r'v(\d+)\.(\d+)\.(\d+)', stdout)
if returncode or not mobj:
raise ValueError
node_vsn = tuple(map(int, mobj.groups()))
if node_vsn >= self._MIN_NODE_VSN:
return node_vsn
raise RuntimeError
except RuntimeError:
min_vsn_str = 'v' + '.'.join(str(v) for v in self._MIN_NODE_VSN)
self.logger.warning(
f'Node version too low. '
f'(got {stdout}, but at least {min_vsn_str} is required)')
except (subprocess.TimeoutExpired, ValueError):
self.logger.warning(
f'Failed to check node version. '
f'Node returned {returncode} exit status. '
f'Node stdout: {stdout}; Node stderr: {stderr}')
def _real_request_pot(
self,
request: PoTokenRequest,
) -> PoTokenResponse:
# used for CI check
self.logger.trace(
f'Generating POT via script: {self._script_path}')
command_args = [self._node_path, self._script_path]
if proxy := request.request_proxy:
command_args.extend(['-p', proxy])
command_args.extend(['-c', get_webpo_content_binding(request)[0]])
if request.bypass_cache:
command_args.append('--bypass-cache')
if request.request_source_address:
command_args.extend(
['--source-address', request.request_source_address])
if request.request_verify_tls is False:
command_args.append('--disable-tls-verification')
self.logger.info(
f'Generating a {request.context.value} PO Token for '
f'{request.internal_client_name} client via bgutil script',
)
self.logger.debug(
f'Executing command to get POT via script: {" ".join(command_args)}')
try:
stdout, stderr, returncode = Popen.run(
command_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True,
timeout=self._GETPOT_TIMEOUT)
except subprocess.TimeoutExpired as e:
raise PoTokenProviderError(
f'_get_pot_via_script failed: Timeout expired when trying to run script (caused by {e!r})')
except Exception as e:
raise PoTokenProviderError(
f'_get_pot_via_script failed: Unable to run script (caused by {e!r})') from e
msg = f'stdout:\n{stdout.strip()}'
if stderr.strip(): # Empty strings are falsy
msg += f'\nstderr:\n{stderr.strip()}'
self.logger.trace(msg)
if returncode:
raise PoTokenProviderError(
f'_get_pot_via_script failed with returncode {returncode}')
try:
# The JSON response is always the last line
script_data_resp = json.loads(stdout.splitlines()[-1])
except json.JSONDecodeError as e:
raise PoTokenProviderError(
f'Error parsing JSON response from _get_pot_via_script (caused by {e!r})') from e
if 'poToken' not in script_data_resp:
raise PoTokenProviderError(
'The script did not respond with a po_token')
return PoTokenResponse(po_token=script_data_resp['poToken'])
@register_preference(BgUtilScriptPTP)
def bgutil_script_getpot_preference(provider, request):
return 1
__all__ = [BgUtilScriptPTP.__name__,
bgutil_script_getpot_preference.__name__]