Updates on minio, envoy ports on master, adding ytops_client
This commit is contained in:
parent
f151ffee86
commit
0ead029b85
@ -105,7 +105,9 @@ RUN mv /usr/local/bin/pip /usr/local/bin/pip.orig && \
|
|||||||
"gunicorn==20.1.0" \
|
"gunicorn==20.1.0" \
|
||||||
"python-ffmpeg==2.0.12" \
|
"python-ffmpeg==2.0.12" \
|
||||||
"ffprobe3" \
|
"ffprobe3" \
|
||||||
"python-dotenv" && \
|
"python-dotenv" \
|
||||||
|
"PyYAML" \
|
||||||
|
"aria2p" && \
|
||||||
mv /usr/local/bin/pip.orig /usr/local/bin/pip
|
mv /usr/local/bin/pip.orig /usr/local/bin/pip
|
||||||
|
|
||||||
# --- Install the custom yt_ops_services package ---
|
# --- Install the custom yt_ops_services package ---
|
||||||
@ -117,6 +119,12 @@ COPY --chown=airflow:airflow yt_ops_services ./yt_ops_services/
|
|||||||
COPY --chown=airflow:airflow thrift_model ./thrift_model/
|
COPY --chown=airflow:airflow thrift_model ./thrift_model/
|
||||||
COPY --chown=airflow:airflow pangramia ./pangramia/
|
COPY --chown=airflow:airflow pangramia ./pangramia/
|
||||||
|
|
||||||
|
# Copy the ytops-client tool and its executable
|
||||||
|
COPY --chown=airflow:airflow ytops_client ./ytops_client/
|
||||||
|
COPY --chown=airflow:airflow bin/ytops-client /app/bin/ytops-client
|
||||||
|
RUN chmod +x /app/bin/ytops-client
|
||||||
|
ENV PATH="/app/bin:${PATH}"
|
||||||
|
|
||||||
# Install the package in editable mode. This runs setup.py and installs all dependencies
|
# Install the package in editable mode. This runs setup.py and installs all dependencies
|
||||||
# listed in `install_requires`, making the `yt_ops_services` module available everywhere.
|
# listed in `install_requires`, making the `yt_ops_services` module available everywhere.
|
||||||
# Bypass the pip root check again.
|
# Bypass the pip root check again.
|
||||||
|
|||||||
@ -118,14 +118,14 @@ services:
|
|||||||
- "{{ service_role }}"
|
- "{{ service_role }}"
|
||||||
|
|
||||||
# --- S3 Logging Parameters ---
|
# --- S3 Logging Parameters ---
|
||||||
- "--s3-endpoint-url"
|
#- "--s3-endpoint-url"
|
||||||
- "${S3_ENDPOINT_URL}"
|
#- "${S3_ENDPOINT_URL}"
|
||||||
- "--s3-access-key-id"
|
#- "--s3-access-key-id"
|
||||||
- "${S3_ACCESS_KEY_ID}"
|
#- "${S3_ACCESS_KEY_ID}"
|
||||||
- "--s3-secret-access-key"
|
#- "--s3-secret-access-key"
|
||||||
- "${S3_SECRET_ACCESS_KEY}"
|
#- "${S3_SECRET_ACCESS_KEY}"
|
||||||
- "--s3-region-name"
|
#- "--s3-region-name"
|
||||||
- "${S3_REGION_NAME}"
|
#- "${S3_REGION_NAME}"
|
||||||
{% if service_role is defined and service_role != 'management' %}
|
{% if service_role is defined and service_role != 'management' %}
|
||||||
# --- Parameters for worker/all-in-one roles ONLY ---
|
# --- Parameters for worker/all-in-one roles ONLY ---
|
||||||
- "--script-dir"
|
- "--script-dir"
|
||||||
|
|||||||
@ -4,11 +4,11 @@ events {
|
|||||||
|
|
||||||
http {
|
http {
|
||||||
upstream minio_servers {
|
upstream minio_servers {
|
||||||
server minio:9000;
|
server 172.17.0.1:9001;
|
||||||
}
|
}
|
||||||
|
|
||||||
upstream minio_console_servers {
|
upstream minio_console_servers {
|
||||||
server minio:9001;
|
server 172.17.0.1:9002;
|
||||||
}
|
}
|
||||||
|
|
||||||
server {
|
server {
|
||||||
|
|||||||
@ -45,7 +45,7 @@ except ImportError as e:
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
DEFAULT_MANAGEMENT_SERVICE_IP = Variable.get("MANAGEMENT_SERVICE_HOST", default_var="envoy-thrift-lb")
|
DEFAULT_MANAGEMENT_SERVICE_IP = Variable.get("MANAGEMENT_SERVICE_HOST", default_var="envoy-thrift-lb")
|
||||||
DEFAULT_MANAGEMENT_SERVICE_PORT = Variable.get("MANAGEMENT_SERVICE_PORT", default_var=9080)
|
DEFAULT_MANAGEMENT_SERVICE_PORT = Variable.get("MANAGEMENT_SERVICE_PORT", default_var=9980)
|
||||||
DEFAULT_REDIS_CONN_ID = "redis_default"
|
DEFAULT_REDIS_CONN_ID = "redis_default"
|
||||||
|
|
||||||
# Version tracking for debugging
|
# Version tracking for debugging
|
||||||
|
|||||||
@ -55,9 +55,13 @@ def _get_predefined_url_lists():
|
|||||||
'urls.dh128.json',
|
'urls.dh128.json',
|
||||||
'urls.rt100.json',
|
'urls.rt100.json',
|
||||||
'urls.rt25.json',
|
'urls.rt25.json',
|
||||||
|
'urls.rt250.json',
|
||||||
|
'urls.rt500.json',
|
||||||
|
'urls.rt3000.json',
|
||||||
'urls.sky28.json',
|
'urls.sky28.json',
|
||||||
'urls.sky3.json',
|
'urls.sky3.json',
|
||||||
'urls.tq46.json',
|
'urls.tq46.json',
|
||||||
|
'urls.topnews500.json',
|
||||||
]
|
]
|
||||||
return ['None'] + sorted(predefined_files)
|
return ['None'] + sorted(predefined_files)
|
||||||
|
|
||||||
@ -256,15 +260,15 @@ def clear_queue_callable(**context):
|
|||||||
redis_conn_id = params['redis_conn_id']
|
redis_conn_id = params['redis_conn_id']
|
||||||
|
|
||||||
queue_system = params.get('queue_system', 'v1_monolithic')
|
queue_system = params.get('queue_system', 'v1_monolithic')
|
||||||
|
queue_base_names_to_clear = []
|
||||||
if queue_system == 'v1_monolithic':
|
if queue_system == 'v1_monolithic':
|
||||||
queue_base_name = params['queue_base_name']
|
queue_base_names_to_clear.append(params['queue_base_name'])
|
||||||
elif queue_system == 'v2_separated_auth':
|
elif queue_system.startswith('v2_'):
|
||||||
queue_base_name = 'queue2_auth'
|
# For v2, clear both auth and dl queues for a complete clear.
|
||||||
elif queue_system == 'v2_separated_dl':
|
queue_base_names_to_clear.extend(['queue2_auth', 'queue2_dl'])
|
||||||
queue_base_name = 'queue2_dl'
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Invalid queue_system: {queue_system}")
|
raise ValueError(f"Invalid queue_system: {queue_system}")
|
||||||
logger.info(f"Operating on queue system '{queue_system}' with base name '{queue_base_name}'.")
|
logger.info(f"Operating on queue system '{queue_system}' with base names: {queue_base_names_to_clear}.")
|
||||||
|
|
||||||
queues_to_clear_options = params.get('queues_to_clear_options', [])
|
queues_to_clear_options = params.get('queues_to_clear_options', [])
|
||||||
confirm_clear = params.get('confirm_clear', False)
|
confirm_clear = params.get('confirm_clear', False)
|
||||||
@ -290,14 +294,15 @@ def clear_queue_callable(**context):
|
|||||||
|
|
||||||
all_suffixes = ['_inbox', '_fail', '_result', '_progress']
|
all_suffixes = ['_inbox', '_fail', '_result', '_progress']
|
||||||
keys_to_delete = set()
|
keys_to_delete = set()
|
||||||
if '_all' in queues_to_clear_options:
|
for queue_base_name in queue_base_names_to_clear:
|
||||||
logger.info("'_all' option selected. Clearing all standard queues.")
|
if '_all' in queues_to_clear_options:
|
||||||
for suffix in all_suffixes:
|
logger.info(f"'_all' option selected. Clearing all standard queues for base '{queue_base_name}'.")
|
||||||
keys_to_delete.add(f"{queue_base_name}{suffix}")
|
for suffix in all_suffixes:
|
||||||
else:
|
|
||||||
for suffix in queues_to_clear_options:
|
|
||||||
if suffix in all_suffixes:
|
|
||||||
keys_to_delete.add(f"{queue_base_name}{suffix}")
|
keys_to_delete.add(f"{queue_base_name}{suffix}")
|
||||||
|
else:
|
||||||
|
for suffix in queues_to_clear_options:
|
||||||
|
if suffix in all_suffixes:
|
||||||
|
keys_to_delete.add(f"{queue_base_name}{suffix}")
|
||||||
|
|
||||||
if not keys_to_delete:
|
if not keys_to_delete:
|
||||||
logger.warning("No valid queue suffixes were selected. Nothing to delete.")
|
logger.warning("No valid queue suffixes were selected. Nothing to delete.")
|
||||||
|
|||||||
@ -37,7 +37,7 @@ logger = logging.getLogger(__name__)
|
|||||||
# Default settings from Airflow Variables or hardcoded fallbacks
|
# Default settings from Airflow Variables or hardcoded fallbacks
|
||||||
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
||||||
DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1")
|
DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1")
|
||||||
DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080)
|
DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9980)
|
||||||
|
|
||||||
DEFAULT_ARGS = {
|
DEFAULT_ARGS = {
|
||||||
'owner': 'airflow',
|
'owner': 'airflow',
|
||||||
|
|||||||
@ -75,10 +75,10 @@ DEFAULT_REQUEST_PARAMS_JSON = """{
|
|||||||
# Default settings
|
# Default settings
|
||||||
DEFAULT_QUEUE_NAME = 'video_queue'
|
DEFAULT_QUEUE_NAME = 'video_queue'
|
||||||
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
||||||
DEFAULT_TOTAL_WORKERS = 3
|
DEFAULT_TOTAL_WORKERS = 8
|
||||||
DEFAULT_WORKERS_PER_BUNCH = 1
|
DEFAULT_WORKERS_PER_BUNCH = 1
|
||||||
DEFAULT_WORKER_DELAY_S = 5
|
DEFAULT_WORKER_DELAY_S = 1
|
||||||
DEFAULT_BUNCH_DELAY_S = 20
|
DEFAULT_BUNCH_DELAY_S = 1
|
||||||
|
|
||||||
DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1")
|
DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1")
|
||||||
DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080)
|
DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080)
|
||||||
@ -323,7 +323,7 @@ with DAG(
|
|||||||
|
|
||||||
# --- Worker Passthrough Parameters ---
|
# --- Worker Passthrough Parameters ---
|
||||||
'on_auth_failure': Param(
|
'on_auth_failure': Param(
|
||||||
'retry_with_new_account',
|
'proceed_loop_under_manual_inspection',
|
||||||
type="string",
|
type="string",
|
||||||
enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'proceed_loop_under_manual_inspection'],
|
enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'proceed_loop_under_manual_inspection'],
|
||||||
title="[Worker Param] On Authentication Failure Policy",
|
title="[Worker Param] On Authentication Failure Policy",
|
||||||
@ -343,38 +343,17 @@ with DAG(
|
|||||||
"'proceed_loop': (Default) Mark URL as failed but continue the processing loop with a new URL. "
|
"'proceed_loop': (Default) Mark URL as failed but continue the processing loop with a new URL. "
|
||||||
"'retry_with_new_token': Attempt to get a new token with a new account and retry the download once. If it fails again, proceed loop."
|
"'retry_with_new_token': Attempt to get a new token with a new account and retry the download once. If it fails again, proceed loop."
|
||||||
),
|
),
|
||||||
'request_params_json': Param(DEFAULT_REQUEST_PARAMS_JSON, type="string", title="[Worker Param] Request Params JSON", description="JSON string with request parameters for the token service.", render_kwargs={"rows": 20, "cols": 120}),
|
'request_params_json': Param('{}', type="string", title="[Worker Param] Request Params JSON", description="JSON string with request parameters for the token service."),
|
||||||
'queue_name': Param(DEFAULT_QUEUE_NAME, type="string", description="[Worker Param] Base name for Redis queues."),
|
'queue_name': Param(DEFAULT_QUEUE_NAME, type="string", description="[Worker Param] Base name for Redis queues."),
|
||||||
'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="[Worker Param] Airflow Redis connection ID."),
|
'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="[Worker Param] Airflow Redis connection ID."),
|
||||||
'clients': Param(
|
'clients': Param(
|
||||||
'mweb,web_camoufox,tv',
|
'tv_simply',
|
||||||
type="string",
|
type="string",
|
||||||
enum=[
|
enum=[
|
||||||
'mweb,web_camoufox,tv',
|
'tv_simply',
|
||||||
'mweb',
|
'mweb',
|
||||||
'web_camoufox',
|
|
||||||
'tv',
|
'tv',
|
||||||
'custom',
|
'custom',
|
||||||
'tv,web_safari,mweb,web_camoufox',
|
|
||||||
'web_safari',
|
|
||||||
'web',
|
|
||||||
'web_embedded',
|
|
||||||
'web_music',
|
|
||||||
'web_creator',
|
|
||||||
'web_safari_camoufox',
|
|
||||||
'web_embedded_camoufox',
|
|
||||||
'web_music_camoufox',
|
|
||||||
'web_creator_camoufox',
|
|
||||||
'mweb_camoufox',
|
|
||||||
'android',
|
|
||||||
'android_music',
|
|
||||||
'android_creator',
|
|
||||||
'android_vr',
|
|
||||||
'ios',
|
|
||||||
'ios_music',
|
|
||||||
'ios_creator',
|
|
||||||
'tv_simply',
|
|
||||||
'tv_embedded',
|
|
||||||
],
|
],
|
||||||
title="[Worker Param] Clients",
|
title="[Worker Param] Clients",
|
||||||
description="[Worker Param] Comma-separated list of clients for token generation. Full list: web, web_safari, web_embedded, web_music, web_creator, mweb, web_camoufox, web_safari_camoufox, web_embedded_camoufox, web_music_camoufox, web_creator_camoufox, mweb_camoufox, android, android_music, android_creator, android_vr, ios, ios_music, ios_creator, tv, tv_simply, tv_embedded. See DAG documentation for details."
|
description="[Worker Param] Comma-separated list of clients for token generation. Full list: web, web_safari, web_embedded, web_music, web_creator, mweb, web_camoufox, web_safari_camoufox, web_embedded_camoufox, web_music_camoufox, web_creator_camoufox, mweb_camoufox, android, android_music, android_creator, android_vr, ios, ios_music, ios_creator, tv, tv_simply, tv_embedded. See DAG documentation for details."
|
||||||
@ -402,27 +381,24 @@ with DAG(
|
|||||||
type="string",
|
type="string",
|
||||||
enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'],
|
enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'],
|
||||||
title="[Worker Param] Download Format Preset",
|
title="[Worker Param] Download Format Preset",
|
||||||
description="Select a predefined format string or choose 'custom' to use the value from 'Custom Download Format'.\nformats_0: 18,140\nformats_2: 18,140,299/298/137/136/135/134/133\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
|
description="Select a predefined format string or choose 'custom' to use the value from 'Custom Download Format'.\nformats_0: 18,140\nformats_2: 18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
|
||||||
),
|
),
|
||||||
'download_format_custom': Param(
|
'download_format_custom': Param(
|
||||||
'18,140,299/298/137/136/135/134/133',
|
'18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy',
|
||||||
type="string",
|
type="string",
|
||||||
title="[Worker Param] Custom Download Format",
|
title="[Worker Param] Custom Download Format",
|
||||||
description="Custom yt-dlp format string. Used when preset is 'custom'. E.g., 'ba[ext=m4a]/bestaudio/best'."
|
description="Custom yt-dlp format string. Used when preset is 'custom'. E.g., 'ba[ext=m4a]/bestaudio/best'."
|
||||||
),
|
),
|
||||||
'downloader': Param(
|
'downloader': Param(
|
||||||
'default',
|
'cli',
|
||||||
type="string",
|
type="string",
|
||||||
enum=['default', 'aria2c'],
|
enum=['py', 'aria-rpc', 'cli'],
|
||||||
title="[Worker Param] Downloader",
|
title="[Worker Param] Download Tool",
|
||||||
description="Choose the downloader for yt-dlp."
|
description="Choose the download tool to use: 'py' (native python, recommended), 'aria-rpc' (send to aria2c daemon), 'cli' (legacy yt-dlp wrapper)."
|
||||||
),
|
|
||||||
'downloader_args_aria2c': Param(
|
|
||||||
'aria2c:-x 4 -k 2M --max-download-limit=3M',
|
|
||||||
type="string",
|
|
||||||
title="[Worker Param] Aria2c Downloader Arguments",
|
|
||||||
description="Arguments to pass to yt-dlp's --downloader-args. Used when downloader is 'aria2c'."
|
|
||||||
),
|
),
|
||||||
|
'aria_host': Param('172.17.0.1', type="string", title="[Worker Param] Aria2c Host", description="For 'aria-rpc' downloader: Host of the aria2c RPC server. Can be set via Airflow Variable 'YTDLP_ARIA_HOST'."),
|
||||||
|
'aria_port': Param(6800, type="integer", title="[Worker Param] Aria2c Port", description="For 'aria-rpc' downloader: Port of the aria2c RPC server. Can be set via Airflow Variable 'YTDLP_ARIA_PORT'."),
|
||||||
|
'aria_secret': Param('SQGCQPLVFQIASMPNPOJYLVGJYLMIDIXDXAIXOTX', type="string", title="[Worker Param] Aria2c Secret", description="For 'aria-rpc' downloader: Secret token. Can be set via Airflow Variable 'YTDLP_ARIA_SECRET'."),
|
||||||
'yt_dlp_extra_args': Param(
|
'yt_dlp_extra_args': Param(
|
||||||
'--restrict-filenames',
|
'--restrict-filenames',
|
||||||
type=["string", "null"],
|
type=["string", "null"],
|
||||||
|
|||||||
@ -290,7 +290,10 @@ def get_url_and_assign_account(**context):
|
|||||||
|
|
||||||
@task
|
@task
|
||||||
def get_token(initial_data: dict, **context):
|
def get_token(initial_data: dict, **context):
|
||||||
"""Makes a single attempt to get a token from the Thrift service."""
|
"""Makes a single attempt to get a token by calling the ytops-client get-info tool."""
|
||||||
|
import subprocess
|
||||||
|
import shlex
|
||||||
|
|
||||||
ti = context['task_instance']
|
ti = context['task_instance']
|
||||||
params = context['params']
|
params = context['params']
|
||||||
|
|
||||||
@ -298,129 +301,85 @@ def get_token(initial_data: dict, **context):
|
|||||||
url = initial_data['url_to_process']
|
url = initial_data['url_to_process']
|
||||||
info_json_dir = Variable.get('DOWNLOADS_TEMP', '/opt/airflow/downloadfiles')
|
info_json_dir = Variable.get('DOWNLOADS_TEMP', '/opt/airflow/downloadfiles')
|
||||||
|
|
||||||
host, port, timeout = params['service_ip'], int(params['service_port']), int(params.get('timeout', DEFAULT_TIMEOUT))
|
host, port = params['service_ip'], int(params['service_port'])
|
||||||
machine_id = params.get('machine_id') or socket.gethostname()
|
machine_id = params.get('machine_id') or socket.gethostname()
|
||||||
clients = params.get('clients')
|
clients = params.get('clients')
|
||||||
request_params_json = params.get('request_params_json', '{}')
|
request_params_json = params.get('request_params_json', '{}')
|
||||||
assigned_proxy_url = params.get('assigned_proxy_url')
|
assigned_proxy_url = params.get('assigned_proxy_url')
|
||||||
|
|
||||||
# Pretty-print the request parameters for debugging
|
video_id = _extract_video_id(url)
|
||||||
try:
|
os.makedirs(info_json_dir, exist_ok=True)
|
||||||
pretty_request_params = json.dumps(json.loads(request_params_json), indent=2)
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
logger.info(f"\n--- Request Parameters ---\n{pretty_request_params}\n--- End of Request Parameters ---")
|
info_json_path = os.path.join(info_json_dir, f"info_{video_id or 'unknown'}_{account_id}_{timestamp}.json")
|
||||||
except (json.JSONDecodeError, TypeError):
|
|
||||||
logger.warning("Could not parse request_params_json. Using raw content.")
|
|
||||||
logger.info(f"\n--- Raw Request Parameters ---\n{request_params_json}\n--- End of Raw Request Parameters ---")
|
|
||||||
|
|
||||||
# Construct Airflow log context to pass to the service
|
cmd = [
|
||||||
try:
|
'ytops-client', 'get-info',
|
||||||
from airflow.configuration import conf
|
'--host', host,
|
||||||
remote_base = conf.get('logging', 'remote_base_log_folder')
|
'--port', str(port),
|
||||||
log_path = (
|
'--profile', account_id,
|
||||||
f"{remote_base}/dag_id={ti.dag_id}/run_id={ti.run_id}/"
|
'--output', info_json_path,
|
||||||
f"task_id={ti.task_id}/attempt={ti.try_number}.log"
|
'--print-proxy',
|
||||||
)
|
'--verbose',
|
||||||
airflow_log_context = AirflowLogContext(
|
'--log-return',
|
||||||
logS3Path=log_path,
|
]
|
||||||
dagId=ti.dag_id,
|
|
||||||
runId=ti.run_id,
|
if clients:
|
||||||
taskId=ti.task_id,
|
cmd.extend(['--client', clients])
|
||||||
tryNumber=ti.try_number,
|
if machine_id:
|
||||||
workerHostname=socket.gethostname(),
|
cmd.extend(['--machine-id', machine_id])
|
||||||
queue=ti.queue
|
if request_params_json and request_params_json != '{}':
|
||||||
)
|
cmd.extend(['--request-params-json', request_params_json])
|
||||||
logger.info(f"Constructed Airflow log context for yt-ops service: {airflow_log_context}")
|
if assigned_proxy_url:
|
||||||
except Exception as e:
|
cmd.extend(['--assigned-proxy-url', assigned_proxy_url])
|
||||||
logger.warning(f"Could not construct full Airflow log context: {e}. Creating a basic one.")
|
|
||||||
airflow_log_context = AirflowLogContext(
|
cmd.append(url)
|
||||||
dagId=ti.dag_id,
|
|
||||||
runId=ti.run_id,
|
|
||||||
taskId=ti.task_id,
|
|
||||||
tryNumber=ti.try_number,
|
|
||||||
workerHostname=socket.gethostname(),
|
|
||||||
queue=ti.queue
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(f"--- Attempting to get token for URL '{url}' with account '{account_id}' (Clients: {clients}) ---")
|
logger.info(f"--- Attempting to get token for URL '{url}' with account '{account_id}' (Clients: {clients}) ---")
|
||||||
client, transport = None, None
|
copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd)
|
||||||
try:
|
logger.info(f"Executing command: {copy_paste_cmd}")
|
||||||
client, transport = _get_thrift_client(host, port, timeout)
|
|
||||||
token_data = client.getOrRefreshToken(
|
|
||||||
accountId=account_id,
|
|
||||||
updateType=TokenUpdateMode.AUTO,
|
|
||||||
url=url,
|
|
||||||
clients=clients,
|
|
||||||
machineId=machine_id,
|
|
||||||
airflowLogContext=airflow_log_context,
|
|
||||||
requestParamsJson=request_params_json,
|
|
||||||
assignedProxyUrl=assigned_proxy_url
|
|
||||||
)
|
|
||||||
|
|
||||||
# Log a compact summary of the Thrift response, omitting large/detailed fields.
|
process = subprocess.run(cmd, capture_output=True, text=True, timeout=int(params.get('timeout', DEFAULT_TIMEOUT)))
|
||||||
summary_token_data = copy(token_data)
|
|
||||||
if hasattr(summary_token_data, 'infoJson') and summary_token_data.infoJson:
|
|
||||||
summary_token_data.infoJson = f"... ({len(summary_token_data.infoJson)} bytes) ..."
|
|
||||||
if hasattr(summary_token_data, 'cookiesBlob') and summary_token_data.cookiesBlob:
|
|
||||||
summary_token_data.cookiesBlob = f"... ({len(summary_token_data.cookiesBlob)} bytes) ..."
|
|
||||||
# These will be logged separately below.
|
|
||||||
if hasattr(summary_token_data, 'requestSummary'):
|
|
||||||
summary_token_data.requestSummary = "..."
|
|
||||||
if hasattr(summary_token_data, 'communicationLogs'):
|
|
||||||
summary_token_data.communicationLogs = "..."
|
|
||||||
logger.info(f"Thrift service response summary: {summary_token_data}")
|
|
||||||
|
|
||||||
request_summary = getattr(token_data, 'requestSummary', None)
|
if process.stdout:
|
||||||
if request_summary:
|
logger.info(f"ytops-client STDOUT:\n{process.stdout}")
|
||||||
# Prepending a newline for better separation in logs.
|
if process.stderr:
|
||||||
logger.info(f"\n--- Request Summary ---\n{request_summary}")
|
logger.info(f"ytops-client STDERR:\n{process.stderr}")
|
||||||
|
|
||||||
communication_logs = getattr(token_data, 'communicationLogs', None)
|
if process.returncode != 0:
|
||||||
if communication_logs:
|
error_message = "ytops-client failed. See logs for details."
|
||||||
logger.info("--- Communication Logs from Token Service ---")
|
for line in reversed(process.stderr.strip().split('\n')):
|
||||||
logger.info(communication_logs)
|
if 'ERROR' in line or 'Thrift error' in line or 'Connection to server failed' in line:
|
||||||
logger.info("--- End of Communication Logs ---")
|
error_message = line.strip()
|
||||||
|
break
|
||||||
|
|
||||||
info_json = getattr(token_data, 'infoJson', None)
|
error_code = 'GET_INFO_CLIENT_FAIL'
|
||||||
if not (info_json and json.loads(info_json)):
|
if "BOT_DETECTED" in process.stderr:
|
||||||
raise AirflowException("Service returned success but info.json was empty or invalid.")
|
error_code = "BOT_DETECTED"
|
||||||
|
elif "BOT_DETECTION_SIGN_IN_REQUIRED" in process.stderr:
|
||||||
video_id = _extract_video_id(url)
|
error_code = "BOT_DETECTION_SIGN_IN_REQUIRED"
|
||||||
os.makedirs(info_json_dir, exist_ok=True)
|
elif "Connection to server failed" in process.stderr:
|
||||||
# Use a readable timestamp for a unique filename on each attempt.
|
error_code = "TRANSPORT_ERROR"
|
||||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
||||||
info_json_path = os.path.join(info_json_dir, f"info_{video_id or 'unknown'}_{account_id}_{timestamp}.json")
|
|
||||||
with open(info_json_path, 'w', encoding='utf-8') as f:
|
|
||||||
f.write(info_json)
|
|
||||||
|
|
||||||
proxy_attr = next((attr for attr in ['socks5Proxy', 'socksProxy', 'socks'] if hasattr(token_data, attr)), None)
|
|
||||||
ytdlp_command = getattr(token_data, 'ytdlpCommand', None)
|
|
||||||
if ytdlp_command:
|
|
||||||
logger.info(f"--- YTDLP Command from Token Service ---\n{ytdlp_command}\n--- End of YTDLP Command ---")
|
|
||||||
|
|
||||||
return {
|
|
||||||
'info_json_path': info_json_path,
|
|
||||||
'socks_proxy': getattr(token_data, proxy_attr) if proxy_attr else None,
|
|
||||||
'ytdlp_command': ytdlp_command,
|
|
||||||
'successful_account_id': account_id,
|
|
||||||
'original_url': url, # Include original URL for fallback
|
|
||||||
}
|
|
||||||
except (PBServiceException, PBUserException, TTransportException) as e:
|
|
||||||
error_context = getattr(e, 'context', None)
|
|
||||||
if isinstance(error_context, str):
|
|
||||||
try: error_context = json.loads(error_context.replace("'", "\""))
|
|
||||||
except: pass
|
|
||||||
|
|
||||||
error_details = {
|
error_details = {
|
||||||
'error_message': getattr(e, 'message', str(e)),
|
'error_message': error_message,
|
||||||
'error_code': getattr(e, 'errorCode', 'TRANSPORT_ERROR'),
|
'error_code': error_code,
|
||||||
'proxy_url': error_context.get('proxy_url') if isinstance(error_context, dict) else None
|
'proxy_url': None
|
||||||
}
|
}
|
||||||
logger.error(f"Thrift call failed for account '{account_id}'. Details: {error_details}")
|
|
||||||
ti.xcom_push(key='error_details', value=error_details)
|
ti.xcom_push(key='error_details', value=error_details)
|
||||||
raise AirflowException(f"Thrift call failed: {error_details['error_message']}")
|
raise AirflowException(f"ytops-client get-info failed: {error_message}")
|
||||||
finally:
|
|
||||||
if transport and transport.isOpen():
|
proxy = None
|
||||||
transport.close()
|
proxy_match = re.search(r"Proxy used: (.*)", process.stderr)
|
||||||
|
if proxy_match:
|
||||||
|
proxy = proxy_match.group(1).strip()
|
||||||
|
|
||||||
|
return {
|
||||||
|
'info_json_path': info_json_path,
|
||||||
|
'socks_proxy': proxy,
|
||||||
|
'ytdlp_command': None,
|
||||||
|
'successful_account_id': account_id,
|
||||||
|
'original_url': url,
|
||||||
|
}
|
||||||
|
|
||||||
@task.branch
|
@task.branch
|
||||||
def handle_bannable_error_branch(task_id_to_check: str, **context):
|
def handle_bannable_error_branch(task_id_to_check: str, **context):
|
||||||
@ -706,7 +665,7 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
|
|||||||
elif format_preset == 'formats_0':
|
elif format_preset == 'formats_0':
|
||||||
download_format = '18,140'
|
download_format = '18,140'
|
||||||
elif format_preset == 'formats_2':
|
elif format_preset == 'formats_2':
|
||||||
download_format = '18,140,299/298/137/136/135/134/133'
|
download_format = '18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy'
|
||||||
elif format_preset == 'formats_3':
|
elif format_preset == 'formats_3':
|
||||||
download_format = '18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318'
|
download_format = '18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318'
|
||||||
else:
|
else:
|
||||||
@ -720,112 +679,102 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
|
|||||||
raise AirflowException(f"Error: info.json path is missing or file does not exist ({info_json_path}).")
|
raise AirflowException(f"Error: info.json path is missing or file does not exist ({info_json_path}).")
|
||||||
|
|
||||||
def run_yt_dlp_command(format_selector: str):
|
def run_yt_dlp_command(format_selector: str):
|
||||||
"""Constructs and runs a yt-dlp command, returning a list of final filenames."""
|
"""Constructs and runs a yt-ops-client download command, returning a list of final filenames."""
|
||||||
cmd = [
|
downloader = params.get('downloader', 'py')
|
||||||
'yt-dlp', '--verbose', '--print-traffic', '--load-info-json', info_json_path,
|
cmd = ['ytops-client', 'download', downloader, '--load-info-json', info_json_path, '-f', format_selector]
|
||||||
'-f', format_selector, '-o', full_output_path,
|
|
||||||
'--print', 'filename', '--continue', '--no-progress', '--no-simulate',
|
|
||||||
'--no-write-info-json', '--ignore-errors', '--no-playlist',
|
|
||||||
]
|
|
||||||
|
|
||||||
if params.get('fragment_retries'):
|
if proxy:
|
||||||
cmd.extend(['--fragment-retries', str(params['fragment_retries'])])
|
|
||||||
if params.get('limit_rate'):
|
|
||||||
cmd.extend(['--limit-rate', params['limit_rate']])
|
|
||||||
if params.get('socket_timeout'):
|
|
||||||
cmd.extend(['--socket-timeout', str(params['socket_timeout'])])
|
|
||||||
if params.get('min_sleep_interval'):
|
|
||||||
cmd.extend(['--min-sleep-interval', str(params['min_sleep_interval'])])
|
|
||||||
if params.get('max_sleep_interval'):
|
|
||||||
cmd.extend(['--max-sleep-interval', str(params['max_sleep_interval'])])
|
|
||||||
if params.get('yt_dlp_test_mode'):
|
|
||||||
cmd.append('--test')
|
|
||||||
|
|
||||||
downloader = params.get('downloader', 'default')
|
|
||||||
if proxy and not (downloader == 'aria2c' and proxy.startswith('socks5://')):
|
|
||||||
cmd.extend(['--proxy', proxy])
|
cmd.extend(['--proxy', proxy])
|
||||||
|
|
||||||
gost_process = None
|
if downloader == 'py':
|
||||||
try:
|
cmd.extend(['--output-dir', download_dir])
|
||||||
if downloader == 'aria2c':
|
# The 'py' tool maps many yt-dlp flags via --extra-ytdlp-args
|
||||||
cmd.extend(['--downloader', 'aria2c'])
|
py_extra_args = []
|
||||||
downloader_args = params.get('downloader_args_aria2c')
|
if params.get('fragment_retries'):
|
||||||
if proxy and proxy.startswith('socks5://'):
|
py_extra_args.extend(['--fragment-retries', str(params['fragment_retries'])])
|
||||||
import socket
|
if params.get('limit_rate'):
|
||||||
from contextlib import closing
|
py_extra_args.extend(['--limit-rate', params['limit_rate']])
|
||||||
def find_free_port():
|
if params.get('socket_timeout'):
|
||||||
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
|
py_extra_args.extend(['--socket-timeout', str(params['socket_timeout'])])
|
||||||
s.bind(('', 0))
|
if params.get('min_sleep_interval'):
|
||||||
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
py_extra_args.extend(['--sleep-interval', str(params['min_sleep_interval'])])
|
||||||
return s.getsockname()[1]
|
if params.get('max_sleep_interval'):
|
||||||
local_port = find_free_port()
|
py_extra_args.extend(['--max-sleep-interval', str(params['max_sleep_interval'])])
|
||||||
http_proxy = f"http://127.0.0.1:{local_port}"
|
if params.get('yt_dlp_test_mode'):
|
||||||
logger.info(f"Starting gost for format '{format_selector}' to forward {proxy} to {http_proxy}")
|
py_extra_args.append('--test')
|
||||||
gost_cmd = ['gost', '-L', f'http://127.0.0.1:{local_port}', '-F', proxy]
|
|
||||||
gost_process = subprocess.Popen(gost_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
||||||
time.sleep(1)
|
|
||||||
if gost_process.poll() is not None:
|
|
||||||
stdout, stderr = gost_process.communicate()
|
|
||||||
logger.error(f"gost failed to start. Exit: {gost_process.returncode}. Stdout: {stdout.decode()}. Stderr: {stderr.decode()}")
|
|
||||||
raise AirflowException("gost proxy tunnel failed to start.")
|
|
||||||
user_args = downloader_args[len('aria2c:'):] if downloader_args and downloader_args.startswith('aria2c:') else (downloader_args or "")
|
|
||||||
final_args_str = f'aria2c:{user_args.strip()} --http-proxy={http_proxy}'
|
|
||||||
cmd.extend(['--downloader-args', final_args_str])
|
|
||||||
elif downloader_args:
|
|
||||||
cmd.extend(['--downloader-args', downloader_args])
|
|
||||||
|
|
||||||
extra_args = params.get('yt_dlp_extra_args')
|
existing_extra = shlex.split(params.get('yt_dlp_extra_args') or '')
|
||||||
if extra_args:
|
final_extra_args = existing_extra + py_extra_args
|
||||||
cmd.extend(shlex.split(extra_args))
|
if final_extra_args:
|
||||||
if original_url:
|
cmd.extend(['--extra-ytdlp-args', shlex.join(final_extra_args)])
|
||||||
cmd.append(original_url)
|
|
||||||
|
|
||||||
copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd)
|
elif downloader == 'aria-rpc':
|
||||||
logger.info(f"Executing yt-dlp command for format '{format_selector}': {copy_paste_cmd}")
|
cmd.extend([
|
||||||
process = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)
|
'--aria-host', params.get('aria_host', '172.17.0.1'),
|
||||||
|
'--aria-port', str(params.get('aria_port', 6800)),
|
||||||
|
'--aria-secret', params.get('aria_secret'),
|
||||||
|
'--wait', '--auto-merge-fragments',
|
||||||
|
'--fragments-dir', download_dir,
|
||||||
|
'--output-dir', download_dir,
|
||||||
|
])
|
||||||
|
if params.get('yt_dlp_cleanup_mode'):
|
||||||
|
cmd.append('--cleanup')
|
||||||
|
|
||||||
if process.stdout:
|
elif downloader == 'cli':
|
||||||
logger.info(f"yt-dlp STDOUT for format '{format_selector}':\n{process.stdout}")
|
cmd.extend(['--output-dir', download_dir])
|
||||||
if process.stderr:
|
# The 'cli' tool is the old yt-dlp wrapper, so it takes similar arguments.
|
||||||
# yt-dlp often prints progress and informational messages to stderr
|
cli_extra_args = []
|
||||||
logger.info(f"yt-dlp STDERR for format '{format_selector}':\n{process.stderr}")
|
if params.get('fragment_retries'):
|
||||||
|
cli_extra_args.extend(['--fragment-retries', str(params['fragment_retries'])])
|
||||||
|
if params.get('limit_rate'):
|
||||||
|
cli_extra_args.extend(['--limit-rate', params['limit_rate']])
|
||||||
|
if params.get('socket_timeout'):
|
||||||
|
cli_extra_args.extend(['--socket-timeout', str(params['socket_timeout'])])
|
||||||
|
if params.get('min_sleep_interval'):
|
||||||
|
cli_extra_args.extend(['--sleep-interval', str(params['min_sleep_interval'])])
|
||||||
|
if params.get('max_sleep_interval'):
|
||||||
|
cli_extra_args.extend(['--max-sleep-interval', str(params['max_sleep_interval'])])
|
||||||
|
if params.get('yt_dlp_test_mode'):
|
||||||
|
cli_extra_args.append('--test')
|
||||||
|
|
||||||
if process.returncode != 0:
|
existing_extra = shlex.split(params.get('yt_dlp_extra_args') or '')
|
||||||
logger.error(f"yt-dlp failed for format '{format_selector}' with exit code {process.returncode}")
|
final_extra_args = existing_extra + cli_extra_args
|
||||||
# STDOUT and STDERR are already logged above.
|
if final_extra_args:
|
||||||
raise AirflowException(f"yt-dlp command failed for format '{format_selector}'. {process.stderr}")
|
cmd.extend(['--extra-ytdlp-args', shlex.join(final_extra_args)])
|
||||||
|
|
||||||
# In test mode, files are not created, so we only check that yt-dlp returned filenames.
|
copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd)
|
||||||
# Otherwise, we verify that the files actually exist on disk.
|
logger.info(f"Executing download command for format '{format_selector}': {copy_paste_cmd}")
|
||||||
output_files = [f for f in process.stdout.strip().split('\n') if f]
|
process = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)
|
||||||
if not params.get('yt_dlp_test_mode'):
|
|
||||||
output_files = [f for f in output_files if os.path.exists(f)]
|
|
||||||
|
|
||||||
if not output_files:
|
if process.stdout:
|
||||||
log_msg = (f"Test run for format '{format_selector}' did not produce any filenames."
|
logger.info(f"Download tool STDOUT for format '{format_selector}':\n{process.stdout}")
|
||||||
if params.get('yt_dlp_test_mode') else
|
if process.stderr:
|
||||||
f"Download for format '{format_selector}' finished but no output files exist.")
|
logger.info(f"Download tool STDERR for format '{format_selector}':\n{process.stderr}")
|
||||||
exc_msg = (f"Test run for format '{format_selector}' did not produce any filenames."
|
|
||||||
if params.get('yt_dlp_test_mode') else
|
|
||||||
f"Download for format '{format_selector}' did not produce a file.")
|
|
||||||
|
|
||||||
logger.error(log_msg)
|
if process.returncode != 0:
|
||||||
logger.error(f"Full STDOUT:\n{process.stdout}")
|
logger.error(f"Download tool failed for format '{format_selector}' with exit code {process.returncode}")
|
||||||
logger.error(f"Full STDERR:\n{process.stderr}")
|
raise AirflowException(f"Download command failed for format '{format_selector}'. See logs for details.")
|
||||||
raise AirflowException(exc_msg)
|
|
||||||
|
|
||||||
log_prefix = "SUCCESS (Test Mode):" if params.get('yt_dlp_test_mode') else "SUCCESS:"
|
output_files = []
|
||||||
logger.info(f"{log_prefix} Command for format '{format_selector}' complete. Files: {output_files}")
|
for line in process.stdout.strip().split('\n'):
|
||||||
return output_files
|
# For aria-rpc, parse "Download and merge successful: <path>" or "Download successful: <path>"
|
||||||
finally:
|
match = re.search(r'successful: (.+)', line)
|
||||||
if gost_process:
|
if match:
|
||||||
logger.info(f"Terminating gost process (PID: {gost_process.pid}) for format '{format_selector}'.")
|
filepath = match.group(1).strip()
|
||||||
gost_process.terminate()
|
if os.path.exists(filepath):
|
||||||
try:
|
output_files.append(filepath)
|
||||||
gost_process.wait(timeout=5)
|
else:
|
||||||
except subprocess.TimeoutExpired:
|
logger.warning(f"File path from aria-rpc output does not exist locally: '{filepath}'")
|
||||||
gost_process.kill()
|
# For py/cli, it's just the path
|
||||||
gost_process.wait()
|
elif os.path.exists(line.strip()):
|
||||||
|
output_files.append(line.strip())
|
||||||
|
|
||||||
|
if not params.get('yt_dlp_test_mode') and not output_files:
|
||||||
|
raise AirflowException(f"Download for format '{format_selector}' finished but no output files were found or exist.")
|
||||||
|
|
||||||
|
log_prefix = "SUCCESS (Test Mode):" if params.get('yt_dlp_test_mode') else "SUCCESS:"
|
||||||
|
logger.info(f"{log_prefix} Command for format '{format_selector}' complete. Files: {output_files}")
|
||||||
|
return output_files
|
||||||
|
|
||||||
def run_ffmpeg_probe(filename):
|
def run_ffmpeg_probe(filename):
|
||||||
"""Probes a file with ffmpeg to check for corruption."""
|
"""Probes a file with ffmpeg to check for corruption."""
|
||||||
@ -1512,7 +1461,7 @@ with DAG(
|
|||||||
'prepend_client_to_account': Param(True, type="boolean", title="[Worker Param] Prepend Client to Account", description="If True, prepends client and timestamp to account names in prefix mode."),
|
'prepend_client_to_account': Param(True, type="boolean", title="[Worker Param] Prepend Client to Account", description="If True, prepends client and timestamp to account names in prefix mode."),
|
||||||
'machine_id': Param(None, type=["string", "null"]),
|
'machine_id': Param(None, type=["string", "null"]),
|
||||||
'assigned_proxy_url': Param(None, type=["string", "null"], title="[Worker Param] Assigned Proxy URL", description="A specific proxy URL to use for the request, overriding the server's proxy pool logic."),
|
'assigned_proxy_url': Param(None, type=["string", "null"], title="[Worker Param] Assigned Proxy URL", description="A specific proxy URL to use for the request, overriding the server's proxy pool logic."),
|
||||||
'clients': Param('mweb,web_camoufox,tv', type="string", description="Comma-separated list of clients for token generation. e.g. mweb,tv,web_camoufox"),
|
'clients': Param('tv_simply', type="string", description="Comma-separated list of clients for token generation. e.g. mweb,tv,web_camoufox"),
|
||||||
'timeout': Param(DEFAULT_TIMEOUT, type="integer"),
|
'timeout': Param(DEFAULT_TIMEOUT, type="integer"),
|
||||||
'output_path_template': Param("%(title)s [%(id)s].f%(format_id)s.%(ext)s", type="string", title="[Worker Param] Output Path Template", description="Output filename template for yt-dlp. It is highly recommended to include `%(format_id)s` to prevent filename collisions when downloading multiple formats."),
|
'output_path_template': Param("%(title)s [%(id)s].f%(format_id)s.%(ext)s", type="string", title="[Worker Param] Output Path Template", description="Output filename template for yt-dlp. It is highly recommended to include `%(format_id)s` to prevent filename collisions when downloading multiple formats."),
|
||||||
'on_auth_failure': Param(
|
'on_auth_failure': Param(
|
||||||
@ -1542,11 +1491,11 @@ with DAG(
|
|||||||
'min_sleep_interval': Param(5, type="integer", title="[Worker Param] Min Sleep Interval", description="Minimum time to sleep between downloads (seconds)."),
|
'min_sleep_interval': Param(5, type="integer", title="[Worker Param] Min Sleep Interval", description="Minimum time to sleep between downloads (seconds)."),
|
||||||
'max_sleep_interval': Param(10, type="integer", title="[Worker Param] Max Sleep Interval", description="Maximum time to sleep between downloads (seconds)."),
|
'max_sleep_interval': Param(10, type="integer", title="[Worker Param] Max Sleep Interval", description="Maximum time to sleep between downloads (seconds)."),
|
||||||
'download_format_preset': Param(
|
'download_format_preset': Param(
|
||||||
'custom',
|
'formats_2',
|
||||||
type="string",
|
type="string",
|
||||||
enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'],
|
enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'],
|
||||||
title="Download Format Preset",
|
title="Download Format Preset",
|
||||||
description="Select a predefined format string or choose 'custom'. To download multiple formats, this should be a comma-separated list of format IDs (e.g., '137,140').\nformats_0: 18,140\nformats_2: 18,140,299/298/137/136/135/134/133\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
|
description="Select a predefined format string or choose 'custom'. To download multiple formats, this should be a comma-separated list of format IDs (e.g., '137,140').\nformats_0: 18,140\nformats_2: 18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
|
||||||
),
|
),
|
||||||
'download_format_custom': Param(
|
'download_format_custom': Param(
|
||||||
'18,140,299/298/137/136/135/134/133',
|
'18,140,299/298/137/136/135/134/133',
|
||||||
@ -1555,18 +1504,15 @@ with DAG(
|
|||||||
description="Custom yt-dlp format string. Used when preset is 'custom'. To download multiple formats, provide a comma-separated list of format IDs (e.g., '137,140')."
|
description="Custom yt-dlp format string. Used when preset is 'custom'. To download multiple formats, provide a comma-separated list of format IDs (e.g., '137,140')."
|
||||||
),
|
),
|
||||||
'downloader': Param(
|
'downloader': Param(
|
||||||
'default',
|
'cli',
|
||||||
type="string",
|
type="string",
|
||||||
enum=['default', 'aria2c'],
|
enum=['py', 'aria-rpc', 'cli'],
|
||||||
title="Downloader",
|
title="Download Tool",
|
||||||
description="Choose the downloader for yt-dlp."
|
description="Choose the download tool to use: 'py' (native python, recommended), 'aria-rpc' (send to aria2c daemon), 'cli' (legacy yt-dlp wrapper)."
|
||||||
),
|
|
||||||
'downloader_args_aria2c': Param(
|
|
||||||
'aria2c:-x 4 -k 2M --max-download-limit=3M',
|
|
||||||
type="string",
|
|
||||||
title="Aria2c Downloader Arguments",
|
|
||||||
description="Arguments to pass to yt-dlp's --downloader-args. Used when downloader is 'aria2c'."
|
|
||||||
),
|
),
|
||||||
|
'aria_host': Param('172.17.0.1', type="string", title="Aria2c Host", description="For 'aria-rpc' downloader: Host of the aria2c RPC server."),
|
||||||
|
'aria_port': Param(6800, type="integer", title="Aria2c Port", description="For 'aria-rpc' downloader: Port of the aria2c RPC server."),
|
||||||
|
'aria_secret': Param('SQGCQPLVFQIASMPNPOJYLVGJYLMIDIXDXAIXOTX', type="string", title="Aria2c Secret", description="For 'aria-rpc' downloader: Secret token."),
|
||||||
'yt_dlp_extra_args': Param(
|
'yt_dlp_extra_args': Param(
|
||||||
'',
|
'',
|
||||||
type=["string", "null"],
|
type=["string", "null"],
|
||||||
|
|||||||
@ -72,10 +72,10 @@ DEFAULT_REQUEST_PARAMS_JSON = """{
|
|||||||
|
|
||||||
# Default settings
|
# Default settings
|
||||||
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
||||||
DEFAULT_TOTAL_WORKERS = 3
|
DEFAULT_TOTAL_WORKERS = 8
|
||||||
DEFAULT_WORKERS_PER_BUNCH = 1
|
DEFAULT_WORKERS_PER_BUNCH = 1
|
||||||
DEFAULT_WORKER_DELAY_S = 5
|
DEFAULT_WORKER_DELAY_S = 1
|
||||||
DEFAULT_BUNCH_DELAY_S = 20
|
DEFAULT_BUNCH_DELAY_S = 1
|
||||||
|
|
||||||
DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1")
|
DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1")
|
||||||
DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080)
|
DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080)
|
||||||
@ -283,7 +283,7 @@ with DAG(
|
|||||||
|
|
||||||
# --- Worker Passthrough Parameters ---
|
# --- Worker Passthrough Parameters ---
|
||||||
'on_bannable_failure': Param(
|
'on_bannable_failure': Param(
|
||||||
'stop_loop_on_auth_proceed_on_download_error',
|
'proceed_loop_under_manual_inspection',
|
||||||
type="string",
|
type="string",
|
||||||
enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'retry_and_ban_account_only', 'retry_on_connection_error', 'proceed_loop_under_manual_inspection', 'stop_loop_on_auth_proceed_on_download_error'],
|
enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'retry_and_ban_account_only', 'retry_on_connection_error', 'proceed_loop_under_manual_inspection', 'stop_loop_on_auth_proceed_on_download_error'],
|
||||||
title="[Worker Param] On Bannable Failure Policy",
|
title="[Worker Param] On Bannable Failure Policy",
|
||||||
@ -294,37 +294,16 @@ with DAG(
|
|||||||
"'proceed_loop_under_manual_inspection': **BEWARE: MANUAL SUPERVISION REQUIRED.** Marks the URL as failed but continues the processing loop. Use this only when you can manually intervene by pausing the dispatcher DAG or creating a lock file (`/opt/airflow/inputfiles/AIRFLOW.PREVENT_URL_PULL.lockfile`) to prevent a runaway failure loop."
|
"'proceed_loop_under_manual_inspection': **BEWARE: MANUAL SUPERVISION REQUIRED.** Marks the URL as failed but continues the processing loop. Use this only when you can manually intervene by pausing the dispatcher DAG or creating a lock file (`/opt/airflow/inputfiles/AIRFLOW.PREVENT_URL_PULL.lockfile`) to prevent a runaway failure loop."
|
||||||
"'stop_loop_on_auth_proceed_on_download_error': **(Default)** Stops the loop on an authentication/token error (like 'stop_loop'), but continues the loop on a download/probe error (like 'proceed...')."
|
"'stop_loop_on_auth_proceed_on_download_error': **(Default)** Stops the loop on an authentication/token error (like 'stop_loop'), but continues the loop on a download/probe error (like 'proceed...')."
|
||||||
),
|
),
|
||||||
'request_params_json': Param(DEFAULT_REQUEST_PARAMS_JSON, type="string", title="[Worker Param] Request Params JSON", description="JSON string with request parameters for the token service.", render_kwargs={"rows": 20, "cols": 120}),
|
'request_params_json': Param('{}', type="string", title="[Worker Param] Request Params JSON", description="JSON string with request parameters for the token service."),
|
||||||
'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="[Worker Param] Airflow Redis connection ID."),
|
'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="[Worker Param] Airflow Redis connection ID."),
|
||||||
'clients': Param(
|
'clients': Param(
|
||||||
'mweb,web_camoufox,tv',
|
'tv_simply',
|
||||||
type="string",
|
type="string",
|
||||||
enum=[
|
enum=[
|
||||||
'mweb,web_camoufox,tv',
|
'tv_simply',
|
||||||
'mweb',
|
'mweb',
|
||||||
'web_camoufox',
|
|
||||||
'tv',
|
'tv',
|
||||||
'custom',
|
'custom',
|
||||||
'tv,web_safari,mweb,web_camoufox',
|
|
||||||
'web_safari',
|
|
||||||
'web',
|
|
||||||
'web_embedded',
|
|
||||||
'web_music',
|
|
||||||
'web_creator',
|
|
||||||
'web_safari_camoufox',
|
|
||||||
'web_embedded_camoufox',
|
|
||||||
'web_music_camoufox',
|
|
||||||
'web_creator_camoufox',
|
|
||||||
'mweb_camoufox',
|
|
||||||
'android',
|
|
||||||
'android_music',
|
|
||||||
'android_creator',
|
|
||||||
'android_vr',
|
|
||||||
'ios',
|
|
||||||
'ios_music',
|
|
||||||
'ios_creator',
|
|
||||||
'tv_simply',
|
|
||||||
'tv_embedded',
|
|
||||||
],
|
],
|
||||||
title="[Worker Param] Clients",
|
title="[Worker Param] Clients",
|
||||||
description="[Worker Param] Comma-separated list of clients for token generation. Full list: web, web_safari, web_embedded, web_music, web_creator, mweb, web_camoufox, web_safari_camoufox, web_embedded_camoufox, web_music_camoufox, web_creator_camoufox, mweb_camoufox, android, android_music, android_creator, android_vr, ios, ios_music, ios_creator, tv, tv_simply, tv_embedded. See DAG documentation for details."
|
description="[Worker Param] Comma-separated list of clients for token generation. Full list: web, web_safari, web_embedded, web_music, web_creator, mweb, web_camoufox, web_safari_camoufox, web_embedded_camoufox, web_music_camoufox, web_creator_camoufox, mweb_camoufox, android, android_music, android_creator, android_vr, ios, ios_music, ios_creator, tv, tv_simply, tv_embedded. See DAG documentation for details."
|
||||||
|
|||||||
@ -37,10 +37,10 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
# Default settings
|
# Default settings
|
||||||
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
||||||
DEFAULT_TOTAL_WORKERS = 3
|
DEFAULT_TOTAL_WORKERS = 8
|
||||||
DEFAULT_WORKERS_PER_BUNCH = 1
|
DEFAULT_WORKERS_PER_BUNCH = 1
|
||||||
DEFAULT_WORKER_DELAY_S = 5
|
DEFAULT_WORKER_DELAY_S = 1
|
||||||
DEFAULT_BUNCH_DELAY_S = 20
|
DEFAULT_BUNCH_DELAY_S = 1
|
||||||
|
|
||||||
# --- Helper Functions ---
|
# --- Helper Functions ---
|
||||||
|
|
||||||
@ -260,27 +260,24 @@ with DAG(
|
|||||||
type="string",
|
type="string",
|
||||||
enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'],
|
enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'],
|
||||||
title="[Worker Param] Download Format Preset",
|
title="[Worker Param] Download Format Preset",
|
||||||
description="Select a predefined format string or choose 'custom' to use the value from 'Custom Download Format'.\nformats_0: 18,140\nformats_2: 18,140,299/298/137/136/135/134/133\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
|
description="Select a predefined format string or choose 'custom' to use the value from 'Custom Download Format'.\nformats_0: 18,140\nformats_2: 18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
|
||||||
),
|
),
|
||||||
'download_format_custom': Param(
|
'download_format_custom': Param(
|
||||||
'18,140,299/298/137/136/135/134/133',
|
'18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy',
|
||||||
type="string",
|
type="string",
|
||||||
title="[Worker Param] Custom Download Format",
|
title="[Worker Param] Custom Download Format",
|
||||||
description="Custom yt-dlp format string. Used when preset is 'custom'. E.g., 'ba[ext=m4a]/bestaudio/best'."
|
description="Custom yt-dlp format string. Used when preset is 'custom'. E.g., 'ba[ext=m4a]/bestaudio/best'."
|
||||||
),
|
),
|
||||||
'downloader': Param(
|
'downloader': Param(
|
||||||
'default',
|
'cli',
|
||||||
type="string",
|
type="string",
|
||||||
enum=['default', 'aria2c'],
|
enum=['py', 'aria-rpc', 'cli'],
|
||||||
title="[Worker Param] Downloader",
|
title="[Worker Param] Download Tool",
|
||||||
description="Choose the downloader for yt-dlp."
|
description="Choose the download tool to use: 'py' (native python, recommended), 'aria-rpc' (send to aria2c daemon), 'cli' (legacy yt-dlp wrapper)."
|
||||||
),
|
|
||||||
'downloader_args_aria2c': Param(
|
|
||||||
'aria2c:-x 4 -k 2M --max-download-limit=3M',
|
|
||||||
type="string",
|
|
||||||
title="[Worker Param] Aria2c Downloader Arguments",
|
|
||||||
description="Arguments to pass to yt-dlp's --downloader-args. Used when downloader is 'aria2c'."
|
|
||||||
),
|
),
|
||||||
|
'aria_host': Param('172.17.0.1', type="string", title="[Worker Param] Aria2c Host", description="For 'aria-rpc' downloader: Host of the aria2c RPC server. Can be set via Airflow Variable 'YTDLP_ARIA_HOST'."),
|
||||||
|
'aria_port': Param(6800, type="integer", title="[Worker Param] Aria2c Port", description="For 'aria-rpc' downloader: Port of the aria2c RPC server. Can be set via Airflow Variable 'YTDLP_ARIA_PORT'."),
|
||||||
|
'aria_secret': Param('SQGCQPLVFQIASMPNPOJYLVGJYLMIDIXDXAIXOTX', type="string", title="[Worker Param] Aria2c Secret", description="For 'aria-rpc' downloader: Secret token. Can be set via Airflow Variable 'YTDLP_ARIA_SECRET'."),
|
||||||
'yt_dlp_extra_args': Param(
|
'yt_dlp_extra_args': Param(
|
||||||
'--restrict-filenames',
|
'--restrict-filenames',
|
||||||
type=["string", "null"],
|
type=["string", "null"],
|
||||||
|
|||||||
@ -380,7 +380,10 @@ def get_url_and_assign_account(**context):
|
|||||||
|
|
||||||
@task
|
@task
|
||||||
def get_token(initial_data: dict, **context):
|
def get_token(initial_data: dict, **context):
|
||||||
"""Makes a single attempt to get a token from the Thrift service."""
|
"""Makes a single attempt to get a token by calling the ytops-client get-info tool."""
|
||||||
|
import subprocess
|
||||||
|
import shlex
|
||||||
|
|
||||||
ti = context['task_instance']
|
ti = context['task_instance']
|
||||||
params = context['params']
|
params = context['params']
|
||||||
|
|
||||||
@ -388,131 +391,89 @@ def get_token(initial_data: dict, **context):
|
|||||||
url = initial_data['url_to_process']
|
url = initial_data['url_to_process']
|
||||||
info_json_dir = Variable.get('DOWNLOADS_TEMP', '/opt/airflow/downloadfiles')
|
info_json_dir = Variable.get('DOWNLOADS_TEMP', '/opt/airflow/downloadfiles')
|
||||||
|
|
||||||
host, port, timeout = params['service_ip'], int(params['service_port']), int(params.get('timeout', DEFAULT_TIMEOUT))
|
host, port = params['service_ip'], int(params['service_port'])
|
||||||
machine_id = params.get('machine_id') or socket.gethostname()
|
machine_id = params.get('machine_id') or socket.gethostname()
|
||||||
clients = params.get('clients')
|
clients = params.get('clients')
|
||||||
request_params_json = params.get('request_params_json', '{}')
|
request_params_json = params.get('request_params_json', '{}')
|
||||||
assigned_proxy_url = params.get('assigned_proxy_url')
|
assigned_proxy_url = params.get('assigned_proxy_url')
|
||||||
|
|
||||||
# Pretty-print the request parameters for debugging
|
video_id = _extract_video_id(url)
|
||||||
try:
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
pretty_request_params = json.dumps(json.loads(request_params_json), indent=2)
|
job_dir_name = f"{timestamp}-{video_id or 'unknown'}"
|
||||||
logger.info(f"\n--- Request Parameters ---\n{pretty_request_params}\n--- End of Request Parameters ---")
|
job_dir_path = os.path.join(info_json_dir, job_dir_name)
|
||||||
except (json.JSONDecodeError, TypeError):
|
os.makedirs(job_dir_path, exist_ok=True)
|
||||||
logger.warning("Could not parse request_params_json. Using raw content.")
|
info_json_filename = f"info_{video_id or 'unknown'}_{account_id}_{timestamp}.json"
|
||||||
logger.info(f"\n--- Raw Request Parameters ---\n{request_params_json}\n--- End of Raw Request Parameters ---")
|
info_json_path = os.path.join(job_dir_path, info_json_filename)
|
||||||
|
|
||||||
# Construct Airflow log context to pass to the service
|
cmd = [
|
||||||
try:
|
'ytops-client', 'get-info',
|
||||||
from airflow.configuration import conf
|
'--host', host,
|
||||||
remote_base = conf.get('logging', 'remote_base_log_folder')
|
'--port', str(port),
|
||||||
log_path = (
|
'--profile', account_id,
|
||||||
f"{remote_base}/dag_id={ti.dag_id}/run_id={ti.run_id}/"
|
'--output', info_json_path,
|
||||||
f"task_id={ti.task_id}/attempt={ti.try_number}.log"
|
'--print-proxy',
|
||||||
)
|
'--verbose',
|
||||||
airflow_log_context = AirflowLogContext(
|
'--log-return',
|
||||||
logS3Path=log_path,
|
]
|
||||||
dagId=ti.dag_id,
|
|
||||||
runId=ti.run_id,
|
if clients:
|
||||||
taskId=ti.task_id,
|
cmd.extend(['--client', clients])
|
||||||
tryNumber=ti.try_number,
|
if machine_id:
|
||||||
workerHostname=socket.gethostname(),
|
cmd.extend(['--machine-id', machine_id])
|
||||||
queue=ti.queue
|
if request_params_json and request_params_json != '{}':
|
||||||
)
|
cmd.extend(['--request-params-json', request_params_json])
|
||||||
logger.info(f"Constructed Airflow log context for yt-ops service: {airflow_log_context}")
|
if assigned_proxy_url:
|
||||||
except Exception as e:
|
cmd.extend(['--assigned-proxy-url', assigned_proxy_url])
|
||||||
logger.warning(f"Could not construct full Airflow log context: {e}. Creating a basic one.")
|
|
||||||
airflow_log_context = AirflowLogContext(
|
cmd.append(url)
|
||||||
dagId=ti.dag_id,
|
|
||||||
runId=ti.run_id,
|
|
||||||
taskId=ti.task_id,
|
|
||||||
tryNumber=ti.try_number,
|
|
||||||
workerHostname=socket.gethostname(),
|
|
||||||
queue=ti.queue
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(f"--- Attempting to get token for URL '{url}' with account '{account_id}' (Clients: {clients}) ---")
|
logger.info(f"--- Attempting to get token for URL '{url}' with account '{account_id}' (Clients: {clients}) ---")
|
||||||
client, transport = None, None
|
copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd)
|
||||||
try:
|
logger.info(f"Executing command: {copy_paste_cmd}")
|
||||||
client, transport = _get_thrift_client(host, port, timeout)
|
|
||||||
token_data = client.getOrRefreshToken(
|
|
||||||
accountId=account_id,
|
|
||||||
updateType=TokenUpdateMode.AUTO,
|
|
||||||
url=url,
|
|
||||||
clients=clients,
|
|
||||||
machineId=machine_id,
|
|
||||||
airflowLogContext=airflow_log_context,
|
|
||||||
requestParamsJson=request_params_json,
|
|
||||||
assignedProxyUrl=assigned_proxy_url
|
|
||||||
)
|
|
||||||
|
|
||||||
# Log a compact summary of the Thrift response, omitting large/detailed fields.
|
process = subprocess.run(cmd, capture_output=True, text=True, timeout=int(params.get('timeout', DEFAULT_TIMEOUT)))
|
||||||
summary_token_data = copy(token_data)
|
|
||||||
if hasattr(summary_token_data, 'infoJson') and summary_token_data.infoJson:
|
|
||||||
summary_token_data.infoJson = f"... ({len(summary_token_data.infoJson)} bytes) ..."
|
|
||||||
if hasattr(summary_token_data, 'cookiesBlob') and summary_token_data.cookiesBlob:
|
|
||||||
summary_token_data.cookiesBlob = f"... ({len(summary_token_data.cookiesBlob)} bytes) ..."
|
|
||||||
# These will be logged separately below.
|
|
||||||
if hasattr(summary_token_data, 'requestSummary'):
|
|
||||||
summary_token_data.requestSummary = "..."
|
|
||||||
if hasattr(summary_token_data, 'communicationLogPaths'):
|
|
||||||
summary_token_data.communicationLogPaths = "..."
|
|
||||||
logger.info(f"Thrift service response summary: {summary_token_data}")
|
|
||||||
|
|
||||||
request_summary = getattr(token_data, 'requestSummary', None)
|
if process.stdout:
|
||||||
if request_summary:
|
logger.info(f"ytops-client STDOUT:\n{process.stdout}")
|
||||||
# Prepending a newline for better separation in logs.
|
if process.stderr:
|
||||||
logger.info(f"\n--- Request Summary ---\n{request_summary}")
|
logger.info(f"ytops-client STDERR:\n{process.stderr}")
|
||||||
|
|
||||||
communication_log_paths = getattr(token_data, 'communicationLogPaths', None)
|
if process.returncode != 0:
|
||||||
if communication_log_paths:
|
error_message = "ytops-client failed. See logs for details."
|
||||||
logger.info("--- Communication Log Paths ---")
|
for line in reversed(process.stderr.strip().split('\n')):
|
||||||
for path in communication_log_paths:
|
if 'ERROR' in line or 'Thrift error' in line or 'Connection to server failed' in line:
|
||||||
logger.info(f" - {path}")
|
error_message = line.strip()
|
||||||
|
break
|
||||||
|
|
||||||
info_json = getattr(token_data, 'infoJson', None)
|
error_code = 'GET_INFO_CLIENT_FAIL'
|
||||||
if not (info_json and json.loads(info_json)):
|
if "BOT_DETECTED" in process.stderr:
|
||||||
raise AirflowException("Service returned success but info.json was empty or invalid.")
|
error_code = "BOT_DETECTED"
|
||||||
|
elif "BOT_DETECTION_SIGN_IN_REQUIRED" in process.stderr:
|
||||||
video_id = _extract_video_id(url)
|
error_code = "BOT_DETECTION_SIGN_IN_REQUIRED"
|
||||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
elif "Connection to server failed" in process.stderr:
|
||||||
|
error_code = "TRANSPORT_ERROR"
|
||||||
# Create a unique directory for this job's artifacts
|
|
||||||
job_dir_name = f"{timestamp}-{video_id or 'unknown'}"
|
|
||||||
job_dir_path = os.path.join(info_json_dir, job_dir_name)
|
|
||||||
os.makedirs(job_dir_path, exist_ok=True)
|
|
||||||
|
|
||||||
info_json_filename = f"info_{video_id or 'unknown'}_{account_id}_{timestamp}.json"
|
|
||||||
info_json_path = os.path.join(job_dir_path, info_json_filename)
|
|
||||||
with open(info_json_path, 'w', encoding='utf-8') as f:
|
|
||||||
f.write(info_json)
|
|
||||||
|
|
||||||
proxy_attr = next((attr for attr in ['socks5Proxy', 'socksProxy', 'socks'] if hasattr(token_data, attr)), None)
|
|
||||||
return {
|
|
||||||
'info_json_path': info_json_path,
|
|
||||||
'socks_proxy': getattr(token_data, proxy_attr) if proxy_attr else None,
|
|
||||||
'ytdlp_command': getattr(token_data, 'ytdlpCommand', None),
|
|
||||||
'successful_account_id': account_id,
|
|
||||||
'original_url': url, # Include original URL for fallback
|
|
||||||
'clients': clients, # Pass clients string for accurate stats
|
|
||||||
}
|
|
||||||
except (PBServiceException, PBUserException, TTransportException) as e:
|
|
||||||
error_context = getattr(e, 'context', None)
|
|
||||||
if isinstance(error_context, str):
|
|
||||||
try: error_context = json.loads(error_context.replace("'", "\""))
|
|
||||||
except: pass
|
|
||||||
|
|
||||||
error_details = {
|
error_details = {
|
||||||
'error_message': getattr(e, 'message', str(e)),
|
'error_message': error_message,
|
||||||
'error_code': getattr(e, 'errorCode', 'TRANSPORT_ERROR'),
|
'error_code': error_code,
|
||||||
'proxy_url': error_context.get('proxy_url') if isinstance(error_context, dict) else None
|
'proxy_url': None
|
||||||
}
|
}
|
||||||
logger.error(f"Thrift call failed for account '{account_id}'. Exception: {error_details['error_message']}")
|
|
||||||
ti.xcom_push(key='error_details', value=error_details)
|
ti.xcom_push(key='error_details', value=error_details)
|
||||||
raise AirflowException(f"Thrift call failed: {error_details['error_message']}")
|
raise AirflowException(f"ytops-client get-info failed: {error_message}")
|
||||||
finally:
|
|
||||||
if transport and transport.isOpen():
|
proxy = None
|
||||||
transport.close()
|
proxy_match = re.search(r"Proxy used: (.*)", process.stderr)
|
||||||
|
if proxy_match:
|
||||||
|
proxy = proxy_match.group(1).strip()
|
||||||
|
|
||||||
|
return {
|
||||||
|
'info_json_path': info_json_path,
|
||||||
|
'socks_proxy': proxy,
|
||||||
|
'ytdlp_command': None,
|
||||||
|
'successful_account_id': account_id,
|
||||||
|
'original_url': url,
|
||||||
|
'clients': clients,
|
||||||
|
}
|
||||||
|
|
||||||
@task.branch
|
@task.branch
|
||||||
def handle_bannable_error_branch(task_id_to_check: str, **context):
|
def handle_bannable_error_branch(task_id_to_check: str, **context):
|
||||||
@ -1135,7 +1096,7 @@ with DAG(
|
|||||||
'prepend_client_to_account': Param(True, type="boolean", title="[Worker Param] Prepend Client to Account", description="If True, prepends client and timestamp to account names in prefix mode."),
|
'prepend_client_to_account': Param(True, type="boolean", title="[Worker Param] Prepend Client to Account", description="If True, prepends client and timestamp to account names in prefix mode."),
|
||||||
'machine_id': Param(None, type=["string", "null"]),
|
'machine_id': Param(None, type=["string", "null"]),
|
||||||
'assigned_proxy_url': Param(None, type=["string", "null"], title="[Worker Param] Assigned Proxy URL", description="If provided, forces the token service to use this specific proxy for the request."),
|
'assigned_proxy_url': Param(None, type=["string", "null"], title="[Worker Param] Assigned Proxy URL", description="If provided, forces the token service to use this specific proxy for the request."),
|
||||||
'clients': Param('mweb', type="string", description="Comma-separated list of clients for token generation. e.g. mweb,tv,web_camoufox"),
|
'clients': Param('tv_simply', type="string", description="Comma-separated list of clients for token generation. e.g. mweb,tv,web_camoufox"),
|
||||||
'timeout': Param(DEFAULT_TIMEOUT, type="integer"),
|
'timeout': Param(DEFAULT_TIMEOUT, type="integer"),
|
||||||
'on_bannable_failure': Param('stop_loop_on_auth_proceed_on_download_error', type="string", enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'retry_and_ban_account_only', 'retry_on_connection_error', 'proceed_loop_under_manual_inspection', 'stop_loop_on_auth_proceed_on_download_error']),
|
'on_bannable_failure': Param('stop_loop_on_auth_proceed_on_download_error', type="string", enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'retry_and_ban_account_only', 'retry_on_connection_error', 'proceed_loop_under_manual_inspection', 'stop_loop_on_auth_proceed_on_download_error']),
|
||||||
'request_params_json': Param(json.dumps(DEFAULT_REQUEST_PARAMS), type="string", title="[Worker Param] Request Params JSON", description="JSON string with request parameters for the token service."),
|
'request_params_json': Param(json.dumps(DEFAULT_REQUEST_PARAMS), type="string", title="[Worker Param] Request Params JSON", description="JSON string with request parameters for the token service."),
|
||||||
|
|||||||
@ -300,7 +300,7 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
|
|||||||
elif format_preset == 'formats_0':
|
elif format_preset == 'formats_0':
|
||||||
download_format = '18,140'
|
download_format = '18,140'
|
||||||
elif format_preset == 'formats_2':
|
elif format_preset == 'formats_2':
|
||||||
download_format = '18,140,299/298/137/136/135/134/133'
|
download_format = '18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy'
|
||||||
elif format_preset == 'formats_3':
|
elif format_preset == 'formats_3':
|
||||||
download_format = '18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318'
|
download_format = '18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318'
|
||||||
else:
|
else:
|
||||||
@ -311,112 +311,102 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
|
|||||||
retry_on_probe_failure = params.get('retry_on_probe_failure', False)
|
retry_on_probe_failure = params.get('retry_on_probe_failure', False)
|
||||||
|
|
||||||
def run_yt_dlp_command(format_selector: str):
|
def run_yt_dlp_command(format_selector: str):
|
||||||
"""Constructs and runs a yt-dlp command, returning a list of final filenames."""
|
"""Constructs and runs a yt-ops-client download command, returning a list of final filenames."""
|
||||||
cmd = [
|
downloader = params.get('downloader', 'py')
|
||||||
'yt-dlp', '--verbose', '--print-traffic', '--load-info-json', info_json_path,
|
cmd = ['ytops-client', 'download', downloader, '--load-info-json', info_json_path, '-f', format_selector]
|
||||||
'-f', format_selector, '-o', full_output_path,
|
|
||||||
'--print', 'filename', '--continue', '--no-progress', '--no-simulate',
|
|
||||||
'--no-write-info-json', '--ignore-errors', '--no-playlist',
|
|
||||||
]
|
|
||||||
|
|
||||||
if params.get('fragment_retries'):
|
if proxy:
|
||||||
cmd.extend(['--fragment-retries', str(params['fragment_retries'])])
|
|
||||||
if params.get('limit_rate'):
|
|
||||||
cmd.extend(['--limit-rate', params['limit_rate']])
|
|
||||||
if params.get('socket_timeout'):
|
|
||||||
cmd.extend(['--socket-timeout', str(params['socket_timeout'])])
|
|
||||||
if params.get('min_sleep_interval'):
|
|
||||||
cmd.extend(['--min-sleep-interval', str(params['min_sleep_interval'])])
|
|
||||||
if params.get('max_sleep_interval'):
|
|
||||||
cmd.extend(['--max-sleep-interval', str(params['max_sleep_interval'])])
|
|
||||||
if params.get('yt_dlp_test_mode'):
|
|
||||||
cmd.append('--test')
|
|
||||||
|
|
||||||
downloader = params.get('downloader', 'default')
|
|
||||||
if proxy and not (downloader == 'aria2c' and proxy.startswith('socks5://')):
|
|
||||||
cmd.extend(['--proxy', proxy])
|
cmd.extend(['--proxy', proxy])
|
||||||
|
|
||||||
gost_process = None
|
if downloader == 'py':
|
||||||
try:
|
cmd.extend(['--output-dir', download_dir])
|
||||||
if downloader == 'aria2c':
|
# The 'py' tool maps many yt-dlp flags via --extra-ytdlp-args
|
||||||
cmd.extend(['--downloader', 'aria2c'])
|
py_extra_args = []
|
||||||
downloader_args = params.get('downloader_args_aria2c')
|
if params.get('fragment_retries'):
|
||||||
if proxy and proxy.startswith('socks5://'):
|
py_extra_args.extend(['--fragment-retries', str(params['fragment_retries'])])
|
||||||
import socket
|
if params.get('limit_rate'):
|
||||||
from contextlib import closing
|
py_extra_args.extend(['--limit-rate', params['limit_rate']])
|
||||||
def find_free_port():
|
if params.get('socket_timeout'):
|
||||||
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
|
py_extra_args.extend(['--socket-timeout', str(params['socket_timeout'])])
|
||||||
s.bind(('', 0))
|
if params.get('min_sleep_interval'):
|
||||||
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
py_extra_args.extend(['--sleep-interval', str(params['min_sleep_interval'])])
|
||||||
return s.getsockname()[1]
|
if params.get('max_sleep_interval'):
|
||||||
local_port = find_free_port()
|
py_extra_args.extend(['--max-sleep-interval', str(params['max_sleep_interval'])])
|
||||||
http_proxy = f"http://127.0.0.1:{local_port}"
|
if params.get('yt_dlp_test_mode'):
|
||||||
logger.info(f"Starting gost for format '{format_selector}' to forward {proxy} to {http_proxy}")
|
py_extra_args.append('--test')
|
||||||
gost_cmd = ['gost', '-L', f'http://127.0.0.1:{local_port}', '-F', proxy]
|
|
||||||
gost_process = subprocess.Popen(gost_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
||||||
time.sleep(1)
|
|
||||||
if gost_process.poll() is not None:
|
|
||||||
stdout, stderr = gost_process.communicate()
|
|
||||||
logger.error(f"gost failed to start. Exit: {gost_process.returncode}. Stdout: {stdout.decode()}. Stderr: {stderr.decode()}")
|
|
||||||
raise AirflowException("gost proxy tunnel failed to start.")
|
|
||||||
user_args = downloader_args[len('aria2c:'):] if downloader_args and downloader_args.startswith('aria2c:') else (downloader_args or "")
|
|
||||||
final_args_str = f'aria2c:{user_args.strip()} --http-proxy={http_proxy}'
|
|
||||||
cmd.extend(['--downloader-args', final_args_str])
|
|
||||||
elif downloader_args:
|
|
||||||
cmd.extend(['--downloader-args', downloader_args])
|
|
||||||
|
|
||||||
extra_args = params.get('yt_dlp_extra_args')
|
existing_extra = shlex.split(params.get('yt_dlp_extra_args') or '')
|
||||||
if extra_args:
|
final_extra_args = existing_extra + py_extra_args
|
||||||
cmd.extend(shlex.split(extra_args))
|
if final_extra_args:
|
||||||
if original_url:
|
cmd.extend(['--extra-ytdlp-args', shlex.join(final_extra_args)])
|
||||||
cmd.append(original_url)
|
|
||||||
|
|
||||||
copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd)
|
elif downloader == 'aria-rpc':
|
||||||
logger.info(f"Executing yt-dlp command for format '{format_selector}': {copy_paste_cmd}")
|
cmd.extend([
|
||||||
process = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)
|
'--aria-host', params.get('aria_host', '172.17.0.1'),
|
||||||
|
'--aria-port', str(params.get('aria_port', 6800)),
|
||||||
|
'--aria-secret', params.get('aria_secret'),
|
||||||
|
'--wait', '--auto-merge-fragments',
|
||||||
|
'--fragments-dir', download_dir,
|
||||||
|
'--output-dir', download_dir,
|
||||||
|
])
|
||||||
|
if params.get('yt_dlp_cleanup_mode'):
|
||||||
|
cmd.append('--cleanup')
|
||||||
|
|
||||||
if process.stdout:
|
elif downloader == 'cli':
|
||||||
logger.info(f"yt-dlp STDOUT for format '{format_selector}':\n{process.stdout}")
|
cmd.extend(['--output-dir', download_dir])
|
||||||
if process.stderr:
|
# The 'cli' tool is the old yt-dlp wrapper, so it takes similar arguments.
|
||||||
# yt-dlp often prints progress and informational messages to stderr
|
cli_extra_args = []
|
||||||
logger.info(f"yt-dlp STDERR for format '{format_selector}':\n{process.stderr}")
|
if params.get('fragment_retries'):
|
||||||
|
cli_extra_args.extend(['--fragment-retries', str(params['fragment_retries'])])
|
||||||
|
if params.get('limit_rate'):
|
||||||
|
cli_extra_args.extend(['--limit-rate', params['limit_rate']])
|
||||||
|
if params.get('socket_timeout'):
|
||||||
|
cli_extra_args.extend(['--socket-timeout', str(params['socket_timeout'])])
|
||||||
|
if params.get('min_sleep_interval'):
|
||||||
|
cli_extra_args.extend(['--sleep-interval', str(params['min_sleep_interval'])])
|
||||||
|
if params.get('max_sleep_interval'):
|
||||||
|
cli_extra_args.extend(['--max-sleep-interval', str(params['max_sleep_interval'])])
|
||||||
|
if params.get('yt_dlp_test_mode'):
|
||||||
|
cli_extra_args.append('--test')
|
||||||
|
|
||||||
if process.returncode != 0:
|
existing_extra = shlex.split(params.get('yt_dlp_extra_args') or '')
|
||||||
logger.error(f"yt-dlp failed for format '{format_selector}' with exit code {process.returncode}")
|
final_extra_args = existing_extra + cli_extra_args
|
||||||
# STDOUT and STDERR are already logged above.
|
if final_extra_args:
|
||||||
raise AirflowException(f"yt-dlp command failed for format '{format_selector}'.")
|
cmd.extend(['--extra-ytdlp-args', shlex.join(final_extra_args)])
|
||||||
|
|
||||||
# In test mode, files are not created, so we only check that yt-dlp returned filenames.
|
copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd)
|
||||||
# Otherwise, we verify that the files actually exist on disk.
|
logger.info(f"Executing download command for format '{format_selector}': {copy_paste_cmd}")
|
||||||
output_files = [f for f in process.stdout.strip().split('\n') if f]
|
process = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)
|
||||||
if not params.get('yt_dlp_test_mode'):
|
|
||||||
output_files = [f for f in output_files if os.path.exists(f)]
|
|
||||||
|
|
||||||
if not output_files:
|
if process.stdout:
|
||||||
log_msg = (f"Test run for format '{format_selector}' did not produce any filenames."
|
logger.info(f"Download tool STDOUT for format '{format_selector}':\n{process.stdout}")
|
||||||
if params.get('yt_dlp_test_mode') else
|
if process.stderr:
|
||||||
f"Download for format '{format_selector}' finished but no output files exist.")
|
logger.info(f"Download tool STDERR for format '{format_selector}':\n{process.stderr}")
|
||||||
exc_msg = (f"Test run for format '{format_selector}' did not produce any filenames."
|
|
||||||
if params.get('yt_dlp_test_mode') else
|
|
||||||
f"Download for format '{format_selector}' did not produce a file.")
|
|
||||||
|
|
||||||
logger.error(log_msg)
|
if process.returncode != 0:
|
||||||
logger.error(f"Full STDOUT:\n{process.stdout}")
|
logger.error(f"Download tool failed for format '{format_selector}' with exit code {process.returncode}")
|
||||||
logger.error(f"Full STDERR:\n{process.stderr}")
|
raise AirflowException(f"Download command failed for format '{format_selector}'. See logs for details.")
|
||||||
raise AirflowException(exc_msg)
|
|
||||||
|
|
||||||
log_prefix = "SUCCESS (Test Mode):" if params.get('yt_dlp_test_mode') else "SUCCESS:"
|
output_files = []
|
||||||
logger.info(f"{log_prefix} Command for format '{format_selector}' complete. Files: {output_files}")
|
for line in process.stdout.strip().split('\n'):
|
||||||
return output_files
|
# For aria-rpc, parse "Download and merge successful: <path>" or "Download successful: <path>"
|
||||||
finally:
|
match = re.search(r'successful: (.+)', line)
|
||||||
if gost_process:
|
if match:
|
||||||
logger.info(f"Terminating gost process (PID: {gost_process.pid}) for format '{format_selector}'.")
|
filepath = match.group(1).strip()
|
||||||
gost_process.terminate()
|
if os.path.exists(filepath):
|
||||||
try:
|
output_files.append(filepath)
|
||||||
gost_process.wait(timeout=5)
|
else:
|
||||||
except subprocess.TimeoutExpired:
|
logger.warning(f"File path from aria-rpc output does not exist locally: '{filepath}'")
|
||||||
gost_process.kill()
|
# For py/cli, it's just the path
|
||||||
gost_process.wait()
|
elif os.path.exists(line.strip()):
|
||||||
|
output_files.append(line.strip())
|
||||||
|
|
||||||
|
if not params.get('yt_dlp_test_mode') and not output_files:
|
||||||
|
raise AirflowException(f"Download for format '{format_selector}' finished but no output files were found or exist.")
|
||||||
|
|
||||||
|
log_prefix = "SUCCESS (Test Mode):" if params.get('yt_dlp_test_mode') else "SUCCESS:"
|
||||||
|
logger.info(f"{log_prefix} Command for format '{format_selector}' complete. Files: {output_files}")
|
||||||
|
return output_files
|
||||||
|
|
||||||
def run_ffmpeg_probe(filename):
|
def run_ffmpeg_probe(filename):
|
||||||
"""Probes a file with ffmpeg to check for corruption."""
|
"""Probes a file with ffmpeg to check for corruption."""
|
||||||
@ -824,7 +814,7 @@ with DAG(
|
|||||||
type="string",
|
type="string",
|
||||||
enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'],
|
enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'],
|
||||||
title="Download Format Preset",
|
title="Download Format Preset",
|
||||||
description="Select a predefined format string or choose 'custom'. To download multiple formats, this should be a comma-separated list of format IDs (e.g., '137,140').\nformats_0: 18,140\nformats_2: 18,140,299/298/137/136/135/134/133\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
|
description="Select a predefined format string or choose 'custom'. To download multiple formats, this should be a comma-separated list of format IDs (e.g., '137,140').\nformats_0: 18,140\nformats_2: 18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
|
||||||
),
|
),
|
||||||
'download_format_custom': Param(
|
'download_format_custom': Param(
|
||||||
'ba[ext=m4a]/bestaudio/best',
|
'ba[ext=m4a]/bestaudio/best',
|
||||||
@ -833,18 +823,15 @@ with DAG(
|
|||||||
description="Custom yt-dlp format string. Used when preset is 'custom'. To download multiple formats, provide a comma-separated list of format IDs (e.g., '137,140')."
|
description="Custom yt-dlp format string. Used when preset is 'custom'. To download multiple formats, provide a comma-separated list of format IDs (e.g., '137,140')."
|
||||||
),
|
),
|
||||||
'downloader': Param(
|
'downloader': Param(
|
||||||
'default',
|
'cli',
|
||||||
type="string",
|
type="string",
|
||||||
enum=['default', 'aria2c'],
|
enum=['py', 'aria-rpc', 'cli'],
|
||||||
title="Downloader",
|
title="Download Tool",
|
||||||
description="Choose the downloader for yt-dlp."
|
description="Choose the download tool to use: 'py' (native python, recommended), 'aria-rpc' (send to aria2c daemon), 'cli' (legacy yt-dlp wrapper)."
|
||||||
),
|
|
||||||
'downloader_args_aria2c': Param(
|
|
||||||
'aria2c:-x 4 -k 2M --max-download-limit=3M',
|
|
||||||
type="string",
|
|
||||||
title="Aria2c Downloader Arguments",
|
|
||||||
description="Arguments to pass to yt-dlp's --downloader-args. Used when downloader is 'aria2c'."
|
|
||||||
),
|
),
|
||||||
|
'aria_host': Param('172.17.0.1', type="string", title="Aria2c Host", description="For 'aria-rpc' downloader: Host of the aria2c RPC server."),
|
||||||
|
'aria_port': Param(6800, type="integer", title="Aria2c Port", description="For 'aria-rpc' downloader: Port of the aria2c RPC server."),
|
||||||
|
'aria_secret': Param('SQGCQPLVFQIASMPNPOJYLVGJYLMIDIXDXAIXOTX', type="string", title="Aria2c Secret", description="For 'aria-rpc' downloader: Secret token."),
|
||||||
'yt_dlp_extra_args': Param(
|
'yt_dlp_extra_args': Param(
|
||||||
'--no-part --restrict-filenames',
|
'--no-part --restrict-filenames',
|
||||||
type=["string", "null"],
|
type=["string", "null"],
|
||||||
|
|||||||
@ -5,6 +5,9 @@
|
|||||||
vars_files:
|
vars_files:
|
||||||
- "{{ inventory_dir }}/group_vars/all/generated_vars.yml"
|
- "{{ inventory_dir }}/group_vars/all/generated_vars.yml"
|
||||||
- "{{ inventory_dir }}/group_vars/all/vault.yml"
|
- "{{ inventory_dir }}/group_vars/all/vault.yml"
|
||||||
|
vars:
|
||||||
|
envoy_port: 9980
|
||||||
|
envoy_admin_port: 9981
|
||||||
pre_tasks:
|
pre_tasks:
|
||||||
- name: Announce master deployment
|
- name: Announce master deployment
|
||||||
debug:
|
debug:
|
||||||
|
|||||||
1
ytops_client/__init__.py
Normal file
1
ytops_client/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
# This file makes 'ytops_client' a Python package.
|
||||||
88
ytops_client/cli.py
Normal file
88
ytops_client/cli.py
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
# Import the functions that define and execute the logic for each subcommand
|
||||||
|
from .list_formats_tool import add_list_formats_parser, main_list_formats
|
||||||
|
from .get_info_tool import add_get_info_parser, main_get_info
|
||||||
|
from .download_tool import add_download_parser, main_download
|
||||||
|
from .stress_policy_tool import add_stress_policy_parser, main_stress_policy
|
||||||
|
from .stress_formats_tool import add_stress_formats_parser, main_stress_formats
|
||||||
|
from .cookie_tool import add_cookie_tool_parser, main_cookie_tool
|
||||||
|
from .download_aria_tool import add_download_aria_parser, main_download_aria
|
||||||
|
from .download_native_py_tool import add_download_native_py_parser, main_download_native_py
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""
|
||||||
|
Main entry point for the yt-ops-client CLI.
|
||||||
|
Parses arguments and dispatches to the appropriate subcommand function.
|
||||||
|
"""
|
||||||
|
# Workaround for argparse behavior with positional arguments that start with a hyphen.
|
||||||
|
# If the command is 'get-info' and the last argument looks like a video ID
|
||||||
|
# starting with a '-', we insert '--' before it to tell argparse to treat it
|
||||||
|
# as a positional argument, not an option. This assumes the URL is the last argument.
|
||||||
|
if len(sys.argv) >= 3 and sys.argv[1] == 'get-info':
|
||||||
|
last_arg = sys.argv[-1]
|
||||||
|
# A YouTube video ID is 11 characters.
|
||||||
|
if last_arg.startswith('-') and len(last_arg) == 11:
|
||||||
|
import re
|
||||||
|
if re.fullmatch(r'-[a-zA-Z0-9_-]{10}', last_arg):
|
||||||
|
sys.argv.insert(len(sys.argv) - 1, '--')
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="YT Ops Client Tools",
|
||||||
|
formatter_class=argparse.RawTextHelpFormatter
|
||||||
|
)
|
||||||
|
subparsers = parser.add_subparsers(dest='command', help='Available sub-commands')
|
||||||
|
|
||||||
|
# Add subparsers from each tool module
|
||||||
|
add_list_formats_parser(subparsers)
|
||||||
|
add_get_info_parser(subparsers)
|
||||||
|
|
||||||
|
# Create a top-level 'download' command with its own subcommands
|
||||||
|
download_parser = subparsers.add_parser(
|
||||||
|
'download',
|
||||||
|
help='Download using different methods.',
|
||||||
|
description='Provides access to various download tools. Use "download <method> --help" for details.'
|
||||||
|
)
|
||||||
|
download_subparsers = download_parser.add_subparsers(dest='download_command', help='Available downloaders', required=True)
|
||||||
|
add_download_parser(download_subparsers) # Adds 'cli' subcommand
|
||||||
|
add_download_native_py_parser(download_subparsers) # Adds 'py' subcommand
|
||||||
|
add_download_aria_parser(download_subparsers) # Adds 'aria-rpc' subcommand
|
||||||
|
|
||||||
|
add_stress_policy_parser(subparsers)
|
||||||
|
add_stress_formats_parser(subparsers)
|
||||||
|
add_cookie_tool_parser(subparsers)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# If no command is provided, print help and exit.
|
||||||
|
if not args.command:
|
||||||
|
parser.print_help()
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Dispatch to the correct main function based on the command
|
||||||
|
if args.command == 'list-formats':
|
||||||
|
return main_list_formats(args)
|
||||||
|
elif args.command == 'get-info':
|
||||||
|
return main_get_info(args)
|
||||||
|
elif args.command == 'download':
|
||||||
|
if args.download_command == 'cli':
|
||||||
|
return main_download(args)
|
||||||
|
elif args.download_command == 'py':
|
||||||
|
return main_download_native_py(args)
|
||||||
|
elif args.download_command == 'aria-rpc':
|
||||||
|
return main_download_aria(args)
|
||||||
|
elif args.command == 'stress-policy':
|
||||||
|
return main_stress_policy(args)
|
||||||
|
elif args.command == 'stress-formats':
|
||||||
|
return main_stress_formats(args)
|
||||||
|
elif args.command == 'convert-cookies':
|
||||||
|
return main_cookie_tool(args)
|
||||||
|
|
||||||
|
# This path should not be reachable if a command is required or handled above.
|
||||||
|
parser.print_help()
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
139
ytops_client/cookie_tool.py
Normal file
139
ytops_client/cookie_tool.py
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Tool to convert JSON cookies to the standard Netscape txt format.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
import logging
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logger = logging.getLogger('cookie_tool')
|
||||||
|
|
||||||
|
def convert_json_to_netscape(json_data):
|
||||||
|
"""
|
||||||
|
Converts a list of cookie dictionaries to a Netscape format string.
|
||||||
|
"""
|
||||||
|
netscape_cookies = []
|
||||||
|
# The header is optional but good practice for some tools.
|
||||||
|
netscape_cookies.append("# Netscape HTTP Cookie File")
|
||||||
|
netscape_cookies.append("# http://www.netscape.com/newsref/std/cookie_spec.html")
|
||||||
|
netscape_cookies.append("# This is a generated file! Do not edit.")
|
||||||
|
netscape_cookies.append("")
|
||||||
|
|
||||||
|
if not isinstance(json_data, list):
|
||||||
|
raise TypeError("Input JSON must be a list of cookie objects.")
|
||||||
|
|
||||||
|
for cookie in json_data:
|
||||||
|
if not isinstance(cookie, dict):
|
||||||
|
logger.warning(f"Skipping non-dictionary item in JSON list: {cookie}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
domain = cookie.get('domain', '')
|
||||||
|
# The 'hostOnly' flag determines if the domain is accessible to subdomains.
|
||||||
|
# Netscape format's flag is TRUE if subdomains can access it.
|
||||||
|
# So, hostOnly=false means flag=TRUE.
|
||||||
|
# A leading dot in the domain also implies this for some implementations.
|
||||||
|
if domain.startswith('.'):
|
||||||
|
include_subdomains = 'TRUE'
|
||||||
|
else:
|
||||||
|
include_subdomains = 'FALSE' if cookie.get('hostOnly', True) else 'TRUE'
|
||||||
|
|
||||||
|
path = cookie.get('path', '/')
|
||||||
|
secure = 'TRUE' if cookie.get('secure', False) else 'FALSE'
|
||||||
|
|
||||||
|
# Expiration date. If session cookie or no expiration, use 0.
|
||||||
|
if cookie.get('session', False) or 'expirationDate' not in cookie or cookie['expirationDate'] is None:
|
||||||
|
expires = 0
|
||||||
|
else:
|
||||||
|
expires = int(cookie['expirationDate'])
|
||||||
|
|
||||||
|
name = cookie.get('name', '')
|
||||||
|
value = str(cookie.get('value', ''))
|
||||||
|
|
||||||
|
# Skip cookies without essential fields
|
||||||
|
if not domain or not name:
|
||||||
|
logger.warning(f"Skipping cookie with missing domain or name: {cookie}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
netscape_cookies.append(
|
||||||
|
f"{domain}\t{include_subdomains}\t{path}\t{secure}\t{expires}\t{name}\t{value}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return "\n".join(netscape_cookies)
|
||||||
|
|
||||||
|
def add_cookie_tool_parser(subparsers):
|
||||||
|
"""Add the parser for the 'convert-cookies' command."""
|
||||||
|
parser = subparsers.add_parser(
|
||||||
|
'convert-cookies',
|
||||||
|
description='Convert JSON cookies to Netscape format.',
|
||||||
|
formatter_class=argparse.RawTextHelpFormatter,
|
||||||
|
help='Convert JSON cookies to Netscape format.',
|
||||||
|
epilog="""
|
||||||
|
Reads a JSON array of cookie objects from stdin and prints the
|
||||||
|
Netscape cookie file format to stdout.
|
||||||
|
|
||||||
|
Example JSON input format (per cookie):
|
||||||
|
{
|
||||||
|
"domain": ".example.com",
|
||||||
|
"hostOnly": false,
|
||||||
|
"path": "/",
|
||||||
|
"secure": true,
|
||||||
|
"expirationDate": 1672531199,
|
||||||
|
"name": "my_cookie",
|
||||||
|
"value": "my_value"
|
||||||
|
}
|
||||||
|
|
||||||
|
Example usage:
|
||||||
|
cat cookies.json | yt-ops-client convert-cookies > cookies.txt
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'input_file',
|
||||||
|
nargs='?',
|
||||||
|
type=argparse.FileType('r', encoding='utf-8'),
|
||||||
|
default=sys.stdin,
|
||||||
|
help="Path to the JSON cookie file. Reads from stdin if not provided."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'-o', '--output',
|
||||||
|
type=argparse.FileType('w', encoding='utf-8'),
|
||||||
|
default=sys.stdout,
|
||||||
|
help="Output file path for the Netscape cookies. Defaults to stdout."
|
||||||
|
)
|
||||||
|
parser.add_argument('--verbose', action='store_true', help='Enable verbose logging.')
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def main_cookie_tool(args):
|
||||||
|
"""Main logic for the 'convert-cookies' command."""
|
||||||
|
if args.verbose:
|
||||||
|
logging.getLogger().setLevel(logging.DEBUG)
|
||||||
|
logging.basicConfig(level=logging.DEBUG, format='%(levelname)s: %(message)s', stream=sys.stderr)
|
||||||
|
else:
|
||||||
|
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s', stream=sys.stderr)
|
||||||
|
|
||||||
|
try:
|
||||||
|
json_content = args.input_file.read()
|
||||||
|
if not json_content.strip():
|
||||||
|
logger.error("Input is empty.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
cookie_data = json.loads(json_content)
|
||||||
|
netscape_string = convert_json_to_netscape(cookie_data)
|
||||||
|
|
||||||
|
args.output.write(netscape_string + '\n')
|
||||||
|
|
||||||
|
if args.output is not sys.stdout:
|
||||||
|
logger.info(f"Successfully converted cookies to {args.output.name}")
|
||||||
|
|
||||||
|
return 0
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logger.error("Invalid JSON provided. Please check the input file.")
|
||||||
|
return 1
|
||||||
|
except TypeError as e:
|
||||||
|
logger.error(f"Error processing JSON: {e}")
|
||||||
|
return 1
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"An unexpected error occurred: {e}", exc_info=args.verbose)
|
||||||
|
return 1
|
||||||
687
ytops_client/download_aria_tool.py
Normal file
687
ytops_client/download_aria_tool.py
Normal file
@ -0,0 +1,687 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Tool to send a download to an aria2c daemon via RPC.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import glob
|
||||||
|
import shutil
|
||||||
|
import re
|
||||||
|
import shlex
|
||||||
|
import time
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
|
try:
|
||||||
|
import aria2p
|
||||||
|
from aria2p.utils import human_readable_bytes
|
||||||
|
except ImportError:
|
||||||
|
print("aria2p is not installed. Please install it with: pip install aria2p", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
logger = logging.getLogger('download_aria_tool')
|
||||||
|
|
||||||
|
class TimeoutError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def add_download_aria_parser(subparsers):
|
||||||
|
"""Add the parser for the 'download aria-rpc' command."""
|
||||||
|
parser = subparsers.add_parser(
|
||||||
|
'aria-rpc',
|
||||||
|
description='Send a download to an aria2c daemon via RPC, using an info.json from stdin or a file.',
|
||||||
|
formatter_class=argparse.RawTextHelpFormatter,
|
||||||
|
help='Download a specific format using aria2c RPC.',
|
||||||
|
epilog="""
|
||||||
|
Usage Notes for Fragmented Downloads (e.g., DASH):
|
||||||
|
|
||||||
|
To download and automatically merge fragmented formats, you must:
|
||||||
|
1. Use '--wait' to make the operation synchronous.
|
||||||
|
2. Use '--auto-merge-fragments' to enable the merge logic.
|
||||||
|
3. Ensure this script has access to the directory where aria2c saves files.
|
||||||
|
|
||||||
|
Example for a remote aria2c daemon:
|
||||||
|
- The remote daemon saves files to '/srv/downloads' on its machine.
|
||||||
|
- This directory is mounted locally at '/mnt/remote_aria2_downloads'.
|
||||||
|
|
||||||
|
cat latest-info.json | yt-ops-client download aria-rpc -f "299/137" \\
|
||||||
|
--wait --auto-merge-fragments \\
|
||||||
|
--remote-dir /srv/downloads \\
|
||||||
|
--fragments-dir /mnt/remote_aria2_downloads
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
parser.add_argument('--load-info-json', type=argparse.FileType('r', encoding='utf-8'), help="Path to the info.json file. If not provided, reads from stdin.")
|
||||||
|
parser.add_argument('-f', '--format', required=True, help='The format ID to download. Supports yt-dlp style format selectors (e.g., "137/136,140").')
|
||||||
|
parser.add_argument('--output-dir', help='Local directory to save the final merged file. Defaults to the current directory.')
|
||||||
|
parser.add_argument('--fragments-dir', help='The local path where this script should look for downloaded fragments. If the aria2c daemon is remote, this should be a local mount point corresponding to --remote-dir. Defaults to --output-dir.')
|
||||||
|
parser.add_argument('--remote-dir', help='The absolute path to the download directory on the remote aria2c host. This is passed via RPC.')
|
||||||
|
parser.add_argument('--aria-host', default='localhost', help='The host of the aria2c RPC server. Default: localhost.')
|
||||||
|
parser.add_argument('--aria-port', type=int, default=6800, help='The port of the aria2c RPC server. Default: 6800.')
|
||||||
|
parser.add_argument('--aria-secret', help='The secret token for the aria2c RPC server (often required, e.g., "SQGCQPLVFQIASMPNPOJYLVGJYLMIDIXDXAIXOTX").')
|
||||||
|
parser.add_argument('--proxy', help='Proxy to use for the download, e.g., "socks5://127.0.0.1:1080".')
|
||||||
|
parser.add_argument('--downloader-args', help='Arguments for aria2c, in yt-dlp format (e.g., "aria2c:[-x 8, -k 1M]").')
|
||||||
|
parser.add_argument('--wait', action='store_true', help='Wait for the download to complete and report its status. Note: This makes the operation synchronous and will block until the download finishes.')
|
||||||
|
parser.add_argument('--wait-timeout', help='Timeout in seconds for waiting on downloads. Use "auto" to calculate based on a minimum speed of 200KiB/s. Requires --wait. Default: no timeout.')
|
||||||
|
parser.add_argument('--auto-merge-fragments', action='store_true', help='Automatically merge fragments after download. Requires --wait and assumes the script has filesystem access to the aria2c host.')
|
||||||
|
parser.add_argument('--remove-fragments-after-merge', action='store_true', help='Delete individual fragment files after a successful merge. Requires --auto-merge-fragments.')
|
||||||
|
parser.add_argument('--cleanup', action='store_true', help='After a successful download, remove the final file(s) from the filesystem. For fragmented downloads, this implies --remove-fragments-after-merge.')
|
||||||
|
parser.add_argument('--remove-on-complete', action=argparse.BooleanOptionalAction, default=True, help='Remove the download from aria2c history on successful completion. Use --no-remove-on-complete to disable. May fail on older aria2c daemons.')
|
||||||
|
parser.add_argument('--purge-on-complete', action='store_true', help='Use aria2.purgeDownloadResult to clear ALL completed/failed downloads from history on success. Use as a workaround for older daemons.')
|
||||||
|
parser.add_argument('--verbose', action='store_true', help='Enable verbose output for this script.')
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def cleanup_aria_download(api, downloads):
|
||||||
|
"""Pause and remove downloads from aria2c."""
|
||||||
|
if not downloads:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
logger.info(f"Attempting to clean up {len(downloads)} download(s) from aria2c...")
|
||||||
|
# Filter out downloads that might already be gone
|
||||||
|
valid_downloads = [d for d in downloads if hasattr(d, 'gid')]
|
||||||
|
if not valid_downloads:
|
||||||
|
logger.info("No valid downloads to clean up.")
|
||||||
|
return
|
||||||
|
api.pause(valid_downloads)
|
||||||
|
# Give aria2c a moment to process the pause command before removing
|
||||||
|
time.sleep(0.5)
|
||||||
|
api.remove(valid_downloads)
|
||||||
|
logger.info("Cleanup successful.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"An error occurred during aria2c cleanup: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def parse_aria_error(download):
|
||||||
|
"""Parses an aria2p Download object to get a detailed error message."""
|
||||||
|
error_code = download.error_code
|
||||||
|
error_message = download.error_message
|
||||||
|
|
||||||
|
if not error_message:
|
||||||
|
return f"Unknown aria2c error (Code: {error_code})"
|
||||||
|
|
||||||
|
# Check for common HTTP errors in the message
|
||||||
|
http_status_match = re.search(r'HTTP status (\d+)', error_message)
|
||||||
|
if http_status_match:
|
||||||
|
status_code = int(http_status_match.group(1))
|
||||||
|
if status_code == 403:
|
||||||
|
return f"HTTP Error 403: Forbidden. The URL may have expired or requires valid cookies/headers."
|
||||||
|
elif status_code == 404:
|
||||||
|
return f"HTTP Error 404: Not Found. The resource is unavailable."
|
||||||
|
else:
|
||||||
|
return f"HTTP Error {status_code}."
|
||||||
|
|
||||||
|
if "Timeout" in error_message or "timed out" in error_message.lower():
|
||||||
|
return "Download timed out."
|
||||||
|
|
||||||
|
# Fallback to the raw error message
|
||||||
|
return f"Aria2c error (Code: {error_code}): {error_message}"
|
||||||
|
|
||||||
|
|
||||||
|
def parse_aria_args_to_options(args_str):
|
||||||
|
"""
|
||||||
|
Parses yt-dlp style downloader args for aria2c.
|
||||||
|
Example: "aria2c:[-x 8, -k 1M]" or just "-x 8 -k 1M"
|
||||||
|
Returns a dictionary of options for aria2p.
|
||||||
|
"""
|
||||||
|
if not args_str or not args_str.strip():
|
||||||
|
return {}
|
||||||
|
|
||||||
|
inner_args_str = args_str.strip()
|
||||||
|
match = re.match(r'aria2c:\s*\[(.*)\]', inner_args_str)
|
||||||
|
if match:
|
||||||
|
# Handle yt-dlp's format
|
||||||
|
inner_args_str = match.group(1).replace(',', ' ')
|
||||||
|
else:
|
||||||
|
# If it doesn't match, assume the whole string is a set of arguments.
|
||||||
|
logger.debug(f"Downloader args '{args_str}' does not match 'aria2c:[...]' format. Parsing as a raw argument string.")
|
||||||
|
|
||||||
|
arg_list = shlex.split(inner_args_str)
|
||||||
|
|
||||||
|
# Use a mini-parser to handle CLI-style args
|
||||||
|
parser = argparse.ArgumentParser(add_help=False, prog="aria2c_args_parser")
|
||||||
|
parser.add_argument('-x', '--max-connection-per-server')
|
||||||
|
parser.add_argument('-k', '--min-split-size')
|
||||||
|
parser.add_argument('-s', '--split')
|
||||||
|
parser.add_argument('--all-proxy')
|
||||||
|
|
||||||
|
try:
|
||||||
|
# We only care about known arguments
|
||||||
|
known_args, unknown_args = parser.parse_known_args(arg_list)
|
||||||
|
if unknown_args:
|
||||||
|
logger.warning(f"Ignoring unknown arguments in --downloader-args: {unknown_args}")
|
||||||
|
# Convert to dict, removing None values
|
||||||
|
return {k: v for k, v in vars(known_args).items() if v is not None}
|
||||||
|
except Exception:
|
||||||
|
logger.warning(f"Failed to parse arguments inside --downloader-args: '{inner_args_str}'")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def main_download_aria(args):
|
||||||
|
"""Main logic for the 'download-aria' command."""
|
||||||
|
log_level = logging.DEBUG if args.verbose else logging.INFO
|
||||||
|
logging.basicConfig(level=log_level, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', stream=sys.stderr)
|
||||||
|
|
||||||
|
if args.remove_fragments_after_merge and not args.auto_merge_fragments:
|
||||||
|
logger.error("--remove-fragments-after-merge requires --auto-merge-fragments.")
|
||||||
|
return 1
|
||||||
|
if args.auto_merge_fragments and not args.wait:
|
||||||
|
logger.error("--auto-merge-fragments requires --wait.")
|
||||||
|
return 1
|
||||||
|
if args.wait_timeout and not args.wait:
|
||||||
|
logger.error("--wait-timeout requires --wait.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if args.wait:
|
||||||
|
logger.info("Will wait for download to complete and report status. This is a synchronous operation.")
|
||||||
|
else:
|
||||||
|
logger.info("Will submit download and exit immediately (asynchronous).")
|
||||||
|
|
||||||
|
info_json_content = ""
|
||||||
|
input_source_name = ""
|
||||||
|
if args.load_info_json:
|
||||||
|
info_json_content = args.load_info_json.read()
|
||||||
|
input_source_name = args.load_info_json.name
|
||||||
|
else:
|
||||||
|
info_json_content = sys.stdin.read()
|
||||||
|
input_source_name = "stdin"
|
||||||
|
|
||||||
|
if not info_json_content.strip():
|
||||||
|
logger.error(f"Failed to read info.json from {input_source_name}. Input is empty.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
info_data = json.loads(info_json_content)
|
||||||
|
logger.info(f"Successfully loaded info.json from {input_source_name}.")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logger.error(f"Failed to parse info.json from {input_source_name}. Is the input valid JSON?")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Find the requested format, supporting yt-dlp style selectors
|
||||||
|
target_format = None
|
||||||
|
# A format selector can be a comma-separated list of preferences,
|
||||||
|
# where each preference can be a slash-separated list of format_ids.
|
||||||
|
# e.g., "299/137/136,140" means try 299, then 137, then 136, then 140.
|
||||||
|
format_preferences = [item.strip() for sublist in (i.split('/') for i in args.format.split(',')) for item in sublist if item.strip()]
|
||||||
|
|
||||||
|
available_formats_map = {f['format_id']: f for f in info_data.get('formats', []) if 'format_id' in f}
|
||||||
|
|
||||||
|
for format_id in format_preferences:
|
||||||
|
if format_id in available_formats_map:
|
||||||
|
target_format = available_formats_map[format_id]
|
||||||
|
logger.info(f"Selected format ID '{format_id}' from selector '{args.format}'.")
|
||||||
|
break
|
||||||
|
|
||||||
|
if not target_format:
|
||||||
|
logger.error(f"No suitable format found for selector '{args.format}' in info.json.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Get file size for auto-timeout and dynamic options
|
||||||
|
total_filesize = target_format.get('filesize') or target_format.get('filesize_approx')
|
||||||
|
|
||||||
|
# Construct filename
|
||||||
|
video_id = info_data.get('id', 'unknown_video_id')
|
||||||
|
title = info_data.get('title', 'unknown_title')
|
||||||
|
ext = target_format.get('ext', 'mp4')
|
||||||
|
# Sanitize title for filename
|
||||||
|
safe_title = "".join([c for c in title if c.isalpha() or c.isdigit() or c in (' ', '-', '_')]).rstrip()
|
||||||
|
filename = f"{safe_title} [{video_id}].f{target_format['format_id']}.{ext}"
|
||||||
|
|
||||||
|
# Prepare options for aria2
|
||||||
|
aria_options = {
|
||||||
|
# Options from yt-dlp's aria2c integration for performance and reliability
|
||||||
|
'max-connection-per-server': 16,
|
||||||
|
'split': 16,
|
||||||
|
'min-split-size': '1M',
|
||||||
|
'http-accept-gzip': 'true',
|
||||||
|
'file-allocation': 'none',
|
||||||
|
}
|
||||||
|
|
||||||
|
if args.proxy:
|
||||||
|
aria_options['all-proxy'] = args.proxy
|
||||||
|
|
||||||
|
custom_options = parse_aria_args_to_options(args.downloader_args)
|
||||||
|
|
||||||
|
# Dynamically set min-split-size if not overridden by user
|
||||||
|
if 'min_split_size' not in custom_options and total_filesize:
|
||||||
|
if total_filesize > 100 * 1024 * 1024: # 100 MiB
|
||||||
|
aria_options['min-split-size'] = '5M'
|
||||||
|
logger.info("File is > 100MiB, dynamically setting min-split-size to 5M.")
|
||||||
|
|
||||||
|
if custom_options:
|
||||||
|
aria_options.update(custom_options)
|
||||||
|
logger.info(f"Applied custom aria2c options from --downloader-args: {custom_options}")
|
||||||
|
|
||||||
|
aria_options['out'] = filename
|
||||||
|
|
||||||
|
# Add headers from info.json, mimicking yt-dlp's behavior for aria2c
|
||||||
|
headers = target_format.get('http_headers')
|
||||||
|
if headers:
|
||||||
|
header_list = [f'{key}: {value}' for key, value in headers.items()]
|
||||||
|
aria_options['header'] = header_list
|
||||||
|
logger.info(f"Adding {len(header_list)} HTTP headers to the download.")
|
||||||
|
if args.verbose:
|
||||||
|
for h in header_list:
|
||||||
|
if h.lower().startswith('cookie:'):
|
||||||
|
logger.debug(f" Header: Cookie: [REDACTED]")
|
||||||
|
else:
|
||||||
|
logger.debug(f" Header: {h}")
|
||||||
|
|
||||||
|
is_fragmented = 'fragments' in target_format
|
||||||
|
if not is_fragmented:
|
||||||
|
url = target_format.get('url')
|
||||||
|
if not url:
|
||||||
|
logger.error(f"Format ID '{args.format}' has neither a URL nor fragments.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
logger.info(f"Connecting to aria2c RPC at http://{args.aria_host}:{args.aria_port}")
|
||||||
|
client = aria2p.Client(
|
||||||
|
host=f"http://{args.aria_host}",
|
||||||
|
port=args.aria_port,
|
||||||
|
secret=args.aria_secret or ""
|
||||||
|
)
|
||||||
|
api = aria2p.API(client)
|
||||||
|
|
||||||
|
timeout_seconds = None
|
||||||
|
if args.wait_timeout:
|
||||||
|
if args.wait_timeout.lower() == 'auto':
|
||||||
|
if total_filesize:
|
||||||
|
# Min speed: 200 KiB/s. Min timeout: 30s.
|
||||||
|
min_speed = 200 * 1024
|
||||||
|
calculated_timeout = int(total_filesize / min_speed)
|
||||||
|
timeout_seconds = max(30, calculated_timeout)
|
||||||
|
total_filesize_hr, _ = human_readable_bytes(total_filesize)
|
||||||
|
logger.info(f"Auto-calculated timeout: {timeout_seconds}s (based on {total_filesize_hr} at 200KiB/s).")
|
||||||
|
else:
|
||||||
|
logger.warning("Cannot use 'auto' timeout: file size not available in info.json. Timeout disabled.")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
timeout_seconds = int(args.wait_timeout)
|
||||||
|
if timeout_seconds <= 0:
|
||||||
|
raise ValueError
|
||||||
|
except ValueError:
|
||||||
|
logger.error(f"Invalid --wait-timeout value: '{args.wait_timeout}'. Must be a positive integer or 'auto'.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if is_fragmented:
|
||||||
|
return download_fragments_aria(args, api, target_format, filename, aria_options, timeout_seconds, remote_dir=args.remote_dir)
|
||||||
|
else:
|
||||||
|
return download_url_aria(args, api, url, filename, aria_options, timeout_seconds, remote_dir=args.remote_dir)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"An error occurred while communicating with aria2c: {e}", exc_info=args.verbose)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
def download_url_aria(args, api, url, filename, aria_options, timeout_seconds, remote_dir=None):
|
||||||
|
"""Handle downloading a single URL with aria2c."""
|
||||||
|
if remote_dir:
|
||||||
|
aria_options['dir'] = remote_dir
|
||||||
|
logger.info(f"Adding download for format '{args.format}' with URL: {url[:70]}...")
|
||||||
|
downloads = api.add_uris([url], options=aria_options)
|
||||||
|
|
||||||
|
if not downloads:
|
||||||
|
logger.error("Failed to add download to aria2c. The API returned an empty result.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Handle older aria2p versions that return a single Download object instead of a list
|
||||||
|
download = downloads[0] if isinstance(downloads, list) else downloads
|
||||||
|
logger.info(f"Successfully added download to aria2c. GID: {download.gid}")
|
||||||
|
|
||||||
|
if args.wait:
|
||||||
|
logger.info(f"Waiting for download {download.gid} to complete...")
|
||||||
|
start_time = time.time()
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
if timeout_seconds and (time.time() - start_time > timeout_seconds):
|
||||||
|
raise TimeoutError(f"Download did not complete within {timeout_seconds}s timeout.")
|
||||||
|
|
||||||
|
# Re-fetch the download object to get the latest status
|
||||||
|
download.update()
|
||||||
|
# A download is no longer active if it's complete, errored, paused, or removed.
|
||||||
|
if download.status not in ('active', 'waiting'):
|
||||||
|
break
|
||||||
|
|
||||||
|
progress_info = (
|
||||||
|
f"\rGID {download.gid}: {download.status} "
|
||||||
|
f"{download.progress_string()} "
|
||||||
|
f"({download.download_speed_string()}) "
|
||||||
|
f"ETA: {download.eta_string()}"
|
||||||
|
)
|
||||||
|
sys.stdout.write(progress_info)
|
||||||
|
sys.stdout.flush()
|
||||||
|
time.sleep(0.5)
|
||||||
|
except (KeyboardInterrupt, TimeoutError) as e:
|
||||||
|
sys.stdout.write('\n')
|
||||||
|
if isinstance(e, KeyboardInterrupt):
|
||||||
|
logger.warning("Wait interrupted by user. Cleaning up download...")
|
||||||
|
cleanup_aria_download(api, [download])
|
||||||
|
return 130
|
||||||
|
else: # TimeoutError
|
||||||
|
logger.error(f"Download timed out. Cleaning up... Error: {e}")
|
||||||
|
cleanup_aria_download(api, [download])
|
||||||
|
return 1
|
||||||
|
except aria2p.ClientException as e:
|
||||||
|
# This can happen if the download completes and is removed by aria2c
|
||||||
|
# before we can check its final status. Assume success in this case.
|
||||||
|
logger.warning(f"Could not get final status for GID {download.gid} (maybe removed on completion?): {e}. Assuming success.")
|
||||||
|
print(f"Download for GID {download.gid} presumed successful.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
sys.stdout.write('\n') # Newline after progress bar
|
||||||
|
|
||||||
|
# Final status check (no need to update again, we have the latest status)
|
||||||
|
if download.status == 'complete':
|
||||||
|
logger.info(f"Download {download.gid} completed successfully.")
|
||||||
|
|
||||||
|
downloaded_filepath_remote = None
|
||||||
|
if download.files:
|
||||||
|
downloaded_filepath_remote = download.files[0].path
|
||||||
|
print(f"Download successful: {downloaded_filepath_remote}")
|
||||||
|
else:
|
||||||
|
print("Download successful, but no file path reported by aria2c.")
|
||||||
|
|
||||||
|
if args.cleanup and downloaded_filepath_remote:
|
||||||
|
local_filepath = None
|
||||||
|
# To map remote path to local, we need remote_dir and a local equivalent.
|
||||||
|
# We'll use fragments_dir as the local equivalent, which defaults to output_dir.
|
||||||
|
local_base_dir = args.fragments_dir or args.output_dir or '.'
|
||||||
|
if remote_dir:
|
||||||
|
if downloaded_filepath_remote.startswith(remote_dir):
|
||||||
|
relative_path = os.path.relpath(downloaded_filepath_remote, remote_dir)
|
||||||
|
local_filepath = os.path.join(local_base_dir, relative_path)
|
||||||
|
else:
|
||||||
|
logger.warning(f"Cleanup: Downloaded file path '{downloaded_filepath_remote}' does not start with remote-dir '{remote_dir}'. Cannot map to local path.")
|
||||||
|
else:
|
||||||
|
logger.warning(f"Cleanup: --remote-dir not specified. Assuming download path is accessible locally as '{downloaded_filepath_remote}'.")
|
||||||
|
local_filepath = downloaded_filepath_remote
|
||||||
|
|
||||||
|
if local_filepath:
|
||||||
|
try:
|
||||||
|
if os.path.exists(local_filepath):
|
||||||
|
os.remove(local_filepath)
|
||||||
|
logger.info(f"Cleanup: Removed downloaded file '{local_filepath}'")
|
||||||
|
else:
|
||||||
|
logger.warning(f"Cleanup: File not found at expected local path '{local_filepath}'. Skipping removal.")
|
||||||
|
except OSError as e:
|
||||||
|
logger.error(f"Cleanup failed: Could not remove file '{local_filepath}': {e}")
|
||||||
|
elif args.cleanup:
|
||||||
|
logger.warning("Cleanup requested, but no downloaded file path was reported by aria2c.")
|
||||||
|
|
||||||
|
if args.purge_on_complete:
|
||||||
|
try:
|
||||||
|
api.purge_download_result()
|
||||||
|
logger.info("Purged all completed/failed downloads from aria2c history.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to purge download history: {e}")
|
||||||
|
elif args.remove_on_complete:
|
||||||
|
try:
|
||||||
|
api.remove_download_result(download)
|
||||||
|
logger.info(f"Removed download {download.gid} from aria2c history.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to remove download {download.gid} from history: {e}")
|
||||||
|
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
detailed_error = parse_aria_error(download)
|
||||||
|
logger.error(f"Download {download.gid} failed. Error: {detailed_error}")
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
print(f"Successfully added download. GID: {download.gid}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def download_fragments_aria(args, api, target_format, filename, aria_options, timeout_seconds, remote_dir=None):
|
||||||
|
"""Handle downloading fragmented formats with aria2c."""
|
||||||
|
logger.info(f"Format '{args.format}' is fragmented. Adding all fragments to download queue.")
|
||||||
|
fragment_base_url = target_format.get('fragment_base_url')
|
||||||
|
fragments = target_format['fragments']
|
||||||
|
|
||||||
|
MAX_FRAGMENTS = 50000
|
||||||
|
if len(fragments) > MAX_FRAGMENTS:
|
||||||
|
logger.error(
|
||||||
|
f"The number of fragments ({len(fragments)}) exceeds the safety limit of {MAX_FRAGMENTS}. "
|
||||||
|
f"This is to prevent overwhelming the aria2c server. Aborting."
|
||||||
|
)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# We need to set the 'dir' option for all fragments if specified.
|
||||||
|
# The 'out' option will be set per-fragment.
|
||||||
|
frag_aria_options = aria_options.copy()
|
||||||
|
frag_aria_options.pop('out', None) # Remove the main 'out' option
|
||||||
|
|
||||||
|
if remote_dir:
|
||||||
|
frag_aria_options['dir'] = remote_dir
|
||||||
|
logger.info(f"Instructing remote aria2c to save fragments to: {remote_dir}")
|
||||||
|
|
||||||
|
base_filename, file_ext = os.path.splitext(filename)
|
||||||
|
|
||||||
|
calls = []
|
||||||
|
for i, fragment in enumerate(fragments):
|
||||||
|
frag_url = fragment.get('url')
|
||||||
|
if not frag_url:
|
||||||
|
if not fragment_base_url:
|
||||||
|
logger.error(f"Fragment {i} has no URL and no fragment_base_url is available. Aborting.")
|
||||||
|
return 1
|
||||||
|
frag_url = urljoin(fragment_base_url, fragment['path'])
|
||||||
|
|
||||||
|
# Use the base filename from the main file, but add fragment identifier
|
||||||
|
fragment_filename = f"{base_filename}-Frag{i}{file_ext}"
|
||||||
|
|
||||||
|
current_frag_options = frag_aria_options.copy()
|
||||||
|
current_frag_options['out'] = os.path.basename(fragment_filename)
|
||||||
|
|
||||||
|
# Prepare parameters for multicall in the format:
|
||||||
|
# {"methodName": "aria2.addUri", "params": [["url"], {"out": "file.mp4"}]}
|
||||||
|
# The secret token is automatically added by aria2p.
|
||||||
|
params = [[frag_url], current_frag_options]
|
||||||
|
call_struct = {
|
||||||
|
"methodName": api.client.ADD_URI,
|
||||||
|
"params": params
|
||||||
|
}
|
||||||
|
calls.append(call_struct)
|
||||||
|
|
||||||
|
results = api.client.multicall(calls)
|
||||||
|
if not results:
|
||||||
|
logger.error("Failed to add fragments to aria2c. The API returned an empty result.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# The result of a multicall of addUri is a list of lists, where each inner list
|
||||||
|
# contains the GID of one download, e.g., [['gid1'], ['gid2']].
|
||||||
|
# A failed call for a fragment may result in a fault struct dict instead of a list.
|
||||||
|
# We extract GIDs from successful calls.
|
||||||
|
gids = [result[0] for result in results if isinstance(result, list) and result]
|
||||||
|
|
||||||
|
if len(gids) != len(fragments):
|
||||||
|
failed_count = len(fragments) - len(gids)
|
||||||
|
logger.warning(f"{failed_count} out of {len(fragments)} fragments failed to be added to aria2c.")
|
||||||
|
|
||||||
|
if not gids:
|
||||||
|
logger.error("Failed to add any fragments to aria2c. All submissions failed.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
logger.info(f"Successfully added {len(gids)} fragments to aria2c.")
|
||||||
|
if args.verbose:
|
||||||
|
logger.debug(f"GIDs: {gids}")
|
||||||
|
|
||||||
|
if args.wait:
|
||||||
|
logger.info(f"Waiting for {len(gids)} fragments to complete...")
|
||||||
|
start_time = time.time()
|
||||||
|
downloads_to_cleanup = []
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
if timeout_seconds and (time.time() - start_time > timeout_seconds):
|
||||||
|
raise TimeoutError(f"Fragment downloads did not complete within {timeout_seconds}s timeout.")
|
||||||
|
|
||||||
|
downloads = api.get_downloads(gids)
|
||||||
|
downloads_to_cleanup = downloads # Store for potential cleanup
|
||||||
|
# A download is considered "active" if it's currently downloading or waiting in the queue.
|
||||||
|
# It is "not active" if it is complete, errored, paused, or removed.
|
||||||
|
active_downloads = [d for d in downloads if d.status in ('active', 'waiting')]
|
||||||
|
if not active_downloads:
|
||||||
|
break # All downloads are complete or have stopped for other reasons
|
||||||
|
|
||||||
|
for d in active_downloads:
|
||||||
|
d.update()
|
||||||
|
|
||||||
|
completed_count = len(downloads) - len(active_downloads)
|
||||||
|
total_bytes = sum(d.total_length for d in downloads)
|
||||||
|
downloaded_bytes = sum(d.completed_length for d in downloads)
|
||||||
|
total_speed = sum(d.download_speed for d in downloads)
|
||||||
|
progress_percent = (downloaded_bytes / total_bytes * 100) if total_bytes > 0 else 0
|
||||||
|
|
||||||
|
progress_info = (
|
||||||
|
f"\rProgress: {completed_count}/{len(downloads)} fragments | "
|
||||||
|
f"{progress_percent:.1f}% "
|
||||||
|
f"({human_readable_bytes(downloaded_bytes)}/{human_readable_bytes(total_bytes)}) "
|
||||||
|
f"Speed: {human_readable_bytes(total_speed)}/s"
|
||||||
|
)
|
||||||
|
sys.stdout.write(progress_info)
|
||||||
|
sys.stdout.flush()
|
||||||
|
time.sleep(0.5)
|
||||||
|
except (KeyboardInterrupt, TimeoutError) as e:
|
||||||
|
sys.stdout.write('\n')
|
||||||
|
if isinstance(e, KeyboardInterrupt):
|
||||||
|
logger.warning("Wait interrupted by user. Cleaning up fragments...")
|
||||||
|
cleanup_aria_download(api, downloads_to_cleanup)
|
||||||
|
return 130
|
||||||
|
else: # TimeoutError
|
||||||
|
logger.error(f"Download timed out. Cleaning up fragments... Error: {e}")
|
||||||
|
cleanup_aria_download(api, downloads_to_cleanup)
|
||||||
|
return 1
|
||||||
|
except aria2p.ClientException as e:
|
||||||
|
# This can happen if downloads complete and are removed by aria2c
|
||||||
|
# before we can check their final status. Assume success in this case.
|
||||||
|
logger.warning(f"Could not get final status for some fragments (maybe removed on completion?): {e}. Assuming success.")
|
||||||
|
|
||||||
|
sys.stdout.write('\n')
|
||||||
|
|
||||||
|
# Final status check
|
||||||
|
failed_downloads = []
|
||||||
|
try:
|
||||||
|
downloads = api.get_downloads(gids)
|
||||||
|
failed_downloads = [d for d in downloads if d.status != 'complete']
|
||||||
|
except aria2p.ClientException as e:
|
||||||
|
logger.warning(f"Could not perform final status check for fragments (maybe removed on completion?): {e}. Assuming success.")
|
||||||
|
# If we can't check, we assume success based on the earlier wait loop not failing catastrophically.
|
||||||
|
failed_downloads = []
|
||||||
|
|
||||||
|
if failed_downloads:
|
||||||
|
logger.error(f"{len(failed_downloads)} fragments failed to download.")
|
||||||
|
for d in failed_downloads:
|
||||||
|
detailed_error = parse_aria_error(d)
|
||||||
|
logger.error(f" GID {d.gid}: {detailed_error}")
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
logger.info("All fragments downloaded successfully.")
|
||||||
|
output_dir = args.output_dir or '.'
|
||||||
|
final_filepath = os.path.join(output_dir, filename)
|
||||||
|
fragments_lookup_dir = args.fragments_dir or output_dir
|
||||||
|
|
||||||
|
if args.auto_merge_fragments:
|
||||||
|
logger.info(f"Attempting to merge fragments into: {final_filepath}")
|
||||||
|
logger.info(f"Searching for fragments in local directory: {os.path.abspath(fragments_lookup_dir)}")
|
||||||
|
try:
|
||||||
|
# base_filename and file_ext are available from earlier in the function
|
||||||
|
# We must escape the base filename in case it contains glob special characters like [ or ].
|
||||||
|
escaped_base = glob.escape(base_filename)
|
||||||
|
search_path = os.path.join(fragments_lookup_dir, f"{escaped_base}-Frag*{file_ext}")
|
||||||
|
fragment_files = glob.glob(search_path)
|
||||||
|
|
||||||
|
if not fragment_files:
|
||||||
|
logger.error(f"No fragment files found with pattern: {search_path}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
def fragment_sort_key(f):
|
||||||
|
match = re.search(r'Frag(\d+)', os.path.basename(f))
|
||||||
|
return int(match.group(1)) if match else -1
|
||||||
|
fragment_files.sort(key=fragment_sort_key)
|
||||||
|
|
||||||
|
with open(final_filepath, 'wb') as dest_file:
|
||||||
|
for frag_path in fragment_files:
|
||||||
|
with open(frag_path, 'rb') as src_file:
|
||||||
|
shutil.copyfileobj(src_file, dest_file)
|
||||||
|
|
||||||
|
logger.info(f"Successfully merged {len(fragment_files)} fragments into {final_filepath}")
|
||||||
|
|
||||||
|
if args.remove_fragments_after_merge or args.cleanup:
|
||||||
|
logger.info("Removing fragment files...")
|
||||||
|
for frag_path in fragment_files:
|
||||||
|
os.remove(frag_path)
|
||||||
|
logger.info("Fragment files removed.")
|
||||||
|
|
||||||
|
if args.cleanup:
|
||||||
|
try:
|
||||||
|
os.remove(final_filepath)
|
||||||
|
logger.info(f"Cleanup: Removed merged file '{final_filepath}'")
|
||||||
|
except OSError as e:
|
||||||
|
logger.error(f"Cleanup failed: Could not remove merged file '{final_filepath}': {e}")
|
||||||
|
|
||||||
|
print(f"Download and merge successful: {final_filepath}")
|
||||||
|
|
||||||
|
if args.purge_on_complete:
|
||||||
|
try:
|
||||||
|
api.purge_download_result()
|
||||||
|
logger.info("Purged all completed/failed downloads from aria2c history.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to purge download history: {e}")
|
||||||
|
elif args.remove_on_complete:
|
||||||
|
try:
|
||||||
|
# The `downloads` variable from the last status check should be valid here.
|
||||||
|
api.remove_download_result(downloads)
|
||||||
|
logger.info(f"Removed {len(downloads)} fragment downloads from aria2c history.")
|
||||||
|
except aria2p.ClientException as e:
|
||||||
|
logger.warning(f"Could not remove fragment downloads from history (maybe already gone?): {e}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to remove fragment downloads from history: {e}")
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"An error occurred during merging: {e}", exc_info=args.verbose)
|
||||||
|
logger.error("Fragments were downloaded but not merged.")
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
print("Download successful. Fragments now need to be merged manually.")
|
||||||
|
print(f"The final merged file should be named: {final_filepath}")
|
||||||
|
print("You can merge them with a command like:")
|
||||||
|
print(f" cat `ls -v '{os.path.join(fragments_lookup_dir, base_filename)}'-Frag*'{file_ext}'` > '{final_filepath}'")
|
||||||
|
|
||||||
|
if args.cleanup:
|
||||||
|
logger.info("Cleanup requested. Removing downloaded fragments...")
|
||||||
|
try:
|
||||||
|
# base_filename and file_ext are available from earlier in the function
|
||||||
|
escaped_base = glob.escape(base_filename)
|
||||||
|
search_path = os.path.join(fragments_lookup_dir, f"{escaped_base}-Frag*{file_ext}")
|
||||||
|
fragment_files = glob.glob(search_path)
|
||||||
|
|
||||||
|
if not fragment_files:
|
||||||
|
logger.warning(f"Cleanup: No fragment files found with pattern: {search_path}")
|
||||||
|
else:
|
||||||
|
for frag_path in fragment_files:
|
||||||
|
os.remove(frag_path)
|
||||||
|
logger.info(f"Removed {len(fragment_files)} fragment files.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"An error occurred during fragment cleanup: {e}", exc_info=args.verbose)
|
||||||
|
|
||||||
|
if args.purge_on_complete:
|
||||||
|
try:
|
||||||
|
api.purge_download_result()
|
||||||
|
logger.info("Purged all completed/failed downloads from aria2c history.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to purge download history: {e}")
|
||||||
|
elif args.remove_on_complete:
|
||||||
|
try:
|
||||||
|
# The `downloads` variable from the last status check should be valid here.
|
||||||
|
api.remove_download_result(downloads)
|
||||||
|
logger.info(f"Removed {len(downloads)} fragment downloads from aria2c history.")
|
||||||
|
except aria2p.ClientException as e:
|
||||||
|
logger.warning(f"Could not remove fragment downloads from history (maybe already gone?): {e}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to remove fragment downloads from history: {e}")
|
||||||
|
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
print(f"Successfully added {len(gids)} fragments. GIDs: {gids}")
|
||||||
|
print("These fragments will need to be merged manually after download.")
|
||||||
|
return 0
|
||||||
297
ytops_client/download_native_py_tool.py
Normal file
297
ytops_client/download_native_py_tool.py
Normal file
@ -0,0 +1,297 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Tool to download a specified format using yt-dlp as a Python library.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import contextlib
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import shlex
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
try:
|
||||||
|
import yt_dlp
|
||||||
|
except ImportError:
|
||||||
|
print("yt-dlp is not installed. Please install it with: pip install yt-dlp", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
logger = logging.getLogger('download_native_py_tool')
|
||||||
|
|
||||||
|
# A custom logger for yt-dlp to capture output and key events
|
||||||
|
class YTDLPLogger:
|
||||||
|
def __init__(self):
|
||||||
|
self.final_filename = None
|
||||||
|
self.is_403 = False
|
||||||
|
self.is_timeout = False
|
||||||
|
|
||||||
|
def debug(self, msg):
|
||||||
|
# yt-dlp logs the destination file path at the debug level.
|
||||||
|
if msg.startswith('[download] Destination:'):
|
||||||
|
self.final_filename = msg.split(':', 1)[1].strip()
|
||||||
|
elif msg.startswith('[download]') and 'has already been downloaded' in msg:
|
||||||
|
match = re.search(r'\[download\]\s+(.*)\s+has already been downloaded', msg)
|
||||||
|
if match:
|
||||||
|
self.final_filename = match.group(1).strip()
|
||||||
|
logger.debug(msg)
|
||||||
|
|
||||||
|
def info(self, msg):
|
||||||
|
logger.info(msg)
|
||||||
|
|
||||||
|
def warning(self, msg):
|
||||||
|
logger.warning(msg)
|
||||||
|
|
||||||
|
def error(self, msg):
|
||||||
|
if "HTTP Error 403" in msg:
|
||||||
|
self.is_403 = True
|
||||||
|
if "Read timed out" in msg:
|
||||||
|
self.is_timeout = True
|
||||||
|
logger.error(msg)
|
||||||
|
|
||||||
|
def ytdlp_progress_hook(d, ytdlp_logger):
|
||||||
|
"""Progress hook to capture the final filename."""
|
||||||
|
if d['status'] == 'finished':
|
||||||
|
ytdlp_logger.final_filename = d.get('filename')
|
||||||
|
logger.info(f"Download finished. Final file: {ytdlp_logger.final_filename}")
|
||||||
|
|
||||||
|
def add_download_native_py_parser(subparsers):
|
||||||
|
"""Add the parser for the 'download py' command."""
|
||||||
|
parser = subparsers.add_parser(
|
||||||
|
'py',
|
||||||
|
description='Download using yt-dlp as a Python library (recommended). This method calls yt-dlp functions directly.',
|
||||||
|
formatter_class=argparse.RawTextHelpFormatter,
|
||||||
|
help='Download using a direct Python call to yt-dlp (recommended).'
|
||||||
|
)
|
||||||
|
parser.add_argument('--load-info-json', type=argparse.FileType('r', encoding='utf-8'), help="Path to the info.json file. If not provided, reads from stdin.")
|
||||||
|
parser.add_argument('-f', '--format', required=True, help='The format selection string to download (e.g., "18", "299/137", "bestvideo+bestaudio").')
|
||||||
|
parser.add_argument('--output-dir', default='.', help='Directory to save the downloaded file. Defaults to current directory.')
|
||||||
|
parser.add_argument('--save-info-json-dir', help='If specified, save the info.json received from stdin to this directory with an auto-generated name.')
|
||||||
|
parser.add_argument('--proxy', help='Proxy to use for the download, e.g., "socks5://127.0.0.1:1080".')
|
||||||
|
parser.add_argument('--proxy-rename', help='Apply sed-style regex substitution to the proxy URL. Format: s/pattern/replacement/')
|
||||||
|
parser.add_argument('--temp-path', help='Directory for temporary files (e.g., fragments). Use a RAM disk for best performance.')
|
||||||
|
parser.add_argument('--pause', type=int, default=0, help='Seconds to wait before starting the download.')
|
||||||
|
parser.add_argument('--download-continue', action='store_true', help='Enable download continuation (--no-overwrites and --continue flags for yt-dlp).')
|
||||||
|
parser.add_argument('--verbose', action='store_true', help='Enable verbose output for this script and yt-dlp.')
|
||||||
|
parser.add_argument('--cli-config', help='Path to a yt-dlp configuration file to load.')
|
||||||
|
parser.add_argument('--downloader', help='Name of the external downloader backend for yt-dlp to use (e.g., "aria2c", "native").')
|
||||||
|
parser.add_argument('--downloader-args', help='Arguments to pass to the external downloader backend (e.g., "aria2c:-x 8").')
|
||||||
|
parser.add_argument('--extra-ytdlp-args', help='A string of extra command-line arguments to pass to yt-dlp.')
|
||||||
|
parser.add_argument('--output-buffer', action='store_true', help='Download to an in-memory buffer and print raw bytes to stdout. Final filename is printed to stderr.')
|
||||||
|
parser.add_argument('--cleanup', action='store_true', help='After download, rename the file to include a timestamp and truncate it to 0 bytes.')
|
||||||
|
parser.add_argument('--merge-output-format', help='Container format to merge to (e.g., "mp4", "mkv"). Overrides config file.')
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def main_download_native_py(args):
|
||||||
|
"""Main logic for the 'download-native-py' command."""
|
||||||
|
# If outputting to buffer, all logging must go to stderr to keep stdout clean for binary data.
|
||||||
|
log_stream = sys.stderr if args.output_buffer else sys.stdout
|
||||||
|
log_level = logging.DEBUG if args.verbose else logging.INFO
|
||||||
|
# Reconfigure root logger
|
||||||
|
for handler in logging.root.handlers[:]:
|
||||||
|
logging.root.removeHandler(handler)
|
||||||
|
logging.basicConfig(level=log_level, stream=log_stream, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||||
|
|
||||||
|
if args.pause > 0:
|
||||||
|
logger.info(f"Pausing for {args.pause} seconds...")
|
||||||
|
time.sleep(args.pause)
|
||||||
|
|
||||||
|
info_json_content = ""
|
||||||
|
input_source_name = ""
|
||||||
|
if args.load_info_json:
|
||||||
|
info_json_content = args.load_info_json.read()
|
||||||
|
input_source_name = args.load_info_json.name
|
||||||
|
else:
|
||||||
|
info_json_content = sys.stdin.read()
|
||||||
|
input_source_name = "stdin"
|
||||||
|
|
||||||
|
if not info_json_content.strip():
|
||||||
|
logger.error(f"Failed to read info.json from {input_source_name}. Input is empty.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
info_data = json.loads(info_json_content)
|
||||||
|
logger.info(f"Successfully loaded info.json from {input_source_name}.")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logger.error(f"Failed to parse info.json from {input_source_name}. Is the input valid JSON?")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if args.save_info_json_dir:
|
||||||
|
try:
|
||||||
|
video_id = info_data.get('id', 'unknown_video_id')
|
||||||
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||||
|
filename = f"{timestamp}-{video_id}-info.json"
|
||||||
|
output_path = os.path.join(args.save_info_json_dir, filename)
|
||||||
|
os.makedirs(args.save_info_json_dir, exist_ok=True)
|
||||||
|
with open(output_path, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(info_data, f, indent=2)
|
||||||
|
logger.info(f"Saved info.json to {output_path}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to save info.json: {e}")
|
||||||
|
|
||||||
|
# Handle proxy and proxy rename
|
||||||
|
proxy_url = args.proxy
|
||||||
|
if not proxy_url:
|
||||||
|
proxy_url = info_data.get('_proxy_url')
|
||||||
|
if proxy_url:
|
||||||
|
logger.info(f"Using proxy from info.json: {proxy_url}")
|
||||||
|
|
||||||
|
if proxy_url and args.proxy_rename:
|
||||||
|
rename_rule = args.proxy_rename.strip("'\"")
|
||||||
|
if rename_rule.startswith('s/') and rename_rule.count('/') >= 2:
|
||||||
|
try:
|
||||||
|
parts = rename_rule.split('/')
|
||||||
|
pattern, replacement = parts[1], parts[2]
|
||||||
|
original_proxy = proxy_url
|
||||||
|
proxy_url = re.sub(pattern, replacement, proxy_url)
|
||||||
|
logger.info(f"Renamed proxy URL from '{original_proxy}' to '{proxy_url}' using rule '{rename_rule}'")
|
||||||
|
except re.error as e:
|
||||||
|
logger.error(f"Invalid regex in --proxy-rename: {e}")
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
logger.error("Invalid --proxy-rename format. Expected: s/pattern/replacement/")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Build the yt-dlp options dictionary
|
||||||
|
# Start by parsing options from config file and extra args to establish a baseline.
|
||||||
|
base_opts_args = []
|
||||||
|
if args.cli_config and os.path.exists(args.cli_config):
|
||||||
|
try:
|
||||||
|
with open(args.cli_config, 'r', encoding='utf-8') as f:
|
||||||
|
config_content = f.read()
|
||||||
|
base_opts_args.extend(shlex.split(config_content))
|
||||||
|
logger.info(f"Loaded {len(base_opts_args)} arguments from config file: {args.cli_config}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to read or parse config file {args.cli_config}: {e}")
|
||||||
|
return 1
|
||||||
|
elif args.cli_config:
|
||||||
|
logger.warning(f"Config file '{args.cli_config}' not found. Ignoring.")
|
||||||
|
|
||||||
|
if args.extra_ytdlp_args:
|
||||||
|
extra_args_list = shlex.split(args.extra_ytdlp_args)
|
||||||
|
logger.info(f"Adding {len(extra_args_list)} extra arguments from --extra-ytdlp-args.")
|
||||||
|
base_opts_args.extend(extra_args_list)
|
||||||
|
|
||||||
|
ydl_opts = {}
|
||||||
|
if base_opts_args:
|
||||||
|
try:
|
||||||
|
# This is an internal API, but it's the most accurate way to parse CLI args
|
||||||
|
# into the ydl_opts dictionary format.
|
||||||
|
ydl_opts, _, _ = yt_dlp.parse_options(base_opts_args)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to parse options from config/extra_args: {e}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Now, layer the script's explicit arguments on top, as they have higher precedence.
|
||||||
|
os.makedirs(args.output_dir, exist_ok=True)
|
||||||
|
output_template = os.path.join(args.output_dir, '%(title)s [%(id)s].f%(format_id)s.%(ext)s')
|
||||||
|
|
||||||
|
ytdlp_logger = YTDLPLogger()
|
||||||
|
|
||||||
|
# Use update to merge, so explicit args overwrite config/extra args.
|
||||||
|
ydl_opts.update({
|
||||||
|
'format': args.format,
|
||||||
|
'outtmpl': '-' if args.output_buffer else output_template,
|
||||||
|
'logger': ytdlp_logger,
|
||||||
|
'progress_hooks': [lambda d: ytdlp_progress_hook(d, ytdlp_logger)],
|
||||||
|
'verbose': args.verbose,
|
||||||
|
})
|
||||||
|
|
||||||
|
if args.temp_path:
|
||||||
|
ydl_opts['paths'] = {'temp': args.temp_path}
|
||||||
|
logger.info(f"Using temporary path: {args.temp_path}")
|
||||||
|
|
||||||
|
if args.download_continue:
|
||||||
|
ydl_opts['continuedl'] = True
|
||||||
|
ydl_opts['nooverwrites'] = True
|
||||||
|
|
||||||
|
if proxy_url:
|
||||||
|
ydl_opts['proxy'] = proxy_url
|
||||||
|
|
||||||
|
if args.downloader:
|
||||||
|
ydl_opts['downloader'] = {args.downloader: None}
|
||||||
|
if args.downloader_args:
|
||||||
|
# yt-dlp expects a dict for downloader_args
|
||||||
|
# e.g., {'aria2c': ['-x', '8']}
|
||||||
|
try:
|
||||||
|
downloader_name, args_str = args.downloader_args.split(':', 1)
|
||||||
|
ydl_opts.setdefault('downloader_args', {})[downloader_name] = shlex.split(args_str)
|
||||||
|
except ValueError:
|
||||||
|
logger.error(f"Invalid --downloader-args format. Expected 'downloader:args'. Got: '{args.downloader_args}'")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if args.merge_output_format:
|
||||||
|
ydl_opts['merge_output_format'] = args.merge_output_format
|
||||||
|
|
||||||
|
try:
|
||||||
|
logger.info(f"Starting download for format '{args.format}' using yt-dlp library...")
|
||||||
|
|
||||||
|
download_buffer = None
|
||||||
|
if args.output_buffer:
|
||||||
|
# When downloading to buffer, we redirect stdout to capture the binary data.
|
||||||
|
download_buffer = io.BytesIO()
|
||||||
|
ctx_mgr = contextlib.redirect_stdout(download_buffer)
|
||||||
|
else:
|
||||||
|
# Otherwise, use a null context manager.
|
||||||
|
ctx_mgr = contextlib.nullcontext()
|
||||||
|
|
||||||
|
with ctx_mgr, yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||||
|
# The download() method is for URLs. For a pre-fetched info dict,
|
||||||
|
# we must use process_ie_result to bypass the info extraction step.
|
||||||
|
# It raises DownloadError on failure, which is caught by the outer try...except block.
|
||||||
|
ydl.process_ie_result(info_data)
|
||||||
|
# If process_ie_result completes without an exception, the download was successful.
|
||||||
|
retcode = 0
|
||||||
|
|
||||||
|
# The success path is now always taken if no exception was raised.
|
||||||
|
if retcode == 0:
|
||||||
|
logger.info("yt-dlp download completed successfully.")
|
||||||
|
|
||||||
|
if args.output_buffer:
|
||||||
|
# Write the captured binary data to the actual stdout.
|
||||||
|
sys.stdout.buffer.write(download_buffer.getvalue())
|
||||||
|
sys.stdout.buffer.flush()
|
||||||
|
# Print the filename to stderr for the orchestrator.
|
||||||
|
if ytdlp_logger.final_filename:
|
||||||
|
print(ytdlp_logger.final_filename, file=sys.stderr)
|
||||||
|
else:
|
||||||
|
# Print the filename to stdout as usual.
|
||||||
|
if ytdlp_logger.final_filename:
|
||||||
|
print(ytdlp_logger.final_filename, file=sys.stdout)
|
||||||
|
|
||||||
|
if args.cleanup:
|
||||||
|
downloaded_filepath = ytdlp_logger.final_filename
|
||||||
|
if downloaded_filepath and os.path.exists(downloaded_filepath):
|
||||||
|
try:
|
||||||
|
logger.info(f"Cleanup: Renaming and truncating '{downloaded_filepath}'")
|
||||||
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||||
|
directory, original_filename = os.path.split(downloaded_filepath)
|
||||||
|
filename_base, filename_ext = os.path.splitext(original_filename)
|
||||||
|
new_filename = f"{filename_base}_{timestamp}{filename_ext}.empty"
|
||||||
|
new_filepath = os.path.join(directory, new_filename)
|
||||||
|
os.rename(downloaded_filepath, new_filepath)
|
||||||
|
logger.info(f"Renamed to '{new_filepath}'")
|
||||||
|
with open(new_filepath, 'w') as f:
|
||||||
|
pass
|
||||||
|
logger.info(f"Truncated '{new_filepath}' to 0 bytes.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Cleanup failed: {e}")
|
||||||
|
return 1 # Treat cleanup failure as a script failure
|
||||||
|
elif not args.output_buffer:
|
||||||
|
logger.warning("Cleanup requested, but no downloaded file was found. Skipping cleanup.")
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
logger.error(f"yt-dlp download failed with internal exit code {retcode}.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
except yt_dlp.utils.DownloadError as e:
|
||||||
|
# This catches download-specific errors from yt-dlp
|
||||||
|
logger.error(f"yt-dlp DownloadError: {e}")
|
||||||
|
return 1
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"An unexpected error occurred during yt-dlp execution: {e}")
|
||||||
|
return 1
|
||||||
285
ytops_client/download_tool.py
Normal file
285
ytops_client/download_tool.py
Normal file
@ -0,0 +1,285 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Tool to download a specified format using an info.json from stdin.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import shlex
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import time
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logger = logging.getLogger('download_tool')
|
||||||
|
|
||||||
|
def add_download_parser(subparsers):
|
||||||
|
"""Add the parser for the 'download cli' command."""
|
||||||
|
parser = subparsers.add_parser(
|
||||||
|
'cli',
|
||||||
|
description='Download using the legacy yt-dlp CLI wrapper. This method invokes yt-dlp as a subprocess.',
|
||||||
|
formatter_class=argparse.RawTextHelpFormatter,
|
||||||
|
help='Download using the legacy yt-dlp CLI wrapper.'
|
||||||
|
)
|
||||||
|
parser.add_argument('--load-info-json', type=argparse.FileType('r', encoding='utf-8'), help="Path to the info.json file. If not provided, reads from stdin.")
|
||||||
|
parser.add_argument('-f', '--format', required=True, help='The format selection string to download (e.g., "18", "299/137", "bestvideo+bestaudio").')
|
||||||
|
parser.add_argument('--output-dir', default='.', help='Directory to save the downloaded file. Defaults to current directory.')
|
||||||
|
parser.add_argument('--save-info-json-dir', help='If specified, save the info.json received from stdin to this directory with an auto-generated name.')
|
||||||
|
parser.add_argument('--proxy', help='Proxy to use for the download, e.g., "socks5://127.0.0.1:1080". This option sets the proxy, overriding any value from the info.json.')
|
||||||
|
parser.add_argument('--proxy-rename', help='Apply sed-style regex substitution to the proxy URL. Format: s/pattern/replacement/')
|
||||||
|
parser.add_argument('--pause', type=int, default=0, help='Seconds to wait before starting the download.')
|
||||||
|
parser.add_argument('--print-traffic', action='store_true', help='Print traffic instead of a progress bar.')
|
||||||
|
parser.add_argument('--download-continue', action='store_true', help='Enable download continuation (--continue and --part flags for yt-dlp).')
|
||||||
|
parser.add_argument('--verbose', action='store_true', help='Enable verbose output for this script and yt-dlp.')
|
||||||
|
parser.add_argument('--cli-config', default='cli.config', help='Path to a yt-dlp configuration file. Defaults to "cli.config".')
|
||||||
|
parser.add_argument('--cleanup', action='store_true', help='After download, rename the file to include a timestamp and truncate it to 0 bytes.')
|
||||||
|
parser.add_argument('--log-file', help='Append full yt-dlp output to the specified log file.')
|
||||||
|
parser.add_argument('--yt-dlp-path', default='yt-dlp', help='Path to the yt-dlp executable. Defaults to "yt-dlp" in PATH.')
|
||||||
|
parser.add_argument('--extra-ytdlp-args', help='A string of extra command-line arguments to pass to yt-dlp.')
|
||||||
|
parser.add_argument('--downloader', help='Name of the external downloader to use (e.g., "aria2c", "native").')
|
||||||
|
parser.add_argument('--downloader-args', help='Arguments to pass to the external downloader (e.g., "aria2c:-x 8").')
|
||||||
|
parser.add_argument('--merge-output-format', help='Container format to merge to (e.g., "mp4", "mkv"). Overrides config file.')
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def main_download(args):
|
||||||
|
"""Main logic for the 'download' command."""
|
||||||
|
if args.verbose:
|
||||||
|
logging.getLogger().setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
if args.pause > 0:
|
||||||
|
logger.info(f"Pausing for {args.pause} seconds...")
|
||||||
|
time.sleep(args.pause)
|
||||||
|
|
||||||
|
info_json_content = ""
|
||||||
|
input_source_name = ""
|
||||||
|
if args.load_info_json:
|
||||||
|
info_json_content = args.load_info_json.read()
|
||||||
|
input_source_name = args.load_info_json.name
|
||||||
|
else:
|
||||||
|
info_json_content = sys.stdin.read()
|
||||||
|
input_source_name = "stdin"
|
||||||
|
|
||||||
|
if not info_json_content.strip():
|
||||||
|
logger.error(f"Failed to read info.json from {input_source_name}. Input is empty.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
info_data = json.loads(info_json_content)
|
||||||
|
logger.info(f"Successfully loaded info.json from {input_source_name}.")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logger.error(f"Failed to parse info.json from {input_source_name}. Is the input valid JSON?")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if args.save_info_json_dir:
|
||||||
|
try:
|
||||||
|
video_id = info_data.get('id', 'unknown_video_id')
|
||||||
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||||
|
filename = f"{timestamp}-{video_id}-info.json"
|
||||||
|
output_path = os.path.join(args.save_info_json_dir, filename)
|
||||||
|
os.makedirs(args.save_info_json_dir, exist_ok=True)
|
||||||
|
with open(output_path, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(info_data, f, indent=2)
|
||||||
|
logger.info(f"Saved info.json to {output_path}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to save info.json: {e}")
|
||||||
|
|
||||||
|
# Determine proxy to use
|
||||||
|
proxy_url = args.proxy
|
||||||
|
if not proxy_url:
|
||||||
|
proxy_url = info_data.get('_proxy_url')
|
||||||
|
if proxy_url:
|
||||||
|
logger.info(f"Using proxy from info.json: {proxy_url}")
|
||||||
|
|
||||||
|
if proxy_url and args.proxy_rename:
|
||||||
|
rename_rule = args.proxy_rename
|
||||||
|
# The user's command line might include quotes that are preserved by shlex.
|
||||||
|
# Strip them to get the raw rule.
|
||||||
|
rename_rule = rename_rule.strip("'\"")
|
||||||
|
if rename_rule.startswith('s/') and rename_rule.count('/') >= 2:
|
||||||
|
try:
|
||||||
|
parts = rename_rule.split('/')
|
||||||
|
pattern = parts[1]
|
||||||
|
replacement = parts[2]
|
||||||
|
original_proxy = proxy_url
|
||||||
|
proxy_url = re.sub(pattern, replacement, proxy_url)
|
||||||
|
logger.info(f"Renamed proxy URL from '{original_proxy}' to '{proxy_url}' using rule '{rename_rule}'")
|
||||||
|
except re.error as e:
|
||||||
|
logger.error(f"Invalid regex in --proxy-rename: {e}")
|
||||||
|
return 1
|
||||||
|
except IndexError:
|
||||||
|
logger.error("Invalid --proxy-rename format. Expected: s/pattern/replacement/")
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
logger.error("Invalid --proxy-rename format. Expected: s/pattern/replacement/")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# yt-dlp needs to load the info.json from a file
|
||||||
|
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', encoding='utf-8') as tmp:
|
||||||
|
json.dump(info_data, tmp)
|
||||||
|
info_json_path = tmp.name
|
||||||
|
|
||||||
|
logger.debug(f"Temporarily saved info.json to {info_json_path}")
|
||||||
|
|
||||||
|
downloaded_filepath = None
|
||||||
|
return_code = 1 # Default to error
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create output directory if it doesn't exist
|
||||||
|
os.makedirs(args.output_dir, exist_ok=True)
|
||||||
|
output_template = os.path.join(args.output_dir, '%(title)s [%(id)s].f%(format_id)s.%(ext)s')
|
||||||
|
|
||||||
|
cmd = [
|
||||||
|
args.yt_dlp_path,
|
||||||
|
'--load-info-json', info_json_path,
|
||||||
|
'-f', args.format,
|
||||||
|
'-o', output_template,
|
||||||
|
'--print', 'filename',
|
||||||
|
]
|
||||||
|
|
||||||
|
if args.extra_ytdlp_args:
|
||||||
|
cmd.extend(shlex.split(args.extra_ytdlp_args))
|
||||||
|
|
||||||
|
if args.downloader:
|
||||||
|
cmd.extend(['--downloader', args.downloader])
|
||||||
|
if args.downloader_args:
|
||||||
|
cmd.extend(['--downloader-args', args.downloader_args])
|
||||||
|
if args.merge_output_format:
|
||||||
|
cmd.extend(['--merge-output-format', args.merge_output_format])
|
||||||
|
|
||||||
|
if args.download_continue:
|
||||||
|
cmd.extend(['--continue', '--part'])
|
||||||
|
|
||||||
|
if os.path.exists(args.cli_config):
|
||||||
|
logger.info(f"Using config file: {args.cli_config}")
|
||||||
|
cmd.extend(['--config-location', args.cli_config])
|
||||||
|
else:
|
||||||
|
logger.info(f"Config file '{args.cli_config}' not found. Using yt-dlp defaults.")
|
||||||
|
|
||||||
|
if args.print_traffic:
|
||||||
|
cmd.append('--print-traffic')
|
||||||
|
cmd.append('--no-progress')
|
||||||
|
else:
|
||||||
|
cmd.append('--progress')
|
||||||
|
|
||||||
|
if args.verbose:
|
||||||
|
cmd.append('--verbose')
|
||||||
|
|
||||||
|
if proxy_url:
|
||||||
|
cmd.extend(['--proxy', proxy_url])
|
||||||
|
|
||||||
|
# Determine if we need to capture output.
|
||||||
|
capture_output = args.cleanup or args.log_file or args.print_traffic
|
||||||
|
|
||||||
|
if capture_output and not args.print_traffic:
|
||||||
|
logger.info("Note: --cleanup or --log-file requires capturing output, which may affect progress bar display.")
|
||||||
|
|
||||||
|
logger.info(f"Executing yt-dlp command for format '{args.format}'")
|
||||||
|
|
||||||
|
# Construct a display version of the command for logging
|
||||||
|
display_cmd_str = ' '.join(f"'{arg}'" if ' ' in arg else arg for arg in cmd)
|
||||||
|
if os.path.exists(args.cli_config):
|
||||||
|
try:
|
||||||
|
with open(args.cli_config, 'r', encoding='utf-8') as f:
|
||||||
|
config_contents = ' '.join(f.read().split())
|
||||||
|
if config_contents:
|
||||||
|
logger.info(f"cli.config contents: {config_contents}")
|
||||||
|
except IOError as e:
|
||||||
|
logger.warning(f"Could not read config file {args.cli_config}: {e}")
|
||||||
|
|
||||||
|
logger.info(f"Full command: {display_cmd_str}")
|
||||||
|
|
||||||
|
if capture_output:
|
||||||
|
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, encoding='utf-8')
|
||||||
|
|
||||||
|
log_f = None
|
||||||
|
if args.log_file:
|
||||||
|
try:
|
||||||
|
log_f = open(args.log_file, 'a', encoding='utf-8')
|
||||||
|
log_f.write(f"\n--- Log entry: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ---\n")
|
||||||
|
log_f.write(f"Command: {' '.join(cmd)}\n\n")
|
||||||
|
except IOError as e:
|
||||||
|
logger.error(f"Failed to open log file {args.log_file}: {e}")
|
||||||
|
|
||||||
|
stdout_data, stderr_data = process.communicate()
|
||||||
|
return_code = process.returncode
|
||||||
|
|
||||||
|
# Write captured output to terminal and log file
|
||||||
|
if stdout_data:
|
||||||
|
sys.stdout.write(stdout_data)
|
||||||
|
sys.stdout.flush()
|
||||||
|
if log_f:
|
||||||
|
for line in stdout_data.splitlines(True):
|
||||||
|
log_f.write(f"[stdout] {line}")
|
||||||
|
|
||||||
|
if stderr_data:
|
||||||
|
sys.stderr.write(stderr_data)
|
||||||
|
sys.stderr.flush()
|
||||||
|
if log_f:
|
||||||
|
for line in stderr_data.splitlines(True):
|
||||||
|
log_f.write(f"[stderr] {line}")
|
||||||
|
|
||||||
|
stdout_lines = stdout_data.splitlines() if stdout_data else []
|
||||||
|
|
||||||
|
if log_f:
|
||||||
|
log_f.write(f"\n--- End log entry (yt-dlp exit code: {return_code}) ---\n")
|
||||||
|
log_f.close()
|
||||||
|
|
||||||
|
for line in reversed(stdout_lines):
|
||||||
|
if line and os.path.exists(line):
|
||||||
|
downloaded_filepath = line
|
||||||
|
logger.info(f"Detected downloaded file: {downloaded_filepath}")
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Original behavior: progress bar direct to terminal, no capture
|
||||||
|
process = subprocess.Popen(cmd)
|
||||||
|
process.wait()
|
||||||
|
return_code = process.returncode
|
||||||
|
|
||||||
|
if return_code != 0:
|
||||||
|
logger.error(f"yt-dlp exited with error code {return_code}")
|
||||||
|
else:
|
||||||
|
logger.info("yt-dlp command completed successfully.")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"An unexpected error occurred: {e}")
|
||||||
|
return 1
|
||||||
|
finally:
|
||||||
|
# Clean up the temporary file
|
||||||
|
if os.path.exists(info_json_path):
|
||||||
|
os.unlink(info_json_path)
|
||||||
|
logger.debug(f"Removed temporary file {info_json_path}")
|
||||||
|
|
||||||
|
# Cleanup phase
|
||||||
|
if args.cleanup:
|
||||||
|
if downloaded_filepath and os.path.exists(downloaded_filepath):
|
||||||
|
try:
|
||||||
|
logger.info(f"Cleanup: Renaming and truncating '{downloaded_filepath}'")
|
||||||
|
|
||||||
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||||
|
|
||||||
|
directory, original_filename = os.path.split(downloaded_filepath)
|
||||||
|
filename_base, filename_ext = os.path.splitext(original_filename)
|
||||||
|
|
||||||
|
# New name format is [base]_[timestamp][ext].empty
|
||||||
|
new_filename = f"{filename_base}_{timestamp}{filename_ext}.empty"
|
||||||
|
new_filepath = os.path.join(directory, new_filename)
|
||||||
|
|
||||||
|
os.rename(downloaded_filepath, new_filepath)
|
||||||
|
logger.info(f"Renamed to '{new_filepath}'")
|
||||||
|
|
||||||
|
with open(new_filepath, 'w') as f:
|
||||||
|
pass
|
||||||
|
logger.info(f"Truncated '{new_filepath}' to 0 bytes.")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Cleanup failed: {e}")
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
logger.warning("Cleanup requested, but no downloaded file was found. Skipping cleanup.")
|
||||||
|
|
||||||
|
return return_code
|
||||||
473
ytops_client/get_info_tool.py
Normal file
473
ytops_client/get_info_tool.py
Normal file
@ -0,0 +1,473 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Tool to get info.json from the Thrift service.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import logging
|
||||||
|
import codecs
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Dict, Any, Optional
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||||
|
)
|
||||||
|
# Note: The CLI entrypoint will configure the root logger.
|
||||||
|
# We get our own logger here for namespacing.
|
||||||
|
logger = logging.getLogger('get_info_tool')
|
||||||
|
|
||||||
|
# Import Thrift modules
|
||||||
|
# Add project's thrift gen_py path to allow importing 'pangramia'
|
||||||
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
project_root = os.path.abspath(os.path.join(script_dir, '..'))
|
||||||
|
sys.path.insert(0, os.path.join(project_root, 'thrift_model', 'gen_py'))
|
||||||
|
from thrift.transport import TTransport
|
||||||
|
from pangramia.yt.common.ttypes import TokenUpdateMode
|
||||||
|
from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException
|
||||||
|
from yt_ops_services.client_utils import get_thrift_client
|
||||||
|
from ytops_client.request_params_help import REQUEST_PARAMS_HELP_STRING
|
||||||
|
|
||||||
|
|
||||||
|
def get_video_id(url: str) -> str:
|
||||||
|
"""Extracts a YouTube video ID from a URL."""
|
||||||
|
# For URLs like https://www.youtube.com/watch?v=VIDEO_ID
|
||||||
|
match = re.search(r"v=([0-9A-Za-z_-]{11})", url)
|
||||||
|
if match:
|
||||||
|
return match.group(1)
|
||||||
|
# For URLs like https://youtu.be/VIDEO_ID
|
||||||
|
match = re.search(r"youtu\.be\/([0-9A-Za-z_-]{11})", url)
|
||||||
|
if match:
|
||||||
|
return match.group(1)
|
||||||
|
# For plain video IDs
|
||||||
|
if re.fullmatch(r'[0-9A-Za-z_-]{11}', url):
|
||||||
|
return url
|
||||||
|
return "unknown_video_id"
|
||||||
|
|
||||||
|
|
||||||
|
def parse_key_value_params(params_str: str) -> Dict[str, Any]:
|
||||||
|
"""Parses a comma-separated string of key=value pairs into a nested dict."""
|
||||||
|
params = {}
|
||||||
|
if not params_str:
|
||||||
|
return params
|
||||||
|
for pair in params_str.split(','):
|
||||||
|
if '=' not in pair:
|
||||||
|
logger.warning(f"Skipping malformed parameter pair: {pair}")
|
||||||
|
continue
|
||||||
|
key, value_str = pair.split('=', 1)
|
||||||
|
keys = key.strip().split('.')
|
||||||
|
|
||||||
|
# Try to parse value as JSON primitive, otherwise treat as string
|
||||||
|
try:
|
||||||
|
# Don't parse if it's quoted, treat as string
|
||||||
|
if (value_str.startswith('"') and value_str.endswith('"')) or \
|
||||||
|
(value_str.startswith("'") and value_str.endswith("'")):
|
||||||
|
value = value_str[1:-1]
|
||||||
|
else:
|
||||||
|
value = json.loads(value_str)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
value = value_str
|
||||||
|
|
||||||
|
d = params
|
||||||
|
for k in keys[:-1]:
|
||||||
|
if k not in d or not isinstance(d[k], dict):
|
||||||
|
d[k] = {}
|
||||||
|
d = d[k]
|
||||||
|
d[keys[-1]] = value
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
def add_get_info_parser(subparsers):
|
||||||
|
"""Add the parser for the 'get-info' command."""
|
||||||
|
parser = subparsers.add_parser(
|
||||||
|
'get-info',
|
||||||
|
description='Get info.json from Thrift service',
|
||||||
|
formatter_class=argparse.RawTextHelpFormatter,
|
||||||
|
help='Get info.json from the Thrift service.'
|
||||||
|
)
|
||||||
|
parser.add_argument('url', help='YouTube URL or video ID')
|
||||||
|
parser.add_argument('--host', default='127.0.0.1', help="Thrift server host. Using 127.0.0.1 avoids harmless connection errors when the local Envoy proxy only listens on IPv4.")
|
||||||
|
parser.add_argument('--port', type=int, default=9080, help='Thrift server port')
|
||||||
|
parser.add_argument('--auth-host', help='Thrift server host (overrides --host).')
|
||||||
|
parser.add_argument('--auth-port', type=int, help='Thrift server port (overrides --port).')
|
||||||
|
parser.add_argument('--profile', default='default_profile', help='The profile name (accountId) to use for the request.')
|
||||||
|
parser.add_argument('--client', help='''Specific client to use. Overrides server default.
|
||||||
|
Available clients:
|
||||||
|
web, web_safari, web_embedded, web_music, web_creator, mweb
|
||||||
|
android, android_music, android_creator, android_vr
|
||||||
|
ios, ios_music, ios_creator
|
||||||
|
tv, tv_simply, tv_embedded
|
||||||
|
|
||||||
|
Append "_camoufox" to any client name (e.g., "web_camoufox") to force
|
||||||
|
the browser-based generation strategy.''')
|
||||||
|
parser.add_argument('--output', help='Output file path for the info.json. If not provided, prints to stdout.')
|
||||||
|
parser.add_argument('--output-auto', action='store_true', help='Automatically generate output filename for info.json and invocation data. Format: DATETIME-CLIENT-VIDEOID-info.json')
|
||||||
|
parser.add_argument('--output-auto-url-only', action='store_true', help='Automatically generate output filename for info.json (format: VIDEOID-info.json) and also save a copy to latest-info.json.')
|
||||||
|
parser.add_argument('--output-auto-suffix', help='Suffix to add to the filename before "-info.json" when using --output-auto or --output-auto-url-only. E.g., "-cycle1".')
|
||||||
|
parser.add_argument('--log-file-auto', action='store_true', help='Automatically generate a log filename and save all script logs to it. Format: VIDEOID-DATETIME.log')
|
||||||
|
parser.add_argument('--machine-id', help='Identifier for the client machine. Defaults to hostname.')
|
||||||
|
parser.add_argument('--worker-id', help='Identifier for a worker process. Used for naming files with --save-latest.')
|
||||||
|
parser.add_argument('--save-latest', action='store_true', help='Save a copy of the info.json to latest-info.json or [worker-id]-latest-info.json. This is implied by --output-auto-url-only.')
|
||||||
|
parser.add_argument('--assigned-proxy-url', help='A specific proxy URL to use for the request, overriding the server\'s proxy pool logic.')
|
||||||
|
parser.add_argument('--proxy-rename', help='Apply sed-style regex substitution to the assigned proxy URL. Format: s/pattern/replacement/')
|
||||||
|
parser.add_argument('--print-proxy', action='store_true', help='Print the proxy used for the request to stderr.')
|
||||||
|
parser.add_argument('--verbose', action='store_true', help='Enable verbose output')
|
||||||
|
parser.add_argument('--log-return', action='store_true', help='Log the full summary of the thrift response to stderr, including detailed logs.\nThis is a convenience flag that implies --show-prefetch-log, --show-nodejs-log, and --show-ytdlp-log.')
|
||||||
|
parser.add_argument('--show-prefetch-log', action='store_true', help='Print the curl pre-fetch log from the server response.')
|
||||||
|
parser.add_argument('--show-nodejs-log', action='store_true', help='Print the Node.js debug log from the server response.')
|
||||||
|
parser.add_argument('--show-ytdlp-log', action='store_true', help='Print the yt-dlp debug log from the server response.')
|
||||||
|
parser.add_argument('--direct', action='store_true', help='Use the direct yt-dlp info.json generation method, bypassing Node.js token generation.')
|
||||||
|
parser.add_argument('--print-info-out', action='store_true', help='Print the final info.json to stdout. By default, output is suppressed unless writing to a file.')
|
||||||
|
parser.add_argument('--request-params-json', help=REQUEST_PARAMS_HELP_STRING + '\nCan also be a comma-separated string of key=value pairs (e.g., "caching_policy.mode=force_refresh").')
|
||||||
|
parser.add_argument('--force-renew', help='Comma-separated list of items to force-renew: cookies, visitor_id, po_token, nsig_cache, all.')
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def main_get_info(args):
|
||||||
|
"""Main logic for the 'get-info' command."""
|
||||||
|
exit_code = 0
|
||||||
|
|
||||||
|
# Set log level
|
||||||
|
if args.verbose:
|
||||||
|
logging.getLogger().setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
if args.log_file_auto:
|
||||||
|
video_id = get_video_id(args.url)
|
||||||
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||||
|
log_filename = f"{video_id}-{timestamp}.log"
|
||||||
|
|
||||||
|
# Get root logger to add file handler
|
||||||
|
root_logger = logging.getLogger()
|
||||||
|
file_handler = logging.FileHandler(log_filename)
|
||||||
|
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||||
|
file_handler.setFormatter(formatter)
|
||||||
|
root_logger.addHandler(file_handler)
|
||||||
|
|
||||||
|
logger.info(f"Logging to file: {log_filename}")
|
||||||
|
|
||||||
|
transport = None
|
||||||
|
try:
|
||||||
|
# Determine host and port, giving precedence to --auth-* args
|
||||||
|
host = args.auth_host or args.host
|
||||||
|
port = args.auth_port or args.port
|
||||||
|
|
||||||
|
# Create Thrift client
|
||||||
|
client, transport = get_thrift_client(host, port)
|
||||||
|
|
||||||
|
# Get token data, which includes the info.json
|
||||||
|
if args.direct:
|
||||||
|
logger.info(f"Requesting info.json for URL '{args.url}' using DIRECT method.")
|
||||||
|
if args.client:
|
||||||
|
logger.info(f"Requesting to use specific client(s): {args.client}")
|
||||||
|
else:
|
||||||
|
logger.info("No specific client requested, server will let yt-dlp decide.")
|
||||||
|
token_data = client.getInfoJsonDirect(url=args.url, clients=args.client)
|
||||||
|
else:
|
||||||
|
logger.info(f"Requesting info.json for URL '{args.url}' using profile '{args.profile}'")
|
||||||
|
|
||||||
|
# Prepare arguments for the Thrift call
|
||||||
|
machine_id = args.machine_id
|
||||||
|
if not machine_id:
|
||||||
|
import socket
|
||||||
|
machine_id = socket.gethostname()
|
||||||
|
logger.info(f"No machine ID provided, using hostname: {machine_id}")
|
||||||
|
|
||||||
|
request_params = {}
|
||||||
|
if args.request_params_json:
|
||||||
|
try:
|
||||||
|
request_params = json.loads(args.request_params_json)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logger.info("Could not parse --request-params-json as JSON, trying as key-value string.")
|
||||||
|
request_params = parse_key_value_params(args.request_params_json)
|
||||||
|
|
||||||
|
if args.force_renew:
|
||||||
|
items_to_renew = [item.strip() for item in args.force_renew.split(',')]
|
||||||
|
request_params['force_renew'] = items_to_renew
|
||||||
|
logger.info(f"Requesting force renew for: {items_to_renew}")
|
||||||
|
|
||||||
|
if args.verbose:
|
||||||
|
# Add verbose flag for yt-dlp on the server
|
||||||
|
ytdlp_params = request_params.setdefault('ytdlp_params', {})
|
||||||
|
ytdlp_params['verbose'] = True
|
||||||
|
logger.info("Verbose mode enabled, requesting verbose yt-dlp logs from server.")
|
||||||
|
|
||||||
|
thrift_args = {
|
||||||
|
'accountId': args.profile,
|
||||||
|
'updateType': TokenUpdateMode.AUTO,
|
||||||
|
'url': args.url,
|
||||||
|
'clients': args.client,
|
||||||
|
'machineId': machine_id,
|
||||||
|
'airflowLogContext': None,
|
||||||
|
'requestParamsJson': json.dumps(request_params) if request_params else None,
|
||||||
|
'assignedProxyUrl': args.assigned_proxy_url
|
||||||
|
}
|
||||||
|
|
||||||
|
# Handle proxy renaming
|
||||||
|
assigned_proxy = args.assigned_proxy_url
|
||||||
|
if assigned_proxy and args.proxy_rename:
|
||||||
|
rename_rule = args.proxy_rename.strip("'\"")
|
||||||
|
if rename_rule.startswith('s/') and rename_rule.count('/') >= 2:
|
||||||
|
try:
|
||||||
|
parts = rename_rule.split('/')
|
||||||
|
pattern = parts[1]
|
||||||
|
replacement = parts[2]
|
||||||
|
original_proxy = assigned_proxy
|
||||||
|
assigned_proxy = re.sub(pattern, replacement, assigned_proxy)
|
||||||
|
logger.info(f"Renamed proxy URL from '{original_proxy}' to '{assigned_proxy}' using rule '{rename_rule}'")
|
||||||
|
except re.error as e:
|
||||||
|
logger.error(f"Invalid regex in --proxy-rename: {e}")
|
||||||
|
return 1
|
||||||
|
except IndexError:
|
||||||
|
logger.error("Invalid --proxy-rename format. Expected: s/pattern/replacement/")
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
logger.error("Invalid --proxy-rename format. Expected: s/pattern/replacement/")
|
||||||
|
return 1
|
||||||
|
thrift_args['assignedProxyUrl'] = assigned_proxy
|
||||||
|
|
||||||
|
if args.client:
|
||||||
|
logger.info(f"Requesting to use specific client: {args.client}")
|
||||||
|
else:
|
||||||
|
logger.info("No specific client requested, server will use its default.")
|
||||||
|
|
||||||
|
token_data = client.getOrRefreshToken(**thrift_args)
|
||||||
|
|
||||||
|
if args.print_proxy:
|
||||||
|
if hasattr(token_data, 'socks') and token_data.socks:
|
||||||
|
print(f"Proxy used: {token_data.socks}", file=sys.stderr)
|
||||||
|
else:
|
||||||
|
print("Proxy information not available in response.", file=sys.stderr)
|
||||||
|
|
||||||
|
if not token_data or not hasattr(token_data, 'infoJson') or not token_data.infoJson:
|
||||||
|
logger.error("Server did not return valid info.json data.")
|
||||||
|
print("Error: Server did not return valid info.json data.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
info_json_str = token_data.infoJson
|
||||||
|
|
||||||
|
# On success, print summary info to stderr for visibility.
|
||||||
|
# This provides immediate feedback without interfering with piped stdout.
|
||||||
|
if hasattr(token_data, 'serverVersionInfo') and token_data.serverVersionInfo:
|
||||||
|
# Filter out the default params line as requested
|
||||||
|
filtered_info = '\n'.join(
|
||||||
|
line for line in token_data.serverVersionInfo.split('\n')
|
||||||
|
if 'Default yt-dlp CLI params:' not in line
|
||||||
|
)
|
||||||
|
print(f"\n--- Server Version Info ---\n{filtered_info}", file=sys.stderr)
|
||||||
|
if hasattr(token_data, 'requestSummary') and token_data.requestSummary:
|
||||||
|
try:
|
||||||
|
summary_data = json.loads(token_data.requestSummary)
|
||||||
|
print(f"\n--- Request Summary ---\n{summary_data.get('summary', token_data.requestSummary)}", file=sys.stderr)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# Fallback for old format or non-JSON summary
|
||||||
|
print(f"\n--- Request Summary ---\n{token_data.requestSummary}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Print detailed logs only if explicitly requested
|
||||||
|
if hasattr(token_data, 'requestSummary') and token_data.requestSummary:
|
||||||
|
try:
|
||||||
|
summary_data = json.loads(token_data.requestSummary)
|
||||||
|
if args.show_prefetch_log or args.log_return:
|
||||||
|
print("\n--- Prefetch Log ---", file=sys.stderr)
|
||||||
|
print(summary_data.get('prefetch_log', 'Not available.'), file=sys.stderr)
|
||||||
|
if args.show_nodejs_log or args.log_return:
|
||||||
|
print("\n--- Node.js Log ---", file=sys.stderr)
|
||||||
|
print(summary_data.get('nodejs_log', 'Not available.'), file=sys.stderr)
|
||||||
|
if args.show_ytdlp_log or args.log_return:
|
||||||
|
print("\n--- yt-dlp Log ---", file=sys.stderr)
|
||||||
|
print(summary_data.get('ytdlp_log', 'Not available.'), file=sys.stderr)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass # Fallback already handled above
|
||||||
|
if hasattr(token_data, 'communicationLogPaths') and token_data.communicationLogPaths:
|
||||||
|
logger.info("--- Communication Log Paths ---")
|
||||||
|
for log_path in token_data.communicationLogPaths:
|
||||||
|
logger.info(f" - {log_path}")
|
||||||
|
|
||||||
|
# Check if the returned info.json is an error report
|
||||||
|
try:
|
||||||
|
info_data = json.loads(info_json_str)
|
||||||
|
if hasattr(token_data, 'socks') and token_data.socks:
|
||||||
|
info_data['_proxy_url'] = token_data.socks
|
||||||
|
if isinstance(info_data, dict) and 'error' in info_data:
|
||||||
|
error_code = info_data.get('errorCode', 'N/A')
|
||||||
|
error_message = info_data.get('message', info_data.get('error', 'Unknown error'))
|
||||||
|
logger.error(f"Server returned an error in info.json (Code: {error_code}): {error_message}")
|
||||||
|
print(f"Error from server (Code: {error_code}): {error_message}", file=sys.stderr)
|
||||||
|
# Optionally print the full error JSON
|
||||||
|
if args.verbose:
|
||||||
|
print(json.dumps(info_data, indent=2), file=sys.stderr)
|
||||||
|
exit_code = 1
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logger.error(f"Failed to parse info.json from server: {info_json_str[:200]}...")
|
||||||
|
print("Error: Failed to parse the info.json response from the server.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
logger.info(f"Successfully retrieved info.json ({len(info_json_str)} bytes)")
|
||||||
|
|
||||||
|
# Save to latest-info.json if requested, or if using --output-auto-url-only for convenience
|
||||||
|
if args.save_latest or args.output_auto_url_only:
|
||||||
|
base_latest_filename = f"{args.worker_id}-latest" if args.worker_id else "latest"
|
||||||
|
latest_info_filename = f"{base_latest_filename}-info.json"
|
||||||
|
latest_proxy_filename = f"{base_latest_filename}-proxy.txt"
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(latest_info_filename, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(info_data, f, indent=2)
|
||||||
|
logger.info(f"Wrote info.json to {latest_info_filename}")
|
||||||
|
print(f"Successfully saved info.json to {latest_info_filename}", file=sys.stderr)
|
||||||
|
except IOError as e:
|
||||||
|
logger.error(f"Failed to write to {latest_info_filename}: {e}")
|
||||||
|
print(f"Error: Failed to write to {latest_info_filename}: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
if hasattr(token_data, 'socks') and token_data.socks:
|
||||||
|
try:
|
||||||
|
with open(latest_proxy_filename, 'w', encoding='utf-8') as f:
|
||||||
|
f.write(token_data.socks + '\n')
|
||||||
|
logger.info(f"Wrote proxy to {latest_proxy_filename}")
|
||||||
|
print(f"Successfully saved proxy to {latest_proxy_filename}", file=sys.stderr)
|
||||||
|
except IOError as e:
|
||||||
|
logger.error(f"Failed to write to {latest_proxy_filename}: {e}")
|
||||||
|
print(f"Error: Failed to write to {latest_proxy_filename}: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Determine output file path if auto-naming is used
|
||||||
|
output_file = args.output
|
||||||
|
if args.output_auto or args.output_auto_url_only:
|
||||||
|
video_id = get_video_id(args.url)
|
||||||
|
suffix = args.output_auto_suffix or ""
|
||||||
|
if args.output_auto:
|
||||||
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||||
|
client_id = args.client or args.profile
|
||||||
|
base_filename = f"{timestamp}-{client_id}-{video_id}{suffix}"
|
||||||
|
output_file = f"{base_filename}-info.json"
|
||||||
|
|
||||||
|
# Save invocation data
|
||||||
|
invocation_filename = f"{base_filename}-invocation.json"
|
||||||
|
invocation_data = {}
|
||||||
|
for attr in ['ytdlpCommand', 'socks', 'jobId', 'url', 'requestSummary', 'communicationLogPaths']:
|
||||||
|
if hasattr(token_data, attr):
|
||||||
|
value = getattr(token_data, attr)
|
||||||
|
if value:
|
||||||
|
invocation_data[attr] = value
|
||||||
|
|
||||||
|
if hasattr(token_data, 'cookiesBlob') and token_data.cookiesBlob:
|
||||||
|
invocation_data['cookiesBlob'] = f"present, {len(token_data.cookiesBlob)} bytes"
|
||||||
|
else:
|
||||||
|
invocation_data['cookiesBlob'] = "not present"
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(invocation_filename, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(invocation_data, f, indent=2)
|
||||||
|
logger.info(f"Wrote invocation data to {invocation_filename}")
|
||||||
|
except IOError as e:
|
||||||
|
logger.error(f"Failed to write invocation data to {invocation_filename}: {e}")
|
||||||
|
|
||||||
|
else: # args.output_auto_url_only
|
||||||
|
output_file = f"{video_id}{suffix}-info.json"
|
||||||
|
|
||||||
|
# Write to output file if specified
|
||||||
|
if output_file:
|
||||||
|
try:
|
||||||
|
# Ensure the output directory exists before writing the file
|
||||||
|
output_dir = os.path.dirname(output_file)
|
||||||
|
if output_dir:
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
with open(output_file, 'w', encoding='utf-8') as f:
|
||||||
|
# Pretty-print the JSON to the file
|
||||||
|
json.dump(info_data, f, indent=2)
|
||||||
|
logger.info(f"Wrote info.json to {output_file}")
|
||||||
|
# Print success message to stderr to not interfere with stdout piping
|
||||||
|
print(f"Successfully saved info.json to {output_file}", file=sys.stderr)
|
||||||
|
|
||||||
|
# If --output-auto, save invocation data
|
||||||
|
if args.output_auto:
|
||||||
|
pass # The latest-info.json logic is now handled by --save-latest
|
||||||
|
|
||||||
|
except IOError as e:
|
||||||
|
logger.error(f"Failed to write to output file {output_file}: {e}")
|
||||||
|
print(f"Error: Failed to write to output file {output_file}: {e}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Print the JSON to stdout if requested, to allow for piping.
|
||||||
|
if args.print_info_out:
|
||||||
|
print(json.dumps(info_data, indent=2))
|
||||||
|
|
||||||
|
return exit_code
|
||||||
|
except (PBServiceException, PBUserException) as e:
|
||||||
|
# Check for non-fatal age-gate errors. These are expected for certain videos
|
||||||
|
# and should not cause the entire stress test to fail.
|
||||||
|
is_age_gate_error = hasattr(e, 'errorCode') and e.errorCode == 'AGE_GATED_SIGN_IN'
|
||||||
|
|
||||||
|
if is_age_gate_error:
|
||||||
|
logger.warning(f"Age-gated content detected for URL '{args.url}'. Treating as a non-fatal warning.")
|
||||||
|
print(f"Warning: Age-gated content detected for '{args.url}'.", file=sys.stderr)
|
||||||
|
|
||||||
|
# To avoid breaking downstream parsers, output a valid JSON error object.
|
||||||
|
# This allows stress testers to see a 'success' (exit 0) but still know it was an age gate issue.
|
||||||
|
error_json = {
|
||||||
|
"error": "Age-gated content",
|
||||||
|
"errorCode": "AGE_GATE",
|
||||||
|
"message": "Sign in to confirm your age."
|
||||||
|
}
|
||||||
|
print(json.dumps(error_json, indent=2))
|
||||||
|
|
||||||
|
# We return success because this is not a system failure.
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Format message for better readability, ensuring newlines are handled.
|
||||||
|
message = str(e.message or '')
|
||||||
|
try:
|
||||||
|
# Attempt to decode as if it has escaped newlines (e.g., '\\n' -> '\n')
|
||||||
|
message = codecs.decode(message, 'unicode_escape')
|
||||||
|
except Exception:
|
||||||
|
# Fallback for safety, though unicode_escape is robust
|
||||||
|
message = message.replace('\\n', '\n')
|
||||||
|
|
||||||
|
# For known user-facing errors, suppress the full traceback unless verbose is explicitly on.
|
||||||
|
# The goal is to provide a clean error message for common issues.
|
||||||
|
user_facing_errors = [
|
||||||
|
"BOT_DETECTED", "BOT_DETECTION_SIGN_IN_REQUIRED",
|
||||||
|
"VIDEO_UNAVAILABLE", "PRIVATE_VIDEO", "VIDEO_REMOVED",
|
||||||
|
"AGE_GATED_SIGN_IN", "MEMBERS_ONLY", "VIDEO_PROCESSING", "GEO_RESTRICTED"
|
||||||
|
]
|
||||||
|
is_user_facing_error = hasattr(e, 'errorCode') and e.errorCode in user_facing_errors
|
||||||
|
|
||||||
|
# Only show full traceback in verbose mode AND if it's NOT a common user-facing error.
|
||||||
|
show_exc_info = args.verbose and not is_user_facing_error
|
||||||
|
|
||||||
|
logger.error(f"A Thrift error occurred: {message}", exc_info=show_exc_info)
|
||||||
|
print(f"\n--- ERROR ---", file=sys.stderr)
|
||||||
|
print(f"{message}", file=sys.stderr)
|
||||||
|
|
||||||
|
if hasattr(e, 'context') and e.context and (args.verbose or not is_user_facing_error):
|
||||||
|
print(f"\n--- CONTEXT ---", file=sys.stderr)
|
||||||
|
# The context is a dict from thrift. Pretty print it, handling newlines in values.
|
||||||
|
if isinstance(e.context, dict):
|
||||||
|
# Process each value to un-escape newlines for clean printing
|
||||||
|
processed_context = {}
|
||||||
|
for key, value in e.context.items():
|
||||||
|
try:
|
||||||
|
processed_context[key] = codecs.decode(str(value), 'unicode_escape')
|
||||||
|
except Exception:
|
||||||
|
processed_context[key] = str(value).replace('\\n', '\n')
|
||||||
|
print(json.dumps(processed_context, indent=2), file=sys.stderr)
|
||||||
|
else:
|
||||||
|
# Fallback for non-dict context
|
||||||
|
print(str(e.context), file=sys.stderr)
|
||||||
|
print("\n", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
except TTransport.TTransportException as e:
|
||||||
|
logger.error(f"Connection to server failed: {e}", exc_info=args.verbose)
|
||||||
|
print(f"Error: Connection to server at {args.host}:{args.port} failed.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"An unexpected error occurred: {e}")
|
||||||
|
print(f"An unexpected error occurred: {e}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
finally:
|
||||||
|
if transport and transport.isOpen():
|
||||||
|
transport.close()
|
||||||
|
logger.info("Thrift connection closed.")
|
||||||
228
ytops_client/list_formats_tool.py
Normal file
228
ytops_client/list_formats_tool.py
Normal file
@ -0,0 +1,228 @@
|
|||||||
|
"""
|
||||||
|
Tool to list available formats from a yt-dlp info.json file.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
from urllib.parse import urlparse, parse_qs
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
def format_size(b):
|
||||||
|
"""Format size in bytes to human-readable string."""
|
||||||
|
if b is None:
|
||||||
|
return 'N/A'
|
||||||
|
if b < 1024:
|
||||||
|
return f"{b}B"
|
||||||
|
elif b < 1024**2:
|
||||||
|
return f"{b/1024:.2f}KiB"
|
||||||
|
elif b < 1024**3:
|
||||||
|
return f"{b/1024**2:.2f}MiB"
|
||||||
|
else:
|
||||||
|
return f"{b/1024**3:.2f}GiB"
|
||||||
|
|
||||||
|
def list_formats(info_json, requested_formats_str=None, file=sys.stdout):
|
||||||
|
"""Prints a table of available formats from info.json data."""
|
||||||
|
formats = info_json.get('formats', [])
|
||||||
|
if not formats:
|
||||||
|
print("No formats found in the provided info.json.", file=file)
|
||||||
|
return
|
||||||
|
|
||||||
|
requested_formats = []
|
||||||
|
requested_order = {}
|
||||||
|
if requested_formats_str:
|
||||||
|
# Split by comma or slash, and filter out empty strings
|
||||||
|
requested_formats = [item for item in re.split(r'[,/]', requested_formats_str) if item]
|
||||||
|
requested_order = {fmt: i for i, fmt in enumerate(requested_formats)}
|
||||||
|
|
||||||
|
def sort_key(f):
|
||||||
|
fid = f.get('format_id', '')
|
||||||
|
is_requested = fid in requested_order
|
||||||
|
if is_requested:
|
||||||
|
# Sort requested formats by the order they were provided
|
||||||
|
return (False, requested_order[fid])
|
||||||
|
else:
|
||||||
|
# Sort other formats numerically by ID
|
||||||
|
return (True, int(fid) if fid.isdigit() else 999)
|
||||||
|
|
||||||
|
sorted_formats = sorted(formats, key=sort_key)
|
||||||
|
|
||||||
|
# Check if any requested formats were found
|
||||||
|
if requested_formats:
|
||||||
|
found_any = any(f.get('format_id') in requested_order for f in formats)
|
||||||
|
if not found_any:
|
||||||
|
print("WARNING: No format from list found.", file=sys.stderr)
|
||||||
|
|
||||||
|
# Header
|
||||||
|
header = "{:<6} {:<7} {:<12} {:<5} {:<18} {:<18} {:<12} {:<10} {:<20} {:<17} {:<15} {:<12} {:<12} {:<12} {:<5} {:<12} {:<12} {:<12} {:<12} {:<12}".format(
|
||||||
|
"ID", "EXT", "RESOLUTION", "FPS", "VCODEC", "ACODEC", "FILESIZE", "TBR", "URL (path)", "EXPIRE (UTC)", "IP", "ID_TOKEN", "SESS_TOKEN", "EI_TOKEN", "GIR", "BUI_TOKEN", "POT_TOKEN", "MT_TOKEN", "SIG", "LSIG"
|
||||||
|
)
|
||||||
|
print(header, file=file)
|
||||||
|
print("-" * len(header), file=file)
|
||||||
|
|
||||||
|
for f in sorted_formats:
|
||||||
|
format_id = f.get('format_id', 'N/A')
|
||||||
|
ext = f.get('ext', 'N/A')
|
||||||
|
|
||||||
|
resolution = f.get('resolution')
|
||||||
|
if not resolution:
|
||||||
|
if 'width' in f and f['width'] is not None:
|
||||||
|
resolution = f"{f['width']}x{f['height']}"
|
||||||
|
else:
|
||||||
|
resolution = 'audio only'
|
||||||
|
|
||||||
|
fps = f.get('fps', '')
|
||||||
|
vcodec = f.get('vcodec', 'none')
|
||||||
|
acodec = f.get('acodec', 'none')
|
||||||
|
filesize = f.get('filesize') or f.get('filesize_approx')
|
||||||
|
tbr = f.get('tbr')
|
||||||
|
|
||||||
|
display_id = f"*{format_id}" if format_id in requested_order else format_id
|
||||||
|
|
||||||
|
url = f.get('url', '')
|
||||||
|
partial_url, expire_date, ip, id_token_short, sess_token_short, ei_token_short, gir, bui_token_short, pot_token_short, mt_token_short, sig_short, lsig_short = ('N/A',) * 12
|
||||||
|
if url:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
query_params = parse_qs(parsed.query)
|
||||||
|
|
||||||
|
path_and_query = parsed.path
|
||||||
|
if parsed.query:
|
||||||
|
path_and_query += '?' + parsed.query
|
||||||
|
|
||||||
|
if len(path_and_query) > 18:
|
||||||
|
partial_url = path_and_query[:8] + '...' + path_and_query[-7:]
|
||||||
|
else:
|
||||||
|
partial_url = path_and_query
|
||||||
|
|
||||||
|
expire_ts = query_params.get('expire', [None])[0]
|
||||||
|
if expire_ts:
|
||||||
|
try:
|
||||||
|
expire_date = datetime.fromtimestamp(int(expire_ts), timezone.utc).strftime('%m-%d %H:%M:%S')
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
expire_date = 'Invalid'
|
||||||
|
|
||||||
|
ip = query_params.get('ip', ['N/A'])[0]
|
||||||
|
|
||||||
|
id_token = query_params.get('id', [None])[0]
|
||||||
|
if id_token and len(id_token) > 12:
|
||||||
|
id_token_short = id_token[:6] + '..' + id_token[-4:]
|
||||||
|
elif id_token:
|
||||||
|
id_token_short = id_token
|
||||||
|
|
||||||
|
sess_token = query_params.get('n', [None])[0]
|
||||||
|
if sess_token and len(sess_token) > 12:
|
||||||
|
sess_token_short = sess_token[:6] + '..' + sess_token[-4:]
|
||||||
|
elif sess_token:
|
||||||
|
sess_token_short = sess_token
|
||||||
|
|
||||||
|
ei_token = query_params.get('ei', [None])[0]
|
||||||
|
if ei_token and len(ei_token) > 12:
|
||||||
|
ei_token_short = ei_token[:6] + '..' + ei_token[-4:]
|
||||||
|
elif ei_token:
|
||||||
|
ei_token_short = ei_token
|
||||||
|
|
||||||
|
gir = query_params.get('gir', ['N/A'])[0]
|
||||||
|
|
||||||
|
bui_token = query_params.get('bui', [None])[0]
|
||||||
|
if bui_token and len(bui_token) > 12:
|
||||||
|
bui_token_short = bui_token[:6] + '..' + bui_token[-4:]
|
||||||
|
elif bui_token:
|
||||||
|
bui_token_short = bui_token
|
||||||
|
|
||||||
|
pot_token = query_params.get('pot', [None])[0]
|
||||||
|
if pot_token and len(pot_token) > 12:
|
||||||
|
pot_token_short = pot_token[:6] + '..' + pot_token[-4:]
|
||||||
|
elif pot_token:
|
||||||
|
pot_token_short = pot_token
|
||||||
|
|
||||||
|
mt_token = query_params.get('mt', [None])[0]
|
||||||
|
# mt is often just a timestamp, don't shorten unless it's a long hash
|
||||||
|
if mt_token and len(mt_token) > 12:
|
||||||
|
mt_token_short = mt_token[:6] + '..' + mt_token[-4:]
|
||||||
|
elif mt_token:
|
||||||
|
mt_token_short = mt_token
|
||||||
|
|
||||||
|
sig = query_params.get('sig', [None])[0]
|
||||||
|
if sig and len(sig) > 12:
|
||||||
|
sig_short = sig[:6] + '..' + sig[-4:]
|
||||||
|
elif sig:
|
||||||
|
sig_short = sig
|
||||||
|
|
||||||
|
lsig = query_params.get('lsig', [None])[0]
|
||||||
|
if lsig and len(lsig) > 12:
|
||||||
|
lsig_short = lsig[:6] + '..' + lsig[-4:]
|
||||||
|
elif lsig:
|
||||||
|
lsig_short = lsig
|
||||||
|
|
||||||
|
print("{:<6} {:<7} {:<12} {:<5} {:<18} {:<18} {:<12} {:<10} {:<20} {:<17} {:<15} {:<12} {:<12} {:<12} {:<5} {:<12} {:<12} {:<12} {:<12} {:<12}".format(
|
||||||
|
str(display_id),
|
||||||
|
str(ext),
|
||||||
|
str(resolution),
|
||||||
|
str(fps) if fps else '',
|
||||||
|
str(vcodec)[:18],
|
||||||
|
str(acodec)[:18],
|
||||||
|
format_size(filesize),
|
||||||
|
f"{tbr:.0f}k" if tbr else 'N/A',
|
||||||
|
partial_url,
|
||||||
|
expire_date,
|
||||||
|
ip,
|
||||||
|
id_token_short,
|
||||||
|
sess_token_short,
|
||||||
|
ei_token_short,
|
||||||
|
gir,
|
||||||
|
bui_token_short,
|
||||||
|
pot_token_short,
|
||||||
|
mt_token_short,
|
||||||
|
sig_short,
|
||||||
|
lsig_short
|
||||||
|
), file=file)
|
||||||
|
|
||||||
|
def add_list_formats_parser(subparsers):
|
||||||
|
"""Add the parser for the 'list-formats' command."""
|
||||||
|
parser = subparsers.add_parser(
|
||||||
|
'list-formats',
|
||||||
|
description="List available formats from a yt-dlp info.json file.",
|
||||||
|
formatter_class=argparse.RawTextHelpFormatter,
|
||||||
|
help="List available formats from a yt-dlp info.json file."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--load-info-json',
|
||||||
|
type=argparse.FileType('r', encoding='utf-8'),
|
||||||
|
default=sys.stdin,
|
||||||
|
help="Path to the info.json file. Reads from stdin if not provided."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'-f', '--formats',
|
||||||
|
help='Comma or slash-separated list of format IDs to highlight and prioritize (e.g., "18,140,299/298").'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'-p', '--pass-through',
|
||||||
|
action='store_true',
|
||||||
|
help='Pass the input JSON through to stdout, printing the format list to stderr.'
|
||||||
|
)
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def main_list_formats(args):
|
||||||
|
"""Main logic for the 'list-formats' command."""
|
||||||
|
try:
|
||||||
|
# Read the whole content to allow passing it through
|
||||||
|
info_json_content = args.load_info_json.read()
|
||||||
|
info_data = json.loads(info_json_content)
|
||||||
|
|
||||||
|
# Determine output stream for the format list
|
||||||
|
output_stream = sys.stderr if args.pass_through else sys.stdout
|
||||||
|
list_formats(info_data, args.formats, file=output_stream)
|
||||||
|
|
||||||
|
# If pass-through is enabled, print the original JSON to stdout
|
||||||
|
if args.pass_through:
|
||||||
|
# Use end='' because the read content likely includes a trailing newline
|
||||||
|
print(info_json_content, end='')
|
||||||
|
|
||||||
|
return 0
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
print("Error: Invalid JSON provided.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An unexpected error occurred: {e}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
48
ytops_client/request_params_help.py
Normal file
48
ytops_client/request_params_help.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
# Using a separate file for this long help message to keep the main script clean.
|
||||||
|
# It's imported by client tools that use the --request-params-json argument.
|
||||||
|
|
||||||
|
REQUEST_PARAMS_HELP_STRING = """JSON string with per-request parameters to override server defaults.
|
||||||
|
Example of a full configuration JSON showing default values (use single quotes to wrap it):
|
||||||
|
'{
|
||||||
|
"_comment": "This JSON object allows overriding server-side defaults for a single request.",
|
||||||
|
"cookies_file_path": "/path/to/your/cookies.txt",
|
||||||
|
|
||||||
|
"context_reuse_policy": {
|
||||||
|
"enabled": true,
|
||||||
|
"max_age_seconds": 86400,
|
||||||
|
"reuse_visitor_id": true,
|
||||||
|
"reuse_cookies": true
|
||||||
|
},
|
||||||
|
"_comment_context_reuse_policy": "Controls how the server reuses session context (cookies, visitor ID) from the account's previous successful request.",
|
||||||
|
"_comment_reuse_visitor_id": "If true, reuses the visitor ID from the last session to maintain a consistent identity to YouTube. This is automatically disabled for TV clients to avoid bot detection.",
|
||||||
|
|
||||||
|
"ytdlp_params": {
|
||||||
|
"use_curl_prefetch": false,
|
||||||
|
"skip_cache": false,
|
||||||
|
"visitor_id_override_enabled": true,
|
||||||
|
"extractor_args": {
|
||||||
|
"youtubepot-bgutilhttp": {
|
||||||
|
"base_url": "http://172.17.0.1:4416"
|
||||||
|
},
|
||||||
|
"youtube": {
|
||||||
|
"pot_trace": "true",
|
||||||
|
"formats": "duplicate",
|
||||||
|
"player_js_version": "actual"
|
||||||
|
},
|
||||||
|
"youtubepot-webpo": {
|
||||||
|
"bind_to_visitor_id": "true"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"_comment_ytdlp_params": "Parameters passed directly to the yt-dlp wrapper for info.json generation.",
|
||||||
|
"_comment_visitor_id_override_enabled": "If true (default), the server validates the visitor ID from the token generator and creates a new one if it is invalid. Set to false to force using the provided visitor ID without validation, which is useful for debugging.",
|
||||||
|
"_comment_extractor_args": "Directly override yt-dlp extractor arguments. To use BGUtils in script mode, replace 'youtubepot-bgutilhttp' with 'youtubepot-bgutilscript'. The script path is '/opt/bgutil-ytdlp-pot-provider-server/build/generate_once.js'. To disable any explicit provider (like '--bgutils-mode none' on the server), remove both 'youtubepot-bgutilhttp' and 'youtubepot-bgutilscript' keys.",
|
||||||
|
|
||||||
|
"session_params": {
|
||||||
|
"lang": "en-US",
|
||||||
|
"location": "US",
|
||||||
|
"deviceCategory": "MOBILE",
|
||||||
|
"user_agent": "Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)"
|
||||||
|
},
|
||||||
|
"_comment_session_params": "Parameters for the token generation session (primarily for Node.js)."
|
||||||
|
}'"""
|
||||||
788
ytops_client/stress_formats_tool.py
Normal file
788
ytops_client/stress_formats_tool.py
Normal file
@ -0,0 +1,788 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Tool to stress-test video format download URLs from an info.json.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import collections
|
||||||
|
import concurrent.futures
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
import re
|
||||||
|
import shlex
|
||||||
|
import signal
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from urllib.parse import urlparse, parse_qs
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logger = logging.getLogger('stress_formats_tool')
|
||||||
|
|
||||||
|
|
||||||
|
def get_video_id(url: str) -> str:
|
||||||
|
"""Extracts a YouTube video ID from a URL."""
|
||||||
|
# For URLs like https://www.youtube.com/watch?v=VIDEO_ID
|
||||||
|
match = re.search(r"v=([0-9A-Za-z_-]{11})", url)
|
||||||
|
if match:
|
||||||
|
return match.group(1)
|
||||||
|
# For URLs like https://youtu.be/VIDEO_ID
|
||||||
|
match = re.search(r"youtu\.be\/([0-9A-Za-z_-]{11})", url)
|
||||||
|
if match:
|
||||||
|
return match.group(1)
|
||||||
|
# For plain video IDs
|
||||||
|
if re.fullmatch(r'[0-9A-Za-z_-]{11}', url):
|
||||||
|
return url
|
||||||
|
return "unknown_video_id"
|
||||||
|
|
||||||
|
|
||||||
|
def get_display_name(path_or_url):
|
||||||
|
"""Returns a clean name for logging, either a filename or a video ID."""
|
||||||
|
if isinstance(path_or_url, Path):
|
||||||
|
return path_or_url.name
|
||||||
|
|
||||||
|
path_str = str(path_or_url)
|
||||||
|
video_id = get_video_id(path_str)
|
||||||
|
if video_id != "unknown_video_id":
|
||||||
|
return video_id
|
||||||
|
|
||||||
|
# Fallback for file paths as strings or weird URLs
|
||||||
|
return Path(path_str).name
|
||||||
|
|
||||||
|
|
||||||
|
def format_size(b):
|
||||||
|
"""Format size in bytes to human-readable string."""
|
||||||
|
if b is None:
|
||||||
|
return 'N/A'
|
||||||
|
if b < 1024:
|
||||||
|
return f"{b}B"
|
||||||
|
elif b < 1024**2:
|
||||||
|
return f"{b/1024:.2f}KiB"
|
||||||
|
elif b < 1024**3:
|
||||||
|
return f"{b/1024**2:.2f}MiB"
|
||||||
|
else:
|
||||||
|
return f"{b/1024**3:.2f}GiB"
|
||||||
|
|
||||||
|
|
||||||
|
class StatsTracker:
|
||||||
|
"""Tracks and reports statistics for the stress test."""
|
||||||
|
def __init__(self, stats_file=None):
|
||||||
|
self.events = []
|
||||||
|
self.start_time = time.time()
|
||||||
|
self.lock = threading.Lock()
|
||||||
|
self.stats_file_path = stats_file
|
||||||
|
self.stats_file_handle = None
|
||||||
|
if self.stats_file_path:
|
||||||
|
try:
|
||||||
|
self.stats_file_handle = open(self.stats_file_path, 'a', encoding='utf-8')
|
||||||
|
except IOError as e:
|
||||||
|
logger.error(f"Could not open stats file {self.stats_file_path}: {e}")
|
||||||
|
|
||||||
|
def log_event(self, event_data):
|
||||||
|
"""Log a download attempt event."""
|
||||||
|
with self.lock:
|
||||||
|
event_data['timestamp'] = datetime.now().isoformat()
|
||||||
|
self.events.append(event_data)
|
||||||
|
if self.stats_file_handle:
|
||||||
|
self.stats_file_handle.write(json.dumps(event_data) + '\n')
|
||||||
|
self.stats_file_handle.flush()
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
"""Close the stats file."""
|
||||||
|
if self.stats_file_handle:
|
||||||
|
self.stats_file_handle.close()
|
||||||
|
|
||||||
|
def print_summary(self):
|
||||||
|
"""Print a summary of the test run."""
|
||||||
|
with self.lock:
|
||||||
|
if not self.events:
|
||||||
|
logger.info("No events were recorded.")
|
||||||
|
return
|
||||||
|
|
||||||
|
duration = time.time() - self.start_time
|
||||||
|
|
||||||
|
# Separate events by type
|
||||||
|
fetch_events = [e for e in self.events if e.get('type') == 'fetch']
|
||||||
|
download_events = [e for e in self.events if e.get('type') != 'fetch'] # Default to download for old events
|
||||||
|
|
||||||
|
logger.info("\n--- Test Summary ---")
|
||||||
|
logger.info(f"Total duration: {duration:.2f} seconds")
|
||||||
|
|
||||||
|
if fetch_events:
|
||||||
|
total_fetches = len(fetch_events)
|
||||||
|
successful_fetches = sum(1 for e in fetch_events if e['success'])
|
||||||
|
failed_fetches = total_fetches - successful_fetches
|
||||||
|
logger.info("\n--- Fetch Summary ---")
|
||||||
|
logger.info(f"Total info.json fetch attempts: {total_fetches}")
|
||||||
|
logger.info(f" - Successful: {successful_fetches}")
|
||||||
|
logger.info(f" - Failed: {failed_fetches}")
|
||||||
|
if total_fetches > 0:
|
||||||
|
success_rate = (successful_fetches / total_fetches) * 100
|
||||||
|
logger.info(f"Success rate: {success_rate:.2f}%")
|
||||||
|
if failed_fetches > 0:
|
||||||
|
error_counts = collections.Counter(e.get('error_type', 'Unknown') for e in fetch_events if not e['success'])
|
||||||
|
logger.info("Failure breakdown:")
|
||||||
|
for error_type, count in sorted(error_counts.items()):
|
||||||
|
logger.info(f" - {error_type}: {count}")
|
||||||
|
|
||||||
|
if download_events:
|
||||||
|
total_attempts = len(download_events)
|
||||||
|
successes = sum(1 for e in download_events if e['success'])
|
||||||
|
failures = total_attempts - successes
|
||||||
|
|
||||||
|
logger.info("\n--- Download Summary ---")
|
||||||
|
logger.info(f"Total download attempts: {total_attempts}")
|
||||||
|
logger.info(f" - Successful: {successes}")
|
||||||
|
logger.info(f" - Failed: {failures}")
|
||||||
|
|
||||||
|
if total_attempts > 0:
|
||||||
|
success_rate = (successes / total_attempts) * 100
|
||||||
|
logger.info(f"Success rate: {success_rate:.2f}%")
|
||||||
|
|
||||||
|
if duration > 1 and total_attempts > 0:
|
||||||
|
dpm = (total_attempts / duration) * 60
|
||||||
|
logger.info(f"Attempt rate: {dpm:.2f} attempts/minute")
|
||||||
|
|
||||||
|
# Download volume stats
|
||||||
|
total_bytes = sum(e.get('downloaded_bytes', 0) for e in download_events if e['success'])
|
||||||
|
if total_bytes > 0:
|
||||||
|
logger.info(f"Total data downloaded: {format_size(total_bytes)}")
|
||||||
|
if duration > 1:
|
||||||
|
bytes_per_second = total_bytes / duration
|
||||||
|
gb_per_hour = (bytes_per_second * 3600) / (1024**3)
|
||||||
|
gb_per_day = gb_per_hour * 24
|
||||||
|
logger.info(f"Download rate: {gb_per_hour:.3f} GB/hour ({gb_per_day:.3f} GB/day)")
|
||||||
|
|
||||||
|
if failures > 0:
|
||||||
|
error_counts = collections.Counter(e.get('error_type', 'Unknown') for e in download_events if not e['success'])
|
||||||
|
logger.info("Failure breakdown:")
|
||||||
|
for error_type, count in sorted(error_counts.items()):
|
||||||
|
logger.info(f" - {error_type}: {count}")
|
||||||
|
|
||||||
|
logger.info("--------------------")
|
||||||
|
|
||||||
|
def print_banner(args, info_jsons=None, urls=None):
|
||||||
|
"""Prints a summary of the test configuration."""
|
||||||
|
logger.info("--- Stress Test Configuration ---")
|
||||||
|
if args.urls_file:
|
||||||
|
if args.fetch_only:
|
||||||
|
logger.info(f"Mode: Fetch-only. Generating info.json files from URL list.")
|
||||||
|
else:
|
||||||
|
logger.info(f"Mode: Full-stack test from URL list.")
|
||||||
|
logger.info(f"URL file: {args.urls_file} ({len(urls)} URLs)")
|
||||||
|
logger.info(f"Workers: {args.workers}")
|
||||||
|
logger.info(f"Info.json command: {args.info_json_gen_cmd}")
|
||||||
|
if args.info_json_gen_cmd_alt and args.alt_cmd_every_n > 0:
|
||||||
|
logger.info(f"Alternate command (every {args.alt_cmd_every_n} URLs): {args.info_json_gen_cmd_alt}")
|
||||||
|
if args.profile_prefix:
|
||||||
|
if args.profile_pool:
|
||||||
|
logger.info(f"Profile mode: Pool of {args.profile_pool} (prefix: {args.profile_prefix})")
|
||||||
|
elif args.profile_per_request:
|
||||||
|
logger.info(f"Profile mode: New profile per request (prefix: {args.profile_prefix})")
|
||||||
|
else: # info-json-files
|
||||||
|
logger.info(f"Mode: Download-only from static info.json files.")
|
||||||
|
if info_jsons:
|
||||||
|
logger.info(f"Files: {', '.join(str(p.name) for p in info_jsons.keys())}")
|
||||||
|
logger.info(f"Workers: {args.workers}")
|
||||||
|
|
||||||
|
logger.info(f"Format selection: {args.format}")
|
||||||
|
logger.info(f"Sleep between cycles: {args.sleep}s")
|
||||||
|
if args.sleep_formats > 0:
|
||||||
|
logger.info(f"Sleep between formats: {args.sleep_formats}s")
|
||||||
|
if args.duration > 0:
|
||||||
|
logger.info(f"Test duration: {args.duration} minutes")
|
||||||
|
if args.max_attempts > 0:
|
||||||
|
logger.info(f"Max cycles: {args.max_attempts}")
|
||||||
|
logger.info(f"Stop on failure: {args.stop_on_failure}")
|
||||||
|
if args.stop_on_403:
|
||||||
|
logger.info(f"Stop on 403 error: True")
|
||||||
|
if args.stop_on_timeout:
|
||||||
|
logger.info(f"Stop on timeout: True")
|
||||||
|
logger.info(f"Stats file: {args.stats_file}")
|
||||||
|
if args.stats_interval > 0:
|
||||||
|
logger.info(f"Periodic stats interval: {args.stats_interval}s")
|
||||||
|
if args.format_download_args:
|
||||||
|
logger.info(f"Extra download args: {args.format_download_args}")
|
||||||
|
logger.info("Download volume: Tracking total data downloaded")
|
||||||
|
logger.info("---------------------------------")
|
||||||
|
|
||||||
|
def add_stress_formats_parser(subparsers):
|
||||||
|
"""Add the parser for the 'stress-formats' command."""
|
||||||
|
parser = subparsers.add_parser(
|
||||||
|
'stress-formats',
|
||||||
|
description="A simple, command-line driven stress-testing tool for basic scenarios.\nAll options are configured via flags. For more complex scenarios and advanced\nfeatures like rate limiting and client rotation, use the 'stress-policy' command.",
|
||||||
|
formatter_class=argparse.RawTextHelpFormatter,
|
||||||
|
help='Run simple, flag-driven stress tests.',
|
||||||
|
epilog="""
|
||||||
|
Usage examples:
|
||||||
|
|
||||||
|
# Test a format from a static info.json every 60 seconds
|
||||||
|
ytops-client stress-formats --info-json-files my_video.json -f 18 --sleep 60
|
||||||
|
|
||||||
|
# Test with multiple info.json files in parallel using 4 workers
|
||||||
|
ytops-client stress-formats --info-json-files "file1.json,file2.json,file3.json" -f 18 --sleep 60 --workers 4
|
||||||
|
|
||||||
|
# Fetch a new info.json for a URL and test a format every 5 minutes
|
||||||
|
ytops-client stress-formats --urls-file urls.txt --info-json-gen-cmd "bin/ytops-client get-info {url}" -f "18" --sleep 300
|
||||||
|
|
||||||
|
# Run the test for exactly 10 cycles, continuing on failure
|
||||||
|
ytops-client stress-formats --info-json-files my_video.json -f 18 --sleep 10 --max-attempts 10 --no-stop-on-failure
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
source_group = parser.add_mutually_exclusive_group(required=True)
|
||||||
|
source_group.add_argument('--info-json-files', help='Comma-separated paths to static info.json files to use for testing.')
|
||||||
|
source_group.add_argument('--urls-file', help='Path to a file with URLs/IDs to test. Can be a text file (one per line) or a JSON array of strings.')
|
||||||
|
|
||||||
|
parser.add_argument('-f', '--format', help='The format selection string. Can be a comma-separated list of IDs (e.g., "18,137"), "all", "random:X%%" (e.g., "random:10%%"), or "random_from:ID1,ID2,..." to pick one from a list. Required unless --fetch-only is used.')
|
||||||
|
parser.add_argument('--sleep', type=int, default=60, help='Seconds to wait between batches of download attempts. Default: 60.')
|
||||||
|
parser.add_argument('--sleep-formats', type=int, default=0, help='Seconds to wait between format downloads within a single file/cycle. Default: 0.')
|
||||||
|
parser.add_argument('--max-attempts', type=int, default=0, help='Maximum number of test cycles. 0 means run indefinitely. Default: 0.')
|
||||||
|
parser.add_argument('--duration', type=int, default=0, help='Total duration to run the test in minutes. 0 means run indefinitely (or until max-attempts is reached). Default: 0.')
|
||||||
|
parser.add_argument('--stop-on-failure', action='store_true', help='Stop the test immediately after the first download failure.')
|
||||||
|
parser.add_argument('--no-stop-on-failure', dest='stop_on_failure', action='store_false', help='Continue testing even after a download failure. (Default)')
|
||||||
|
parser.set_defaults(stop_on_failure=False)
|
||||||
|
parser.add_argument('--stop-on-403', action='store_true', help='Stop the test immediately after a 403 Forbidden error.')
|
||||||
|
parser.add_argument('--stop-on-timeout', action='store_true', help='Stop the test immediately after a read timeout error.')
|
||||||
|
|
||||||
|
parser.add_argument('--fetch-only', action='store_true', help='When used with --urls-file, only fetch and save info.json files without performing download tests.')
|
||||||
|
|
||||||
|
parser.add_argument('--workers', type=int, default=1, help='Number of parallel workers for multi-file mode. Default: 1.')
|
||||||
|
parser.add_argument('--stats-file', default='stress_test_stats.jsonl', help='File to log statistics for each attempt. Default: stress_test_stats.jsonl')
|
||||||
|
parser.add_argument('--stats-interval', type=int, default=0, help='Interval in seconds to print stats summary periodically. 0 disables. Default: 0.')
|
||||||
|
|
||||||
|
# Arguments for info.json generation
|
||||||
|
parser.add_argument('--info-json-gen-cmd', help='Command template to generate info.json. Use {url}, {worker_id}, {cycle}, and {profile} as placeholders. Required with --urls-file.')
|
||||||
|
parser.add_argument('--info-json-gen-cmd-alt', help='Alternate command template for info.json generation.')
|
||||||
|
parser.add_argument('--alt-cmd-every-n', type=int, default=0, help='Use the alternate command for every N-th URL (e.g., N=3 means URLs 3, 6, 9...). Requires --info-json-gen-cmd-alt.')
|
||||||
|
|
||||||
|
# Profile generation options
|
||||||
|
profile_group = parser.add_argument_group('Profile Generation Options (for --urls-file mode)')
|
||||||
|
profile_group.add_argument('--profile-prefix', help='Base name for generated profile IDs (e.g., "test_user"). Used with --profile-pool or --profile-per-request.')
|
||||||
|
profile_group.add_argument('--profile-pool', type=int, metavar='N', help='Use a pool of N profiles. Profile ID will be {prefix}_{worker_id %% N}. Requires --profile-prefix.')
|
||||||
|
profile_group.add_argument('--profile-per-request', action='store_true', help='Generate a new unique profile ID for each request. Profile ID will be {prefix}_{timestamp}_{worker_id}. Requires --profile-prefix.')
|
||||||
|
|
||||||
|
# Arguments to pass to format_download.py
|
||||||
|
parser.add_argument('--format-download-args', nargs='+', help='Additional arguments to pass to the download tool. E.g., --proxy-rename s/old/new/ --cleanup')
|
||||||
|
|
||||||
|
parser.add_argument('--verbose', action='store_true', help='Enable verbose output.')
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def run_command(cmd, input_data=None):
|
||||||
|
"""Runs a command, captures its output, and returns status."""
|
||||||
|
logger.debug(f"Running command: {' '.join(cmd)}")
|
||||||
|
try:
|
||||||
|
process = subprocess.Popen(
|
||||||
|
cmd,
|
||||||
|
stdin=subprocess.PIPE if input_data else None,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
text=True,
|
||||||
|
encoding='utf-8'
|
||||||
|
)
|
||||||
|
stdout, stderr = process.communicate(input=input_data)
|
||||||
|
return process.returncode, stdout, stderr
|
||||||
|
except FileNotFoundError:
|
||||||
|
logger.error(f"Command not found: {cmd[0]}. Make sure it's in your PATH.")
|
||||||
|
return -1, "", f"Command not found: {cmd[0]}"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"An error occurred while running command: {' '.join(cmd)}. Error: {e}")
|
||||||
|
return -1, "", str(e)
|
||||||
|
|
||||||
|
def run_download_worker(info_json_path, info_json_content, format_to_download, args):
|
||||||
|
"""
|
||||||
|
Performs a single download attempt. Designed to be run in a worker thread.
|
||||||
|
"""
|
||||||
|
# 1. Attempt download
|
||||||
|
download_cmd = [
|
||||||
|
sys.executable, '-m', 'ytops_client.cli', 'download',
|
||||||
|
'-f', format_to_download
|
||||||
|
]
|
||||||
|
if args.format_download_args:
|
||||||
|
# with nargs='+', this is a list.
|
||||||
|
# If it's one item, it might be a single quoted string of args that needs splitting.
|
||||||
|
if len(args.format_download_args) == 1:
|
||||||
|
download_cmd.extend(shlex.split(args.format_download_args[0]))
|
||||||
|
else:
|
||||||
|
# multiple items, assume they are already split by shell
|
||||||
|
download_cmd.extend(args.format_download_args)
|
||||||
|
|
||||||
|
display_name = get_display_name(info_json_path)
|
||||||
|
logger.info(f"[{display_name} @ {format_to_download}] Kicking off download process...")
|
||||||
|
retcode, stdout, stderr = run_command(download_cmd, input_data=info_json_content)
|
||||||
|
|
||||||
|
# 2. Check result
|
||||||
|
is_403_error = "HTTP Error 403" in stderr
|
||||||
|
is_timeout_error = "Read timed out" in stderr
|
||||||
|
|
||||||
|
result = {
|
||||||
|
'type': 'download',
|
||||||
|
'path': str(info_json_path),
|
||||||
|
'format': format_to_download,
|
||||||
|
'success': retcode == 0,
|
||||||
|
'error_type': None,
|
||||||
|
'details': '',
|
||||||
|
'downloaded_bytes': 0
|
||||||
|
}
|
||||||
|
|
||||||
|
if retcode == 0:
|
||||||
|
# Success
|
||||||
|
downloaded_filepath = ''
|
||||||
|
# The filename is the last non-empty line of stdout that doesn't look like a progress bar
|
||||||
|
lines = stdout.splitlines()
|
||||||
|
for line in reversed(lines):
|
||||||
|
if line and not line.strip().startswith('['):
|
||||||
|
downloaded_filepath = line.strip()
|
||||||
|
break
|
||||||
|
|
||||||
|
details_str = "OK"
|
||||||
|
if downloaded_filepath:
|
||||||
|
details_str = f"Downloaded: {Path(downloaded_filepath).name}"
|
||||||
|
|
||||||
|
# Parse download size from stderr
|
||||||
|
size_in_bytes = 0
|
||||||
|
size_match = re.search(r'\[download\]\s+100%\s+of\s+~?([0-9.]+)(B|KiB|MiB|GiB)', stderr)
|
||||||
|
if size_match:
|
||||||
|
value = float(size_match.group(1))
|
||||||
|
unit = size_match.group(2)
|
||||||
|
multipliers = {"B": 1, "KiB": 1024, "MiB": 1024**2, "GiB": 1024**3}
|
||||||
|
size_in_bytes = int(value * multipliers.get(unit, 1))
|
||||||
|
result['downloaded_bytes'] = size_in_bytes
|
||||||
|
details_str += f" ({size_match.group(1)}{unit})"
|
||||||
|
|
||||||
|
result['details'] = details_str
|
||||||
|
else:
|
||||||
|
# Failure
|
||||||
|
# Try to get the most relevant error line
|
||||||
|
error_lines = [line for line in stderr.strip().split('\n') if 'ERROR:' in line]
|
||||||
|
if error_lines:
|
||||||
|
result['details'] = error_lines[-1]
|
||||||
|
else:
|
||||||
|
# If no "ERROR:" line, use the last few lines of stderr for context.
|
||||||
|
last_lines = stderr.strip().split('\n')[-3:] # Get up to last 3 lines
|
||||||
|
result['details'] = ' | '.join(line.strip() for line in last_lines if line.strip())
|
||||||
|
if not result['details']:
|
||||||
|
result['details'] = "Unknown error (stderr was empty)"
|
||||||
|
|
||||||
|
if is_403_error:
|
||||||
|
result['error_type'] = 'HTTP 403'
|
||||||
|
elif is_timeout_error:
|
||||||
|
result['error_type'] = 'Timeout'
|
||||||
|
else:
|
||||||
|
result['error_type'] = f'Exit Code {retcode}'
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def process_info_json_cycle(path, content, args, stats):
|
||||||
|
"""
|
||||||
|
Processes one info.json file for one cycle, downloading selected formats sequentially.
|
||||||
|
Logs events and returns a list of results.
|
||||||
|
"""
|
||||||
|
results = []
|
||||||
|
should_stop_file = False
|
||||||
|
display_name = get_display_name(path)
|
||||||
|
|
||||||
|
# Determine formats to test based on the info.json content
|
||||||
|
try:
|
||||||
|
info_data = json.loads(content)
|
||||||
|
available_formats = info_data.get('formats', [])
|
||||||
|
if not available_formats:
|
||||||
|
logger.warning(f"[{display_name}] No formats found in info.json. Skipping.")
|
||||||
|
return []
|
||||||
|
|
||||||
|
available_format_ids = [f['format_id'] for f in available_formats]
|
||||||
|
|
||||||
|
formats_to_test = []
|
||||||
|
format_selection_mode = args.format.lower()
|
||||||
|
|
||||||
|
if format_selection_mode == 'all':
|
||||||
|
formats_to_test = available_format_ids
|
||||||
|
logger.info(f"[{display_name}] Testing all {len(formats_to_test)} available formats.")
|
||||||
|
elif format_selection_mode.startswith('random:'):
|
||||||
|
try:
|
||||||
|
percent_str = format_selection_mode.split(':')[1].rstrip('%')
|
||||||
|
percent = float(percent_str)
|
||||||
|
if not (0 < percent <= 100):
|
||||||
|
raise ValueError("Percentage must be between 0 and 100.")
|
||||||
|
|
||||||
|
count = max(1, int(len(available_format_ids) * (percent / 100.0)))
|
||||||
|
formats_to_test = random.sample(available_format_ids, k=count)
|
||||||
|
logger.info(f"[{display_name}] Randomly selected {len(formats_to_test)} formats ({percent}%) from all available to test: {', '.join(formats_to_test)}")
|
||||||
|
except (ValueError, IndexError) as e:
|
||||||
|
logger.error(f"[{display_name}] Invalid random format selection '{args.format}': {e}. Skipping.")
|
||||||
|
return []
|
||||||
|
elif format_selection_mode.startswith('random_from:'):
|
||||||
|
try:
|
||||||
|
choices_str = format_selection_mode.split(':', 1)[1]
|
||||||
|
if not choices_str:
|
||||||
|
raise ValueError("No formats provided after 'random_from:'.")
|
||||||
|
|
||||||
|
format_choices = [f.strip() for f in choices_str.split(',') if f.strip()]
|
||||||
|
|
||||||
|
# Filter the choices to only those available in the current info.json
|
||||||
|
valid_choices = [f for f in format_choices if f in available_format_ids]
|
||||||
|
|
||||||
|
if not valid_choices:
|
||||||
|
logger.warning(f"[{display_name}] None of the requested formats for random selection ({', '.join(format_choices)}) are available. Skipping.")
|
||||||
|
return []
|
||||||
|
|
||||||
|
formats_to_test = [random.choice(valid_choices)]
|
||||||
|
logger.info(f"[{display_name}] Randomly selected 1 format from your list to test: {formats_to_test[0]}")
|
||||||
|
except (ValueError, IndexError) as e:
|
||||||
|
logger.error(f"[{display_name}] Invalid random_from format selection '{args.format}': {e}. Skipping.")
|
||||||
|
return []
|
||||||
|
else:
|
||||||
|
# Standard comma-separated list
|
||||||
|
requested_formats = [f.strip() for f in args.format.split(',') if f.strip()]
|
||||||
|
formats_to_test = []
|
||||||
|
for req_fmt in requested_formats:
|
||||||
|
# Check for exact match first
|
||||||
|
if req_fmt in available_format_ids:
|
||||||
|
formats_to_test.append(req_fmt)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# If no exact match, check for formats that start with this ID + '-'
|
||||||
|
# e.g., req_fmt '140' should match '140-0'
|
||||||
|
prefix_match = f"{req_fmt}-"
|
||||||
|
first_match = next((af for af in available_format_ids if af.startswith(prefix_match)), None)
|
||||||
|
|
||||||
|
if first_match:
|
||||||
|
logger.info(f"[{display_name}] Requested format '{req_fmt}' not found. Using first available match: '{first_match}'.")
|
||||||
|
formats_to_test.append(first_match)
|
||||||
|
else:
|
||||||
|
# This could be a complex selector like 'bestvideo' or '299/298', so keep it.
|
||||||
|
if req_fmt not in available_format_ids:
|
||||||
|
logger.warning(f"[{display_name}] Requested format '{req_fmt}' not found in available formats.")
|
||||||
|
formats_to_test.append(req_fmt)
|
||||||
|
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logger.error(f"[{display_name}] Failed to parse info.json. Skipping.")
|
||||||
|
return []
|
||||||
|
|
||||||
|
for i, format_id in enumerate(formats_to_test):
|
||||||
|
if should_stop_file:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Check if the format URL is expired before attempting to download
|
||||||
|
format_details = next((f for f in available_formats if f.get('format_id') == format_id), None)
|
||||||
|
if format_details and 'url' in format_details:
|
||||||
|
parsed_url = urlparse(format_details['url'])
|
||||||
|
query_params = parse_qs(parsed_url.query)
|
||||||
|
expire_ts_str = query_params.get('expire', [None])[0]
|
||||||
|
if expire_ts_str and expire_ts_str.isdigit():
|
||||||
|
expire_ts = int(expire_ts_str)
|
||||||
|
if expire_ts < time.time():
|
||||||
|
logger.warning(f"[{display_name}] Skipping format '{format_id}' because its URL is expired.")
|
||||||
|
result = {
|
||||||
|
'type': 'download', 'path': str(path), 'format': format_id,
|
||||||
|
'success': True, 'error_type': 'Skipped',
|
||||||
|
'details': 'Download URL is expired', 'downloaded_bytes': 0
|
||||||
|
}
|
||||||
|
stats.log_event(result)
|
||||||
|
results.append(result)
|
||||||
|
continue # Move to the next format
|
||||||
|
|
||||||
|
result = run_download_worker(path, content, format_id, args)
|
||||||
|
stats.log_event(result)
|
||||||
|
results.append(result)
|
||||||
|
|
||||||
|
status = "SUCCESS" if result['success'] else f"FAILURE ({result['error_type']})"
|
||||||
|
logger.info(f"Result for {display_name} (format {format_id}): {status} - {result.get('details', 'OK')}")
|
||||||
|
|
||||||
|
if not result['success']:
|
||||||
|
# This flag stops processing more formats for THIS file in this cycle
|
||||||
|
# The main loop will decide if all cycles should stop.
|
||||||
|
if args.stop_on_failure or \
|
||||||
|
(args.stop_on_403 and result['error_type'] == 'HTTP 403') or \
|
||||||
|
(args.stop_on_timeout and result['error_type'] == 'Timeout'):
|
||||||
|
logger.info(f"Stopping further format tests for {display_name} in this cycle due to failure.")
|
||||||
|
should_stop_file = True
|
||||||
|
|
||||||
|
# Sleep between formats if needed
|
||||||
|
if args.sleep_formats > 0 and i < len(formats_to_test) - 1:
|
||||||
|
logger.info(f"Sleeping for {args.sleep_formats}s before next format for {display_name}...")
|
||||||
|
time.sleep(args.sleep_formats)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def main_stress_formats(args):
|
||||||
|
"""Main logic for the 'stress-formats' command."""
|
||||||
|
# The --format argument is required unless we are only fetching info.json files.
|
||||||
|
if not args.fetch_only and not args.format:
|
||||||
|
logger.error("Error: argument -f/--format is required when not using --fetch-only.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if (args.profile_pool or args.profile_per_request) and not args.profile_prefix:
|
||||||
|
logger.error("--profile-prefix is required when using --profile-pool or --profile-per-request.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if args.urls_file and args.fetch_only and not args.info_json_gen_cmd:
|
||||||
|
logger.error("--info-json-gen-cmd is required when using --urls-file with --fetch-only.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if args.verbose:
|
||||||
|
logging.getLogger().setLevel(logging.DEBUG)
|
||||||
|
else:
|
||||||
|
# Make the default logger more concise for test output
|
||||||
|
for handler in logging.root.handlers:
|
||||||
|
handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s', datefmt='%H:%M:%S'))
|
||||||
|
|
||||||
|
stats = StatsTracker(args.stats_file)
|
||||||
|
start_time = time.time()
|
||||||
|
duration_seconds = args.duration * 60 if args.duration > 0 else 0
|
||||||
|
|
||||||
|
# --- Load sources ---
|
||||||
|
info_jsons = {}
|
||||||
|
urls = []
|
||||||
|
if args.info_json_files:
|
||||||
|
info_json_files = [Path(p.strip()) for p in args.info_json_files.split(',')]
|
||||||
|
for file_path in info_json_files:
|
||||||
|
if not file_path.is_file():
|
||||||
|
logger.error(f"Info.json file not found: {file_path}")
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
|
info_jsons[file_path] = f.read()
|
||||||
|
except (IOError, json.JSONDecodeError) as e:
|
||||||
|
logger.error(f"Failed to read or parse {file_path}: {e}")
|
||||||
|
|
||||||
|
if not info_jsons:
|
||||||
|
logger.error("No valid info.json files to process. Exiting.")
|
||||||
|
return 1
|
||||||
|
logger.info(f"Loaded {len(info_jsons)} info.json file(s).")
|
||||||
|
print_banner(args, info_jsons=info_jsons)
|
||||||
|
|
||||||
|
elif args.urls_file:
|
||||||
|
if not args.info_json_gen_cmd:
|
||||||
|
logger.error("--info-json-gen-cmd is required when using --urls-file.")
|
||||||
|
return 1
|
||||||
|
try:
|
||||||
|
with open(args.urls_file, 'r', encoding='utf-8') as f:
|
||||||
|
content = f.read()
|
||||||
|
# Try parsing as JSON array first
|
||||||
|
try:
|
||||||
|
data = json.loads(content)
|
||||||
|
if isinstance(data, list) and all(isinstance(item, str) for item in data):
|
||||||
|
urls = data
|
||||||
|
logger.info(f"Loaded {len(urls)} URLs/IDs from JSON array in {args.urls_file}.")
|
||||||
|
else:
|
||||||
|
# Valid JSON, but not a list of strings. Treat as error to avoid confusion.
|
||||||
|
logger.error(f"URL file '{args.urls_file}' is valid JSON but not an array of strings.")
|
||||||
|
return 1
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# Fallback to line-by-line parsing for plain text files
|
||||||
|
urls = [line.strip() for line in content.splitlines() if line.strip()]
|
||||||
|
logger.info(f"Loaded {len(urls)} URLs/IDs from text file {args.urls_file}.")
|
||||||
|
|
||||||
|
if not urls:
|
||||||
|
logger.error(f"URL file '{args.urls_file}' is empty or contains no valid URLs/IDs.")
|
||||||
|
return 1
|
||||||
|
except IOError as e:
|
||||||
|
logger.error(f"Failed to read URL file {args.urls_file}: {e}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Clean up URLs/IDs which might have extra quotes, commas, or brackets from copy-pasting
|
||||||
|
cleaned_urls = []
|
||||||
|
for url in urls:
|
||||||
|
# Strip whitespace, then trailing comma, then surrounding junk, then whitespace again
|
||||||
|
cleaned_url = url.strip().rstrip(',').strip().strip('\'"[]').strip()
|
||||||
|
if cleaned_url:
|
||||||
|
cleaned_urls.append(cleaned_url)
|
||||||
|
|
||||||
|
if len(cleaned_urls) != len(urls):
|
||||||
|
logger.info(f"Cleaned URL list, removed {len(urls) - len(cleaned_urls)} empty or invalid entries.")
|
||||||
|
|
||||||
|
urls = cleaned_urls
|
||||||
|
if not urls:
|
||||||
|
logger.error("URL list is empty after cleaning. Exiting.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print_banner(args, urls=urls)
|
||||||
|
|
||||||
|
# --- Main test loop ---
|
||||||
|
cycles = 0
|
||||||
|
last_stats_print_time = time.time()
|
||||||
|
try:
|
||||||
|
# --- Worker function for URL mode ---
|
||||||
|
def process_url_task(url, url_index, cycle_num):
|
||||||
|
"""Worker to generate info.json for a URL and then test formats."""
|
||||||
|
# 1. Generate profile name if configured
|
||||||
|
profile_name = None
|
||||||
|
if args.profile_prefix:
|
||||||
|
if args.profile_pool:
|
||||||
|
profile_name = f"{args.profile_prefix}_{url_index % args.profile_pool}"
|
||||||
|
elif args.profile_per_request:
|
||||||
|
timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
|
||||||
|
profile_name = f"{args.profile_prefix}_{timestamp}_{url_index}"
|
||||||
|
|
||||||
|
# 2. Select and format the generation command
|
||||||
|
gen_cmd_template = args.info_json_gen_cmd
|
||||||
|
if args.alt_cmd_every_n > 0 and args.info_json_gen_cmd_alt and (url_index + 1) % args.alt_cmd_every_n == 0:
|
||||||
|
gen_cmd_template = args.info_json_gen_cmd_alt
|
||||||
|
logger.info(f"Using alternate command for URL #{url_index + 1}: {url}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# shlex.split handles quoted arguments in the template
|
||||||
|
video_id = get_video_id(url)
|
||||||
|
gen_cmd = []
|
||||||
|
template_args = shlex.split(gen_cmd_template)
|
||||||
|
|
||||||
|
# If the video ID could be mistaken for an option, and it appears to be
|
||||||
|
# a positional argument, insert '--' to prevent misinterpretation.
|
||||||
|
if video_id.startswith('-'):
|
||||||
|
try:
|
||||||
|
# Heuristic: if {url} is the last token, it's likely positional.
|
||||||
|
if template_args and template_args[-1] == '{url}':
|
||||||
|
template_args.insert(-1, '--')
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
pass # {url} not found or list is empty.
|
||||||
|
|
||||||
|
for arg in template_args:
|
||||||
|
# Replace placeholders
|
||||||
|
formatted_arg = arg.replace('{url}', video_id) \
|
||||||
|
.replace('{worker_id}', str(url_index)) \
|
||||||
|
.replace('{cycle}', str(cycle_num))
|
||||||
|
if profile_name:
|
||||||
|
formatted_arg = formatted_arg.replace('{profile}', profile_name)
|
||||||
|
gen_cmd.append(formatted_arg)
|
||||||
|
|
||||||
|
# Pass verbose flag through if set
|
||||||
|
if args.verbose and 'get_info_json_client.py' in gen_cmd_template and '--verbose' not in gen_cmd_template:
|
||||||
|
gen_cmd.append('--verbose')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to format --info-json-gen-cmd: {e}")
|
||||||
|
stats.log_event({'path': url, 'success': False, 'error_type': 'BadGenCmd', 'details': 'Cmd format error'})
|
||||||
|
return []
|
||||||
|
|
||||||
|
# 3. Run command to get info.json
|
||||||
|
log_msg = f"[{url}] Generating info.json"
|
||||||
|
if profile_name:
|
||||||
|
log_msg += f" with profile '{profile_name}'"
|
||||||
|
log_msg += "..."
|
||||||
|
logger.info(log_msg)
|
||||||
|
|
||||||
|
retcode, stdout, stderr = run_command(gen_cmd)
|
||||||
|
if retcode != 0:
|
||||||
|
error_msg = stderr.strip().split('\n')[-1]
|
||||||
|
logger.error(f"[{url}] Failed to generate info.json: {error_msg}")
|
||||||
|
event = {'type': 'fetch', 'path': url, 'success': False, 'error_type': 'GetInfoJsonFail', 'details': error_msg}
|
||||||
|
stats.log_event(event)
|
||||||
|
return [] # Return empty list, as no formats were tested
|
||||||
|
|
||||||
|
# Handle --fetch-only
|
||||||
|
if args.fetch_only:
|
||||||
|
logger.info(f"[{url}] Successfully fetched info.json. Skipping download due to --fetch-only.")
|
||||||
|
event = {'type': 'fetch', 'path': url, 'success': True, 'details': 'OK'}
|
||||||
|
stats.log_event(event)
|
||||||
|
return [] # Return empty list, indicating no downloads to check for failure
|
||||||
|
|
||||||
|
# 4. Pass to the format processing function
|
||||||
|
return process_info_json_cycle(url, stdout, args, stats)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
if duration_seconds and (time.time() - start_time) > duration_seconds:
|
||||||
|
logger.info(f"Reached duration limit of {args.duration} minutes. Stopping.")
|
||||||
|
break
|
||||||
|
cycles += 1
|
||||||
|
if args.max_attempts > 0 and cycles > args.max_attempts:
|
||||||
|
logger.info(f"Reached max cycles ({args.max_attempts}). Stopping.")
|
||||||
|
break
|
||||||
|
|
||||||
|
logger.info(f"--- Cycle #{cycles} ---")
|
||||||
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=args.workers) as executor:
|
||||||
|
future_to_identifier = {}
|
||||||
|
if args.info_json_files:
|
||||||
|
future_to_identifier = {
|
||||||
|
executor.submit(process_info_json_cycle, path, content, args, stats): path
|
||||||
|
for path, content in info_jsons.items()
|
||||||
|
}
|
||||||
|
elif args.urls_file:
|
||||||
|
future_to_identifier = {
|
||||||
|
executor.submit(process_url_task, url, i, cycles): url
|
||||||
|
for i, url in enumerate(urls)
|
||||||
|
}
|
||||||
|
|
||||||
|
should_stop = False
|
||||||
|
|
||||||
|
# Use a set of futures that we can modify while iterating
|
||||||
|
futures = set(future_to_identifier.keys())
|
||||||
|
|
||||||
|
while futures and not should_stop:
|
||||||
|
# Wait for the next future to complete
|
||||||
|
done, futures = concurrent.futures.wait(futures, return_when=concurrent.futures.FIRST_COMPLETED)
|
||||||
|
|
||||||
|
for future in done:
|
||||||
|
identifier = future_to_identifier[future]
|
||||||
|
identifier_name = get_display_name(identifier)
|
||||||
|
try:
|
||||||
|
results = future.result()
|
||||||
|
# Check if any result from this file triggers a global stop
|
||||||
|
for result in results:
|
||||||
|
if not result['success']:
|
||||||
|
if args.stop_on_failure:
|
||||||
|
logger.info(f"Failure on {identifier_name} (format {result['format']}). Shutting down all workers due to --stop-on-failure.")
|
||||||
|
should_stop = True
|
||||||
|
elif args.stop_on_403 and result['error_type'] == 'HTTP 403':
|
||||||
|
logger.info(f"403 error on {identifier_name} (format {result['format']}). Shutting down all workers due to --stop-on-403.")
|
||||||
|
should_stop = True
|
||||||
|
elif args.stop_on_timeout and result['error_type'] == 'Timeout':
|
||||||
|
logger.info(f"Timeout on {identifier_name} (format {result['format']}). Shutting down all workers due to --stop-on-timeout.")
|
||||||
|
should_stop = True
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error(f'{identifier_name} generated an exception: {exc}')
|
||||||
|
stats.log_event({'path': str(identifier), 'success': False, 'error_type': 'Exception', 'details': str(exc)})
|
||||||
|
|
||||||
|
if should_stop:
|
||||||
|
break # Stop processing results from 'done' set
|
||||||
|
|
||||||
|
# Check for duration limit after each batch of tasks completes
|
||||||
|
if duration_seconds and (time.time() - start_time) > duration_seconds:
|
||||||
|
logger.info(f"Reached duration limit of {args.duration} minutes. Cancelling remaining tasks.")
|
||||||
|
should_stop = True
|
||||||
|
|
||||||
|
# If the loop was exited, cancel any remaining tasks
|
||||||
|
if should_stop and futures:
|
||||||
|
logger.info(f"Cancelling {len(futures)} outstanding task(s).")
|
||||||
|
for future in futures:
|
||||||
|
future.cancel()
|
||||||
|
|
||||||
|
if should_stop:
|
||||||
|
break
|
||||||
|
|
||||||
|
if args.stats_interval > 0 and (time.time() - last_stats_print_time) >= args.stats_interval:
|
||||||
|
stats.print_summary()
|
||||||
|
last_stats_print_time = time.time()
|
||||||
|
|
||||||
|
if args.max_attempts > 0 and cycles >= args.max_attempts:
|
||||||
|
break
|
||||||
|
|
||||||
|
logger.info(f"Cycle complete. Sleeping for {args.sleep} seconds...")
|
||||||
|
|
||||||
|
# Interruptible sleep that respects the total test duration
|
||||||
|
sleep_end_time = time.time() + args.sleep
|
||||||
|
should_stop_after_sleep = False
|
||||||
|
while time.time() < sleep_end_time:
|
||||||
|
if duration_seconds and (time.time() - start_time) >= duration_seconds:
|
||||||
|
logger.info(f"Reached duration limit of {args.duration} minutes during sleep. Stopping.")
|
||||||
|
should_stop_after_sleep = True
|
||||||
|
break
|
||||||
|
time.sleep(1) # Check every second
|
||||||
|
|
||||||
|
if should_stop_after_sleep:
|
||||||
|
break
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
logger.info("\nCtrl+C received, shutting down...")
|
||||||
|
finally:
|
||||||
|
stats.print_summary()
|
||||||
|
stats.close()
|
||||||
|
|
||||||
|
return 0 if not any(not e['success'] for e in stats.events) else 1
|
||||||
2420
ytops_client/stress_policy_tool.py
Normal file
2420
ytops_client/stress_policy_tool.py
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user