Updates on minio, envoy ports on master, adding ytops_client

2025-11-19 10:37:09 +03:00 · 2025-11-19 10:37:09 +03:00 · 0ead029b85
commit 0ead029b85
parent f151ffee86
24 changed files with 5868 additions and 552 deletions
--- a/airflow/Dockerfile
+++ b/airflow/Dockerfile
@ -105,7 +105,9 @@ RUN mv /usr/local/bin/pip /usr/local/bin/pip.orig && \
    "gunicorn==20.1.0" \
    "python-ffmpeg==2.0.12" \
    "ffprobe3" \
-    "python-dotenv" && \
+    "python-dotenv" \
    "PyYAML" \
    "aria2p" && \
    mv /usr/local/bin/pip.orig /usr/local/bin/pip
 # --- Install the custom yt_ops_services package ---
@ -117,6 +119,12 @@ COPY --chown=airflow:airflow yt_ops_services ./yt_ops_services/
 COPY --chown=airflow:airflow thrift_model ./thrift_model/
 COPY --chown=airflow:airflow pangramia ./pangramia/
 # Copy the ytops-client tool and its executable
 COPY --chown=airflow:airflow ytops_client ./ytops_client/
 COPY --chown=airflow:airflow bin/ytops-client /app/bin/ytops-client
 RUN chmod +x /app/bin/ytops-client
 ENV PATH="/app/bin:${PATH}"
 # Install the package in editable mode. This runs setup.py and installs all dependencies
 # listed in `install_requires`, making the `yt_ops_services` module available everywhere.
 # Bypass the pip root check again.
--- a/airflow/configs/docker-compose-ytdlp-ops.yaml.j2
+++ b/airflow/configs/docker-compose-ytdlp-ops.yaml.j2
@ -118,14 +118,14 @@ services:
      - "{{ service_role }}"
      # --- S3 Logging Parameters ---
-      - "--s3-endpoint-url"
+      #- "--s3-endpoint-url"
-      - "${S3_ENDPOINT_URL}"
+      #- "${S3_ENDPOINT_URL}"
-      - "--s3-access-key-id"
+      #- "--s3-access-key-id"
-      - "${S3_ACCESS_KEY_ID}"
+      #- "${S3_ACCESS_KEY_ID}"
-      - "--s3-secret-access-key"
+      #- "--s3-secret-access-key"
-      - "${S3_SECRET_ACCESS_KEY}"
+      #- "${S3_SECRET_ACCESS_KEY}"
-      - "--s3-region-name"
+      #- "--s3-region-name"
-      - "${S3_REGION_NAME}"
+      #- "${S3_REGION_NAME}"
 {% if service_role is defined and service_role != 'management' %}
      # --- Parameters for worker/all-in-one roles ONLY ---
      - "--script-dir"
--- a/airflow/configs/nginx.conf
+++ b/airflow/configs/nginx.conf
@ -4,11 +4,11 @@ events {
 http {
    upstream minio_servers {
-        server minio:9000;
+        server 172.17.0.1:9001;
    }
    upstream minio_console_servers {
-        server minio:9001;
+        server 172.17.0.1:9002;
    }
    server {
--- a/airflow/dags/ytdlp_mgmt_proxy_account.py
+++ b/airflow/dags/ytdlp_mgmt_proxy_account.py
@ -45,7 +45,7 @@ except ImportError as e:
    raise
 DEFAULT_MANAGEMENT_SERVICE_IP = Variable.get("MANAGEMENT_SERVICE_HOST", default_var="envoy-thrift-lb")
-DEFAULT_MANAGEMENT_SERVICE_PORT = Variable.get("MANAGEMENT_SERVICE_PORT", default_var=9080)
+DEFAULT_MANAGEMENT_SERVICE_PORT = Variable.get("MANAGEMENT_SERVICE_PORT", default_var=9980)
 DEFAULT_REDIS_CONN_ID = "redis_default"
 # Version tracking for debugging
--- a/airflow/dags/ytdlp_mgmt_queues.py
+++ b/airflow/dags/ytdlp_mgmt_queues.py
@ -55,9 +55,13 @@ def _get_predefined_url_lists():
        'urls.dh128.json',
        'urls.rt100.json',
        'urls.rt25.json',
        'urls.rt250.json',
        'urls.rt500.json',
        'urls.rt3000.json',
        'urls.sky28.json',
        'urls.sky3.json',
        'urls.tq46.json',
        'urls.topnews500.json',
    ]
    return ['None'] + sorted(predefined_files)
@ -256,15 +260,15 @@ def clear_queue_callable(**context):
    redis_conn_id = params['redis_conn_id']
    queue_system = params.get('queue_system', 'v1_monolithic')
    queue_base_names_to_clear = []
    if queue_system == 'v1_monolithic':
-        queue_base_name = params['queue_base_name']
+        queue_base_names_to_clear.append(params['queue_base_name'])
-    elif queue_system == 'v2_separated_auth':
+    elif queue_system.startswith('v2_'):
-        queue_base_name = 'queue2_auth'
+        # For v2, clear both auth and dl queues for a complete clear.
-    elif queue_system == 'v2_separated_dl':
+        queue_base_names_to_clear.extend(['queue2_auth', 'queue2_dl'])
        queue_base_name = 'queue2_dl'
    else:
        raise ValueError(f"Invalid queue_system: {queue_system}")
-    logger.info(f"Operating on queue system '{queue_system}' with base name '{queue_base_name}'.")
+    logger.info(f"Operating on queue system '{queue_system}' with base names: {queue_base_names_to_clear}.")
    queues_to_clear_options = params.get('queues_to_clear_options', [])
    confirm_clear = params.get('confirm_clear', False)
@ -290,14 +294,15 @@ def clear_queue_callable(**context):
    all_suffixes = ['_inbox', '_fail', '_result', '_progress']
    keys_to_delete = set()
-    if '_all' in queues_to_clear_options:
+    for queue_base_name in queue_base_names_to_clear:
-        logger.info("'_all' option selected. Clearing all standard queues.")
+        if '_all' in queues_to_clear_options:
-        for suffix in all_suffixes:
+            logger.info(f"'_all' option selected. Clearing all standard queues for base '{queue_base_name}'.")
-            keys_to_delete.add(f"{queue_base_name}{suffix}")
+            for suffix in all_suffixes:
    else:
        for suffix in queues_to_clear_options:
            if suffix in all_suffixes:
                keys_to_delete.add(f"{queue_base_name}{suffix}")
        else:
            for suffix in queues_to_clear_options:
                if suffix in all_suffixes:
                    keys_to_delete.add(f"{queue_base_name}{suffix}")
    if not keys_to_delete:
        logger.warning("No valid queue suffixes were selected. Nothing to delete.")
--- a/airflow/dags/ytdlp_ops_account_maintenance.py
+++ b/airflow/dags/ytdlp_ops_account_maintenance.py
@ -37,7 +37,7 @@ logger = logging.getLogger(__name__)
 # Default settings from Airflow Variables or hardcoded fallbacks
 DEFAULT_REDIS_CONN_ID = 'redis_default'
 DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1")
-DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080)
+DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9980)
 DEFAULT_ARGS = {
    'owner': 'airflow',
--- a/airflow/dags/ytdlp_ops_v01_orchestrator.py
+++ b/airflow/dags/ytdlp_ops_v01_orchestrator.py
@ -75,10 +75,10 @@ DEFAULT_REQUEST_PARAMS_JSON = """{
 # Default settings
 DEFAULT_QUEUE_NAME = 'video_queue'
 DEFAULT_REDIS_CONN_ID = 'redis_default'
-DEFAULT_TOTAL_WORKERS = 3
+DEFAULT_TOTAL_WORKERS = 8
 DEFAULT_WORKERS_PER_BUNCH = 1
-DEFAULT_WORKER_DELAY_S = 5
+DEFAULT_WORKER_DELAY_S = 1
-DEFAULT_BUNCH_DELAY_S = 20
+DEFAULT_BUNCH_DELAY_S = 1
 DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1")
 DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080)
@ -323,7 +323,7 @@ with DAG(
        # --- Worker Passthrough Parameters ---
        'on_auth_failure': Param(
-            'retry_with_new_account',
+            'proceed_loop_under_manual_inspection',
            type="string",
            enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'proceed_loop_under_manual_inspection'],
            title="[Worker Param] On Authentication Failure Policy",
@ -343,38 +343,17 @@ with DAG(
                        "'proceed_loop': (Default) Mark URL as failed but continue the processing loop with a new URL. "
                        "'retry_with_new_token': Attempt to get a new token with a new account and retry the download once. If it fails again, proceed loop."
        ),
-        'request_params_json': Param(DEFAULT_REQUEST_PARAMS_JSON, type="string", title="[Worker Param] Request Params JSON", description="JSON string with request parameters for the token service.", render_kwargs={"rows": 20, "cols": 120}),
+        'request_params_json': Param('{}', type="string", title="[Worker Param] Request Params JSON", description="JSON string with request parameters for the token service."),
        'queue_name': Param(DEFAULT_QUEUE_NAME, type="string", description="[Worker Param] Base name for Redis queues."),
        'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="[Worker Param] Airflow Redis connection ID."),
        'clients': Param(
-            'mweb,web_camoufox,tv',
+            'tv_simply',
            type="string",
            enum=[
-                'mweb,web_camoufox,tv',
+                'tv_simply',
                'mweb',
                'web_camoufox',
                'tv',
                'custom',
                'tv,web_safari,mweb,web_camoufox',
                'web_safari',
                'web',
                'web_embedded',
                'web_music',
                'web_creator',
                'web_safari_camoufox',
                'web_embedded_camoufox',
                'web_music_camoufox',
                'web_creator_camoufox',
                'mweb_camoufox',
                'android',
                'android_music',
                'android_creator',
                'android_vr',
                'ios',
                'ios_music',
                'ios_creator',
                'tv_simply',
                'tv_embedded',
            ],
            title="[Worker Param] Clients",
            description="[Worker Param] Comma-separated list of clients for token generation. Full list: web, web_safari, web_embedded, web_music, web_creator, mweb, web_camoufox, web_safari_camoufox, web_embedded_camoufox, web_music_camoufox, web_creator_camoufox, mweb_camoufox, android, android_music, android_creator, android_vr, ios, ios_music, ios_creator, tv, tv_simply, tv_embedded. See DAG documentation for details."
@ -402,27 +381,24 @@ with DAG(
            type="string",
            enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'],
            title="[Worker Param] Download Format Preset",
-            description="Select a predefined format string or choose 'custom' to use the value from 'Custom Download Format'.\nformats_0: 18,140\nformats_2: 18,140,299/298/137/136/135/134/133\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
+            description="Select a predefined format string or choose 'custom' to use the value from 'Custom Download Format'.\nformats_0: 18,140\nformats_2: 18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
        ),
        'download_format_custom': Param(
-            '18,140,299/298/137/136/135/134/133',
+            '18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy',
            type="string",
            title="[Worker Param] Custom Download Format",
            description="Custom yt-dlp format string. Used when preset is 'custom'. E.g., 'ba[ext=m4a]/bestaudio/best'."
        ),
        'downloader': Param(
-            'default',
+            'cli',
            type="string",
-            enum=['default', 'aria2c'],
+            enum=['py', 'aria-rpc', 'cli'],
-            title="[Worker Param] Downloader",
+            title="[Worker Param] Download Tool",
-            description="Choose the downloader for yt-dlp."
+            description="Choose the download tool to use: 'py' (native python, recommended), 'aria-rpc' (send to aria2c daemon), 'cli' (legacy yt-dlp wrapper)."
        ),
        'downloader_args_aria2c': Param(
            'aria2c:-x 4 -k 2M --max-download-limit=3M',
            type="string",
            title="[Worker Param] Aria2c Downloader Arguments",
            description="Arguments to pass to yt-dlp's --downloader-args. Used when downloader is 'aria2c'."
        ),
        'aria_host': Param('172.17.0.1', type="string", title="[Worker Param] Aria2c Host", description="For 'aria-rpc' downloader: Host of the aria2c RPC server. Can be set via Airflow Variable 'YTDLP_ARIA_HOST'."),
        'aria_port': Param(6800, type="integer", title="[Worker Param] Aria2c Port", description="For 'aria-rpc' downloader: Port of the aria2c RPC server. Can be set via Airflow Variable 'YTDLP_ARIA_PORT'."),
        'aria_secret': Param('SQGCQPLVFQIASMPNPOJYLVGJYLMIDIXDXAIXOTX', type="string", title="[Worker Param] Aria2c Secret", description="For 'aria-rpc' downloader: Secret token. Can be set via Airflow Variable 'YTDLP_ARIA_SECRET'."),
        'yt_dlp_extra_args': Param(
            '--restrict-filenames',
            type=["string", "null"],
--- a/airflow/dags/ytdlp_ops_v01_worker_per_url.py
+++ b/airflow/dags/ytdlp_ops_v01_worker_per_url.py
@ -290,7 +290,10 @@ def get_url_and_assign_account(**context):
@task
 def get_token(initial_data: dict, **context):
-    """Makes a single attempt to get a token from the Thrift service."""
+    """Makes a single attempt to get a token by calling the ytops-client get-info tool."""
    import subprocess
    import shlex
    ti = context['task_instance']
    params = context['params']
@ -298,129 +301,85 @@ def get_token(initial_data: dict, **context):
    url = initial_data['url_to_process']
    info_json_dir = Variable.get('DOWNLOADS_TEMP', '/opt/airflow/downloadfiles')
-    host, port, timeout = params['service_ip'], int(params['service_port']), int(params.get('timeout', DEFAULT_TIMEOUT))
+    host, port = params['service_ip'], int(params['service_port'])
    machine_id = params.get('machine_id') or socket.gethostname()
    clients = params.get('clients')
    request_params_json = params.get('request_params_json', '{}')
    assigned_proxy_url = params.get('assigned_proxy_url')
-    # Pretty-print the request parameters for debugging
+    video_id = _extract_video_id(url)
-    try:
+    os.makedirs(info_json_dir, exist_ok=True)
-        pretty_request_params = json.dumps(json.loads(request_params_json), indent=2)
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        logger.info(f"\n--- Request Parameters ---\n{pretty_request_params}\n--- End of Request Parameters ---")
+    info_json_path = os.path.join(info_json_dir, f"info_{video_id or 'unknown'}_{account_id}_{timestamp}.json")
    except (json.JSONDecodeError, TypeError):
        logger.warning("Could not parse request_params_json. Using raw content.")
        logger.info(f"\n--- Raw Request Parameters ---\n{request_params_json}\n--- End of Raw Request Parameters ---")
-    # Construct Airflow log context to pass to the service
+    cmd = [
-    try:
+        'ytops-client', 'get-info',
-        from airflow.configuration import conf
+        '--host', host,
-        remote_base = conf.get('logging', 'remote_base_log_folder')
+        '--port', str(port),
-        log_path = (
+        '--profile', account_id,
-            f"{remote_base}/dag_id={ti.dag_id}/run_id={ti.run_id}/"
+        '--output', info_json_path,
-            f"task_id={ti.task_id}/attempt={ti.try_number}.log"
+        '--print-proxy',
-        )
+        '--verbose',
-        airflow_log_context = AirflowLogContext(
+        '--log-return',
-            logS3Path=log_path,
+    ]
-            dagId=ti.dag_id,
+
-            runId=ti.run_id,
+    if clients:
-            taskId=ti.task_id,
+        cmd.extend(['--client', clients])
-            tryNumber=ti.try_number,
+    if machine_id:
-            workerHostname=socket.gethostname(),
+        cmd.extend(['--machine-id', machine_id])
-            queue=ti.queue
+    if request_params_json and request_params_json != '{}':
-        )
+        cmd.extend(['--request-params-json', request_params_json])
-        logger.info(f"Constructed Airflow log context for yt-ops service: {airflow_log_context}")
+    if assigned_proxy_url:
-    except Exception as e:
+        cmd.extend(['--assigned-proxy-url', assigned_proxy_url])
-        logger.warning(f"Could not construct full Airflow log context: {e}. Creating a basic one.")
+    
-        airflow_log_context = AirflowLogContext(
+    cmd.append(url)
            dagId=ti.dag_id,
            runId=ti.run_id,
            taskId=ti.task_id,
            tryNumber=ti.try_number,
            workerHostname=socket.gethostname(),
            queue=ti.queue
        )
    logger.info(f"--- Attempting to get token for URL '{url}' with account '{account_id}' (Clients: {clients}) ---")
-    client, transport = None, None
+    copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd)
-    try:
+    logger.info(f"Executing command: {copy_paste_cmd}")
        client, transport = _get_thrift_client(host, port, timeout)
        token_data = client.getOrRefreshToken(
            accountId=account_id,
            updateType=TokenUpdateMode.AUTO,
            url=url,
            clients=clients,
            machineId=machine_id,
            airflowLogContext=airflow_log_context,
            requestParamsJson=request_params_json,
            assignedProxyUrl=assigned_proxy_url
        )
-        # Log a compact summary of the Thrift response, omitting large/detailed fields.
+    process = subprocess.run(cmd, capture_output=True, text=True, timeout=int(params.get('timeout', DEFAULT_TIMEOUT)))
        summary_token_data = copy(token_data)
        if hasattr(summary_token_data, 'infoJson') and summary_token_data.infoJson:
            summary_token_data.infoJson = f"... ({len(summary_token_data.infoJson)} bytes) ..."
        if hasattr(summary_token_data, 'cookiesBlob') and summary_token_data.cookiesBlob:
            summary_token_data.cookiesBlob = f"... ({len(summary_token_data.cookiesBlob)} bytes) ..."
        # These will be logged separately below.
        if hasattr(summary_token_data, 'requestSummary'):
            summary_token_data.requestSummary = "..."
        if hasattr(summary_token_data, 'communicationLogs'):
            summary_token_data.communicationLogs = "..."
        logger.info(f"Thrift service response summary: {summary_token_data}")
-        request_summary = getattr(token_data, 'requestSummary', None)
+    if process.stdout:
-        if request_summary:
+        logger.info(f"ytops-client STDOUT:\n{process.stdout}")
-            # Prepending a newline for better separation in logs.
+    if process.stderr:
-            logger.info(f"\n--- Request Summary ---\n{request_summary}")
+        logger.info(f"ytops-client STDERR:\n{process.stderr}")
-        communication_logs = getattr(token_data, 'communicationLogs', None)
+    if process.returncode != 0:
-        if communication_logs:
+        error_message = "ytops-client failed. See logs for details."
-            logger.info("--- Communication Logs from Token Service ---")
+        for line in reversed(process.stderr.strip().split('\n')):
-            logger.info(communication_logs)
+            if 'ERROR' in line or 'Thrift error' in line or 'Connection to server failed' in line:
-            logger.info("--- End of Communication Logs ---")
+                error_message = line.strip()
                break
-        info_json = getattr(token_data, 'infoJson', None)
+        error_code = 'GET_INFO_CLIENT_FAIL'
-        if not (info_json and json.loads(info_json)):
+        if "BOT_DETECTED" in process.stderr:
-            raise AirflowException("Service returned success but info.json was empty or invalid.")
+            error_code = "BOT_DETECTED"
-
+        elif "BOT_DETECTION_SIGN_IN_REQUIRED" in process.stderr:
-        video_id = _extract_video_id(url)
+            error_code = "BOT_DETECTION_SIGN_IN_REQUIRED"
-        os.makedirs(info_json_dir, exist_ok=True)
+        elif "Connection to server failed" in process.stderr:
-        # Use a readable timestamp for a unique filename on each attempt.
+            error_code = "TRANSPORT_ERROR"
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        info_json_path = os.path.join(info_json_dir, f"info_{video_id or 'unknown'}_{account_id}_{timestamp}.json")
        with open(info_json_path, 'w', encoding='utf-8') as f:
            f.write(info_json)
        proxy_attr = next((attr for attr in ['socks5Proxy', 'socksProxy', 'socks'] if hasattr(token_data, attr)), None)
        ytdlp_command = getattr(token_data, 'ytdlpCommand', None)
        if ytdlp_command:
            logger.info(f"--- YTDLP Command from Token Service ---\n{ytdlp_command}\n--- End of YTDLP Command ---")
        return {
            'info_json_path': info_json_path,
            'socks_proxy': getattr(token_data, proxy_attr) if proxy_attr else None,
            'ytdlp_command': ytdlp_command,
            'successful_account_id': account_id,
            'original_url': url,  # Include original URL for fallback
        }
    except (PBServiceException, PBUserException, TTransportException) as e:
        error_context = getattr(e, 'context', None)
        if isinstance(error_context, str):
            try: error_context = json.loads(error_context.replace("'", "\""))
            except: pass
        error_details = {
-            'error_message': getattr(e, 'message', str(e)),
+            'error_message': error_message,
-            'error_code': getattr(e, 'errorCode', 'TRANSPORT_ERROR'),
+            'error_code': error_code,
-            'proxy_url': error_context.get('proxy_url') if isinstance(error_context, dict) else None
+            'proxy_url': None
        }
        logger.error(f"Thrift call failed for account '{account_id}'. Details: {error_details}")
        ti.xcom_push(key='error_details', value=error_details)
-        raise AirflowException(f"Thrift call failed: {error_details['error_message']}")
+        raise AirflowException(f"ytops-client get-info failed: {error_message}")
-    finally:
+
-        if transport and transport.isOpen():
+    proxy = None
-            transport.close()
+    proxy_match = re.search(r"Proxy used: (.*)", process.stderr)
    if proxy_match:
        proxy = proxy_match.group(1).strip()
    return {
        'info_json_path': info_json_path,
        'socks_proxy': proxy,
        'ytdlp_command': None,
        'successful_account_id': account_id,
        'original_url': url,
    }
@task.branch
 def handle_bannable_error_branch(task_id_to_check: str, **context):
@ -706,7 +665,7 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
        elif format_preset == 'formats_0':
            download_format = '18,140'
        elif format_preset == 'formats_2':
-            download_format = '18,140,299/298/137/136/135/134/133'
+            download_format = '18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy'
        elif format_preset == 'formats_3':
            download_format = '18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318'
        else:
@ -720,112 +679,102 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
            raise AirflowException(f"Error: info.json path is missing or file does not exist ({info_json_path}).")
        def run_yt_dlp_command(format_selector: str):
-            """Constructs and runs a yt-dlp command, returning a list of final filenames."""
+            """Constructs and runs a yt-ops-client download command, returning a list of final filenames."""
-            cmd = [
+            downloader = params.get('downloader', 'py')
-                'yt-dlp', '--verbose', '--print-traffic', '--load-info-json', info_json_path,
+            cmd = ['ytops-client', 'download', downloader, '--load-info-json', info_json_path, '-f', format_selector]
                '-f', format_selector, '-o', full_output_path,
                '--print', 'filename', '--continue', '--no-progress', '--no-simulate',
                '--no-write-info-json', '--ignore-errors', '--no-playlist',
            ]
-            if params.get('fragment_retries'):
+            if proxy:
                cmd.extend(['--fragment-retries', str(params['fragment_retries'])])
            if params.get('limit_rate'):
                cmd.extend(['--limit-rate', params['limit_rate']])
            if params.get('socket_timeout'):
                cmd.extend(['--socket-timeout', str(params['socket_timeout'])])
            if params.get('min_sleep_interval'):
                cmd.extend(['--min-sleep-interval', str(params['min_sleep_interval'])])
            if params.get('max_sleep_interval'):
                cmd.extend(['--max-sleep-interval', str(params['max_sleep_interval'])])
            if params.get('yt_dlp_test_mode'):
                cmd.append('--test')
            downloader = params.get('downloader', 'default')
            if proxy and not (downloader == 'aria2c' and proxy.startswith('socks5://')):
                cmd.extend(['--proxy', proxy])
-            gost_process = None
+            if downloader == 'py':
-            try:
+                cmd.extend(['--output-dir', download_dir])
-                if downloader == 'aria2c':
+                # The 'py' tool maps many yt-dlp flags via --extra-ytdlp-args
-                    cmd.extend(['--downloader', 'aria2c'])
+                py_extra_args = []
-                    downloader_args = params.get('downloader_args_aria2c')
+                if params.get('fragment_retries'):
-                    if proxy and proxy.startswith('socks5://'):
+                    py_extra_args.extend(['--fragment-retries', str(params['fragment_retries'])])
-                        import socket
+                if params.get('limit_rate'):
-                        from contextlib import closing
+                    py_extra_args.extend(['--limit-rate', params['limit_rate']])
-                        def find_free_port():
+                if params.get('socket_timeout'):
-                            with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
+                    py_extra_args.extend(['--socket-timeout', str(params['socket_timeout'])])
-                                s.bind(('', 0))
+                if params.get('min_sleep_interval'):
-                                s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+                    py_extra_args.extend(['--sleep-interval', str(params['min_sleep_interval'])])
-                                return s.getsockname()[1]
+                if params.get('max_sleep_interval'):
-                        local_port = find_free_port()
+                    py_extra_args.extend(['--max-sleep-interval', str(params['max_sleep_interval'])])
-                        http_proxy = f"http://127.0.0.1:{local_port}"
+                if params.get('yt_dlp_test_mode'):
-                        logger.info(f"Starting gost for format '{format_selector}' to forward {proxy} to {http_proxy}")
+                    py_extra_args.append('--test')
                        gost_cmd = ['gost', '-L', f'http://127.0.0.1:{local_port}', '-F', proxy]
                        gost_process = subprocess.Popen(gost_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                        time.sleep(1)
                        if gost_process.poll() is not None:
                            stdout, stderr = gost_process.communicate()
                            logger.error(f"gost failed to start. Exit: {gost_process.returncode}. Stdout: {stdout.decode()}. Stderr: {stderr.decode()}")
                            raise AirflowException("gost proxy tunnel failed to start.")
                        user_args = downloader_args[len('aria2c:'):] if downloader_args and downloader_args.startswith('aria2c:') else (downloader_args or "")
                        final_args_str = f'aria2c:{user_args.strip()} --http-proxy={http_proxy}'
                        cmd.extend(['--downloader-args', final_args_str])
                    elif downloader_args:
                        cmd.extend(['--downloader-args', downloader_args])
-                extra_args = params.get('yt_dlp_extra_args')
+                existing_extra = shlex.split(params.get('yt_dlp_extra_args') or '')
-                if extra_args:
+                final_extra_args = existing_extra + py_extra_args
-                    cmd.extend(shlex.split(extra_args))
+                if final_extra_args:
-                if original_url:
+                    cmd.extend(['--extra-ytdlp-args', shlex.join(final_extra_args)])
                    cmd.append(original_url)
-                copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd)
+            elif downloader == 'aria-rpc':
-                logger.info(f"Executing yt-dlp command for format '{format_selector}': {copy_paste_cmd}")
+                cmd.extend([
-                process = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)
+                    '--aria-host', params.get('aria_host', '172.17.0.1'),
                    '--aria-port', str(params.get('aria_port', 6800)),
                    '--aria-secret', params.get('aria_secret'),
                    '--wait', '--auto-merge-fragments',
                    '--fragments-dir', download_dir,
                    '--output-dir', download_dir,
                ])
                if params.get('yt_dlp_cleanup_mode'):
                    cmd.append('--cleanup')
-                if process.stdout:
+            elif downloader == 'cli':
-                    logger.info(f"yt-dlp STDOUT for format '{format_selector}':\n{process.stdout}")
+                cmd.extend(['--output-dir', download_dir])
-                if process.stderr:
+                # The 'cli' tool is the old yt-dlp wrapper, so it takes similar arguments.
-                    # yt-dlp often prints progress and informational messages to stderr
+                cli_extra_args = []
-                    logger.info(f"yt-dlp STDERR for format '{format_selector}':\n{process.stderr}")
+                if params.get('fragment_retries'):
                    cli_extra_args.extend(['--fragment-retries', str(params['fragment_retries'])])
                if params.get('limit_rate'):
                    cli_extra_args.extend(['--limit-rate', params['limit_rate']])
                if params.get('socket_timeout'):
                    cli_extra_args.extend(['--socket-timeout', str(params['socket_timeout'])])
                if params.get('min_sleep_interval'):
                    cli_extra_args.extend(['--sleep-interval', str(params['min_sleep_interval'])])
                if params.get('max_sleep_interval'):
                    cli_extra_args.extend(['--max-sleep-interval', str(params['max_sleep_interval'])])
                if params.get('yt_dlp_test_mode'):
                    cli_extra_args.append('--test')
-                if process.returncode != 0:
+                existing_extra = shlex.split(params.get('yt_dlp_extra_args') or '')
-                    logger.error(f"yt-dlp failed for format '{format_selector}' with exit code {process.returncode}")
+                final_extra_args = existing_extra + cli_extra_args
-                    # STDOUT and STDERR are already logged above.
+                if final_extra_args:
-                    raise AirflowException(f"yt-dlp command failed for format '{format_selector}'. {process.stderr}")
+                    cmd.extend(['--extra-ytdlp-args', shlex.join(final_extra_args)])
-                # In test mode, files are not created, so we only check that yt-dlp returned filenames.
+            copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd)
-                # Otherwise, we verify that the files actually exist on disk.
+            logger.info(f"Executing download command for format '{format_selector}': {copy_paste_cmd}")
-                output_files = [f for f in process.stdout.strip().split('\n') if f]
+            process = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)
                if not params.get('yt_dlp_test_mode'):
                    output_files = [f for f in output_files if os.path.exists(f)]
-                if not output_files:
+            if process.stdout:
-                    log_msg = (f"Test run for format '{format_selector}' did not produce any filenames."
+                logger.info(f"Download tool STDOUT for format '{format_selector}':\n{process.stdout}")
-                               if params.get('yt_dlp_test_mode') else
+            if process.stderr:
-                               f"Download for format '{format_selector}' finished but no output files exist.")
+                logger.info(f"Download tool STDERR for format '{format_selector}':\n{process.stderr}")
                    exc_msg = (f"Test run for format '{format_selector}' did not produce any filenames."
                               if params.get('yt_dlp_test_mode') else
                               f"Download for format '{format_selector}' did not produce a file.")
-                    logger.error(log_msg)
+            if process.returncode != 0:
-                    logger.error(f"Full STDOUT:\n{process.stdout}")
+                logger.error(f"Download tool failed for format '{format_selector}' with exit code {process.returncode}")
-                    logger.error(f"Full STDERR:\n{process.stderr}")
+                raise AirflowException(f"Download command failed for format '{format_selector}'. See logs for details.")
                    raise AirflowException(exc_msg)
-                log_prefix = "SUCCESS (Test Mode):" if params.get('yt_dlp_test_mode') else "SUCCESS:"
+            output_files = []
-                logger.info(f"{log_prefix} Command for format '{format_selector}' complete. Files: {output_files}")
+            for line in process.stdout.strip().split('\n'):
-                return output_files
+                # For aria-rpc, parse "Download and merge successful: <path>" or "Download successful: <path>"
-            finally:
+                match = re.search(r'successful: (.+)', line)
-                if gost_process:
+                if match:
-                    logger.info(f"Terminating gost process (PID: {gost_process.pid}) for format '{format_selector}'.")
+                    filepath = match.group(1).strip()
-                    gost_process.terminate()
+                    if os.path.exists(filepath):
-                    try:
+                        output_files.append(filepath)
-                        gost_process.wait(timeout=5)
+                    else:
-                    except subprocess.TimeoutExpired:
+                        logger.warning(f"File path from aria-rpc output does not exist locally: '{filepath}'")
-                        gost_process.kill()
+                # For py/cli, it's just the path
-                        gost_process.wait()
+                elif os.path.exists(line.strip()):
                    output_files.append(line.strip())
            if not params.get('yt_dlp_test_mode') and not output_files:
                raise AirflowException(f"Download for format '{format_selector}' finished but no output files were found or exist.")
            log_prefix = "SUCCESS (Test Mode):" if params.get('yt_dlp_test_mode') else "SUCCESS:"
            logger.info(f"{log_prefix} Command for format '{format_selector}' complete. Files: {output_files}")
            return output_files
        def run_ffmpeg_probe(filename):
            """Probes a file with ffmpeg to check for corruption."""
@ -1512,7 +1461,7 @@ with DAG(
        'prepend_client_to_account': Param(True, type="boolean", title="[Worker Param] Prepend Client to Account", description="If True, prepends client and timestamp to account names in prefix mode."),
        'machine_id': Param(None, type=["string", "null"]),
        'assigned_proxy_url': Param(None, type=["string", "null"], title="[Worker Param] Assigned Proxy URL", description="A specific proxy URL to use for the request, overriding the server's proxy pool logic."),
-        'clients': Param('mweb,web_camoufox,tv', type="string", description="Comma-separated list of clients for token generation. e.g. mweb,tv,web_camoufox"),
+        'clients': Param('tv_simply', type="string", description="Comma-separated list of clients for token generation. e.g. mweb,tv,web_camoufox"),
        'timeout': Param(DEFAULT_TIMEOUT, type="integer"),
        'output_path_template': Param("%(title)s [%(id)s].f%(format_id)s.%(ext)s", type="string", title="[Worker Param] Output Path Template", description="Output filename template for yt-dlp. It is highly recommended to include `%(format_id)s` to prevent filename collisions when downloading multiple formats."),
        'on_auth_failure': Param(
@ -1542,11 +1491,11 @@ with DAG(
        'min_sleep_interval': Param(5, type="integer", title="[Worker Param] Min Sleep Interval", description="Minimum time to sleep between downloads (seconds)."),
        'max_sleep_interval': Param(10, type="integer", title="[Worker Param] Max Sleep Interval", description="Maximum time to sleep between downloads (seconds)."),
        'download_format_preset': Param(
-            'custom',
+            'formats_2',
            type="string",
            enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'],
            title="Download Format Preset",
-            description="Select a predefined format string or choose 'custom'. To download multiple formats, this should be a comma-separated list of format IDs (e.g., '137,140').\nformats_0: 18,140\nformats_2: 18,140,299/298/137/136/135/134/133\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
+            description="Select a predefined format string or choose 'custom'. To download multiple formats, this should be a comma-separated list of format IDs (e.g., '137,140').\nformats_0: 18,140\nformats_2: 18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
        ),
        'download_format_custom': Param(
            '18,140,299/298/137/136/135/134/133',
@ -1555,18 +1504,15 @@ with DAG(
            description="Custom yt-dlp format string. Used when preset is 'custom'. To download multiple formats, provide a comma-separated list of format IDs (e.g., '137,140')."
        ),
        'downloader': Param(
-            'default',
+            'cli',
            type="string",
-            enum=['default', 'aria2c'],
+            enum=['py', 'aria-rpc', 'cli'],
-            title="Downloader",
+            title="Download Tool",
-            description="Choose the downloader for yt-dlp."
+            description="Choose the download tool to use: 'py' (native python, recommended), 'aria-rpc' (send to aria2c daemon), 'cli' (legacy yt-dlp wrapper)."
        ),
        'downloader_args_aria2c': Param(
            'aria2c:-x 4 -k 2M --max-download-limit=3M',
            type="string",
            title="Aria2c Downloader Arguments",
            description="Arguments to pass to yt-dlp's --downloader-args. Used when downloader is 'aria2c'."
        ),
        'aria_host': Param('172.17.0.1', type="string", title="Aria2c Host", description="For 'aria-rpc' downloader: Host of the aria2c RPC server."),
        'aria_port': Param(6800, type="integer", title="Aria2c Port", description="For 'aria-rpc' downloader: Port of the aria2c RPC server."),
        'aria_secret': Param('SQGCQPLVFQIASMPNPOJYLVGJYLMIDIXDXAIXOTX', type="string", title="Aria2c Secret", description="For 'aria-rpc' downloader: Secret token."),
        'yt_dlp_extra_args': Param(
            '',
            type=["string", "null"],
--- a/airflow/dags/ytdlp_ops_v02_orchestrator_auth.py
+++ b/airflow/dags/ytdlp_ops_v02_orchestrator_auth.py
@ -72,10 +72,10 @@ DEFAULT_REQUEST_PARAMS_JSON = """{
 # Default settings
 DEFAULT_REDIS_CONN_ID = 'redis_default'
-DEFAULT_TOTAL_WORKERS = 3
+DEFAULT_TOTAL_WORKERS = 8
 DEFAULT_WORKERS_PER_BUNCH = 1
-DEFAULT_WORKER_DELAY_S = 5
+DEFAULT_WORKER_DELAY_S = 1
-DEFAULT_BUNCH_DELAY_S = 20
+DEFAULT_BUNCH_DELAY_S = 1
 DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1")
 DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080)
@ -283,7 +283,7 @@ with DAG(
        # --- Worker Passthrough Parameters ---
        'on_bannable_failure': Param(
-            'stop_loop_on_auth_proceed_on_download_error',
+            'proceed_loop_under_manual_inspection',
            type="string",
            enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'retry_and_ban_account_only', 'retry_on_connection_error', 'proceed_loop_under_manual_inspection', 'stop_loop_on_auth_proceed_on_download_error'],
            title="[Worker Param] On Bannable Failure Policy",
@ -294,37 +294,16 @@ with DAG(
                        "'proceed_loop_under_manual_inspection': **BEWARE: MANUAL SUPERVISION REQUIRED.** Marks the URL as failed but continues the processing loop. Use this only when you can manually intervene by pausing the dispatcher DAG or creating a lock file (`/opt/airflow/inputfiles/AIRFLOW.PREVENT_URL_PULL.lockfile`) to prevent a runaway failure loop."
                        "'stop_loop_on_auth_proceed_on_download_error': **(Default)** Stops the loop on an authentication/token error (like 'stop_loop'), but continues the loop on a download/probe error (like 'proceed...')."
        ),
-        'request_params_json': Param(DEFAULT_REQUEST_PARAMS_JSON, type="string", title="[Worker Param] Request Params JSON", description="JSON string with request parameters for the token service.", render_kwargs={"rows": 20, "cols": 120}),
+        'request_params_json': Param('{}', type="string", title="[Worker Param] Request Params JSON", description="JSON string with request parameters for the token service."),
        'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="[Worker Param] Airflow Redis connection ID."),
        'clients': Param(
-            'mweb,web_camoufox,tv',
+            'tv_simply',
            type="string",
            enum=[
-                'mweb,web_camoufox,tv',
+                'tv_simply',
                'mweb',
                'web_camoufox',
                'tv',
                'custom',
                'tv,web_safari,mweb,web_camoufox',
                'web_safari',
                'web',
                'web_embedded',
                'web_music',
                'web_creator',
                'web_safari_camoufox',
                'web_embedded_camoufox',
                'web_music_camoufox',
                'web_creator_camoufox',
                'mweb_camoufox',
                'android',
                'android_music',
                'android_creator',
                'android_vr',
                'ios',
                'ios_music',
                'ios_creator',
                'tv_simply',
                'tv_embedded',
            ],
            title="[Worker Param] Clients",
            description="[Worker Param] Comma-separated list of clients for token generation. Full list: web, web_safari, web_embedded, web_music, web_creator, mweb, web_camoufox, web_safari_camoufox, web_embedded_camoufox, web_music_camoufox, web_creator_camoufox, mweb_camoufox, android, android_music, android_creator, android_vr, ios, ios_music, ios_creator, tv, tv_simply, tv_embedded. See DAG documentation for details."
--- a/airflow/dags/ytdlp_ops_v02_orchestrator_dl.py
+++ b/airflow/dags/ytdlp_ops_v02_orchestrator_dl.py
@ -37,10 +37,10 @@ logger = logging.getLogger(__name__)
 # Default settings
 DEFAULT_REDIS_CONN_ID = 'redis_default'
-DEFAULT_TOTAL_WORKERS = 3
+DEFAULT_TOTAL_WORKERS = 8
 DEFAULT_WORKERS_PER_BUNCH = 1
-DEFAULT_WORKER_DELAY_S = 5
+DEFAULT_WORKER_DELAY_S = 1
-DEFAULT_BUNCH_DELAY_S = 20
+DEFAULT_BUNCH_DELAY_S = 1
 # --- Helper Functions ---
@ -260,27 +260,24 @@ with DAG(
            type="string",
            enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'],
            title="[Worker Param] Download Format Preset",
-            description="Select a predefined format string or choose 'custom' to use the value from 'Custom Download Format'.\nformats_0: 18,140\nformats_2: 18,140,299/298/137/136/135/134/133\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
+            description="Select a predefined format string or choose 'custom' to use the value from 'Custom Download Format'.\nformats_0: 18,140\nformats_2: 18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
        ),
        'download_format_custom': Param(
-            '18,140,299/298/137/136/135/134/133',
+            '18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy',
            type="string",
            title="[Worker Param] Custom Download Format",
            description="Custom yt-dlp format string. Used when preset is 'custom'. E.g., 'ba[ext=m4a]/bestaudio/best'."
        ),
        'downloader': Param(
-            'default',
+            'cli',
            type="string",
-            enum=['default', 'aria2c'],
+            enum=['py', 'aria-rpc', 'cli'],
-            title="[Worker Param] Downloader",
+            title="[Worker Param] Download Tool",
-            description="Choose the downloader for yt-dlp."
+            description="Choose the download tool to use: 'py' (native python, recommended), 'aria-rpc' (send to aria2c daemon), 'cli' (legacy yt-dlp wrapper)."
        ),
        'downloader_args_aria2c': Param(
            'aria2c:-x 4 -k 2M --max-download-limit=3M',
            type="string",
            title="[Worker Param] Aria2c Downloader Arguments",
            description="Arguments to pass to yt-dlp's --downloader-args. Used when downloader is 'aria2c'."
        ),
        'aria_host': Param('172.17.0.1', type="string", title="[Worker Param] Aria2c Host", description="For 'aria-rpc' downloader: Host of the aria2c RPC server. Can be set via Airflow Variable 'YTDLP_ARIA_HOST'."),
        'aria_port': Param(6800, type="integer", title="[Worker Param] Aria2c Port", description="For 'aria-rpc' downloader: Port of the aria2c RPC server. Can be set via Airflow Variable 'YTDLP_ARIA_PORT'."),
        'aria_secret': Param('SQGCQPLVFQIASMPNPOJYLVGJYLMIDIXDXAIXOTX', type="string", title="[Worker Param] Aria2c Secret", description="For 'aria-rpc' downloader: Secret token. Can be set via Airflow Variable 'YTDLP_ARIA_SECRET'."),
        'yt_dlp_extra_args': Param(
            '--restrict-filenames',
            type=["string", "null"],
--- a/airflow/dags/ytdlp_ops_v02_worker_per_url_auth.py
+++ b/airflow/dags/ytdlp_ops_v02_worker_per_url_auth.py
@ -380,7 +380,10 @@ def get_url_and_assign_account(**context):
@task
 def get_token(initial_data: dict, **context):
-    """Makes a single attempt to get a token from the Thrift service."""
+    """Makes a single attempt to get a token by calling the ytops-client get-info tool."""
    import subprocess
    import shlex
    ti = context['task_instance']
    params = context['params']
@ -388,131 +391,89 @@ def get_token(initial_data: dict, **context):
    url = initial_data['url_to_process']
    info_json_dir = Variable.get('DOWNLOADS_TEMP', '/opt/airflow/downloadfiles')
-    host, port, timeout = params['service_ip'], int(params['service_port']), int(params.get('timeout', DEFAULT_TIMEOUT))
+    host, port = params['service_ip'], int(params['service_port'])
    machine_id = params.get('machine_id') or socket.gethostname()
    clients = params.get('clients')
    request_params_json = params.get('request_params_json', '{}')
    assigned_proxy_url = params.get('assigned_proxy_url')
-    # Pretty-print the request parameters for debugging
+    video_id = _extract_video_id(url)
-    try:
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        pretty_request_params = json.dumps(json.loads(request_params_json), indent=2)
+    job_dir_name = f"{timestamp}-{video_id or 'unknown'}"
-        logger.info(f"\n--- Request Parameters ---\n{pretty_request_params}\n--- End of Request Parameters ---")
+    job_dir_path = os.path.join(info_json_dir, job_dir_name)
-    except (json.JSONDecodeError, TypeError):
+    os.makedirs(job_dir_path, exist_ok=True)
-        logger.warning("Could not parse request_params_json. Using raw content.")
+    info_json_filename = f"info_{video_id or 'unknown'}_{account_id}_{timestamp}.json"
-        logger.info(f"\n--- Raw Request Parameters ---\n{request_params_json}\n--- End of Raw Request Parameters ---")
+    info_json_path = os.path.join(job_dir_path, info_json_filename)
-    # Construct Airflow log context to pass to the service
+    cmd = [
-    try:
+        'ytops-client', 'get-info',
-        from airflow.configuration import conf
+        '--host', host,
-        remote_base = conf.get('logging', 'remote_base_log_folder')
+        '--port', str(port),
-        log_path = (
+        '--profile', account_id,
-            f"{remote_base}/dag_id={ti.dag_id}/run_id={ti.run_id}/"
+        '--output', info_json_path,
-            f"task_id={ti.task_id}/attempt={ti.try_number}.log"
+        '--print-proxy',
-        )
+        '--verbose',
-        airflow_log_context = AirflowLogContext(
+        '--log-return',
-            logS3Path=log_path,
+    ]
-            dagId=ti.dag_id,
+
-            runId=ti.run_id,
+    if clients:
-            taskId=ti.task_id,
+        cmd.extend(['--client', clients])
-            tryNumber=ti.try_number,
+    if machine_id:
-            workerHostname=socket.gethostname(),
+        cmd.extend(['--machine-id', machine_id])
-            queue=ti.queue
+    if request_params_json and request_params_json != '{}':
-        )
+        cmd.extend(['--request-params-json', request_params_json])
-        logger.info(f"Constructed Airflow log context for yt-ops service: {airflow_log_context}")
+    if assigned_proxy_url:
-    except Exception as e:
+        cmd.extend(['--assigned-proxy-url', assigned_proxy_url])
-        logger.warning(f"Could not construct full Airflow log context: {e}. Creating a basic one.")
+    
-        airflow_log_context = AirflowLogContext(
+    cmd.append(url)
            dagId=ti.dag_id,
            runId=ti.run_id,
            taskId=ti.task_id,
            tryNumber=ti.try_number,
            workerHostname=socket.gethostname(),
            queue=ti.queue
        )
    logger.info(f"--- Attempting to get token for URL '{url}' with account '{account_id}' (Clients: {clients}) ---")
-    client, transport = None, None
+    copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd)
-    try:
+    logger.info(f"Executing command: {copy_paste_cmd}")
        client, transport = _get_thrift_client(host, port, timeout)
        token_data = client.getOrRefreshToken(
            accountId=account_id,
            updateType=TokenUpdateMode.AUTO,
            url=url,
            clients=clients,
            machineId=machine_id,
            airflowLogContext=airflow_log_context,
            requestParamsJson=request_params_json,
            assignedProxyUrl=assigned_proxy_url
        )
-        # Log a compact summary of the Thrift response, omitting large/detailed fields.
+    process = subprocess.run(cmd, capture_output=True, text=True, timeout=int(params.get('timeout', DEFAULT_TIMEOUT)))
        summary_token_data = copy(token_data)
        if hasattr(summary_token_data, 'infoJson') and summary_token_data.infoJson:
            summary_token_data.infoJson = f"... ({len(summary_token_data.infoJson)} bytes) ..."
        if hasattr(summary_token_data, 'cookiesBlob') and summary_token_data.cookiesBlob:
            summary_token_data.cookiesBlob = f"... ({len(summary_token_data.cookiesBlob)} bytes) ..."
        # These will be logged separately below.
        if hasattr(summary_token_data, 'requestSummary'):
            summary_token_data.requestSummary = "..."
        if hasattr(summary_token_data, 'communicationLogPaths'):
            summary_token_data.communicationLogPaths = "..."
        logger.info(f"Thrift service response summary: {summary_token_data}")
-        request_summary = getattr(token_data, 'requestSummary', None)
+    if process.stdout:
-        if request_summary:
+        logger.info(f"ytops-client STDOUT:\n{process.stdout}")
-            # Prepending a newline for better separation in logs.
+    if process.stderr:
-            logger.info(f"\n--- Request Summary ---\n{request_summary}")
+        logger.info(f"ytops-client STDERR:\n{process.stderr}")
-        communication_log_paths = getattr(token_data, 'communicationLogPaths', None)
+    if process.returncode != 0:
-        if communication_log_paths:
+        error_message = "ytops-client failed. See logs for details."
-            logger.info("--- Communication Log Paths ---")
+        for line in reversed(process.stderr.strip().split('\n')):
-            for path in communication_log_paths:
+            if 'ERROR' in line or 'Thrift error' in line or 'Connection to server failed' in line:
-                logger.info(f"  - {path}")
+                error_message = line.strip()
                break
-        info_json = getattr(token_data, 'infoJson', None)
+        error_code = 'GET_INFO_CLIENT_FAIL'
-        if not (info_json and json.loads(info_json)):
+        if "BOT_DETECTED" in process.stderr:
-            raise AirflowException("Service returned success but info.json was empty or invalid.")
+            error_code = "BOT_DETECTED"
-
+        elif "BOT_DETECTION_SIGN_IN_REQUIRED" in process.stderr:
-        video_id = _extract_video_id(url)
+            error_code = "BOT_DETECTION_SIGN_IN_REQUIRED"
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        elif "Connection to server failed" in process.stderr:
-
+            error_code = "TRANSPORT_ERROR"
        # Create a unique directory for this job's artifacts
        job_dir_name = f"{timestamp}-{video_id or 'unknown'}"
        job_dir_path = os.path.join(info_json_dir, job_dir_name)
        os.makedirs(job_dir_path, exist_ok=True)
        info_json_filename = f"info_{video_id or 'unknown'}_{account_id}_{timestamp}.json"
        info_json_path = os.path.join(job_dir_path, info_json_filename)
        with open(info_json_path, 'w', encoding='utf-8') as f:
            f.write(info_json)
        proxy_attr = next((attr for attr in ['socks5Proxy', 'socksProxy', 'socks'] if hasattr(token_data, attr)), None)
        return {
            'info_json_path': info_json_path,
            'socks_proxy': getattr(token_data, proxy_attr) if proxy_attr else None,
            'ytdlp_command': getattr(token_data, 'ytdlpCommand', None),
            'successful_account_id': account_id,
            'original_url': url,  # Include original URL for fallback
            'clients': clients, # Pass clients string for accurate stats
        }
    except (PBServiceException, PBUserException, TTransportException) as e:
        error_context = getattr(e, 'context', None)
        if isinstance(error_context, str):
            try: error_context = json.loads(error_context.replace("'", "\""))
            except: pass
        error_details = {
-            'error_message': getattr(e, 'message', str(e)),
+            'error_message': error_message,
-            'error_code': getattr(e, 'errorCode', 'TRANSPORT_ERROR'),
+            'error_code': error_code,
-            'proxy_url': error_context.get('proxy_url') if isinstance(error_context, dict) else None
+            'proxy_url': None
        }
        logger.error(f"Thrift call failed for account '{account_id}'. Exception: {error_details['error_message']}")
        ti.xcom_push(key='error_details', value=error_details)
-        raise AirflowException(f"Thrift call failed: {error_details['error_message']}")
+        raise AirflowException(f"ytops-client get-info failed: {error_message}")
-    finally:
+
-        if transport and transport.isOpen():
+    proxy = None
-            transport.close()
+    proxy_match = re.search(r"Proxy used: (.*)", process.stderr)
    if proxy_match:
        proxy = proxy_match.group(1).strip()
    return {
        'info_json_path': info_json_path,
        'socks_proxy': proxy,
        'ytdlp_command': None,
        'successful_account_id': account_id,
        'original_url': url,
        'clients': clients,
    }
@task.branch
 def handle_bannable_error_branch(task_id_to_check: str, **context):
@ -1135,7 +1096,7 @@ with DAG(
        'prepend_client_to_account': Param(True, type="boolean", title="[Worker Param] Prepend Client to Account", description="If True, prepends client and timestamp to account names in prefix mode."),
        'machine_id': Param(None, type=["string", "null"]),
        'assigned_proxy_url': Param(None, type=["string", "null"], title="[Worker Param] Assigned Proxy URL", description="If provided, forces the token service to use this specific proxy for the request."),
-        'clients': Param('mweb', type="string", description="Comma-separated list of clients for token generation. e.g. mweb,tv,web_camoufox"),
+        'clients': Param('tv_simply', type="string", description="Comma-separated list of clients for token generation. e.g. mweb,tv,web_camoufox"),
        'timeout': Param(DEFAULT_TIMEOUT, type="integer"),
        'on_bannable_failure': Param('stop_loop_on_auth_proceed_on_download_error', type="string", enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'retry_and_ban_account_only', 'retry_on_connection_error', 'proceed_loop_under_manual_inspection', 'stop_loop_on_auth_proceed_on_download_error']),
        'request_params_json': Param(json.dumps(DEFAULT_REQUEST_PARAMS), type="string", title="[Worker Param] Request Params JSON", description="JSON string with request parameters for the token service."),
--- a/airflow/dags/ytdlp_ops_v02_worker_per_url_dl.py
+++ b/airflow/dags/ytdlp_ops_v02_worker_per_url_dl.py
@ -300,7 +300,7 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
    elif format_preset == 'formats_0':
        download_format = '18,140'
    elif format_preset == 'formats_2':
-        download_format = '18,140,299/298/137/136/135/134/133'
+        download_format = '18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy'
    elif format_preset == 'formats_3':
        download_format = '18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318'
    else:
@ -311,112 +311,102 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
    retry_on_probe_failure = params.get('retry_on_probe_failure', False)
    def run_yt_dlp_command(format_selector: str):
-        """Constructs and runs a yt-dlp command, returning a list of final filenames."""
+        """Constructs and runs a yt-ops-client download command, returning a list of final filenames."""
-        cmd = [
+        downloader = params.get('downloader', 'py')
-            'yt-dlp', '--verbose', '--print-traffic', '--load-info-json', info_json_path,
+        cmd = ['ytops-client', 'download', downloader, '--load-info-json', info_json_path, '-f', format_selector]
            '-f', format_selector, '-o', full_output_path,
            '--print', 'filename', '--continue', '--no-progress', '--no-simulate',
            '--no-write-info-json', '--ignore-errors', '--no-playlist',
        ]
-        if params.get('fragment_retries'):
+        if proxy:
            cmd.extend(['--fragment-retries', str(params['fragment_retries'])])
        if params.get('limit_rate'):
            cmd.extend(['--limit-rate', params['limit_rate']])
        if params.get('socket_timeout'):
            cmd.extend(['--socket-timeout', str(params['socket_timeout'])])
        if params.get('min_sleep_interval'):
            cmd.extend(['--min-sleep-interval', str(params['min_sleep_interval'])])
        if params.get('max_sleep_interval'):
            cmd.extend(['--max-sleep-interval', str(params['max_sleep_interval'])])
        if params.get('yt_dlp_test_mode'):
            cmd.append('--test')
        downloader = params.get('downloader', 'default')
        if proxy and not (downloader == 'aria2c' and proxy.startswith('socks5://')):
            cmd.extend(['--proxy', proxy])
-        gost_process = None
+        if downloader == 'py':
-        try:
+            cmd.extend(['--output-dir', download_dir])
-            if downloader == 'aria2c':
+            # The 'py' tool maps many yt-dlp flags via --extra-ytdlp-args
-                cmd.extend(['--downloader', 'aria2c'])
+            py_extra_args = []
-                downloader_args = params.get('downloader_args_aria2c')
+            if params.get('fragment_retries'):
-                if proxy and proxy.startswith('socks5://'):
+                py_extra_args.extend(['--fragment-retries', str(params['fragment_retries'])])
-                    import socket
+            if params.get('limit_rate'):
-                    from contextlib import closing
+                py_extra_args.extend(['--limit-rate', params['limit_rate']])
-                    def find_free_port():
+            if params.get('socket_timeout'):
-                        with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
+                py_extra_args.extend(['--socket-timeout', str(params['socket_timeout'])])
-                            s.bind(('', 0))
+            if params.get('min_sleep_interval'):
-                            s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+                py_extra_args.extend(['--sleep-interval', str(params['min_sleep_interval'])])
-                            return s.getsockname()[1]
+            if params.get('max_sleep_interval'):
-                    local_port = find_free_port()
+                py_extra_args.extend(['--max-sleep-interval', str(params['max_sleep_interval'])])
-                    http_proxy = f"http://127.0.0.1:{local_port}"
+            if params.get('yt_dlp_test_mode'):
-                    logger.info(f"Starting gost for format '{format_selector}' to forward {proxy} to {http_proxy}")
+                py_extra_args.append('--test')
                    gost_cmd = ['gost', '-L', f'http://127.0.0.1:{local_port}', '-F', proxy]
                    gost_process = subprocess.Popen(gost_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                    time.sleep(1)
                    if gost_process.poll() is not None:
                        stdout, stderr = gost_process.communicate()
                        logger.error(f"gost failed to start. Exit: {gost_process.returncode}. Stdout: {stdout.decode()}. Stderr: {stderr.decode()}")
                        raise AirflowException("gost proxy tunnel failed to start.")
                    user_args = downloader_args[len('aria2c:'):] if downloader_args and downloader_args.startswith('aria2c:') else (downloader_args or "")
                    final_args_str = f'aria2c:{user_args.strip()} --http-proxy={http_proxy}'
                    cmd.extend(['--downloader-args', final_args_str])
                elif downloader_args:
                    cmd.extend(['--downloader-args', downloader_args])
-            extra_args = params.get('yt_dlp_extra_args')
+            existing_extra = shlex.split(params.get('yt_dlp_extra_args') or '')
-            if extra_args:
+            final_extra_args = existing_extra + py_extra_args
-                cmd.extend(shlex.split(extra_args))
+            if final_extra_args:
-            if original_url:
+                cmd.extend(['--extra-ytdlp-args', shlex.join(final_extra_args)])
                cmd.append(original_url)
-            copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd)
+        elif downloader == 'aria-rpc':
-            logger.info(f"Executing yt-dlp command for format '{format_selector}': {copy_paste_cmd}")
+            cmd.extend([
-            process = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)
+                '--aria-host', params.get('aria_host', '172.17.0.1'),
                '--aria-port', str(params.get('aria_port', 6800)),
                '--aria-secret', params.get('aria_secret'),
                '--wait', '--auto-merge-fragments',
                '--fragments-dir', download_dir,
                '--output-dir', download_dir,
            ])
            if params.get('yt_dlp_cleanup_mode'):
                cmd.append('--cleanup')
-            if process.stdout:
+        elif downloader == 'cli':
-                logger.info(f"yt-dlp STDOUT for format '{format_selector}':\n{process.stdout}")
+            cmd.extend(['--output-dir', download_dir])
-            if process.stderr:
+            # The 'cli' tool is the old yt-dlp wrapper, so it takes similar arguments.
-                # yt-dlp often prints progress and informational messages to stderr
+            cli_extra_args = []
-                logger.info(f"yt-dlp STDERR for format '{format_selector}':\n{process.stderr}")
+            if params.get('fragment_retries'):
                cli_extra_args.extend(['--fragment-retries', str(params['fragment_retries'])])
            if params.get('limit_rate'):
                cli_extra_args.extend(['--limit-rate', params['limit_rate']])
            if params.get('socket_timeout'):
                cli_extra_args.extend(['--socket-timeout', str(params['socket_timeout'])])
            if params.get('min_sleep_interval'):
                cli_extra_args.extend(['--sleep-interval', str(params['min_sleep_interval'])])
            if params.get('max_sleep_interval'):
                cli_extra_args.extend(['--max-sleep-interval', str(params['max_sleep_interval'])])
            if params.get('yt_dlp_test_mode'):
                cli_extra_args.append('--test')
-            if process.returncode != 0:
+            existing_extra = shlex.split(params.get('yt_dlp_extra_args') or '')
-                logger.error(f"yt-dlp failed for format '{format_selector}' with exit code {process.returncode}")
+            final_extra_args = existing_extra + cli_extra_args
-                # STDOUT and STDERR are already logged above.
+            if final_extra_args:
-                raise AirflowException(f"yt-dlp command failed for format '{format_selector}'.")
+                cmd.extend(['--extra-ytdlp-args', shlex.join(final_extra_args)])
-            # In test mode, files are not created, so we only check that yt-dlp returned filenames.
+        copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd)
-            # Otherwise, we verify that the files actually exist on disk.
+        logger.info(f"Executing download command for format '{format_selector}': {copy_paste_cmd}")
-            output_files = [f for f in process.stdout.strip().split('\n') if f]
+        process = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)
            if not params.get('yt_dlp_test_mode'):
                output_files = [f for f in output_files if os.path.exists(f)]
-            if not output_files:
+        if process.stdout:
-                log_msg = (f"Test run for format '{format_selector}' did not produce any filenames."
+            logger.info(f"Download tool STDOUT for format '{format_selector}':\n{process.stdout}")
-                           if params.get('yt_dlp_test_mode') else
+        if process.stderr:
-                           f"Download for format '{format_selector}' finished but no output files exist.")
+            logger.info(f"Download tool STDERR for format '{format_selector}':\n{process.stderr}")
                exc_msg = (f"Test run for format '{format_selector}' did not produce any filenames."
                           if params.get('yt_dlp_test_mode') else
                           f"Download for format '{format_selector}' did not produce a file.")
-                logger.error(log_msg)
+        if process.returncode != 0:
-                logger.error(f"Full STDOUT:\n{process.stdout}")
+            logger.error(f"Download tool failed for format '{format_selector}' with exit code {process.returncode}")
-                logger.error(f"Full STDERR:\n{process.stderr}")
+            raise AirflowException(f"Download command failed for format '{format_selector}'. See logs for details.")
                raise AirflowException(exc_msg)
-            log_prefix = "SUCCESS (Test Mode):" if params.get('yt_dlp_test_mode') else "SUCCESS:"
+        output_files = []
-            logger.info(f"{log_prefix} Command for format '{format_selector}' complete. Files: {output_files}")
+        for line in process.stdout.strip().split('\n'):
-            return output_files
+            # For aria-rpc, parse "Download and merge successful: <path>" or "Download successful: <path>"
-        finally:
+            match = re.search(r'successful: (.+)', line)
-            if gost_process:
+            if match:
-                logger.info(f"Terminating gost process (PID: {gost_process.pid}) for format '{format_selector}'.")
+                filepath = match.group(1).strip()
-                gost_process.terminate()
+                if os.path.exists(filepath):
-                try:
+                    output_files.append(filepath)
-                    gost_process.wait(timeout=5)
+                else:
-                except subprocess.TimeoutExpired:
+                    logger.warning(f"File path from aria-rpc output does not exist locally: '{filepath}'")
-                    gost_process.kill()
+            # For py/cli, it's just the path
-                    gost_process.wait()
+            elif os.path.exists(line.strip()):
                output_files.append(line.strip())
        if not params.get('yt_dlp_test_mode') and not output_files:
            raise AirflowException(f"Download for format '{format_selector}' finished but no output files were found or exist.")
        log_prefix = "SUCCESS (Test Mode):" if params.get('yt_dlp_test_mode') else "SUCCESS:"
        logger.info(f"{log_prefix} Command for format '{format_selector}' complete. Files: {output_files}")
        return output_files
    def run_ffmpeg_probe(filename):
        """Probes a file with ffmpeg to check for corruption."""
@ -824,7 +814,7 @@ with DAG(
            type="string",
            enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'],
            title="Download Format Preset",
-            description="Select a predefined format string or choose 'custom'. To download multiple formats, this should be a comma-separated list of format IDs (e.g., '137,140').\nformats_0: 18,140\nformats_2: 18,140,299/298/137/136/135/134/133\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
+            description="Select a predefined format string or choose 'custom'. To download multiple formats, this should be a comma-separated list of format IDs (e.g., '137,140').\nformats_0: 18,140\nformats_2: 18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
        ),
        'download_format_custom': Param(
            'ba[ext=m4a]/bestaudio/best',
@ -833,18 +823,15 @@ with DAG(
            description="Custom yt-dlp format string. Used when preset is 'custom'. To download multiple formats, provide a comma-separated list of format IDs (e.g., '137,140')."
        ),
        'downloader': Param(
-            'default',
+            'cli',
            type="string",
-            enum=['default', 'aria2c'],
+            enum=['py', 'aria-rpc', 'cli'],
-            title="Downloader",
+            title="Download Tool",
-            description="Choose the downloader for yt-dlp."
+            description="Choose the download tool to use: 'py' (native python, recommended), 'aria-rpc' (send to aria2c daemon), 'cli' (legacy yt-dlp wrapper)."
        ),
        'downloader_args_aria2c': Param(
            'aria2c:-x 4 -k 2M --max-download-limit=3M',
            type="string",
            title="Aria2c Downloader Arguments",
            description="Arguments to pass to yt-dlp's --downloader-args. Used when downloader is 'aria2c'."
        ),
        'aria_host': Param('172.17.0.1', type="string", title="Aria2c Host", description="For 'aria-rpc' downloader: Host of the aria2c RPC server."),
        'aria_port': Param(6800, type="integer", title="Aria2c Port", description="For 'aria-rpc' downloader: Port of the aria2c RPC server."),
        'aria_secret': Param('SQGCQPLVFQIASMPNPOJYLVGJYLMIDIXDXAIXOTX', type="string", title="Aria2c Secret", description="For 'aria-rpc' downloader: Secret token."),
        'yt_dlp_extra_args': Param(
            '--no-part --restrict-filenames',
            type=["string", "null"],
--- a/ansible/playbook-master.yml
+++ b/ansible/playbook-master.yml
@ -5,6 +5,9 @@
  vars_files:
    - "{{ inventory_dir }}/group_vars/all/generated_vars.yml"
    - "{{ inventory_dir }}/group_vars/all/vault.yml"
  vars:
    envoy_port: 9980
    envoy_admin_port: 9981
  pre_tasks:
    - name: Announce master deployment
      debug:
--- a/ytops_client/init.py
+++ b/ytops_client/init.py
@ -0,0 +1 @@
 # This file makes 'ytops_client' a Python package.
--- a/ytops_client/cli.py
+++ b/ytops_client/cli.py
@ -0,0 +1,88 @@
 #!/usr/bin/env python3
 import sys
 import argparse
 # Import the functions that define and execute the logic for each subcommand
 from .list_formats_tool import add_list_formats_parser, main_list_formats
 from .get_info_tool import add_get_info_parser, main_get_info
 from .download_tool import add_download_parser, main_download
 from .stress_policy_tool import add_stress_policy_parser, main_stress_policy
 from .stress_formats_tool import add_stress_formats_parser, main_stress_formats
 from .cookie_tool import add_cookie_tool_parser, main_cookie_tool
 from .download_aria_tool import add_download_aria_parser, main_download_aria
 from .download_native_py_tool import add_download_native_py_parser, main_download_native_py
 def main():
    """
    Main entry point for the yt-ops-client CLI.
    Parses arguments and dispatches to the appropriate subcommand function.
    """
    # Workaround for argparse behavior with positional arguments that start with a hyphen.
    # If the command is 'get-info' and the last argument looks like a video ID
    # starting with a '-', we insert '--' before it to tell argparse to treat it
    # as a positional argument, not an option. This assumes the URL is the last argument.
    if len(sys.argv) >= 3 and sys.argv[1] == 'get-info':
        last_arg = sys.argv[-1]
        # A YouTube video ID is 11 characters.
        if last_arg.startswith('-') and len(last_arg) == 11:
            import re
            if re.fullmatch(r'-[a-zA-Z0-9_-]{10}', last_arg):
                sys.argv.insert(len(sys.argv) - 1, '--')
    parser = argparse.ArgumentParser(
        description="YT Ops Client Tools",
        formatter_class=argparse.RawTextHelpFormatter
    )
    subparsers = parser.add_subparsers(dest='command', help='Available sub-commands')
    # Add subparsers from each tool module
    add_list_formats_parser(subparsers)
    add_get_info_parser(subparsers)
    # Create a top-level 'download' command with its own subcommands
    download_parser = subparsers.add_parser(
        'download',
        help='Download using different methods.',
        description='Provides access to various download tools. Use "download <method> --help" for details.'
    )
    download_subparsers = download_parser.add_subparsers(dest='download_command', help='Available downloaders', required=True)
    add_download_parser(download_subparsers)  # Adds 'cli' subcommand
    add_download_native_py_parser(download_subparsers)  # Adds 'py' subcommand
    add_download_aria_parser(download_subparsers)  # Adds 'aria-rpc' subcommand
    add_stress_policy_parser(subparsers)
    add_stress_formats_parser(subparsers)
    add_cookie_tool_parser(subparsers)
    args = parser.parse_args()
    # If no command is provided, print help and exit.
    if not args.command:
        parser.print_help()
        return 1
    # Dispatch to the correct main function based on the command
    if args.command == 'list-formats':
        return main_list_formats(args)
    elif args.command == 'get-info':
        return main_get_info(args)
    elif args.command == 'download':
        if args.download_command == 'cli':
            return main_download(args)
        elif args.download_command == 'py':
            return main_download_native_py(args)
        elif args.download_command == 'aria-rpc':
            return main_download_aria(args)
    elif args.command == 'stress-policy':
        return main_stress_policy(args)
    elif args.command == 'stress-formats':
        return main_stress_formats(args)
    elif args.command == 'convert-cookies':
        return main_cookie_tool(args)
    # This path should not be reachable if a command is required or handled above.
    parser.print_help()
    return 1
 if __name__ == "__main__":
    sys.exit(main())
--- a/ytops_client/cookie_tool.py
+++ b/ytops_client/cookie_tool.py
@ -0,0 +1,139 @@
 #!/usr/bin/env python3
 """
 Tool to convert JSON cookies to the standard Netscape txt format.
 """
 import argparse
 import json
 import sys
 import logging
 # Configure logging
 logger = logging.getLogger('cookie_tool')
 def convert_json_to_netscape(json_data):
    """
    Converts a list of cookie dictionaries to a Netscape format string.
    """
    netscape_cookies = []
    # The header is optional but good practice for some tools.
    netscape_cookies.append("# Netscape HTTP Cookie File")
    netscape_cookies.append("# http://www.netscape.com/newsref/std/cookie_spec.html")
    netscape_cookies.append("# This is a generated file! Do not edit.")
    netscape_cookies.append("")
    if not isinstance(json_data, list):
        raise TypeError("Input JSON must be a list of cookie objects.")
    for cookie in json_data:
        if not isinstance(cookie, dict):
            logger.warning(f"Skipping non-dictionary item in JSON list: {cookie}")
            continue
        domain = cookie.get('domain', '')
        # The 'hostOnly' flag determines if the domain is accessible to subdomains.
        # Netscape format's flag is TRUE if subdomains can access it.
        # So, hostOnly=false means flag=TRUE.
        # A leading dot in the domain also implies this for some implementations.
        if domain.startswith('.'):
            include_subdomains = 'TRUE'
        else:
            include_subdomains = 'FALSE' if cookie.get('hostOnly', True) else 'TRUE'
        path = cookie.get('path', '/')
        secure = 'TRUE' if cookie.get('secure', False) else 'FALSE'
        # Expiration date. If session cookie or no expiration, use 0.
        if cookie.get('session', False) or 'expirationDate' not in cookie or cookie['expirationDate'] is None:
            expires = 0
        else:
            expires = int(cookie['expirationDate'])
        name = cookie.get('name', '')
        value = str(cookie.get('value', ''))
        # Skip cookies without essential fields
        if not domain or not name:
            logger.warning(f"Skipping cookie with missing domain or name: {cookie}")
            continue
        netscape_cookies.append(
            f"{domain}\t{include_subdomains}\t{path}\t{secure}\t{expires}\t{name}\t{value}"
        )
    return "\n".join(netscape_cookies)
 def add_cookie_tool_parser(subparsers):
    """Add the parser for the 'convert-cookies' command."""
    parser = subparsers.add_parser(
        'convert-cookies',
        description='Convert JSON cookies to Netscape format.',
        formatter_class=argparse.RawTextHelpFormatter,
        help='Convert JSON cookies to Netscape format.',
        epilog="""
 Reads a JSON array of cookie objects from stdin and prints the
 Netscape cookie file format to stdout.
 Example JSON input format (per cookie):
 {
    "domain": ".example.com",
    "hostOnly": false,
    "path": "/",
    "secure": true,
    "expirationDate": 1672531199,
    "name": "my_cookie",
    "value": "my_value"
 }
 Example usage:
 cat cookies.json | yt-ops-client convert-cookies > cookies.txt
 """
    )
    parser.add_argument(
        'input_file',
        nargs='?',
        type=argparse.FileType('r', encoding='utf-8'),
        default=sys.stdin,
        help="Path to the JSON cookie file. Reads from stdin if not provided."
    )
    parser.add_argument(
        '-o', '--output',
        type=argparse.FileType('w', encoding='utf-8'),
        default=sys.stdout,
        help="Output file path for the Netscape cookies. Defaults to stdout."
    )
    parser.add_argument('--verbose', action='store_true', help='Enable verbose logging.')
    return parser
 def main_cookie_tool(args):
    """Main logic for the 'convert-cookies' command."""
    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)
        logging.basicConfig(level=logging.DEBUG, format='%(levelname)s: %(message)s', stream=sys.stderr)
    else:
        logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s', stream=sys.stderr)
    try:
        json_content = args.input_file.read()
        if not json_content.strip():
            logger.error("Input is empty.")
            return 1
        cookie_data = json.loads(json_content)
        netscape_string = convert_json_to_netscape(cookie_data)
        args.output.write(netscape_string + '\n')
        if args.output is not sys.stdout:
            logger.info(f"Successfully converted cookies to {args.output.name}")
        return 0
    except json.JSONDecodeError:
        logger.error("Invalid JSON provided. Please check the input file.")
        return 1
    except TypeError as e:
        logger.error(f"Error processing JSON: {e}")
        return 1
    except Exception as e:
        logger.error(f"An unexpected error occurred: {e}", exc_info=args.verbose)
        return 1
--- a/ytops_client/download_aria_tool.py
+++ b/ytops_client/download_aria_tool.py
@ -0,0 +1,687 @@
 #!/usr/bin/env python3
 """
 Tool to send a download to an aria2c daemon via RPC.
 """
 import argparse
 import json
 import logging
 import sys
 import os
 import glob
 import shutil
 import re
 import shlex
 import time
 from urllib.parse import urljoin
 try:
    import aria2p
    from aria2p.utils import human_readable_bytes
 except ImportError:
    print("aria2p is not installed. Please install it with: pip install aria2p", file=sys.stderr)
    sys.exit(1)
 logger = logging.getLogger('download_aria_tool')
 class TimeoutError(Exception):
    pass
 def add_download_aria_parser(subparsers):
    """Add the parser for the 'download aria-rpc' command."""
    parser = subparsers.add_parser(
        'aria-rpc',
        description='Send a download to an aria2c daemon via RPC, using an info.json from stdin or a file.',
        formatter_class=argparse.RawTextHelpFormatter,
        help='Download a specific format using aria2c RPC.',
        epilog="""
 Usage Notes for Fragmented Downloads (e.g., DASH):
 To download and automatically merge fragmented formats, you must:
 1. Use '--wait' to make the operation synchronous.
 2. Use '--auto-merge-fragments' to enable the merge logic.
 3. Ensure this script has access to the directory where aria2c saves files.
 Example for a remote aria2c daemon:
  - The remote daemon saves files to '/srv/downloads' on its machine.
  - This directory is mounted locally at '/mnt/remote_aria2_downloads'.
 cat latest-info.json | yt-ops-client download aria-rpc -f "299/137" \\
  --wait --auto-merge-fragments \\
  --remote-dir /srv/downloads \\
  --fragments-dir /mnt/remote_aria2_downloads
 """
    )
    parser.add_argument('--load-info-json', type=argparse.FileType('r', encoding='utf-8'), help="Path to the info.json file. If not provided, reads from stdin.")
    parser.add_argument('-f', '--format', required=True, help='The format ID to download. Supports yt-dlp style format selectors (e.g., "137/136,140").')
    parser.add_argument('--output-dir', help='Local directory to save the final merged file. Defaults to the current directory.')
    parser.add_argument('--fragments-dir', help='The local path where this script should look for downloaded fragments. If the aria2c daemon is remote, this should be a local mount point corresponding to --remote-dir. Defaults to --output-dir.')
    parser.add_argument('--remote-dir', help='The absolute path to the download directory on the remote aria2c host. This is passed via RPC.')
    parser.add_argument('--aria-host', default='localhost', help='The host of the aria2c RPC server. Default: localhost.')
    parser.add_argument('--aria-port', type=int, default=6800, help='The port of the aria2c RPC server. Default: 6800.')
    parser.add_argument('--aria-secret', help='The secret token for the aria2c RPC server (often required, e.g., "SQGCQPLVFQIASMPNPOJYLVGJYLMIDIXDXAIXOTX").')
    parser.add_argument('--proxy', help='Proxy to use for the download, e.g., "socks5://127.0.0.1:1080".')
    parser.add_argument('--downloader-args', help='Arguments for aria2c, in yt-dlp format (e.g., "aria2c:[-x 8, -k 1M]").')
    parser.add_argument('--wait', action='store_true', help='Wait for the download to complete and report its status. Note: This makes the operation synchronous and will block until the download finishes.')
    parser.add_argument('--wait-timeout', help='Timeout in seconds for waiting on downloads. Use "auto" to calculate based on a minimum speed of 200KiB/s. Requires --wait. Default: no timeout.')
    parser.add_argument('--auto-merge-fragments', action='store_true', help='Automatically merge fragments after download. Requires --wait and assumes the script has filesystem access to the aria2c host.')
    parser.add_argument('--remove-fragments-after-merge', action='store_true', help='Delete individual fragment files after a successful merge. Requires --auto-merge-fragments.')
    parser.add_argument('--cleanup', action='store_true', help='After a successful download, remove the final file(s) from the filesystem. For fragmented downloads, this implies --remove-fragments-after-merge.')
    parser.add_argument('--remove-on-complete', action=argparse.BooleanOptionalAction, default=True, help='Remove the download from aria2c history on successful completion. Use --no-remove-on-complete to disable. May fail on older aria2c daemons.')
    parser.add_argument('--purge-on-complete', action='store_true', help='Use aria2.purgeDownloadResult to clear ALL completed/failed downloads from history on success. Use as a workaround for older daemons.')
    parser.add_argument('--verbose', action='store_true', help='Enable verbose output for this script.')
    return parser
 def cleanup_aria_download(api, downloads):
    """Pause and remove downloads from aria2c."""
    if not downloads:
        return
    try:
        logger.info(f"Attempting to clean up {len(downloads)} download(s) from aria2c...")
        # Filter out downloads that might already be gone
        valid_downloads = [d for d in downloads if hasattr(d, 'gid')]
        if not valid_downloads:
            logger.info("No valid downloads to clean up.")
            return
        api.pause(valid_downloads)
        # Give aria2c a moment to process the pause command before removing
        time.sleep(0.5)
        api.remove(valid_downloads)
        logger.info("Cleanup successful.")
    except Exception as e:
        logger.warning(f"An error occurred during aria2c cleanup: {e}")
 def parse_aria_error(download):
    """Parses an aria2p Download object to get a detailed error message."""
    error_code = download.error_code
    error_message = download.error_message
    if not error_message:
        return f"Unknown aria2c error (Code: {error_code})"
    # Check for common HTTP errors in the message
    http_status_match = re.search(r'HTTP status (\d+)', error_message)
    if http_status_match:
        status_code = int(http_status_match.group(1))
        if status_code == 403:
            return f"HTTP Error 403: Forbidden. The URL may have expired or requires valid cookies/headers."
        elif status_code == 404:
            return f"HTTP Error 404: Not Found. The resource is unavailable."
        else:
            return f"HTTP Error {status_code}."
    if "Timeout" in error_message or "timed out" in error_message.lower():
        return "Download timed out."
    # Fallback to the raw error message
    return f"Aria2c error (Code: {error_code}): {error_message}"
 def parse_aria_args_to_options(args_str):
    """
    Parses yt-dlp style downloader args for aria2c.
    Example: "aria2c:[-x 8, -k 1M]" or just "-x 8 -k 1M"
    Returns a dictionary of options for aria2p.
    """
    if not args_str or not args_str.strip():
        return {}
    inner_args_str = args_str.strip()
    match = re.match(r'aria2c:\s*\[(.*)\]', inner_args_str)
    if match:
        # Handle yt-dlp's format
        inner_args_str = match.group(1).replace(',', ' ')
    else:
        # If it doesn't match, assume the whole string is a set of arguments.
        logger.debug(f"Downloader args '{args_str}' does not match 'aria2c:[...]' format. Parsing as a raw argument string.")
    arg_list = shlex.split(inner_args_str)
    # Use a mini-parser to handle CLI-style args
    parser = argparse.ArgumentParser(add_help=False, prog="aria2c_args_parser")
    parser.add_argument('-x', '--max-connection-per-server')
    parser.add_argument('-k', '--min-split-size')
    parser.add_argument('-s', '--split')
    parser.add_argument('--all-proxy')
    try:
        # We only care about known arguments
        known_args, unknown_args = parser.parse_known_args(arg_list)
        if unknown_args:
            logger.warning(f"Ignoring unknown arguments in --downloader-args: {unknown_args}")
        # Convert to dict, removing None values
        return {k: v for k, v in vars(known_args).items() if v is not None}
    except Exception:
        logger.warning(f"Failed to parse arguments inside --downloader-args: '{inner_args_str}'")
        return {}
 def main_download_aria(args):
    """Main logic for the 'download-aria' command."""
    log_level = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(level=log_level, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', stream=sys.stderr)
    if args.remove_fragments_after_merge and not args.auto_merge_fragments:
        logger.error("--remove-fragments-after-merge requires --auto-merge-fragments.")
        return 1
    if args.auto_merge_fragments and not args.wait:
        logger.error("--auto-merge-fragments requires --wait.")
        return 1
    if args.wait_timeout and not args.wait:
        logger.error("--wait-timeout requires --wait.")
        return 1
    if args.wait:
        logger.info("Will wait for download to complete and report status. This is a synchronous operation.")
    else:
        logger.info("Will submit download and exit immediately (asynchronous).")
    info_json_content = ""
    input_source_name = ""
    if args.load_info_json:
        info_json_content = args.load_info_json.read()
        input_source_name = args.load_info_json.name
    else:
        info_json_content = sys.stdin.read()
        input_source_name = "stdin"
    if not info_json_content.strip():
        logger.error(f"Failed to read info.json from {input_source_name}. Input is empty.")
        return 1
    try:
        info_data = json.loads(info_json_content)
        logger.info(f"Successfully loaded info.json from {input_source_name}.")
    except json.JSONDecodeError:
        logger.error(f"Failed to parse info.json from {input_source_name}. Is the input valid JSON?")
        return 1
    # Find the requested format, supporting yt-dlp style selectors
    target_format = None
    # A format selector can be a comma-separated list of preferences,
    # where each preference can be a slash-separated list of format_ids.
    # e.g., "299/137/136,140" means try 299, then 137, then 136, then 140.
    format_preferences = [item.strip() for sublist in (i.split('/') for i in args.format.split(',')) for item in sublist if item.strip()]
    available_formats_map = {f['format_id']: f for f in info_data.get('formats', []) if 'format_id' in f}
    for format_id in format_preferences:
        if format_id in available_formats_map:
            target_format = available_formats_map[format_id]
            logger.info(f"Selected format ID '{format_id}' from selector '{args.format}'.")
            break
    if not target_format:
        logger.error(f"No suitable format found for selector '{args.format}' in info.json.")
        return 1
    # Get file size for auto-timeout and dynamic options
    total_filesize = target_format.get('filesize') or target_format.get('filesize_approx')
    # Construct filename
    video_id = info_data.get('id', 'unknown_video_id')
    title = info_data.get('title', 'unknown_title')
    ext = target_format.get('ext', 'mp4')
    # Sanitize title for filename
    safe_title = "".join([c for c in title if c.isalpha() or c.isdigit() or c in (' ', '-', '_')]).rstrip()
    filename = f"{safe_title} [{video_id}].f{target_format['format_id']}.{ext}"
    # Prepare options for aria2
    aria_options = {
        # Options from yt-dlp's aria2c integration for performance and reliability
        'max-connection-per-server': 16,
        'split': 16,
        'min-split-size': '1M',
        'http-accept-gzip': 'true',
        'file-allocation': 'none',
    }
    if args.proxy:
        aria_options['all-proxy'] = args.proxy
    custom_options = parse_aria_args_to_options(args.downloader_args)
    # Dynamically set min-split-size if not overridden by user
    if 'min_split_size' not in custom_options and total_filesize:
        if total_filesize > 100 * 1024 * 1024:  # 100 MiB
            aria_options['min-split-size'] = '5M'
            logger.info("File is > 100MiB, dynamically setting min-split-size to 5M.")
    if custom_options:
        aria_options.update(custom_options)
        logger.info(f"Applied custom aria2c options from --downloader-args: {custom_options}")
    aria_options['out'] = filename
    # Add headers from info.json, mimicking yt-dlp's behavior for aria2c
    headers = target_format.get('http_headers')
    if headers:
        header_list = [f'{key}: {value}' for key, value in headers.items()]
        aria_options['header'] = header_list
        logger.info(f"Adding {len(header_list)} HTTP headers to the download.")
        if args.verbose:
            for h in header_list:
                if h.lower().startswith('cookie:'):
                    logger.debug(f"  Header: Cookie: [REDACTED]")
                else:
                    logger.debug(f"  Header: {h}")
    is_fragmented = 'fragments' in target_format
    if not is_fragmented:
        url = target_format.get('url')
        if not url:
            logger.error(f"Format ID '{args.format}' has neither a URL nor fragments.")
            return 1
    try:
        logger.info(f"Connecting to aria2c RPC at http://{args.aria_host}:{args.aria_port}")
        client = aria2p.Client(
            host=f"http://{args.aria_host}",
            port=args.aria_port,
            secret=args.aria_secret or ""
        )
        api = aria2p.API(client)
        timeout_seconds = None
        if args.wait_timeout:
            if args.wait_timeout.lower() == 'auto':
                if total_filesize:
                    # Min speed: 200 KiB/s. Min timeout: 30s.
                    min_speed = 200 * 1024
                    calculated_timeout = int(total_filesize / min_speed)
                    timeout_seconds = max(30, calculated_timeout)
                    total_filesize_hr, _ = human_readable_bytes(total_filesize)
                    logger.info(f"Auto-calculated timeout: {timeout_seconds}s (based on {total_filesize_hr} at 200KiB/s).")
                else:
                    logger.warning("Cannot use 'auto' timeout: file size not available in info.json. Timeout disabled.")
            else:
                try:
                    timeout_seconds = int(args.wait_timeout)
                    if timeout_seconds <= 0:
                        raise ValueError
                except ValueError:
                    logger.error(f"Invalid --wait-timeout value: '{args.wait_timeout}'. Must be a positive integer or 'auto'.")
                    return 1
        if is_fragmented:
            return download_fragments_aria(args, api, target_format, filename, aria_options, timeout_seconds, remote_dir=args.remote_dir)
        else:
            return download_url_aria(args, api, url, filename, aria_options, timeout_seconds, remote_dir=args.remote_dir)
    except Exception as e:
        logger.error(f"An error occurred while communicating with aria2c: {e}", exc_info=args.verbose)
        return 1
 def download_url_aria(args, api, url, filename, aria_options, timeout_seconds, remote_dir=None):
    """Handle downloading a single URL with aria2c."""
    if remote_dir:
        aria_options['dir'] = remote_dir
    logger.info(f"Adding download for format '{args.format}' with URL: {url[:70]}...")
    downloads = api.add_uris([url], options=aria_options)
    if not downloads:
        logger.error("Failed to add download to aria2c. The API returned an empty result.")
        return 1
    # Handle older aria2p versions that return a single Download object instead of a list
    download = downloads[0] if isinstance(downloads, list) else downloads
    logger.info(f"Successfully added download to aria2c. GID: {download.gid}")
    if args.wait:
        logger.info(f"Waiting for download {download.gid} to complete...")
        start_time = time.time()
        try:
            while True:
                if timeout_seconds and (time.time() - start_time > timeout_seconds):
                    raise TimeoutError(f"Download did not complete within {timeout_seconds}s timeout.")
                # Re-fetch the download object to get the latest status
                download.update()
                # A download is no longer active if it's complete, errored, paused, or removed.
                if download.status not in ('active', 'waiting'):
                    break
                progress_info = (
                    f"\rGID {download.gid}: {download.status} "
                    f"{download.progress_string()} "
                    f"({download.download_speed_string()}) "
                    f"ETA: {download.eta_string()}"
                )
                sys.stdout.write(progress_info)
                sys.stdout.flush()
                time.sleep(0.5)
        except (KeyboardInterrupt, TimeoutError) as e:
            sys.stdout.write('\n')
            if isinstance(e, KeyboardInterrupt):
                logger.warning("Wait interrupted by user. Cleaning up download...")
                cleanup_aria_download(api, [download])
                return 130
            else:  # TimeoutError
                logger.error(f"Download timed out. Cleaning up... Error: {e}")
                cleanup_aria_download(api, [download])
                return 1
        except aria2p.ClientException as e:
            # This can happen if the download completes and is removed by aria2c
            # before we can check its final status. Assume success in this case.
            logger.warning(f"Could not get final status for GID {download.gid} (maybe removed on completion?): {e}. Assuming success.")
            print(f"Download for GID {download.gid} presumed successful.")
            return 0
        sys.stdout.write('\n')  # Newline after progress bar
        # Final status check (no need to update again, we have the latest status)
        if download.status == 'complete':
            logger.info(f"Download {download.gid} completed successfully.")
            downloaded_filepath_remote = None
            if download.files:
                downloaded_filepath_remote = download.files[0].path
                print(f"Download successful: {downloaded_filepath_remote}")
            else:
                print("Download successful, but no file path reported by aria2c.")
            if args.cleanup and downloaded_filepath_remote:
                local_filepath = None
                # To map remote path to local, we need remote_dir and a local equivalent.
                # We'll use fragments_dir as the local equivalent, which defaults to output_dir.
                local_base_dir = args.fragments_dir or args.output_dir or '.'
                if remote_dir:
                    if downloaded_filepath_remote.startswith(remote_dir):
                        relative_path = os.path.relpath(downloaded_filepath_remote, remote_dir)
                        local_filepath = os.path.join(local_base_dir, relative_path)
                    else:
                        logger.warning(f"Cleanup: Downloaded file path '{downloaded_filepath_remote}' does not start with remote-dir '{remote_dir}'. Cannot map to local path.")
                else:
                    logger.warning(f"Cleanup: --remote-dir not specified. Assuming download path is accessible locally as '{downloaded_filepath_remote}'.")
                    local_filepath = downloaded_filepath_remote
                if local_filepath:
                    try:
                        if os.path.exists(local_filepath):
                            os.remove(local_filepath)
                            logger.info(f"Cleanup: Removed downloaded file '{local_filepath}'")
                        else:
                            logger.warning(f"Cleanup: File not found at expected local path '{local_filepath}'. Skipping removal.")
                    except OSError as e:
                        logger.error(f"Cleanup failed: Could not remove file '{local_filepath}': {e}")
            elif args.cleanup:
                logger.warning("Cleanup requested, but no downloaded file path was reported by aria2c.")
            if args.purge_on_complete:
                try:
                    api.purge_download_result()
                    logger.info("Purged all completed/failed downloads from aria2c history.")
                except Exception as e:
                    logger.warning(f"Failed to purge download history: {e}")
            elif args.remove_on_complete:
                try:
                    api.remove_download_result(download)
                    logger.info(f"Removed download {download.gid} from aria2c history.")
                except Exception as e:
                    logger.warning(f"Failed to remove download {download.gid} from history: {e}")
            return 0
        else:
            detailed_error = parse_aria_error(download)
            logger.error(f"Download {download.gid} failed. Error: {detailed_error}")
            return 1
    else:
        print(f"Successfully added download. GID: {download.gid}")
        return 0
 def download_fragments_aria(args, api, target_format, filename, aria_options, timeout_seconds, remote_dir=None):
    """Handle downloading fragmented formats with aria2c."""
    logger.info(f"Format '{args.format}' is fragmented. Adding all fragments to download queue.")
    fragment_base_url = target_format.get('fragment_base_url')
    fragments = target_format['fragments']
    MAX_FRAGMENTS = 50000
    if len(fragments) > MAX_FRAGMENTS:
        logger.error(
            f"The number of fragments ({len(fragments)}) exceeds the safety limit of {MAX_FRAGMENTS}. "
            f"This is to prevent overwhelming the aria2c server. Aborting."
        )
        return 1
    # We need to set the 'dir' option for all fragments if specified.
    # The 'out' option will be set per-fragment.
    frag_aria_options = aria_options.copy()
    frag_aria_options.pop('out', None)  # Remove the main 'out' option
    if remote_dir:
        frag_aria_options['dir'] = remote_dir
        logger.info(f"Instructing remote aria2c to save fragments to: {remote_dir}")
    base_filename, file_ext = os.path.splitext(filename)
    calls = []
    for i, fragment in enumerate(fragments):
        frag_url = fragment.get('url')
        if not frag_url:
            if not fragment_base_url:
                logger.error(f"Fragment {i} has no URL and no fragment_base_url is available. Aborting.")
                return 1
            frag_url = urljoin(fragment_base_url, fragment['path'])
        # Use the base filename from the main file, but add fragment identifier
        fragment_filename = f"{base_filename}-Frag{i}{file_ext}"
        current_frag_options = frag_aria_options.copy()
        current_frag_options['out'] = os.path.basename(fragment_filename)
        # Prepare parameters for multicall in the format:
        # {"methodName": "aria2.addUri", "params": [["url"], {"out": "file.mp4"}]}
        # The secret token is automatically added by aria2p.
        params = [[frag_url], current_frag_options]
        call_struct = {
            "methodName": api.client.ADD_URI,
            "params": params
        }
        calls.append(call_struct)
    results = api.client.multicall(calls)
    if not results:
        logger.error("Failed to add fragments to aria2c. The API returned an empty result.")
        return 1
    # The result of a multicall of addUri is a list of lists, where each inner list
    # contains the GID of one download, e.g., [['gid1'], ['gid2']].
    # A failed call for a fragment may result in a fault struct dict instead of a list.
    # We extract GIDs from successful calls.
    gids = [result[0] for result in results if isinstance(result, list) and result]
    if len(gids) != len(fragments):
        failed_count = len(fragments) - len(gids)
        logger.warning(f"{failed_count} out of {len(fragments)} fragments failed to be added to aria2c.")
    if not gids:
        logger.error("Failed to add any fragments to aria2c. All submissions failed.")
        return 1
    logger.info(f"Successfully added {len(gids)} fragments to aria2c.")
    if args.verbose:
        logger.debug(f"GIDs: {gids}")
    if args.wait:
        logger.info(f"Waiting for {len(gids)} fragments to complete...")
        start_time = time.time()
        downloads_to_cleanup = []
        try:
            while True:
                if timeout_seconds and (time.time() - start_time > timeout_seconds):
                    raise TimeoutError(f"Fragment downloads did not complete within {timeout_seconds}s timeout.")
                downloads = api.get_downloads(gids)
                downloads_to_cleanup = downloads  # Store for potential cleanup
                # A download is considered "active" if it's currently downloading or waiting in the queue.
                # It is "not active" if it is complete, errored, paused, or removed.
                active_downloads = [d for d in downloads if d.status in ('active', 'waiting')]
                if not active_downloads:
                    break  # All downloads are complete or have stopped for other reasons
                for d in active_downloads:
                    d.update()
                completed_count = len(downloads) - len(active_downloads)
                total_bytes = sum(d.total_length for d in downloads)
                downloaded_bytes = sum(d.completed_length for d in downloads)
                total_speed = sum(d.download_speed for d in downloads)
                progress_percent = (downloaded_bytes / total_bytes * 100) if total_bytes > 0 else 0
                progress_info = (
                    f"\rProgress: {completed_count}/{len(downloads)} fragments | "
                    f"{progress_percent:.1f}% "
                    f"({human_readable_bytes(downloaded_bytes)}/{human_readable_bytes(total_bytes)}) "
                    f"Speed: {human_readable_bytes(total_speed)}/s"
                )
                sys.stdout.write(progress_info)
                sys.stdout.flush()
                time.sleep(0.5)
        except (KeyboardInterrupt, TimeoutError) as e:
            sys.stdout.write('\n')
            if isinstance(e, KeyboardInterrupt):
                logger.warning("Wait interrupted by user. Cleaning up fragments...")
                cleanup_aria_download(api, downloads_to_cleanup)
                return 130
            else:  # TimeoutError
                logger.error(f"Download timed out. Cleaning up fragments... Error: {e}")
                cleanup_aria_download(api, downloads_to_cleanup)
                return 1
        except aria2p.ClientException as e:
            # This can happen if downloads complete and are removed by aria2c
            # before we can check their final status. Assume success in this case.
            logger.warning(f"Could not get final status for some fragments (maybe removed on completion?): {e}. Assuming success.")
        sys.stdout.write('\n')
        # Final status check
        failed_downloads = []
        try:
            downloads = api.get_downloads(gids)
            failed_downloads = [d for d in downloads if d.status != 'complete']
        except aria2p.ClientException as e:
            logger.warning(f"Could not perform final status check for fragments (maybe removed on completion?): {e}. Assuming success.")
            # If we can't check, we assume success based on the earlier wait loop not failing catastrophically.
            failed_downloads = []
        if failed_downloads:
            logger.error(f"{len(failed_downloads)} fragments failed to download.")
            for d in failed_downloads:
                detailed_error = parse_aria_error(d)
                logger.error(f"  GID {d.gid}: {detailed_error}")
            return 1
        else:
            logger.info("All fragments downloaded successfully.")
            output_dir = args.output_dir or '.'
            final_filepath = os.path.join(output_dir, filename)
            fragments_lookup_dir = args.fragments_dir or output_dir
            if args.auto_merge_fragments:
                logger.info(f"Attempting to merge fragments into: {final_filepath}")
                logger.info(f"Searching for fragments in local directory: {os.path.abspath(fragments_lookup_dir)}")
                try:
                    # base_filename and file_ext are available from earlier in the function
                    # We must escape the base filename in case it contains glob special characters like [ or ].
                    escaped_base = glob.escape(base_filename)
                    search_path = os.path.join(fragments_lookup_dir, f"{escaped_base}-Frag*{file_ext}")
                    fragment_files = glob.glob(search_path)
                    if not fragment_files:
                        logger.error(f"No fragment files found with pattern: {search_path}")
                        return 1
                    def fragment_sort_key(f):
                        match = re.search(r'Frag(\d+)', os.path.basename(f))
                        return int(match.group(1)) if match else -1
                    fragment_files.sort(key=fragment_sort_key)
                    with open(final_filepath, 'wb') as dest_file:
                        for frag_path in fragment_files:
                            with open(frag_path, 'rb') as src_file:
                                shutil.copyfileobj(src_file, dest_file)
                    logger.info(f"Successfully merged {len(fragment_files)} fragments into {final_filepath}")
                    if args.remove_fragments_after_merge or args.cleanup:
                        logger.info("Removing fragment files...")
                        for frag_path in fragment_files:
                            os.remove(frag_path)
                        logger.info("Fragment files removed.")
                    if args.cleanup:
                        try:
                            os.remove(final_filepath)
                            logger.info(f"Cleanup: Removed merged file '{final_filepath}'")
                        except OSError as e:
                            logger.error(f"Cleanup failed: Could not remove merged file '{final_filepath}': {e}")
                    print(f"Download and merge successful: {final_filepath}")
                    if args.purge_on_complete:
                        try:
                            api.purge_download_result()
                            logger.info("Purged all completed/failed downloads from aria2c history.")
                        except Exception as e:
                            logger.warning(f"Failed to purge download history: {e}")
                    elif args.remove_on_complete:
                        try:
                            # The `downloads` variable from the last status check should be valid here.
                            api.remove_download_result(downloads)
                            logger.info(f"Removed {len(downloads)} fragment downloads from aria2c history.")
                        except aria2p.ClientException as e:
                            logger.warning(f"Could not remove fragment downloads from history (maybe already gone?): {e}")
                        except Exception as e:
                            logger.warning(f"Failed to remove fragment downloads from history: {e}")
                    return 0
                except Exception as e:
                    logger.error(f"An error occurred during merging: {e}", exc_info=args.verbose)
                    logger.error("Fragments were downloaded but not merged.")
                    return 1
            else:
                print("Download successful. Fragments now need to be merged manually.")
                print(f"The final merged file should be named: {final_filepath}")
                print("You can merge them with a command like:")
                print(f"  cat `ls -v '{os.path.join(fragments_lookup_dir, base_filename)}'-Frag*'{file_ext}'` > '{final_filepath}'")
                if args.cleanup:
                    logger.info("Cleanup requested. Removing downloaded fragments...")
                    try:
                        # base_filename and file_ext are available from earlier in the function
                        escaped_base = glob.escape(base_filename)
                        search_path = os.path.join(fragments_lookup_dir, f"{escaped_base}-Frag*{file_ext}")
                        fragment_files = glob.glob(search_path)
                        if not fragment_files:
                            logger.warning(f"Cleanup: No fragment files found with pattern: {search_path}")
                        else:
                            for frag_path in fragment_files:
                                os.remove(frag_path)
                            logger.info(f"Removed {len(fragment_files)} fragment files.")
                    except Exception as e:
                        logger.error(f"An error occurred during fragment cleanup: {e}", exc_info=args.verbose)
                if args.purge_on_complete:
                    try:
                        api.purge_download_result()
                        logger.info("Purged all completed/failed downloads from aria2c history.")
                    except Exception as e:
                        logger.warning(f"Failed to purge download history: {e}")
                elif args.remove_on_complete:
                    try:
                        # The `downloads` variable from the last status check should be valid here.
                        api.remove_download_result(downloads)
                        logger.info(f"Removed {len(downloads)} fragment downloads from aria2c history.")
                    except aria2p.ClientException as e:
                        logger.warning(f"Could not remove fragment downloads from history (maybe already gone?): {e}")
                    except Exception as e:
                        logger.warning(f"Failed to remove fragment downloads from history: {e}")
                return 0
    else:
        print(f"Successfully added {len(gids)} fragments. GIDs: {gids}")
        print("These fragments will need to be merged manually after download.")
        return 0
--- a/ytops_client/download_native_py_tool.py
+++ b/ytops_client/download_native_py_tool.py
@ -0,0 +1,297 @@
 #!/usr/bin/env python3
 """
 Tool to download a specified format using yt-dlp as a Python library.
 """
 import argparse
 import contextlib
 import io
 import json
 import logging
 import os
 import re
 import shlex
 import sys
 import time
 from datetime import datetime
 try:
    import yt_dlp
 except ImportError:
    print("yt-dlp is not installed. Please install it with: pip install yt-dlp", file=sys.stderr)
    sys.exit(1)
 logger = logging.getLogger('download_native_py_tool')
 # A custom logger for yt-dlp to capture output and key events
 class YTDLPLogger:
    def __init__(self):
        self.final_filename = None
        self.is_403 = False
        self.is_timeout = False
    def debug(self, msg):
        # yt-dlp logs the destination file path at the debug level.
        if msg.startswith('[download] Destination:'):
            self.final_filename = msg.split(':', 1)[1].strip()
        elif msg.startswith('[download]') and 'has already been downloaded' in msg:
            match = re.search(r'\[download\]\s+(.*)\s+has already been downloaded', msg)
            if match:
                self.final_filename = match.group(1).strip()
        logger.debug(msg)
    def info(self, msg):
        logger.info(msg)
    def warning(self, msg):
        logger.warning(msg)
    def error(self, msg):
        if "HTTP Error 403" in msg:
            self.is_403 = True
        if "Read timed out" in msg:
            self.is_timeout = True
        logger.error(msg)
 def ytdlp_progress_hook(d, ytdlp_logger):
    """Progress hook to capture the final filename."""
    if d['status'] == 'finished':
        ytdlp_logger.final_filename = d.get('filename')
        logger.info(f"Download finished. Final file: {ytdlp_logger.final_filename}")
 def add_download_native_py_parser(subparsers):
    """Add the parser for the 'download py' command."""
    parser = subparsers.add_parser(
        'py',
        description='Download using yt-dlp as a Python library (recommended). This method calls yt-dlp functions directly.',
        formatter_class=argparse.RawTextHelpFormatter,
        help='Download using a direct Python call to yt-dlp (recommended).'
    )
    parser.add_argument('--load-info-json', type=argparse.FileType('r', encoding='utf-8'), help="Path to the info.json file. If not provided, reads from stdin.")
    parser.add_argument('-f', '--format', required=True, help='The format selection string to download (e.g., "18", "299/137", "bestvideo+bestaudio").')
    parser.add_argument('--output-dir', default='.', help='Directory to save the downloaded file. Defaults to current directory.')
    parser.add_argument('--save-info-json-dir', help='If specified, save the info.json received from stdin to this directory with an auto-generated name.')
    parser.add_argument('--proxy', help='Proxy to use for the download, e.g., "socks5://127.0.0.1:1080".')
    parser.add_argument('--proxy-rename', help='Apply sed-style regex substitution to the proxy URL. Format: s/pattern/replacement/')
    parser.add_argument('--temp-path', help='Directory for temporary files (e.g., fragments). Use a RAM disk for best performance.')
    parser.add_argument('--pause', type=int, default=0, help='Seconds to wait before starting the download.')
    parser.add_argument('--download-continue', action='store_true', help='Enable download continuation (--no-overwrites and --continue flags for yt-dlp).')
    parser.add_argument('--verbose', action='store_true', help='Enable verbose output for this script and yt-dlp.')
    parser.add_argument('--cli-config', help='Path to a yt-dlp configuration file to load.')
    parser.add_argument('--downloader', help='Name of the external downloader backend for yt-dlp to use (e.g., "aria2c", "native").')
    parser.add_argument('--downloader-args', help='Arguments to pass to the external downloader backend (e.g., "aria2c:-x 8").')
    parser.add_argument('--extra-ytdlp-args', help='A string of extra command-line arguments to pass to yt-dlp.')
    parser.add_argument('--output-buffer', action='store_true', help='Download to an in-memory buffer and print raw bytes to stdout. Final filename is printed to stderr.')
    parser.add_argument('--cleanup', action='store_true', help='After download, rename the file to include a timestamp and truncate it to 0 bytes.')
    parser.add_argument('--merge-output-format', help='Container format to merge to (e.g., "mp4", "mkv"). Overrides config file.')
    return parser
 def main_download_native_py(args):
    """Main logic for the 'download-native-py' command."""
    # If outputting to buffer, all logging must go to stderr to keep stdout clean for binary data.
    log_stream = sys.stderr if args.output_buffer else sys.stdout
    log_level = logging.DEBUG if args.verbose else logging.INFO
    # Reconfigure root logger
    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
    logging.basicConfig(level=log_level, stream=log_stream, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    if args.pause > 0:
        logger.info(f"Pausing for {args.pause} seconds...")
        time.sleep(args.pause)
    info_json_content = ""
    input_source_name = ""
    if args.load_info_json:
        info_json_content = args.load_info_json.read()
        input_source_name = args.load_info_json.name
    else:
        info_json_content = sys.stdin.read()
        input_source_name = "stdin"
    if not info_json_content.strip():
        logger.error(f"Failed to read info.json from {input_source_name}. Input is empty.")
        return 1
    try:
        info_data = json.loads(info_json_content)
        logger.info(f"Successfully loaded info.json from {input_source_name}.")
    except json.JSONDecodeError:
        logger.error(f"Failed to parse info.json from {input_source_name}. Is the input valid JSON?")
        return 1
    if args.save_info_json_dir:
        try:
            video_id = info_data.get('id', 'unknown_video_id')
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            filename = f"{timestamp}-{video_id}-info.json"
            output_path = os.path.join(args.save_info_json_dir, filename)
            os.makedirs(args.save_info_json_dir, exist_ok=True)
            with open(output_path, 'w', encoding='utf-8') as f:
                json.dump(info_data, f, indent=2)
            logger.info(f"Saved info.json to {output_path}")
        except Exception as e:
            logger.error(f"Failed to save info.json: {e}")
    # Handle proxy and proxy rename
    proxy_url = args.proxy
    if not proxy_url:
        proxy_url = info_data.get('_proxy_url')
        if proxy_url:
            logger.info(f"Using proxy from info.json: {proxy_url}")
    if proxy_url and args.proxy_rename:
        rename_rule = args.proxy_rename.strip("'\"")
        if rename_rule.startswith('s/') and rename_rule.count('/') >= 2:
            try:
                parts = rename_rule.split('/')
                pattern, replacement = parts[1], parts[2]
                original_proxy = proxy_url
                proxy_url = re.sub(pattern, replacement, proxy_url)
                logger.info(f"Renamed proxy URL from '{original_proxy}' to '{proxy_url}' using rule '{rename_rule}'")
            except re.error as e:
                logger.error(f"Invalid regex in --proxy-rename: {e}")
                return 1
        else:
            logger.error("Invalid --proxy-rename format. Expected: s/pattern/replacement/")
            return 1
    # Build the yt-dlp options dictionary
    # Start by parsing options from config file and extra args to establish a baseline.
    base_opts_args = []
    if args.cli_config and os.path.exists(args.cli_config):
        try:
            with open(args.cli_config, 'r', encoding='utf-8') as f:
                config_content = f.read()
                base_opts_args.extend(shlex.split(config_content))
            logger.info(f"Loaded {len(base_opts_args)} arguments from config file: {args.cli_config}")
        except Exception as e:
            logger.error(f"Failed to read or parse config file {args.cli_config}: {e}")
            return 1
    elif args.cli_config:
        logger.warning(f"Config file '{args.cli_config}' not found. Ignoring.")
    if args.extra_ytdlp_args:
        extra_args_list = shlex.split(args.extra_ytdlp_args)
        logger.info(f"Adding {len(extra_args_list)} extra arguments from --extra-ytdlp-args.")
        base_opts_args.extend(extra_args_list)
    ydl_opts = {}
    if base_opts_args:
        try:
            # This is an internal API, but it's the most accurate way to parse CLI args
            # into the ydl_opts dictionary format.
            ydl_opts, _, _ = yt_dlp.parse_options(base_opts_args)
        except Exception as e:
            logger.error(f"Failed to parse options from config/extra_args: {e}")
            return 1
    # Now, layer the script's explicit arguments on top, as they have higher precedence.
    os.makedirs(args.output_dir, exist_ok=True)
    output_template = os.path.join(args.output_dir, '%(title)s [%(id)s].f%(format_id)s.%(ext)s')
    ytdlp_logger = YTDLPLogger()
    # Use update to merge, so explicit args overwrite config/extra args.
    ydl_opts.update({
        'format': args.format,
        'outtmpl': '-' if args.output_buffer else output_template,
        'logger': ytdlp_logger,
        'progress_hooks': [lambda d: ytdlp_progress_hook(d, ytdlp_logger)],
        'verbose': args.verbose,
    })
    if args.temp_path:
        ydl_opts['paths'] = {'temp': args.temp_path}
        logger.info(f"Using temporary path: {args.temp_path}")
    if args.download_continue:
        ydl_opts['continuedl'] = True
        ydl_opts['nooverwrites'] = True
    if proxy_url:
        ydl_opts['proxy'] = proxy_url
    if args.downloader:
        ydl_opts['downloader'] = {args.downloader: None}
    if args.downloader_args:
        # yt-dlp expects a dict for downloader_args
        # e.g., {'aria2c': ['-x', '8']}
        try:
            downloader_name, args_str = args.downloader_args.split(':', 1)
            ydl_opts.setdefault('downloader_args', {})[downloader_name] = shlex.split(args_str)
        except ValueError:
            logger.error(f"Invalid --downloader-args format. Expected 'downloader:args'. Got: '{args.downloader_args}'")
            return 1
    if args.merge_output_format:
        ydl_opts['merge_output_format'] = args.merge_output_format
    try:
        logger.info(f"Starting download for format '{args.format}' using yt-dlp library...")
        download_buffer = None
        if args.output_buffer:
            # When downloading to buffer, we redirect stdout to capture the binary data.
            download_buffer = io.BytesIO()
            ctx_mgr = contextlib.redirect_stdout(download_buffer)
        else:
            # Otherwise, use a null context manager.
            ctx_mgr = contextlib.nullcontext()
        with ctx_mgr, yt_dlp.YoutubeDL(ydl_opts) as ydl:
            # The download() method is for URLs. For a pre-fetched info dict,
            # we must use process_ie_result to bypass the info extraction step.
            # It raises DownloadError on failure, which is caught by the outer try...except block.
            ydl.process_ie_result(info_data)
            # If process_ie_result completes without an exception, the download was successful.
            retcode = 0
        # The success path is now always taken if no exception was raised.
        if retcode == 0:
            logger.info("yt-dlp download completed successfully.")
            if args.output_buffer:
                # Write the captured binary data to the actual stdout.
                sys.stdout.buffer.write(download_buffer.getvalue())
                sys.stdout.buffer.flush()
                # Print the filename to stderr for the orchestrator.
                if ytdlp_logger.final_filename:
                    print(ytdlp_logger.final_filename, file=sys.stderr)
            else:
                # Print the filename to stdout as usual.
                if ytdlp_logger.final_filename:
                    print(ytdlp_logger.final_filename, file=sys.stdout)
                if args.cleanup:
                    downloaded_filepath = ytdlp_logger.final_filename
                    if downloaded_filepath and os.path.exists(downloaded_filepath):
                        try:
                            logger.info(f"Cleanup: Renaming and truncating '{downloaded_filepath}'")
                            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
                            directory, original_filename = os.path.split(downloaded_filepath)
                            filename_base, filename_ext = os.path.splitext(original_filename)
                            new_filename = f"{filename_base}_{timestamp}{filename_ext}.empty"
                            new_filepath = os.path.join(directory, new_filename)
                            os.rename(downloaded_filepath, new_filepath)
                            logger.info(f"Renamed to '{new_filepath}'")
                            with open(new_filepath, 'w') as f:
                                pass
                            logger.info(f"Truncated '{new_filepath}' to 0 bytes.")
                        except Exception as e:
                            logger.error(f"Cleanup failed: {e}")
                            return 1 # Treat cleanup failure as a script failure
                    elif not args.output_buffer:
                        logger.warning("Cleanup requested, but no downloaded file was found. Skipping cleanup.")
            return 0
        else:
            logger.error(f"yt-dlp download failed with internal exit code {retcode}.")
            return 1
    except yt_dlp.utils.DownloadError as e:
        # This catches download-specific errors from yt-dlp
        logger.error(f"yt-dlp DownloadError: {e}")
        return 1
    except Exception as e:
        logger.exception(f"An unexpected error occurred during yt-dlp execution: {e}")
        return 1
--- a/ytops_client/download_tool.py
+++ b/ytops_client/download_tool.py
@ -0,0 +1,285 @@
 #!/usr/bin/env python3
 """
 Tool to download a specified format using an info.json from stdin.
 """
 import argparse
 import json
 import logging
 import os
 import re
 import shlex
 import subprocess
 import sys
 import tempfile
 import time
 from datetime import datetime
 # Configure logging
 logger = logging.getLogger('download_tool')
 def add_download_parser(subparsers):
    """Add the parser for the 'download cli' command."""
    parser = subparsers.add_parser(
        'cli',
        description='Download using the legacy yt-dlp CLI wrapper. This method invokes yt-dlp as a subprocess.',
        formatter_class=argparse.RawTextHelpFormatter,
        help='Download using the legacy yt-dlp CLI wrapper.'
    )
    parser.add_argument('--load-info-json', type=argparse.FileType('r', encoding='utf-8'), help="Path to the info.json file. If not provided, reads from stdin.")
    parser.add_argument('-f', '--format', required=True, help='The format selection string to download (e.g., "18", "299/137", "bestvideo+bestaudio").')
    parser.add_argument('--output-dir', default='.', help='Directory to save the downloaded file. Defaults to current directory.')
    parser.add_argument('--save-info-json-dir', help='If specified, save the info.json received from stdin to this directory with an auto-generated name.')
    parser.add_argument('--proxy', help='Proxy to use for the download, e.g., "socks5://127.0.0.1:1080". This option sets the proxy, overriding any value from the info.json.')
    parser.add_argument('--proxy-rename', help='Apply sed-style regex substitution to the proxy URL. Format: s/pattern/replacement/')
    parser.add_argument('--pause', type=int, default=0, help='Seconds to wait before starting the download.')
    parser.add_argument('--print-traffic', action='store_true', help='Print traffic instead of a progress bar.')
    parser.add_argument('--download-continue', action='store_true', help='Enable download continuation (--continue and --part flags for yt-dlp).')
    parser.add_argument('--verbose', action='store_true', help='Enable verbose output for this script and yt-dlp.')
    parser.add_argument('--cli-config', default='cli.config', help='Path to a yt-dlp configuration file. Defaults to "cli.config".')
    parser.add_argument('--cleanup', action='store_true', help='After download, rename the file to include a timestamp and truncate it to 0 bytes.')
    parser.add_argument('--log-file', help='Append full yt-dlp output to the specified log file.')
    parser.add_argument('--yt-dlp-path', default='yt-dlp', help='Path to the yt-dlp executable. Defaults to "yt-dlp" in PATH.')
    parser.add_argument('--extra-ytdlp-args', help='A string of extra command-line arguments to pass to yt-dlp.')
    parser.add_argument('--downloader', help='Name of the external downloader to use (e.g., "aria2c", "native").')
    parser.add_argument('--downloader-args', help='Arguments to pass to the external downloader (e.g., "aria2c:-x 8").')
    parser.add_argument('--merge-output-format', help='Container format to merge to (e.g., "mp4", "mkv"). Overrides config file.')
    return parser
 def main_download(args):
    """Main logic for the 'download' command."""
    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)
    if args.pause > 0:
        logger.info(f"Pausing for {args.pause} seconds...")
        time.sleep(args.pause)
    info_json_content = ""
    input_source_name = ""
    if args.load_info_json:
        info_json_content = args.load_info_json.read()
        input_source_name = args.load_info_json.name
    else:
        info_json_content = sys.stdin.read()
        input_source_name = "stdin"
    if not info_json_content.strip():
        logger.error(f"Failed to read info.json from {input_source_name}. Input is empty.")
        return 1
    try:
        info_data = json.loads(info_json_content)
        logger.info(f"Successfully loaded info.json from {input_source_name}.")
    except json.JSONDecodeError:
        logger.error(f"Failed to parse info.json from {input_source_name}. Is the input valid JSON?")
        return 1
    if args.save_info_json_dir:
        try:
            video_id = info_data.get('id', 'unknown_video_id')
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            filename = f"{timestamp}-{video_id}-info.json"
            output_path = os.path.join(args.save_info_json_dir, filename)
            os.makedirs(args.save_info_json_dir, exist_ok=True)
            with open(output_path, 'w', encoding='utf-8') as f:
                json.dump(info_data, f, indent=2)
            logger.info(f"Saved info.json to {output_path}")
        except Exception as e:
            logger.error(f"Failed to save info.json: {e}")
    # Determine proxy to use
    proxy_url = args.proxy
    if not proxy_url:
        proxy_url = info_data.get('_proxy_url')
        if proxy_url:
            logger.info(f"Using proxy from info.json: {proxy_url}")
    if proxy_url and args.proxy_rename:
        rename_rule = args.proxy_rename
        # The user's command line might include quotes that are preserved by shlex.
        # Strip them to get the raw rule.
        rename_rule = rename_rule.strip("'\"")
        if rename_rule.startswith('s/') and rename_rule.count('/') >= 2:
            try:
                parts = rename_rule.split('/')
                pattern = parts[1]
                replacement = parts[2]
                original_proxy = proxy_url
                proxy_url = re.sub(pattern, replacement, proxy_url)
                logger.info(f"Renamed proxy URL from '{original_proxy}' to '{proxy_url}' using rule '{rename_rule}'")
            except re.error as e:
                logger.error(f"Invalid regex in --proxy-rename: {e}")
                return 1
            except IndexError:
                logger.error("Invalid --proxy-rename format. Expected: s/pattern/replacement/")
                return 1
        else:
            logger.error("Invalid --proxy-rename format. Expected: s/pattern/replacement/")
            return 1
    # yt-dlp needs to load the info.json from a file
    with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', encoding='utf-8') as tmp:
        json.dump(info_data, tmp)
        info_json_path = tmp.name
    logger.debug(f"Temporarily saved info.json to {info_json_path}")
    downloaded_filepath = None
    return_code = 1  # Default to error
    try:
        # Create output directory if it doesn't exist
        os.makedirs(args.output_dir, exist_ok=True)
        output_template = os.path.join(args.output_dir, '%(title)s [%(id)s].f%(format_id)s.%(ext)s')
        cmd = [
            args.yt_dlp_path,
            '--load-info-json', info_json_path,
            '-f', args.format,
            '-o', output_template,
            '--print', 'filename',
        ]
        if args.extra_ytdlp_args:
            cmd.extend(shlex.split(args.extra_ytdlp_args))
        if args.downloader:
            cmd.extend(['--downloader', args.downloader])
        if args.downloader_args:
            cmd.extend(['--downloader-args', args.downloader_args])
        if args.merge_output_format:
            cmd.extend(['--merge-output-format', args.merge_output_format])
        if args.download_continue:
            cmd.extend(['--continue', '--part'])
        if os.path.exists(args.cli_config):
            logger.info(f"Using config file: {args.cli_config}")
            cmd.extend(['--config-location', args.cli_config])
        else:
            logger.info(f"Config file '{args.cli_config}' not found. Using yt-dlp defaults.")
        if args.print_traffic:
            cmd.append('--print-traffic')
            cmd.append('--no-progress')
        else:
            cmd.append('--progress')
        if args.verbose:
            cmd.append('--verbose')
        if proxy_url:
            cmd.extend(['--proxy', proxy_url])
        # Determine if we need to capture output.
        capture_output = args.cleanup or args.log_file or args.print_traffic
        if capture_output and not args.print_traffic:
            logger.info("Note: --cleanup or --log-file requires capturing output, which may affect progress bar display.")
        logger.info(f"Executing yt-dlp command for format '{args.format}'")
        # Construct a display version of the command for logging
        display_cmd_str = ' '.join(f"'{arg}'" if ' ' in arg else arg for arg in cmd)
        if os.path.exists(args.cli_config):
            try:
                with open(args.cli_config, 'r', encoding='utf-8') as f:
                    config_contents = ' '.join(f.read().split())
                    if config_contents:
                        logger.info(f"cli.config contents: {config_contents}")
            except IOError as e:
                logger.warning(f"Could not read config file {args.cli_config}: {e}")
        logger.info(f"Full command: {display_cmd_str}")
        if capture_output:
            process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, encoding='utf-8')
            log_f = None
            if args.log_file:
                try:
                    log_f = open(args.log_file, 'a', encoding='utf-8')
                    log_f.write(f"\n--- Log entry: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ---\n")
                    log_f.write(f"Command: {' '.join(cmd)}\n\n")
                except IOError as e:
                    logger.error(f"Failed to open log file {args.log_file}: {e}")
            stdout_data, stderr_data = process.communicate()
            return_code = process.returncode
            # Write captured output to terminal and log file
            if stdout_data:
                sys.stdout.write(stdout_data)
                sys.stdout.flush()
                if log_f:
                    for line in stdout_data.splitlines(True):
                        log_f.write(f"[stdout] {line}")
            if stderr_data:
                sys.stderr.write(stderr_data)
                sys.stderr.flush()
                if log_f:
                    for line in stderr_data.splitlines(True):
                        log_f.write(f"[stderr] {line}")
            stdout_lines = stdout_data.splitlines() if stdout_data else []
            if log_f:
                log_f.write(f"\n--- End log entry (yt-dlp exit code: {return_code}) ---\n")
                log_f.close()
            for line in reversed(stdout_lines):
                if line and os.path.exists(line):
                    downloaded_filepath = line
                    logger.info(f"Detected downloaded file: {downloaded_filepath}")
                    break
        else:
            # Original behavior: progress bar direct to terminal, no capture
            process = subprocess.Popen(cmd)
            process.wait()
            return_code = process.returncode
        if return_code != 0:
            logger.error(f"yt-dlp exited with error code {return_code}")
        else:
            logger.info("yt-dlp command completed successfully.")
    except Exception as e:
        logger.exception(f"An unexpected error occurred: {e}")
        return 1
    finally:
        # Clean up the temporary file
        if os.path.exists(info_json_path):
            os.unlink(info_json_path)
            logger.debug(f"Removed temporary file {info_json_path}")
    # Cleanup phase
    if args.cleanup:
        if downloaded_filepath and os.path.exists(downloaded_filepath):
            try:
                logger.info(f"Cleanup: Renaming and truncating '{downloaded_filepath}'")
                timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
                directory, original_filename = os.path.split(downloaded_filepath)
                filename_base, filename_ext = os.path.splitext(original_filename)
                # New name format is [base]_[timestamp][ext].empty
                new_filename = f"{filename_base}_{timestamp}{filename_ext}.empty"
                new_filepath = os.path.join(directory, new_filename)
                os.rename(downloaded_filepath, new_filepath)
                logger.info(f"Renamed to '{new_filepath}'")
                with open(new_filepath, 'w') as f:
                    pass
                logger.info(f"Truncated '{new_filepath}' to 0 bytes.")
            except Exception as e:
                logger.error(f"Cleanup failed: {e}")
                return 1
        else:
            logger.warning("Cleanup requested, but no downloaded file was found. Skipping cleanup.")
    return return_code
--- a/ytops_client/get_info_tool.py
+++ b/ytops_client/get_info_tool.py
@ -0,0 +1,473 @@
 #!/usr/bin/env python3
 """
 Tool to get info.json from the Thrift service.
 """
 import argparse
 import json
 import os
 import re
 import sys
 import logging
 import codecs
 from datetime import datetime
 from typing import Dict, Any, Optional
 # Configure logging
 logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
 # Note: The CLI entrypoint will configure the root logger.
 # We get our own logger here for namespacing.
 logger = logging.getLogger('get_info_tool')
 # Import Thrift modules
 # Add project's thrift gen_py path to allow importing 'pangramia'
 script_dir = os.path.dirname(os.path.abspath(__file__))
 project_root = os.path.abspath(os.path.join(script_dir, '..'))
 sys.path.insert(0, os.path.join(project_root, 'thrift_model', 'gen_py'))
 from thrift.transport import TTransport
 from pangramia.yt.common.ttypes import TokenUpdateMode
 from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException
 from yt_ops_services.client_utils import get_thrift_client
 from ytops_client.request_params_help import REQUEST_PARAMS_HELP_STRING
 def get_video_id(url: str) -> str:
    """Extracts a YouTube video ID from a URL."""
    # For URLs like https://www.youtube.com/watch?v=VIDEO_ID
    match = re.search(r"v=([0-9A-Za-z_-]{11})", url)
    if match:
        return match.group(1)
    # For URLs like https://youtu.be/VIDEO_ID
    match = re.search(r"youtu\.be\/([0-9A-Za-z_-]{11})", url)
    if match:
        return match.group(1)
    # For plain video IDs
    if re.fullmatch(r'[0-9A-Za-z_-]{11}', url):
        return url
    return "unknown_video_id"
 def parse_key_value_params(params_str: str) -> Dict[str, Any]:
    """Parses a comma-separated string of key=value pairs into a nested dict."""
    params = {}
    if not params_str:
        return params
    for pair in params_str.split(','):
        if '=' not in pair:
            logger.warning(f"Skipping malformed parameter pair: {pair}")
            continue
        key, value_str = pair.split('=', 1)
        keys = key.strip().split('.')
        # Try to parse value as JSON primitive, otherwise treat as string
        try:
            # Don't parse if it's quoted, treat as string
            if (value_str.startswith('"') and value_str.endswith('"')) or \
               (value_str.startswith("'") and value_str.endswith("'")):
                value = value_str[1:-1]
            else:
                value = json.loads(value_str)
        except json.JSONDecodeError:
            value = value_str
        d = params
        for k in keys[:-1]:
            if k not in d or not isinstance(d[k], dict):
                d[k] = {}
            d = d[k]
        d[keys[-1]] = value
    return params
 def add_get_info_parser(subparsers):
    """Add the parser for the 'get-info' command."""
    parser = subparsers.add_parser(
        'get-info',
        description='Get info.json from Thrift service',
        formatter_class=argparse.RawTextHelpFormatter,
        help='Get info.json from the Thrift service.'
    )
    parser.add_argument('url', help='YouTube URL or video ID')
    parser.add_argument('--host', default='127.0.0.1', help="Thrift server host. Using 127.0.0.1 avoids harmless connection errors when the local Envoy proxy only listens on IPv4.")
    parser.add_argument('--port', type=int, default=9080, help='Thrift server port')
    parser.add_argument('--auth-host', help='Thrift server host (overrides --host).')
    parser.add_argument('--auth-port', type=int, help='Thrift server port (overrides --port).')
    parser.add_argument('--profile', default='default_profile', help='The profile name (accountId) to use for the request.')
    parser.add_argument('--client', help='''Specific client to use. Overrides server default.
 Available clients:
  web, web_safari, web_embedded, web_music, web_creator, mweb
  android, android_music, android_creator, android_vr
  ios, ios_music, ios_creator
  tv, tv_simply, tv_embedded
 Append "_camoufox" to any client name (e.g., "web_camoufox") to force
 the browser-based generation strategy.''')
    parser.add_argument('--output', help='Output file path for the info.json. If not provided, prints to stdout.')
    parser.add_argument('--output-auto', action='store_true', help='Automatically generate output filename for info.json and invocation data. Format: DATETIME-CLIENT-VIDEOID-info.json')
    parser.add_argument('--output-auto-url-only', action='store_true', help='Automatically generate output filename for info.json (format: VIDEOID-info.json) and also save a copy to latest-info.json.')
    parser.add_argument('--output-auto-suffix', help='Suffix to add to the filename before "-info.json" when using --output-auto or --output-auto-url-only. E.g., "-cycle1".')
    parser.add_argument('--log-file-auto', action='store_true', help='Automatically generate a log filename and save all script logs to it. Format: VIDEOID-DATETIME.log')
    parser.add_argument('--machine-id', help='Identifier for the client machine. Defaults to hostname.')
    parser.add_argument('--worker-id', help='Identifier for a worker process. Used for naming files with --save-latest.')
    parser.add_argument('--save-latest', action='store_true', help='Save a copy of the info.json to latest-info.json or [worker-id]-latest-info.json. This is implied by --output-auto-url-only.')
    parser.add_argument('--assigned-proxy-url', help='A specific proxy URL to use for the request, overriding the server\'s proxy pool logic.')
    parser.add_argument('--proxy-rename', help='Apply sed-style regex substitution to the assigned proxy URL. Format: s/pattern/replacement/')
    parser.add_argument('--print-proxy', action='store_true', help='Print the proxy used for the request to stderr.')
    parser.add_argument('--verbose', action='store_true', help='Enable verbose output')
    parser.add_argument('--log-return', action='store_true', help='Log the full summary of the thrift response to stderr, including detailed logs.\nThis is a convenience flag that implies --show-prefetch-log, --show-nodejs-log, and --show-ytdlp-log.')
    parser.add_argument('--show-prefetch-log', action='store_true', help='Print the curl pre-fetch log from the server response.')
    parser.add_argument('--show-nodejs-log', action='store_true', help='Print the Node.js debug log from the server response.')
    parser.add_argument('--show-ytdlp-log', action='store_true', help='Print the yt-dlp debug log from the server response.')
    parser.add_argument('--direct', action='store_true', help='Use the direct yt-dlp info.json generation method, bypassing Node.js token generation.')
    parser.add_argument('--print-info-out', action='store_true', help='Print the final info.json to stdout. By default, output is suppressed unless writing to a file.')
    parser.add_argument('--request-params-json', help=REQUEST_PARAMS_HELP_STRING + '\nCan also be a comma-separated string of key=value pairs (e.g., "caching_policy.mode=force_refresh").')
    parser.add_argument('--force-renew', help='Comma-separated list of items to force-renew: cookies, visitor_id, po_token, nsig_cache, all.')
    return parser
 def main_get_info(args):
    """Main logic for the 'get-info' command."""
    exit_code = 0
    # Set log level
    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)
    if args.log_file_auto:
        video_id = get_video_id(args.url)
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        log_filename = f"{video_id}-{timestamp}.log"
        # Get root logger to add file handler
        root_logger = logging.getLogger()
        file_handler = logging.FileHandler(log_filename)
        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        file_handler.setFormatter(formatter)
        root_logger.addHandler(file_handler)
        logger.info(f"Logging to file: {log_filename}")
    transport = None
    try:
        # Determine host and port, giving precedence to --auth-* args
        host = args.auth_host or args.host
        port = args.auth_port or args.port
        # Create Thrift client
        client, transport = get_thrift_client(host, port)
        # Get token data, which includes the info.json
        if args.direct:
            logger.info(f"Requesting info.json for URL '{args.url}' using DIRECT method.")
            if args.client:
                logger.info(f"Requesting to use specific client(s): {args.client}")
            else:
                logger.info("No specific client requested, server will let yt-dlp decide.")
            token_data = client.getInfoJsonDirect(url=args.url, clients=args.client)
        else:
            logger.info(f"Requesting info.json for URL '{args.url}' using profile '{args.profile}'")
            # Prepare arguments for the Thrift call
            machine_id = args.machine_id
            if not machine_id:
                import socket
                machine_id = socket.gethostname()
                logger.info(f"No machine ID provided, using hostname: {machine_id}")
            request_params = {}
            if args.request_params_json:
                try:
                    request_params = json.loads(args.request_params_json)
                except json.JSONDecodeError:
                    logger.info("Could not parse --request-params-json as JSON, trying as key-value string.")
                    request_params = parse_key_value_params(args.request_params_json)
            if args.force_renew:
                items_to_renew = [item.strip() for item in args.force_renew.split(',')]
                request_params['force_renew'] = items_to_renew
                logger.info(f"Requesting force renew for: {items_to_renew}")
            if args.verbose:
                # Add verbose flag for yt-dlp on the server
                ytdlp_params = request_params.setdefault('ytdlp_params', {})
                ytdlp_params['verbose'] = True
                logger.info("Verbose mode enabled, requesting verbose yt-dlp logs from server.")
            thrift_args = {
                'accountId': args.profile,
                'updateType': TokenUpdateMode.AUTO,
                'url': args.url,
                'clients': args.client,
                'machineId': machine_id,
                'airflowLogContext': None,
                'requestParamsJson': json.dumps(request_params) if request_params else None,
                'assignedProxyUrl': args.assigned_proxy_url
            }
            # Handle proxy renaming
            assigned_proxy = args.assigned_proxy_url
            if assigned_proxy and args.proxy_rename:
                rename_rule = args.proxy_rename.strip("'\"")
                if rename_rule.startswith('s/') and rename_rule.count('/') >= 2:
                    try:
                        parts = rename_rule.split('/')
                        pattern = parts[1]
                        replacement = parts[2]
                        original_proxy = assigned_proxy
                        assigned_proxy = re.sub(pattern, replacement, assigned_proxy)
                        logger.info(f"Renamed proxy URL from '{original_proxy}' to '{assigned_proxy}' using rule '{rename_rule}'")
                    except re.error as e:
                        logger.error(f"Invalid regex in --proxy-rename: {e}")
                        return 1
                    except IndexError:
                        logger.error("Invalid --proxy-rename format. Expected: s/pattern/replacement/")
                        return 1
                else:
                    logger.error("Invalid --proxy-rename format. Expected: s/pattern/replacement/")
                    return 1
            thrift_args['assignedProxyUrl'] = assigned_proxy
            if args.client:
                logger.info(f"Requesting to use specific client: {args.client}")
            else:
                logger.info("No specific client requested, server will use its default.")
            token_data = client.getOrRefreshToken(**thrift_args)
        if args.print_proxy:
            if hasattr(token_data, 'socks') and token_data.socks:
                print(f"Proxy used: {token_data.socks}", file=sys.stderr)
            else:
                print("Proxy information not available in response.", file=sys.stderr)
        if not token_data or not hasattr(token_data, 'infoJson') or not token_data.infoJson:
            logger.error("Server did not return valid info.json data.")
            print("Error: Server did not return valid info.json data.", file=sys.stderr)
            return 1
        info_json_str = token_data.infoJson
        # On success, print summary info to stderr for visibility.
        # This provides immediate feedback without interfering with piped stdout.
        if hasattr(token_data, 'serverVersionInfo') and token_data.serverVersionInfo:
            # Filter out the default params line as requested
            filtered_info = '\n'.join(
                line for line in token_data.serverVersionInfo.split('\n')
                if 'Default yt-dlp CLI params:' not in line
            )
            print(f"\n--- Server Version Info ---\n{filtered_info}", file=sys.stderr)
        if hasattr(token_data, 'requestSummary') and token_data.requestSummary:
            try:
                summary_data = json.loads(token_data.requestSummary)
                print(f"\n--- Request Summary ---\n{summary_data.get('summary', token_data.requestSummary)}", file=sys.stderr)
            except json.JSONDecodeError:
                # Fallback for old format or non-JSON summary
                print(f"\n--- Request Summary ---\n{token_data.requestSummary}", file=sys.stderr)
        # Print detailed logs only if explicitly requested
        if hasattr(token_data, 'requestSummary') and token_data.requestSummary:
            try:
                summary_data = json.loads(token_data.requestSummary)
                if args.show_prefetch_log or args.log_return:
                    print("\n--- Prefetch Log ---", file=sys.stderr)
                    print(summary_data.get('prefetch_log', 'Not available.'), file=sys.stderr)
                if args.show_nodejs_log or args.log_return:
                    print("\n--- Node.js Log ---", file=sys.stderr)
                    print(summary_data.get('nodejs_log', 'Not available.'), file=sys.stderr)
                if args.show_ytdlp_log or args.log_return:
                    print("\n--- yt-dlp Log ---", file=sys.stderr)
                    print(summary_data.get('ytdlp_log', 'Not available.'), file=sys.stderr)
            except json.JSONDecodeError:
                pass # Fallback already handled above
            if hasattr(token_data, 'communicationLogPaths') and token_data.communicationLogPaths:
                logger.info("--- Communication Log Paths ---")
                for log_path in token_data.communicationLogPaths:
                    logger.info(f"  - {log_path}")
        # Check if the returned info.json is an error report
        try:
            info_data = json.loads(info_json_str)
            if hasattr(token_data, 'socks') and token_data.socks:
                info_data['_proxy_url'] = token_data.socks
            if isinstance(info_data, dict) and 'error' in info_data:
                error_code = info_data.get('errorCode', 'N/A')
                error_message = info_data.get('message', info_data.get('error', 'Unknown error'))
                logger.error(f"Server returned an error in info.json (Code: {error_code}): {error_message}")
                print(f"Error from server (Code: {error_code}): {error_message}", file=sys.stderr)
                # Optionally print the full error JSON
                if args.verbose:
                    print(json.dumps(info_data, indent=2), file=sys.stderr)
                exit_code = 1
        except json.JSONDecodeError:
            logger.error(f"Failed to parse info.json from server: {info_json_str[:200]}...")
            print("Error: Failed to parse the info.json response from the server.", file=sys.stderr)
            return 1
        logger.info(f"Successfully retrieved info.json ({len(info_json_str)} bytes)")
        # Save to latest-info.json if requested, or if using --output-auto-url-only for convenience
        if args.save_latest or args.output_auto_url_only:
            base_latest_filename = f"{args.worker_id}-latest" if args.worker_id else "latest"
            latest_info_filename = f"{base_latest_filename}-info.json"
            latest_proxy_filename = f"{base_latest_filename}-proxy.txt"
            try:
                with open(latest_info_filename, 'w', encoding='utf-8') as f:
                    json.dump(info_data, f, indent=2)
                logger.info(f"Wrote info.json to {latest_info_filename}")
                print(f"Successfully saved info.json to {latest_info_filename}", file=sys.stderr)
            except IOError as e:
                logger.error(f"Failed to write to {latest_info_filename}: {e}")
                print(f"Error: Failed to write to {latest_info_filename}: {e}", file=sys.stderr)
            if hasattr(token_data, 'socks') and token_data.socks:
                try:
                    with open(latest_proxy_filename, 'w', encoding='utf-8') as f:
                        f.write(token_data.socks + '\n')
                    logger.info(f"Wrote proxy to {latest_proxy_filename}")
                    print(f"Successfully saved proxy to {latest_proxy_filename}", file=sys.stderr)
                except IOError as e:
                    logger.error(f"Failed to write to {latest_proxy_filename}: {e}")
                    print(f"Error: Failed to write to {latest_proxy_filename}: {e}", file=sys.stderr)
        # Determine output file path if auto-naming is used
        output_file = args.output
        if args.output_auto or args.output_auto_url_only:
            video_id = get_video_id(args.url)
            suffix = args.output_auto_suffix or ""
            if args.output_auto:
                timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
                client_id = args.client or args.profile
                base_filename = f"{timestamp}-{client_id}-{video_id}{suffix}"
                output_file = f"{base_filename}-info.json"
                # Save invocation data
                invocation_filename = f"{base_filename}-invocation.json"
                invocation_data = {}
                for attr in ['ytdlpCommand', 'socks', 'jobId', 'url', 'requestSummary', 'communicationLogPaths']:
                    if hasattr(token_data, attr):
                        value = getattr(token_data, attr)
                        if value:
                            invocation_data[attr] = value
                if hasattr(token_data, 'cookiesBlob') and token_data.cookiesBlob:
                    invocation_data['cookiesBlob'] = f"present, {len(token_data.cookiesBlob)} bytes"
                else:
                    invocation_data['cookiesBlob'] = "not present"
                try:
                    with open(invocation_filename, 'w', encoding='utf-8') as f:
                        json.dump(invocation_data, f, indent=2)
                    logger.info(f"Wrote invocation data to {invocation_filename}")
                except IOError as e:
                    logger.error(f"Failed to write invocation data to {invocation_filename}: {e}")
            else:  # args.output_auto_url_only
                output_file = f"{video_id}{suffix}-info.json"
        # Write to output file if specified
        if output_file:
            try:
                # Ensure the output directory exists before writing the file
                output_dir = os.path.dirname(output_file)
                if output_dir:
                    os.makedirs(output_dir, exist_ok=True)
                with open(output_file, 'w', encoding='utf-8') as f:
                    # Pretty-print the JSON to the file
                    json.dump(info_data, f, indent=2)
                logger.info(f"Wrote info.json to {output_file}")
                # Print success message to stderr to not interfere with stdout piping
                print(f"Successfully saved info.json to {output_file}", file=sys.stderr)
                # If --output-auto, save invocation data
                if args.output_auto:
                    pass # The latest-info.json logic is now handled by --save-latest
            except IOError as e:
                logger.error(f"Failed to write to output file {output_file}: {e}")
                print(f"Error: Failed to write to output file {output_file}: {e}", file=sys.stderr)
                return 1
        # Print the JSON to stdout if requested, to allow for piping.
        if args.print_info_out:
            print(json.dumps(info_data, indent=2))
        return exit_code
    except (PBServiceException, PBUserException) as e:
        # Check for non-fatal age-gate errors. These are expected for certain videos
        # and should not cause the entire stress test to fail.
        is_age_gate_error = hasattr(e, 'errorCode') and e.errorCode == 'AGE_GATED_SIGN_IN'
        if is_age_gate_error:
            logger.warning(f"Age-gated content detected for URL '{args.url}'. Treating as a non-fatal warning.")
            print(f"Warning: Age-gated content detected for '{args.url}'.", file=sys.stderr)
            # To avoid breaking downstream parsers, output a valid JSON error object.
            # This allows stress testers to see a 'success' (exit 0) but still know it was an age gate issue.
            error_json = {
                "error": "Age-gated content",
                "errorCode": "AGE_GATE",
                "message": "Sign in to confirm your age."
            }
            print(json.dumps(error_json, indent=2))
            # We return success because this is not a system failure.
            return 0
        # Format message for better readability, ensuring newlines are handled.
        message = str(e.message or '')
        try:
            # Attempt to decode as if it has escaped newlines (e.g., '\\n' -> '\n')
            message = codecs.decode(message, 'unicode_escape')
        except Exception:
            # Fallback for safety, though unicode_escape is robust
            message = message.replace('\\n', '\n')
        # For known user-facing errors, suppress the full traceback unless verbose is explicitly on.
        # The goal is to provide a clean error message for common issues.
        user_facing_errors = [
            "BOT_DETECTED", "BOT_DETECTION_SIGN_IN_REQUIRED",
            "VIDEO_UNAVAILABLE", "PRIVATE_VIDEO", "VIDEO_REMOVED",
            "AGE_GATED_SIGN_IN", "MEMBERS_ONLY", "VIDEO_PROCESSING", "GEO_RESTRICTED"
        ]
        is_user_facing_error = hasattr(e, 'errorCode') and e.errorCode in user_facing_errors
        # Only show full traceback in verbose mode AND if it's NOT a common user-facing error.
        show_exc_info = args.verbose and not is_user_facing_error
        logger.error(f"A Thrift error occurred: {message}", exc_info=show_exc_info)
        print(f"\n--- ERROR ---", file=sys.stderr)
        print(f"{message}", file=sys.stderr)
        if hasattr(e, 'context') and e.context and (args.verbose or not is_user_facing_error):
            print(f"\n--- CONTEXT ---", file=sys.stderr)
            # The context is a dict from thrift. Pretty print it, handling newlines in values.
            if isinstance(e.context, dict):
                # Process each value to un-escape newlines for clean printing
                processed_context = {}
                for key, value in e.context.items():
                    try:
                        processed_context[key] = codecs.decode(str(value), 'unicode_escape')
                    except Exception:
                        processed_context[key] = str(value).replace('\\n', '\n')
                print(json.dumps(processed_context, indent=2), file=sys.stderr)
            else:
                # Fallback for non-dict context
                print(str(e.context), file=sys.stderr)
        print("\n", file=sys.stderr)
        return 1
    except TTransport.TTransportException as e:
        logger.error(f"Connection to server failed: {e}", exc_info=args.verbose)
        print(f"Error: Connection to server at {args.host}:{args.port} failed.", file=sys.stderr)
        return 1
    except Exception as e:
        logger.exception(f"An unexpected error occurred: {e}")
        print(f"An unexpected error occurred: {e}", file=sys.stderr)
        return 1
    finally:
        if transport and transport.isOpen():
            transport.close()
            logger.info("Thrift connection closed.")
--- a/ytops_client/list_formats_tool.py
+++ b/ytops_client/list_formats_tool.py
@ -0,0 +1,228 @@
 """
 Tool to list available formats from a yt-dlp info.json file.
 """
 import sys
 import json
 import argparse
 import re
 from urllib.parse import urlparse, parse_qs
 from datetime import datetime, timezone
 def format_size(b):
    """Format size in bytes to human-readable string."""
    if b is None:
        return 'N/A'
    if b < 1024:
        return f"{b}B"
    elif b < 1024**2:
        return f"{b/1024:.2f}KiB"
    elif b < 1024**3:
        return f"{b/1024**2:.2f}MiB"
    else:
        return f"{b/1024**3:.2f}GiB"
 def list_formats(info_json, requested_formats_str=None, file=sys.stdout):
    """Prints a table of available formats from info.json data."""
    formats = info_json.get('formats', [])
    if not formats:
        print("No formats found in the provided info.json.", file=file)
        return
    requested_formats = []
    requested_order = {}
    if requested_formats_str:
        # Split by comma or slash, and filter out empty strings
        requested_formats = [item for item in re.split(r'[,/]', requested_formats_str) if item]
        requested_order = {fmt: i for i, fmt in enumerate(requested_formats)}
    def sort_key(f):
        fid = f.get('format_id', '')
        is_requested = fid in requested_order
        if is_requested:
            # Sort requested formats by the order they were provided
            return (False, requested_order[fid])
        else:
            # Sort other formats numerically by ID
            return (True, int(fid) if fid.isdigit() else 999)
    sorted_formats = sorted(formats, key=sort_key)
    # Check if any requested formats were found
    if requested_formats:
        found_any = any(f.get('format_id') in requested_order for f in formats)
        if not found_any:
            print("WARNING: No format from list found.", file=sys.stderr)
    # Header
    header = "{:<6} {:<7} {:<12} {:<5} {:<18} {:<18} {:<12} {:<10} {:<20} {:<17} {:<15} {:<12} {:<12} {:<12} {:<5} {:<12} {:<12} {:<12} {:<12} {:<12}".format(
        "ID", "EXT", "RESOLUTION", "FPS", "VCODEC", "ACODEC", "FILESIZE", "TBR", "URL (path)", "EXPIRE (UTC)", "IP", "ID_TOKEN", "SESS_TOKEN", "EI_TOKEN", "GIR", "BUI_TOKEN", "POT_TOKEN", "MT_TOKEN", "SIG", "LSIG"
    )
    print(header, file=file)
    print("-" * len(header), file=file)
    for f in sorted_formats:
        format_id = f.get('format_id', 'N/A')
        ext = f.get('ext', 'N/A')
        resolution = f.get('resolution')
        if not resolution:
            if 'width' in f and f['width'] is not None:
                resolution = f"{f['width']}x{f['height']}"
            else:
                resolution = 'audio only'
        fps = f.get('fps', '')
        vcodec = f.get('vcodec', 'none')
        acodec = f.get('acodec', 'none')
        filesize = f.get('filesize') or f.get('filesize_approx')
        tbr = f.get('tbr')
        display_id = f"*{format_id}" if format_id in requested_order else format_id
        url = f.get('url', '')
        partial_url, expire_date, ip, id_token_short, sess_token_short, ei_token_short, gir, bui_token_short, pot_token_short, mt_token_short, sig_short, lsig_short = ('N/A',) * 12
        if url:
            parsed = urlparse(url)
            query_params = parse_qs(parsed.query)
            path_and_query = parsed.path
            if parsed.query:
                path_and_query += '?' + parsed.query
            if len(path_and_query) > 18:
                partial_url = path_and_query[:8] + '...' + path_and_query[-7:]
            else:
                partial_url = path_and_query
            expire_ts = query_params.get('expire', [None])[0]
            if expire_ts:
                try:
                    expire_date = datetime.fromtimestamp(int(expire_ts), timezone.utc).strftime('%m-%d %H:%M:%S')
                except (ValueError, TypeError):
                    expire_date = 'Invalid'
            ip = query_params.get('ip', ['N/A'])[0]
            id_token = query_params.get('id', [None])[0]
            if id_token and len(id_token) > 12:
                id_token_short = id_token[:6] + '..' + id_token[-4:]
            elif id_token:
                id_token_short = id_token
            sess_token = query_params.get('n', [None])[0]
            if sess_token and len(sess_token) > 12:
                sess_token_short = sess_token[:6] + '..' + sess_token[-4:]
            elif sess_token:
                sess_token_short = sess_token
            ei_token = query_params.get('ei', [None])[0]
            if ei_token and len(ei_token) > 12:
                ei_token_short = ei_token[:6] + '..' + ei_token[-4:]
            elif ei_token:
                ei_token_short = ei_token
            gir = query_params.get('gir', ['N/A'])[0]
            bui_token = query_params.get('bui', [None])[0]
            if bui_token and len(bui_token) > 12:
                bui_token_short = bui_token[:6] + '..' + bui_token[-4:]
            elif bui_token:
                bui_token_short = bui_token
            pot_token = query_params.get('pot', [None])[0]
            if pot_token and len(pot_token) > 12:
                pot_token_short = pot_token[:6] + '..' + pot_token[-4:]
            elif pot_token:
                pot_token_short = pot_token
            mt_token = query_params.get('mt', [None])[0]
            # mt is often just a timestamp, don't shorten unless it's a long hash
            if mt_token and len(mt_token) > 12:
                mt_token_short = mt_token[:6] + '..' + mt_token[-4:]
            elif mt_token:
                mt_token_short = mt_token
            sig = query_params.get('sig', [None])[0]
            if sig and len(sig) > 12:
                sig_short = sig[:6] + '..' + sig[-4:]
            elif sig:
                sig_short = sig
            lsig = query_params.get('lsig', [None])[0]
            if lsig and len(lsig) > 12:
                lsig_short = lsig[:6] + '..' + lsig[-4:]
            elif lsig:
                lsig_short = lsig
        print("{:<6} {:<7} {:<12} {:<5} {:<18} {:<18} {:<12} {:<10} {:<20} {:<17} {:<15} {:<12} {:<12} {:<12} {:<5} {:<12} {:<12} {:<12} {:<12} {:<12}".format(
            str(display_id),
            str(ext),
            str(resolution),
            str(fps) if fps else '',
            str(vcodec)[:18],
            str(acodec)[:18],
            format_size(filesize),
            f"{tbr:.0f}k" if tbr else 'N/A',
            partial_url,
            expire_date,
            ip,
            id_token_short,
            sess_token_short,
            ei_token_short,
            gir,
            bui_token_short,
            pot_token_short,
            mt_token_short,
            sig_short,
            lsig_short
        ), file=file)
 def add_list_formats_parser(subparsers):
    """Add the parser for the 'list-formats' command."""
    parser = subparsers.add_parser(
        'list-formats',
        description="List available formats from a yt-dlp info.json file.",
        formatter_class=argparse.RawTextHelpFormatter,
        help="List available formats from a yt-dlp info.json file."
    )
    parser.add_argument(
        '--load-info-json',
        type=argparse.FileType('r', encoding='utf-8'),
        default=sys.stdin,
        help="Path to the info.json file. Reads from stdin if not provided."
    )
    parser.add_argument(
        '-f', '--formats',
        help='Comma or slash-separated list of format IDs to highlight and prioritize (e.g., "18,140,299/298").'
    )
    parser.add_argument(
        '-p', '--pass-through',
        action='store_true',
        help='Pass the input JSON through to stdout, printing the format list to stderr.'
    )
    return parser
 def main_list_formats(args):
    """Main logic for the 'list-formats' command."""
    try:
        # Read the whole content to allow passing it through
        info_json_content = args.load_info_json.read()
        info_data = json.loads(info_json_content)
        # Determine output stream for the format list
        output_stream = sys.stderr if args.pass_through else sys.stdout
        list_formats(info_data, args.formats, file=output_stream)
        # If pass-through is enabled, print the original JSON to stdout
        if args.pass_through:
            # Use end='' because the read content likely includes a trailing newline
            print(info_json_content, end='')
        return 0
    except json.JSONDecodeError:
        print("Error: Invalid JSON provided.", file=sys.stderr)
        return 1
    except Exception as e:
        print(f"An unexpected error occurred: {e}", file=sys.stderr)
        return 1
--- a/ytops_client/request_params_help.py
+++ b/ytops_client/request_params_help.py
@ -0,0 +1,48 @@
 # Using a separate file for this long help message to keep the main script clean.
 # It's imported by client tools that use the --request-params-json argument.
 REQUEST_PARAMS_HELP_STRING = """JSON string with per-request parameters to override server defaults.
 Example of a full configuration JSON showing default values (use single quotes to wrap it):
 '{
  "_comment": "This JSON object allows overriding server-side defaults for a single request.",
  "cookies_file_path": "/path/to/your/cookies.txt",
  "context_reuse_policy": {
    "enabled": true,
    "max_age_seconds": 86400,
    "reuse_visitor_id": true,
    "reuse_cookies": true
  },
  "_comment_context_reuse_policy": "Controls how the server reuses session context (cookies, visitor ID) from the account's previous successful request.",
  "_comment_reuse_visitor_id": "If true, reuses the visitor ID from the last session to maintain a consistent identity to YouTube. This is automatically disabled for TV clients to avoid bot detection.",
  "ytdlp_params": {
    "use_curl_prefetch": false,
    "skip_cache": false,
    "visitor_id_override_enabled": true,
    "extractor_args": {
      "youtubepot-bgutilhttp": {
        "base_url": "http://172.17.0.1:4416"
      },
      "youtube": {
        "pot_trace": "true",
        "formats": "duplicate",
        "player_js_version": "actual"
      },
      "youtubepot-webpo": {
        "bind_to_visitor_id": "true"
      }
    }
  },
  "_comment_ytdlp_params": "Parameters passed directly to the yt-dlp wrapper for info.json generation.",
  "_comment_visitor_id_override_enabled": "If true (default), the server validates the visitor ID from the token generator and creates a new one if it is invalid. Set to false to force using the provided visitor ID without validation, which is useful for debugging.",
  "_comment_extractor_args": "Directly override yt-dlp extractor arguments. To use BGUtils in script mode, replace 'youtubepot-bgutilhttp' with 'youtubepot-bgutilscript'. The script path is '/opt/bgutil-ytdlp-pot-provider-server/build/generate_once.js'. To disable any explicit provider (like '--bgutils-mode none' on the server), remove both 'youtubepot-bgutilhttp' and 'youtubepot-bgutilscript' keys.",
  "session_params": {
    "lang": "en-US",
    "location": "US",
    "deviceCategory": "MOBILE",
    "user_agent": "Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)"
  },
  "_comment_session_params": "Parameters for the token generation session (primarily for Node.js)."
 }'"""
--- a/ytops_client/stress_formats_tool.py
+++ b/ytops_client/stress_formats_tool.py
@ -0,0 +1,788 @@
 #!/usr/bin/env python3
 """
 Tool to stress-test video format download URLs from an info.json.
 """
 import argparse
 import collections
 import concurrent.futures
 import json
 import logging
 import os
 import random
 import re
 import shlex
 import signal
 import subprocess
 import sys
 import threading
 import time
 from datetime import datetime, timezone
 from pathlib import Path
 from urllib.parse import urlparse, parse_qs
 # Configure logging
 logger = logging.getLogger('stress_formats_tool')
 def get_video_id(url: str) -> str:
    """Extracts a YouTube video ID from a URL."""
    # For URLs like https://www.youtube.com/watch?v=VIDEO_ID
    match = re.search(r"v=([0-9A-Za-z_-]{11})", url)
    if match:
        return match.group(1)
    # For URLs like https://youtu.be/VIDEO_ID
    match = re.search(r"youtu\.be\/([0-9A-Za-z_-]{11})", url)
    if match:
        return match.group(1)
    # For plain video IDs
    if re.fullmatch(r'[0-9A-Za-z_-]{11}', url):
        return url
    return "unknown_video_id"
 def get_display_name(path_or_url):
    """Returns a clean name for logging, either a filename or a video ID."""
    if isinstance(path_or_url, Path):
        return path_or_url.name
    path_str = str(path_or_url)
    video_id = get_video_id(path_str)
    if video_id != "unknown_video_id":
        return video_id
    # Fallback for file paths as strings or weird URLs
    return Path(path_str).name
 def format_size(b):
    """Format size in bytes to human-readable string."""
    if b is None:
        return 'N/A'
    if b < 1024:
        return f"{b}B"
    elif b < 1024**2:
        return f"{b/1024:.2f}KiB"
    elif b < 1024**3:
        return f"{b/1024**2:.2f}MiB"
    else:
        return f"{b/1024**3:.2f}GiB"
 class StatsTracker:
    """Tracks and reports statistics for the stress test."""
    def __init__(self, stats_file=None):
        self.events = []
        self.start_time = time.time()
        self.lock = threading.Lock()
        self.stats_file_path = stats_file
        self.stats_file_handle = None
        if self.stats_file_path:
            try:
                self.stats_file_handle = open(self.stats_file_path, 'a', encoding='utf-8')
            except IOError as e:
                logger.error(f"Could not open stats file {self.stats_file_path}: {e}")
    def log_event(self, event_data):
        """Log a download attempt event."""
        with self.lock:
            event_data['timestamp'] = datetime.now().isoformat()
            self.events.append(event_data)
            if self.stats_file_handle:
                self.stats_file_handle.write(json.dumps(event_data) + '\n')
                self.stats_file_handle.flush()
    def close(self):
        """Close the stats file."""
        if self.stats_file_handle:
            self.stats_file_handle.close()
    def print_summary(self):
        """Print a summary of the test run."""
        with self.lock:
            if not self.events:
                logger.info("No events were recorded.")
                return
            duration = time.time() - self.start_time
            # Separate events by type
            fetch_events = [e for e in self.events if e.get('type') == 'fetch']
            download_events = [e for e in self.events if e.get('type') != 'fetch'] # Default to download for old events
            logger.info("\n--- Test Summary ---")
            logger.info(f"Total duration: {duration:.2f} seconds")
            if fetch_events:
                total_fetches = len(fetch_events)
                successful_fetches = sum(1 for e in fetch_events if e['success'])
                failed_fetches = total_fetches - successful_fetches
                logger.info("\n--- Fetch Summary ---")
                logger.info(f"Total info.json fetch attempts: {total_fetches}")
                logger.info(f"  - Successful: {successful_fetches}")
                logger.info(f"  - Failed:     {failed_fetches}")
                if total_fetches > 0:
                    success_rate = (successful_fetches / total_fetches) * 100
                    logger.info(f"Success rate: {success_rate:.2f}%")
                if failed_fetches > 0:
                    error_counts = collections.Counter(e.get('error_type', 'Unknown') for e in fetch_events if not e['success'])
                    logger.info("Failure breakdown:")
                    for error_type, count in sorted(error_counts.items()):
                        logger.info(f"  - {error_type}: {count}")
            if download_events:
                total_attempts = len(download_events)
                successes = sum(1 for e in download_events if e['success'])
                failures = total_attempts - successes
                logger.info("\n--- Download Summary ---")
                logger.info(f"Total download attempts: {total_attempts}")
                logger.info(f"  - Successful: {successes}")
                logger.info(f"  - Failed:     {failures}")
                if total_attempts > 0:
                    success_rate = (successes / total_attempts) * 100
                    logger.info(f"Success rate: {success_rate:.2f}%")
                if duration > 1 and total_attempts > 0:
                    dpm = (total_attempts / duration) * 60
                    logger.info(f"Attempt rate: {dpm:.2f} attempts/minute")
                # Download volume stats
                total_bytes = sum(e.get('downloaded_bytes', 0) for e in download_events if e['success'])
                if total_bytes > 0:
                    logger.info(f"Total data downloaded: {format_size(total_bytes)}")
                    if duration > 1:
                        bytes_per_second = total_bytes / duration
                        gb_per_hour = (bytes_per_second * 3600) / (1024**3)
                        gb_per_day = gb_per_hour * 24
                        logger.info(f"Download rate: {gb_per_hour:.3f} GB/hour ({gb_per_day:.3f} GB/day)")
                if failures > 0:
                    error_counts = collections.Counter(e.get('error_type', 'Unknown') for e in download_events if not e['success'])
                    logger.info("Failure breakdown:")
                    for error_type, count in sorted(error_counts.items()):
                        logger.info(f"  - {error_type}: {count}")
            logger.info("--------------------")
 def print_banner(args, info_jsons=None, urls=None):
    """Prints a summary of the test configuration."""
    logger.info("--- Stress Test Configuration ---")
    if args.urls_file:
        if args.fetch_only:
            logger.info(f"Mode: Fetch-only. Generating info.json files from URL list.")
        else:
            logger.info(f"Mode: Full-stack test from URL list.")
        logger.info(f"URL file: {args.urls_file} ({len(urls)} URLs)")
        logger.info(f"Workers: {args.workers}")
        logger.info(f"Info.json command: {args.info_json_gen_cmd}")
        if args.info_json_gen_cmd_alt and args.alt_cmd_every_n > 0:
            logger.info(f"Alternate command (every {args.alt_cmd_every_n} URLs): {args.info_json_gen_cmd_alt}")
        if args.profile_prefix:
            if args.profile_pool:
                logger.info(f"Profile mode: Pool of {args.profile_pool} (prefix: {args.profile_prefix})")
            elif args.profile_per_request:
                logger.info(f"Profile mode: New profile per request (prefix: {args.profile_prefix})")
    else: # info-json-files
        logger.info(f"Mode: Download-only from static info.json files.")
        if info_jsons:
            logger.info(f"Files: {', '.join(str(p.name) for p in info_jsons.keys())}")
        logger.info(f"Workers: {args.workers}")
    logger.info(f"Format selection: {args.format}")
    logger.info(f"Sleep between cycles: {args.sleep}s")
    if args.sleep_formats > 0:
        logger.info(f"Sleep between formats: {args.sleep_formats}s")
    if args.duration > 0:
        logger.info(f"Test duration: {args.duration} minutes")
    if args.max_attempts > 0:
        logger.info(f"Max cycles: {args.max_attempts}")
    logger.info(f"Stop on failure: {args.stop_on_failure}")
    if args.stop_on_403:
        logger.info(f"Stop on 403 error: True")
    if args.stop_on_timeout:
        logger.info(f"Stop on timeout: True")
    logger.info(f"Stats file: {args.stats_file}")
    if args.stats_interval > 0:
        logger.info(f"Periodic stats interval: {args.stats_interval}s")
    if args.format_download_args:
        logger.info(f"Extra download args: {args.format_download_args}")
    logger.info("Download volume: Tracking total data downloaded")
    logger.info("---------------------------------")
 def add_stress_formats_parser(subparsers):
    """Add the parser for the 'stress-formats' command."""
    parser = subparsers.add_parser(
        'stress-formats',
        description="A simple, command-line driven stress-testing tool for basic scenarios.\nAll options are configured via flags. For more complex scenarios and advanced\nfeatures like rate limiting and client rotation, use the 'stress-policy' command.",
        formatter_class=argparse.RawTextHelpFormatter,
        help='Run simple, flag-driven stress tests.',
        epilog="""
 Usage examples:
 # Test a format from a static info.json every 60 seconds
 ytops-client stress-formats --info-json-files my_video.json -f 18 --sleep 60
 # Test with multiple info.json files in parallel using 4 workers
 ytops-client stress-formats --info-json-files "file1.json,file2.json,file3.json" -f 18 --sleep 60 --workers 4
 # Fetch a new info.json for a URL and test a format every 5 minutes
 ytops-client stress-formats --urls-file urls.txt --info-json-gen-cmd "bin/ytops-client get-info {url}" -f "18" --sleep 300
 # Run the test for exactly 10 cycles, continuing on failure
 ytops-client stress-formats --info-json-files my_video.json -f 18 --sleep 10 --max-attempts 10 --no-stop-on-failure
 """
    )
    source_group = parser.add_mutually_exclusive_group(required=True)
    source_group.add_argument('--info-json-files', help='Comma-separated paths to static info.json files to use for testing.')
    source_group.add_argument('--urls-file', help='Path to a file with URLs/IDs to test. Can be a text file (one per line) or a JSON array of strings.')
    parser.add_argument('-f', '--format', help='The format selection string. Can be a comma-separated list of IDs (e.g., "18,137"), "all", "random:X%%" (e.g., "random:10%%"), or "random_from:ID1,ID2,..." to pick one from a list. Required unless --fetch-only is used.')
    parser.add_argument('--sleep', type=int, default=60, help='Seconds to wait between batches of download attempts. Default: 60.')
    parser.add_argument('--sleep-formats', type=int, default=0, help='Seconds to wait between format downloads within a single file/cycle. Default: 0.')
    parser.add_argument('--max-attempts', type=int, default=0, help='Maximum number of test cycles. 0 means run indefinitely. Default: 0.')
    parser.add_argument('--duration', type=int, default=0, help='Total duration to run the test in minutes. 0 means run indefinitely (or until max-attempts is reached). Default: 0.')
    parser.add_argument('--stop-on-failure', action='store_true', help='Stop the test immediately after the first download failure.')
    parser.add_argument('--no-stop-on-failure', dest='stop_on_failure', action='store_false', help='Continue testing even after a download failure. (Default)')
    parser.set_defaults(stop_on_failure=False)
    parser.add_argument('--stop-on-403', action='store_true', help='Stop the test immediately after a 403 Forbidden error.')
    parser.add_argument('--stop-on-timeout', action='store_true', help='Stop the test immediately after a read timeout error.')
    parser.add_argument('--fetch-only', action='store_true', help='When used with --urls-file, only fetch and save info.json files without performing download tests.')
    parser.add_argument('--workers', type=int, default=1, help='Number of parallel workers for multi-file mode. Default: 1.')
    parser.add_argument('--stats-file', default='stress_test_stats.jsonl', help='File to log statistics for each attempt. Default: stress_test_stats.jsonl')
    parser.add_argument('--stats-interval', type=int, default=0, help='Interval in seconds to print stats summary periodically. 0 disables. Default: 0.')
    # Arguments for info.json generation
    parser.add_argument('--info-json-gen-cmd', help='Command template to generate info.json. Use {url}, {worker_id}, {cycle}, and {profile} as placeholders. Required with --urls-file.')
    parser.add_argument('--info-json-gen-cmd-alt', help='Alternate command template for info.json generation.')
    parser.add_argument('--alt-cmd-every-n', type=int, default=0, help='Use the alternate command for every N-th URL (e.g., N=3 means URLs 3, 6, 9...). Requires --info-json-gen-cmd-alt.')
    # Profile generation options
    profile_group = parser.add_argument_group('Profile Generation Options (for --urls-file mode)')
    profile_group.add_argument('--profile-prefix', help='Base name for generated profile IDs (e.g., "test_user"). Used with --profile-pool or --profile-per-request.')
    profile_group.add_argument('--profile-pool', type=int, metavar='N', help='Use a pool of N profiles. Profile ID will be {prefix}_{worker_id %% N}. Requires --profile-prefix.')
    profile_group.add_argument('--profile-per-request', action='store_true', help='Generate a new unique profile ID for each request. Profile ID will be {prefix}_{timestamp}_{worker_id}. Requires --profile-prefix.')
    # Arguments to pass to format_download.py
    parser.add_argument('--format-download-args', nargs='+', help='Additional arguments to pass to the download tool. E.g., --proxy-rename s/old/new/ --cleanup')
    parser.add_argument('--verbose', action='store_true', help='Enable verbose output.')
    return parser
 def run_command(cmd, input_data=None):
    """Runs a command, captures its output, and returns status."""
    logger.debug(f"Running command: {' '.join(cmd)}")
    try:
        process = subprocess.Popen(
            cmd,
            stdin=subprocess.PIPE if input_data else None,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
            encoding='utf-8'
        )
        stdout, stderr = process.communicate(input=input_data)
        return process.returncode, stdout, stderr
    except FileNotFoundError:
        logger.error(f"Command not found: {cmd[0]}. Make sure it's in your PATH.")
        return -1, "", f"Command not found: {cmd[0]}"
    except Exception as e:
        logger.error(f"An error occurred while running command: {' '.join(cmd)}. Error: {e}")
        return -1, "", str(e)
 def run_download_worker(info_json_path, info_json_content, format_to_download, args):
    """
    Performs a single download attempt. Designed to be run in a worker thread.
    """
    # 1. Attempt download
    download_cmd = [
        sys.executable, '-m', 'ytops_client.cli', 'download',
        '-f', format_to_download
    ]
    if args.format_download_args:
        # with nargs='+', this is a list.
        # If it's one item, it might be a single quoted string of args that needs splitting.
        if len(args.format_download_args) == 1:
            download_cmd.extend(shlex.split(args.format_download_args[0]))
        else:
            # multiple items, assume they are already split by shell
            download_cmd.extend(args.format_download_args)
    display_name = get_display_name(info_json_path)
    logger.info(f"[{display_name} @ {format_to_download}] Kicking off download process...")
    retcode, stdout, stderr = run_command(download_cmd, input_data=info_json_content)
    # 2. Check result
    is_403_error = "HTTP Error 403" in stderr
    is_timeout_error = "Read timed out" in stderr
    result = {
        'type': 'download',
        'path': str(info_json_path),
        'format': format_to_download,
        'success': retcode == 0,
        'error_type': None,
        'details': '',
        'downloaded_bytes': 0
    }
    if retcode == 0:
        # Success
        downloaded_filepath = ''
        # The filename is the last non-empty line of stdout that doesn't look like a progress bar
        lines = stdout.splitlines()
        for line in reversed(lines):
            if line and not line.strip().startswith('['):
                downloaded_filepath = line.strip()
                break
        details_str = "OK"
        if downloaded_filepath:
            details_str = f"Downloaded: {Path(downloaded_filepath).name}"
        # Parse download size from stderr
        size_in_bytes = 0
        size_match = re.search(r'\[download\]\s+100%\s+of\s+~?([0-9.]+)(B|KiB|MiB|GiB)', stderr)
        if size_match:
            value = float(size_match.group(1))
            unit = size_match.group(2)
            multipliers = {"B": 1, "KiB": 1024, "MiB": 1024**2, "GiB": 1024**3}
            size_in_bytes = int(value * multipliers.get(unit, 1))
            result['downloaded_bytes'] = size_in_bytes
            details_str += f" ({size_match.group(1)}{unit})"
        result['details'] = details_str
    else:
        # Failure
        # Try to get the most relevant error line
        error_lines = [line for line in stderr.strip().split('\n') if 'ERROR:' in line]
        if error_lines:
            result['details'] = error_lines[-1]
        else:
            # If no "ERROR:" line, use the last few lines of stderr for context.
            last_lines = stderr.strip().split('\n')[-3:] # Get up to last 3 lines
            result['details'] = ' | '.join(line.strip() for line in last_lines if line.strip())
            if not result['details']:
                result['details'] = "Unknown error (stderr was empty)"
        if is_403_error:
            result['error_type'] = 'HTTP 403'
        elif is_timeout_error:
            result['error_type'] = 'Timeout'
        else:
            result['error_type'] = f'Exit Code {retcode}'
    return result
 def process_info_json_cycle(path, content, args, stats):
    """
    Processes one info.json file for one cycle, downloading selected formats sequentially.
    Logs events and returns a list of results.
    """
    results = []
    should_stop_file = False
    display_name = get_display_name(path)
    # Determine formats to test based on the info.json content
    try:
        info_data = json.loads(content)
        available_formats = info_data.get('formats', [])
        if not available_formats:
            logger.warning(f"[{display_name}] No formats found in info.json. Skipping.")
            return []
        available_format_ids = [f['format_id'] for f in available_formats]
        formats_to_test = []
        format_selection_mode = args.format.lower()
        if format_selection_mode == 'all':
            formats_to_test = available_format_ids
            logger.info(f"[{display_name}] Testing all {len(formats_to_test)} available formats.")
        elif format_selection_mode.startswith('random:'):
            try:
                percent_str = format_selection_mode.split(':')[1].rstrip('%')
                percent = float(percent_str)
                if not (0 < percent <= 100):
                    raise ValueError("Percentage must be between 0 and 100.")
                count = max(1, int(len(available_format_ids) * (percent / 100.0)))
                formats_to_test = random.sample(available_format_ids, k=count)
                logger.info(f"[{display_name}] Randomly selected {len(formats_to_test)} formats ({percent}%) from all available to test: {', '.join(formats_to_test)}")
            except (ValueError, IndexError) as e:
                logger.error(f"[{display_name}] Invalid random format selection '{args.format}': {e}. Skipping.")
                return []
        elif format_selection_mode.startswith('random_from:'):
            try:
                choices_str = format_selection_mode.split(':', 1)[1]
                if not choices_str:
                    raise ValueError("No formats provided after 'random_from:'.")
                format_choices = [f.strip() for f in choices_str.split(',') if f.strip()]
                # Filter the choices to only those available in the current info.json
                valid_choices = [f for f in format_choices if f in available_format_ids]
                if not valid_choices:
                    logger.warning(f"[{display_name}] None of the requested formats for random selection ({', '.join(format_choices)}) are available. Skipping.")
                    return []
                formats_to_test = [random.choice(valid_choices)]
                logger.info(f"[{display_name}] Randomly selected 1 format from your list to test: {formats_to_test[0]}")
            except (ValueError, IndexError) as e:
                logger.error(f"[{display_name}] Invalid random_from format selection '{args.format}': {e}. Skipping.")
                return []
        else:
            # Standard comma-separated list
            requested_formats = [f.strip() for f in args.format.split(',') if f.strip()]
            formats_to_test = []
            for req_fmt in requested_formats:
                # Check for exact match first
                if req_fmt in available_format_ids:
                    formats_to_test.append(req_fmt)
                    continue
                # If no exact match, check for formats that start with this ID + '-'
                # e.g., req_fmt '140' should match '140-0'
                prefix_match = f"{req_fmt}-"
                first_match = next((af for af in available_format_ids if af.startswith(prefix_match)), None)
                if first_match:
                    logger.info(f"[{display_name}] Requested format '{req_fmt}' not found. Using first available match: '{first_match}'.")
                    formats_to_test.append(first_match)
                else:
                    # This could be a complex selector like 'bestvideo' or '299/298', so keep it.
                    if req_fmt not in available_format_ids:
                        logger.warning(f"[{display_name}] Requested format '{req_fmt}' not found in available formats.")
                    formats_to_test.append(req_fmt)
    except json.JSONDecodeError:
        logger.error(f"[{display_name}] Failed to parse info.json. Skipping.")
        return []
    for i, format_id in enumerate(formats_to_test):
        if should_stop_file:
            break
        # Check if the format URL is expired before attempting to download
        format_details = next((f for f in available_formats if f.get('format_id') == format_id), None)
        if format_details and 'url' in format_details:
            parsed_url = urlparse(format_details['url'])
            query_params = parse_qs(parsed_url.query)
            expire_ts_str = query_params.get('expire', [None])[0]
            if expire_ts_str and expire_ts_str.isdigit():
                expire_ts = int(expire_ts_str)
                if expire_ts < time.time():
                    logger.warning(f"[{display_name}] Skipping format '{format_id}' because its URL is expired.")
                    result = {
                        'type': 'download', 'path': str(path), 'format': format_id,
                        'success': True, 'error_type': 'Skipped',
                        'details': 'Download URL is expired', 'downloaded_bytes': 0
                    }
                    stats.log_event(result)
                    results.append(result)
                    continue # Move to the next format
        result = run_download_worker(path, content, format_id, args)
        stats.log_event(result)
        results.append(result)
        status = "SUCCESS" if result['success'] else f"FAILURE ({result['error_type']})"
        logger.info(f"Result for {display_name} (format {format_id}): {status} - {result.get('details', 'OK')}")
        if not result['success']:
            # This flag stops processing more formats for THIS file in this cycle
            # The main loop will decide if all cycles should stop.
            if args.stop_on_failure or \
               (args.stop_on_403 and result['error_type'] == 'HTTP 403') or \
               (args.stop_on_timeout and result['error_type'] == 'Timeout'):
                logger.info(f"Stopping further format tests for {display_name} in this cycle due to failure.")
                should_stop_file = True
        # Sleep between formats if needed
        if args.sleep_formats > 0 and i < len(formats_to_test) - 1:
            logger.info(f"Sleeping for {args.sleep_formats}s before next format for {display_name}...")
            time.sleep(args.sleep_formats)
    return results
 def main_stress_formats(args):
    """Main logic for the 'stress-formats' command."""
    # The --format argument is required unless we are only fetching info.json files.
    if not args.fetch_only and not args.format:
        logger.error("Error: argument -f/--format is required when not using --fetch-only.")
        return 1
    if (args.profile_pool or args.profile_per_request) and not args.profile_prefix:
        logger.error("--profile-prefix is required when using --profile-pool or --profile-per-request.")
        return 1
    if args.urls_file and args.fetch_only and not args.info_json_gen_cmd:
        logger.error("--info-json-gen-cmd is required when using --urls-file with --fetch-only.")
        return 1
    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)
    else:
        # Make the default logger more concise for test output
        for handler in logging.root.handlers:
            handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s', datefmt='%H:%M:%S'))
    stats = StatsTracker(args.stats_file)
    start_time = time.time()
    duration_seconds = args.duration * 60 if args.duration > 0 else 0
    # --- Load sources ---
    info_jsons = {}
    urls = []
    if args.info_json_files:
        info_json_files = [Path(p.strip()) for p in args.info_json_files.split(',')]
        for file_path in info_json_files:
            if not file_path.is_file():
                logger.error(f"Info.json file not found: {file_path}")
                continue
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    info_jsons[file_path] = f.read()
            except (IOError, json.JSONDecodeError) as e:
                logger.error(f"Failed to read or parse {file_path}: {e}")
        if not info_jsons:
            logger.error("No valid info.json files to process. Exiting.")
            return 1
        logger.info(f"Loaded {len(info_jsons)} info.json file(s).")
        print_banner(args, info_jsons=info_jsons)
    elif args.urls_file:
        if not args.info_json_gen_cmd:
            logger.error("--info-json-gen-cmd is required when using --urls-file.")
            return 1
        try:
            with open(args.urls_file, 'r', encoding='utf-8') as f:
                content = f.read()
                # Try parsing as JSON array first
                try:
                    data = json.loads(content)
                    if isinstance(data, list) and all(isinstance(item, str) for item in data):
                        urls = data
                        logger.info(f"Loaded {len(urls)} URLs/IDs from JSON array in {args.urls_file}.")
                    else:
                        # Valid JSON, but not a list of strings. Treat as error to avoid confusion.
                        logger.error(f"URL file '{args.urls_file}' is valid JSON but not an array of strings.")
                        return 1
                except json.JSONDecodeError:
                    # Fallback to line-by-line parsing for plain text files
                    urls = [line.strip() for line in content.splitlines() if line.strip()]
                    logger.info(f"Loaded {len(urls)} URLs/IDs from text file {args.urls_file}.")
            if not urls:
                logger.error(f"URL file '{args.urls_file}' is empty or contains no valid URLs/IDs.")
                return 1
        except IOError as e:
            logger.error(f"Failed to read URL file {args.urls_file}: {e}")
            return 1
        # Clean up URLs/IDs which might have extra quotes, commas, or brackets from copy-pasting
        cleaned_urls = []
        for url in urls:
            # Strip whitespace, then trailing comma, then surrounding junk, then whitespace again
            cleaned_url = url.strip().rstrip(',').strip().strip('\'"[]').strip()
            if cleaned_url:
                cleaned_urls.append(cleaned_url)
        if len(cleaned_urls) != len(urls):
            logger.info(f"Cleaned URL list, removed {len(urls) - len(cleaned_urls)} empty or invalid entries.")
        urls = cleaned_urls
        if not urls:
            logger.error("URL list is empty after cleaning. Exiting.")
            return 1
        print_banner(args, urls=urls)
    # --- Main test loop ---
    cycles = 0
    last_stats_print_time = time.time()
    try:
        # --- Worker function for URL mode ---
        def process_url_task(url, url_index, cycle_num):
            """Worker to generate info.json for a URL and then test formats."""
            # 1. Generate profile name if configured
            profile_name = None
            if args.profile_prefix:
                if args.profile_pool:
                    profile_name = f"{args.profile_prefix}_{url_index % args.profile_pool}"
                elif args.profile_per_request:
                    timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
                    profile_name = f"{args.profile_prefix}_{timestamp}_{url_index}"
            # 2. Select and format the generation command
            gen_cmd_template = args.info_json_gen_cmd
            if args.alt_cmd_every_n > 0 and args.info_json_gen_cmd_alt and (url_index + 1) % args.alt_cmd_every_n == 0:
                gen_cmd_template = args.info_json_gen_cmd_alt
                logger.info(f"Using alternate command for URL #{url_index + 1}: {url}")
            try:
                # shlex.split handles quoted arguments in the template
                video_id = get_video_id(url)
                gen_cmd = []
                template_args = shlex.split(gen_cmd_template)
                # If the video ID could be mistaken for an option, and it appears to be
                # a positional argument, insert '--' to prevent misinterpretation.
                if video_id.startswith('-'):
                    try:
                        # Heuristic: if {url} is the last token, it's likely positional.
                        if template_args and template_args[-1] == '{url}':
                            template_args.insert(-1, '--')
                    except (ValueError, IndexError):
                        pass  # {url} not found or list is empty.
                for arg in template_args:
                    # Replace placeholders
                    formatted_arg = arg.replace('{url}', video_id) \
                                     .replace('{worker_id}', str(url_index)) \
                                     .replace('{cycle}', str(cycle_num))
                    if profile_name:
                        formatted_arg = formatted_arg.replace('{profile}', profile_name)
                    gen_cmd.append(formatted_arg)
                # Pass verbose flag through if set
                if args.verbose and 'get_info_json_client.py' in gen_cmd_template and '--verbose' not in gen_cmd_template:
                    gen_cmd.append('--verbose')
            except Exception as e:
                logger.error(f"Failed to format --info-json-gen-cmd: {e}")
                stats.log_event({'path': url, 'success': False, 'error_type': 'BadGenCmd', 'details': 'Cmd format error'})
                return []
            # 3. Run command to get info.json
            log_msg = f"[{url}] Generating info.json"
            if profile_name:
                log_msg += f" with profile '{profile_name}'"
            log_msg += "..."
            logger.info(log_msg)
            retcode, stdout, stderr = run_command(gen_cmd)
            if retcode != 0:
                error_msg = stderr.strip().split('\n')[-1]
                logger.error(f"[{url}] Failed to generate info.json: {error_msg}")
                event = {'type': 'fetch', 'path': url, 'success': False, 'error_type': 'GetInfoJsonFail', 'details': error_msg}
                stats.log_event(event)
                return [] # Return empty list, as no formats were tested
            # Handle --fetch-only
            if args.fetch_only:
                logger.info(f"[{url}] Successfully fetched info.json. Skipping download due to --fetch-only.")
                event = {'type': 'fetch', 'path': url, 'success': True, 'details': 'OK'}
                stats.log_event(event)
                return [] # Return empty list, indicating no downloads to check for failure
            # 4. Pass to the format processing function
            return process_info_json_cycle(url, stdout, args, stats)
        while True:
            if duration_seconds and (time.time() - start_time) > duration_seconds:
                logger.info(f"Reached duration limit of {args.duration} minutes. Stopping.")
                break
            cycles += 1
            if args.max_attempts > 0 and cycles > args.max_attempts:
                logger.info(f"Reached max cycles ({args.max_attempts}). Stopping.")
                break
            logger.info(f"--- Cycle #{cycles} ---")
            with concurrent.futures.ThreadPoolExecutor(max_workers=args.workers) as executor:
                future_to_identifier = {}
                if args.info_json_files:
                    future_to_identifier = {
                        executor.submit(process_info_json_cycle, path, content, args, stats): path
                        for path, content in info_jsons.items()
                    }
                elif args.urls_file:
                    future_to_identifier = {
                        executor.submit(process_url_task, url, i, cycles): url
                        for i, url in enumerate(urls)
                    }
                should_stop = False
                # Use a set of futures that we can modify while iterating
                futures = set(future_to_identifier.keys())
                while futures and not should_stop:
                    # Wait for the next future to complete
                    done, futures = concurrent.futures.wait(futures, return_when=concurrent.futures.FIRST_COMPLETED)
                    for future in done:
                        identifier = future_to_identifier[future]
                        identifier_name = get_display_name(identifier)
                        try:
                            results = future.result()
                            # Check if any result from this file triggers a global stop
                            for result in results:
                                if not result['success']:
                                    if args.stop_on_failure:
                                        logger.info(f"Failure on {identifier_name} (format {result['format']}). Shutting down all workers due to --stop-on-failure.")
                                        should_stop = True
                                    elif args.stop_on_403 and result['error_type'] == 'HTTP 403':
                                        logger.info(f"403 error on {identifier_name} (format {result['format']}). Shutting down all workers due to --stop-on-403.")
                                        should_stop = True
                                    elif args.stop_on_timeout and result['error_type'] == 'Timeout':
                                        logger.info(f"Timeout on {identifier_name} (format {result['format']}). Shutting down all workers due to --stop-on-timeout.")
                                        should_stop = True
                        except Exception as exc:
                            logger.error(f'{identifier_name} generated an exception: {exc}')
                            stats.log_event({'path': str(identifier), 'success': False, 'error_type': 'Exception', 'details': str(exc)})
                        if should_stop:
                            break  # Stop processing results from 'done' set
                    # Check for duration limit after each batch of tasks completes
                    if duration_seconds and (time.time() - start_time) > duration_seconds:
                        logger.info(f"Reached duration limit of {args.duration} minutes. Cancelling remaining tasks.")
                        should_stop = True
                # If the loop was exited, cancel any remaining tasks
                if should_stop and futures:
                    logger.info(f"Cancelling {len(futures)} outstanding task(s).")
                    for future in futures:
                        future.cancel()
            if should_stop:
                break
            if args.stats_interval > 0 and (time.time() - last_stats_print_time) >= args.stats_interval:
                stats.print_summary()
                last_stats_print_time = time.time()
            if args.max_attempts > 0 and cycles >= args.max_attempts:
                break
            logger.info(f"Cycle complete. Sleeping for {args.sleep} seconds...")
            # Interruptible sleep that respects the total test duration
            sleep_end_time = time.time() + args.sleep
            should_stop_after_sleep = False
            while time.time() < sleep_end_time:
                if duration_seconds and (time.time() - start_time) >= duration_seconds:
                    logger.info(f"Reached duration limit of {args.duration} minutes during sleep. Stopping.")
                    should_stop_after_sleep = True
                    break
                time.sleep(1) # Check every second
            if should_stop_after_sleep:
                break
    except KeyboardInterrupt:
        logger.info("\nCtrl+C received, shutting down...")
    finally:
        stats.print_summary()
        stats.close()
    return 0 if not any(not e['success'] for e in stats.events) else 1
--- a/ytops_client/stress_policy_tool.py
+++ b/ytops_client/stress_policy_tool.py
		`@ -0,0 +1 @@`
							`# This file makes 'ytops_client' a Python package.`