Local changes over multiple fixes, to skip url on bad formats, to allow send to aria2c service, to adopt ban cli policy testing, pass throught lang and headers from airflow dags if needed

2025-12-01 20:27:50 +03:00 · 2025-12-01 20:27:50 +03:00 · 336438d4cc
commit 336438d4cc
parent 302282365e
40 changed files with 2848 additions and 1293 deletions
--- a/airflow/aria2-pro-docker/Dockerfile
+++ b/airflow/aria2-pro-docker/Dockerfile
@ -0,0 +1,126 @@
 #     _         _       ____    ____
 #    / \   _ __(_) __ _|___ \  |  _ \ _ __ ___
 #   / _ \ | '__| |/ _` | __) | | |_) | '__/ _ \
 #  / ___ \| |  | | (_| |/ __/  |  __/| | | (_) |
 # /_/   \_\_|  |_|\__,_|_____| |_|   |_|  \___/
 #
 # https://github.com/P3TERX/Aria2-Pro-Docker
 #
 # Copyright (c) 2020-2021 P3TERX <https://p3terx.com>
 #
 # This is free software, licensed under the MIT License.
 # See /LICENSE for more information.
 # Using Debian Bullseye as a more stable base than EOL Alpine
 FROM debian:bullseye-slim
 # Install s6-overlay and build aria2 in a single layer to reduce image size
 # renovate: datasource=github-releases depName=just-containers/s6-overlay
 ARG S6_OVERLAY_VERSION=v3.1.6.2
 RUN BUILD_DEPS=" \
        build-essential \
        autoconf \
        automake \
        autotools-dev \
        libtool \
        pkg-config \
        git \
        gettext \
        autopoint \
        gettext-base \
        libssl-dev \
        libssh2-1-dev \
        libc-ares-dev \
        libexpat1-dev \
        libc-ares-dev \
 	vim \
 	libexpat1 \
        zlib1g-dev \
        libsqlite3-dev \
    " && \
    apt-get update && \
    apt-get install -y --no-install-recommends \
        jq \
        findutils \
        ca-certificates \
        curl \
        xz-utils \
        dos2unix \
        $BUILD_DEPS && \
    curl -sSL https://github.com/just-containers/s6-overlay/releases/download/${S6_OVERLAY_VERSION}/s6-overlay-noarch.tar.xz -o /tmp/s6-overlay-noarch.tar.xz && \
    curl -sSL https://github.com/just-containers/s6-overlay/releases/download/${S6_OVERLAY_VERSION}/s6-overlay-x86_64.tar.xz -o /tmp/s6-overlay-x86_64.tar.xz && \
    tar -C / -Jxpf /tmp/s6-overlay-noarch.tar.xz && \
    tar -C / -Jxpf /tmp/s6-overlay-x86_64.tar.xz && \
    git clone https://github.com/aria2/aria2.git /tmp/aria2 && \
    cd /tmp/aria2 && \
    git checkout 8985d66e71f980e7d2765753800078f47761f1ba && \
    sed -i "s/\"1\", 1, 16, 'x'));/\"1\", 1, 128, 'x'));/" src/OptionHandlerFactory.cc && \
    autoreconf -i && \
    ./configure \
        --disable-dependency-tracking \
        --enable-static \
        --disable-shared \
        --with-ca-bundle=/etc/ssl/certs/ca-certificates.crt \
        --without-libxml2 \
        --with-libexpat \
        --without-libgcrypt \
        --with-openssl \
        --with-libcares \
        --with-libsqlite3 \
        --with-libssh2 \
        --with-zlib && \
    make -j$(nproc) && \
    make install && \
    cd / && \
 # No purge runtime dev    apt-get purge -y --auto-remove $BUILD_DEPS && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/* /tmp/*
 COPY rootfs /
 RUN find /etc/cont-init.d /etc/services.d -type f -exec dos2unix {} + && \
    find /etc/cont-init.d /etc/services.d -type f -exec chmod +x {} +
 ENV S6_BEHAVIOUR_IF_STAGE2_FAILS=1 \
    RCLONE_CONFIG=/config/rclone.conf \
    UPDATE_TRACKERS=true \
    CUSTOM_TRACKER_URL= \
    LISTEN_PORT=6888 \
    RPC_PORT=6800 \
    RPC_SECRET= \
    PUID= PGID= \
    DISK_CACHE= \
    IPV6_MODE= \
    UMASK_SET= \
    SPECIAL_MODE=
 EXPOSE \
    6800 \
    6888 \
    6888/udp
 VOLUME \
    /config \
    /downloads
 #ENTRYPOINT ["/init"]
 CMD ["aria2c", \
    "--enable-rpc=true", \
    "--rpc-listen-all=true", \
    "--rpc-listen-port=6800", \
    "--listen-port=6888", \
    "--disable-ipv6=true", \
    "--max-concurrent-downloads=128", \
    "--max-connection-per-server=32", \
    "--split=6", \
    "--min-split-size=2M", \
    "--file-allocation=falloc", \
    "--continue=false", \
    "--check-integrity=false", \
    "--log-level=info", \
    "--console-log-level=info", \
    "--save-session-interval=5", \
    "--dir=/downloads", \
    "--disk-cache=64M", \
    "--input-file=/config/aria2.session", \
    "--save-session=/config/aria2.session"]
--- a/airflow/aria2-pro-docker/rootfs/Aria2-Pro
+++ b/airflow/aria2-pro-docker/rootfs/Aria2-Pro
@ -0,0 +1,17 @@
 ----------------------------------------------------------------
 █████╗ ██████╗ ██╗ █████╗ ██████╗     ██████╗ ██████╗  ██████╗ 
 ██╔══██╗██╔══██╗██║██╔══██╗╚════██╗    ██╔══██╗██╔══██╗██╔═══██╗
 ███████║██████╔╝██║███████║ █████╔╝    ██████╔╝██████╔╝██║   ██║
 ██╔══██║██╔══██╗██║██╔══██║██╔═══╝     ██╔═══╝ ██╔══██╗██║   ██║
 ██║  ██║██║  ██║██║██║  ██║███████╗    ██║     ██║  ██║╚██████╔╝
 ╚═╝  ╚═╝╚═╝  ╚═╝╚═╝╚═╝  ╚═╝╚══════╝    ╚═╝     ╚═╝  ╚═╝ ╚═════╝ 
 https://github.com/P3TERX/Aria2-Pro-Docker
 Copyright (c) 2020-2021 P3TERX <https://p3terx.com>
 Version: COMMIT_HASH | Build Time: DATE_TIME
 ----------------------------------------------------------------
--- a/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/08-config
+++ b/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/08-config
@ -0,0 +1,39 @@
 #!/usr/bin/with-contenv bash
 #     _         _       ____    ____
 #    / \   _ __(_) __ _|___ \  |  _ \ _ __ ___
 #   / _ \ | '__| |/ _` | __) | | |_) | '__/ _ \
 #  / ___ \| |  | | (_| |/ __/  |  __/| | | (_) |
 # /_/   \_\_|  |_|\__,_|_____| |_|   |_|  \___/
 #
 # https://github.com/P3TERX/Aria2-Pro-Docker
 #
 # Copyright (c) 2020-2021 P3TERX <https://p3terx.com>
 #
 # This is free software, licensed under the MIT License.
 # See /LICENSE for more information.
 . /etc/init-base
 mkdir -p ${ARIA2_CONF_DIR} ${SCRIPT_DIR} ${DOWNLOAD_DIR}
 PROFILES="
 aria2.conf
 "
 DOWNLOAD_PROFILE
 [[ ! -f "${ARIA2_CONF_DIR}/aria2.session" ]] && {
    rm -rf "${ARIA2_CONF_DIR}/aria2.session"
    touch "${ARIA2_CONF_DIR}/aria2.session"
 }
 if ! [[ "${UPDATE_TRACKERS}" = "false" || "${UPDATE_TRACKERS}" = "disable" ]]; then
    rm -f /etc/services.d/crond/down
    PROFILES="tracker.sh"
    DOWNLOAD_PROFILE
    bash ${SCRIPT_DIR}/tracker.sh ${ARIA2_CONF}
 else
    touch /etc/services.d/crond/down
 fi
 exit 0
--- a/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/18-mode
+++ b/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/18-mode
@ -0,0 +1,35 @@
 #!/usr/bin/with-contenv bash
 #     _         _       ____    ____
 #    / \   _ __(_) __ _|___ \  |  _ \ _ __ ___
 #   / _ \ | '__| |/ _` | __) | | |_) | '__/ _ \
 #  / ___ \| |  | | (_| |/ __/  |  __/| | | (_) |
 # /_/   \_\_|  |_|\__,_|_____| |_|   |_|  \___/
 #
 # https://github.com/P3TERX/Aria2-Pro-Docker
 #
 # Copyright (c) 2020-2021 P3TERX <https://p3terx.com>
 #
 # This is free software, licensed under the MIT License.
 # See /LICENSE for more information.
 . /etc/init-base
 INSTALL_RCLONE() {
    if [[ ! -f /usr/local/bin/rclone ]]; then
        echo
        echo -e "${INFO} Installing RCLONE ..."
        [[ -L /usr/bin/unzip ]] && rm -f /usr/bin/unzip
        curl -fsSL https://rclone.org/install.sh | bash
    fi
 }
 if [[ "${SPECIAL_MODE}" = "rclone" ]]; then
    INSTALL_RCLONE
    PROFILES="upload.sh rclone.env"
    DOWNLOAD_PROFILE
 elif [[ "${SPECIAL_MODE}" = "move" ]]; then
    PROFILES="move.sh"
    DOWNLOAD_PROFILE
 fi
 exit 0
--- a/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/28-fix
+++ b/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/28-fix
@ -0,0 +1,61 @@
 #!/usr/bin/with-contenv bash
 #     _         _       ____    ____
 #    / \   _ __(_) __ _|___ \  |  _ \ _ __ ___
 #   / _ \ | '__| |/ _` | __) | | |_) | '__/ _ \
 #  / ___ \| |  | | (_| |/ __/  |  __/| | | (_) |
 # /_/   \_\_|  |_|\__,_|_____| |_|   |_|  \___/
 #
 # https://github.com/P3TERX/Aria2-Pro-Docker
 #
 # Copyright (c) 2020-2021 P3TERX <https://p3terx.com>
 #
 # This is free software, licensed under the MIT License.
 # See /LICENSE for more information.
 . /etc/init-base
 [[ -e ${ARIA2_CONF_DIR}/delete.sh ]] && {
    rm -f ${ARIA2_CONF_DIR}/*.sh
    sed -i "s@^\(on-download-stop=\).*@\1${SCRIPT_DIR}/delete.sh@" ${ARIA2_CONF}
    sed -i "s@^\(on-download-complete=\).*@\1${SCRIPT_DIR}/clean.sh@" ${ARIA2_CONF}
 }
 sed -i "s@^\(dir=\).*@\1/downloads@" ${ARIA2_CONF}
 sed -i "s@^\(input-file=\).*@\1${ARIA2_CONF_DIR}/aria2.session@" ${ARIA2_CONF}
 sed -i "s@^\(save-session=\).*@\1${ARIA2_CONF_DIR}/aria2.session@" ${ARIA2_CONF}
 sed -i "s@^\(dht-file-path=\).*@\1${ARIA2_CONF_DIR}/dht.dat@" ${ARIA2_CONF}
 sed -i "s@^\(dht-file-path6=\).*@\1${ARIA2_CONF_DIR}/dht6.dat@" ${ARIA2_CONF}
 [[ -e ${ARIA2_CONF_DIR}/HelloWorld ]] && exit 0
 [[ ${RPC_PORT} ]] &&
    sed -i "s@^\(rpc-listen-port=\).*@\1${RPC_PORT}@" ${ARIA2_CONF}
 [[ ${LISTEN_PORT} ]] && {
    sed -i "s@^\(listen-port=\).*@\1${LISTEN_PORT}@" ${ARIA2_CONF}
    sed -i "s@^\(dht-listen-port=\).*@\1${LISTEN_PORT}@" ${ARIA2_CONF}
 }
 [[ ${RPC_SECRET} ]] &&
    sed -i "s@^\(rpc-secret=\).*@\1${RPC_SECRET}@" ${ARIA2_CONF}
 [[ ${DISK_CACHE} ]] &&
    sed -i "s@^\(disk-cache=\).*@\1${DISK_CACHE}@" ${ARIA2_CONF}
 [[ "${IPV6_MODE}" = "true" || "${IPV6_MODE}" = "enable" ]] && {
    sed -i "s@^\(disable-ipv6=\).*@\1false@" ${ARIA2_CONF}
    sed -i "s@^\(enable-dht6=\).*@\1true@" ${ARIA2_CONF}
 }
 [[ "${IPV6_MODE}" = "false" || "${IPV6_MODE}" = "disable" ]] && {
    sed -i "s@^\(disable-ipv6=\).*@\1true@" ${ARIA2_CONF}
    sed -i "s@^\(enable-dht6=\).*@\1false@" ${ARIA2_CONF}
 }
 [[ "${SPECIAL_MODE}" = "rclone" ]] &&
    sed -i "s@^\(on-download-complete=\).*@\1${SCRIPT_DIR}/upload.sh@" ${ARIA2_CONF}
 [[ "${SPECIAL_MODE}" = "move" ]] &&
    sed -i "s@^\(on-download-complete=\).*@\1${SCRIPT_DIR}/move.sh@" ${ARIA2_CONF}
 exit 0
--- a/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/58-permissions
+++ b/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/58-permissions
@ -0,0 +1,27 @@
 #!/usr/bin/with-contenv bash
 #     _         _       ____    ____
 #    / \   _ __(_) __ _|___ \  |  _ \ _ __ ___
 #   / _ \ | '__| |/ _` | __) | | |_) | '__/ _ \
 #  / ___ \| |  | | (_| |/ __/  |  __/| | | (_) |
 # /_/   \_\_|  |_|\__,_|_____| |_|   |_|  \___/
 #
 # https://github.com/P3TERX/Aria2-Pro-Docker
 #
 # Copyright (c) 2020-2021 P3TERX <https://p3terx.com>
 #
 # This is free software, licensed under the MIT License.
 # See /LICENSE for more information.
 . /etc/init-base
 if [ -w ${DOWNLOAD_DIR} ]; then echo "Download DIR writeable, not changing owner."; else chown -R p3terx:p3terx ${DOWNLOAD_DIR}; fi
 chown -R p3terx:p3terx ${ARIA2_CONF_DIR}
 if [[ -z ${PUID} && -z ${PGID} ]] || [[ ${PUID} = 65534 && ${PGID} = 65534 ]]; then
    echo -e "${WARN} Ignore permission settings."
    chmod -v 777 ${DOWNLOAD_DIR}
    chmod -vR 777 ${ARIA2_CONF_DIR}
 else
    if [ -w ${DOWNLOAD_DIR} ]; then echo "Download DIR writeable, not modifying permission."; else chmod -v u=rwx ${DOWNLOAD_DIR}; fi
    chmod -v 600 ${ARIA2_CONF_DIR}/*
    chmod -v 755 ${SCRIPT_DIR}
    chmod -v 700 ${SCRIPT_DIR}/*
 fi
--- a/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/88-done
+++ b/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/88-done
@ -0,0 +1,2 @@
 #!/bin/sh
 cat /Aria2-Pro
--- a/airflow/aria2-pro-docker/rootfs/etc/crontabs/p3terx
+++ b/airflow/aria2-pro-docker/rootfs/etc/crontabs/p3terx
@ -0,0 +1 @@
 # BT tracker updates disabled.
--- a/airflow/aria2-pro-docker/rootfs/etc/init-base
+++ b/airflow/aria2-pro-docker/rootfs/etc/init-base
@ -0,0 +1,118 @@
 #     _         _       ____    ____
 #    / \   _ __(_) __ _|___ \  |  _ \ _ __ ___
 #   / _ \ | '__| |/ _` | __) | | |_) | '__/ _ \
 #  / ___ \| |  | | (_| |/ __/  |  __/| | | (_) |
 # /_/   \_\_|  |_|\__,_|_____| |_|   |_|  \___/
 #
 # https://github.com/P3TERX/Docker-Aria2-Pro
 #
 # Copyright (c) 2020 P3TERX <https://p3terx.com>
 #
 # This is free software, licensed under the MIT License.
 # See /LICENSE for more information.
 Green_font_prefix="\033[32m"
 Red_font_prefix="\033[31m"
 Green_background_prefix="\033[42;37m"
 Red_background_prefix="\033[41;37m"
 Font_color_suffix="\033[0m"
 INFO="[${Green_font_prefix}INFO${Font_color_suffix}]"
 ERROR="[${Red_font_prefix}ERROR${Font_color_suffix}]"
 WARN="[${Yellow_font_prefix}WARN${Font_color_suffix}]"
 DOWNLOAD_DIR="/downloads"
 ARIA2_CONF_DIR="/config"
 ARIA2_CONF="${ARIA2_CONF_DIR}/aria2.conf"
 SCRIPT_CONF="${ARIA2_CONF_DIR}/script.conf"
 SCRIPT_DIR="${ARIA2_CONF_DIR}/script"
 CURL_OPTIONS="-fsSL --connect-timeout 3 --max-time 3"
 PROFILE_URL1="https://p3terx.github.io/aria2.conf"
 PROFILE_URL2="https://aria2c.now.sh"
 PROFILE_URL3="https://cdn.jsdelivr.net/gh/P3TERX/aria2.conf"
 FILE_ALLOCATION_SET() {
    TMP_FILE="/downloads/P3TERX.COM"
    if fallocate -l 5G ${TMP_FILE}; then
        FILE_ALLOCATION=falloc
    else
        FILE_ALLOCATION=none
    fi
    rm -f ${TMP_FILE}
    sed -i "s@^\(file-allocation=\).*@\1${FILE_ALLOCATION}@" "${ARIA2_CONF}"
 }
 CONVERSION_ARIA2_CONF() {
    sed -i "s@^\(rpc-listen-port=\).*@\1${RPC_PORT:-6800}@" "${ARIA2_CONF}"
    sed -i "s@^\(listen-port=\).*@\1${LISTEN_PORT:-6888}@" "${ARIA2_CONF}"
    sed -i "s@^\(dht-listen-port=\).*@\1${LISTEN_PORT:-6888}@" "${ARIA2_CONF}"
    sed -i "s@^\(dir=\).*@\1/downloads@" "${ARIA2_CONF}"
    sed -i "s@/root/.aria2@${ARIA2_CONF_DIR}@" "${ARIA2_CONF}"
    sed -i "s@^#\(retry-on-.*=\).*@\1true@" "${ARIA2_CONF}"
    sed -i "s@^\(max-connection-per-server=\).*@\1128@" "${ARIA2_CONF}"
    sed -i "/^on-download-stop=/d" "${ARIA2_CONF}"
    sed -i "/^on-download-complete=/d" "${ARIA2_CONF}"
    # Custom settings from user
    sed -i "s@^\(continue=\).*@\1false@" "${ARIA2_CONF}"
    sed -i "s@^\(always-resume=\).*@\1false@" "${ARIA2_CONF}"
    sed -i "s@^\(max-concurrent-downloads=\).*@\1500@" "${ARIA2_CONF}"
    sed -i "s@^\(enable-dht=\).*@\1false@" "${ARIA2_CONF}"
    sed -i "s@^\(enable-dht6=\).*@\1false@" "${ARIA2_CONF}"
    sed -i "s@^\(bt-enable-lpd=\).*@\1true@" "${ARIA2_CONF}"
    sed -i "s@^\(enable-peer-exchange=\).*@\1false@" "${ARIA2_CONF}"
    sed -i "s@^\(max-overall-upload-limit=\).*@\12M@" "${ARIA2_CONF}"
    sed -i "s@^\(seed-time=\).*@\11@" "${ARIA2_CONF}"
    sed -i "s@^\(user-agent=\).*@\1Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version@" "${ARIA2_CONF}"
    sed -i "s@^\(peer-id-prefix=\).*@\1-DE13F0-@" "${ARIA2_CONF}"
    sed -i "s@^\(summary-interval=\).*@\11@" "${ARIA2_CONF}"
    sed -i "s@^\(show-console-readout=\).*@\1false@" "${ARIA2_CONF}"
    sed -i "s@^\(console-log-level=\).*@\1notice@" "${ARIA2_CONF}"
    # Add settings not present in default config
    echo "" >>"${ARIA2_CONF}"
    echo "# Custom settings added" >>"${ARIA2_CONF}"
    echo "disable-metalink=true" >>"${ARIA2_CONF}"
    echo "follow-torrent=false" >>"${ARIA2_CONF}"
    echo "retry-on-400=false" >>"${ARIA2_CONF}"
    echo "retry-on-403=false" >>"${ARIA2_CONF}"
    echo "retry-on-406=false" >>"${ARIA2_CONF}"
    echo "retry-on-unknown=true" >>"${ARIA2_CONF}"
    echo "rpc-listen-all=true" >>"${ARIA2_CONF}"
    [[ $TZ != "Asia/Shanghai" ]] && sed -i '11,$s/#.*//;/^$/d' "${ARIA2_CONF}"
    FILE_ALLOCATION_SET
 }
 CONVERSION_SCRIPT_CONF() {
    sed -i "s@\(upload-log=\).*@\1${ARIA2_CONF_DIR}/upload.log@" "${SCRIPT_CONF}"
    sed -i "s@\(move-log=\).*@\1${ARIA2_CONF_DIR}/move.log@" "${SCRIPT_CONF}"
    sed -i "s@^\(dest-dir=\).*@\1${DOWNLOAD_DIR}/completed@" "${SCRIPT_CONF}"
 }
 CONVERSION_CORE() {
    sed -i "s@\(ARIA2_CONF_DIR=\"\).*@\1${ARIA2_CONF_DIR}\"@" "${SCRIPT_DIR}/core"
 }
 DOWNLOAD_PROFILE() {
    for PROFILE in ${PROFILES}; do
        [[ ${PROFILE} = *.sh || ${PROFILE} = core ]] && cd "${SCRIPT_DIR}" || cd "${ARIA2_CONF_DIR}"
        while [[ ! -f ${PROFILE} ]]; do
            rm -rf ${PROFILE}
            echo
            echo -e "${INFO} Downloading '${PROFILE}' ..."
            curl -O ${CURL_OPTIONS} ${PROFILE_URL1}/${PROFILE} ||
                curl -O ${CURL_OPTIONS} ${PROFILE_URL2}/${PROFILE} ||
                curl -O ${CURL_OPTIONS} ${PROFILE_URL3}/${PROFILE}
            [[ -s ${PROFILE} ]] && {
                [[ "${PROFILE}" = "aria2.conf" ]] && CONVERSION_ARIA2_CONF
                [[ "${PROFILE}" = "script.conf" ]] && CONVERSION_SCRIPT_CONF
                [[ "${PROFILE}" = "core" ]] && CONVERSION_CORE
                echo
                echo -e "${INFO} '${PROFILE}' download completed !"
            } || {
                echo
                echo -e "${ERROR} '${PROFILE}' download error, retry ..."
                sleep 3
            }
        done
    done
 }
--- a/airflow/aria2-pro-docker/rootfs/etc/services.d/aria2/finish
+++ b/airflow/aria2-pro-docker/rootfs/etc/services.d/aria2/finish
@ -0,0 +1,15 @@
 #!/usr/bin/execlineb -S0
 #     _         _       ____    ____
 #    / \   _ __(_) __ _|___ \  |  _ \ _ __ ___
 #   / _ \ | '__| |/ _` | __) | | |_) | '__/ _ \
 #  / ___ \| |  | | (_| |/ __/  |  __/| | | (_) |
 # /_/   \_\_|  |_|\__,_|_____| |_|   |_|  \___/
 #
 # https://github.com/P3TERX/Aria2-Pro-Docker
 #
 # Copyright (c) 2020-2021 P3TERX <https://p3terx.com>
 #
 # This is free software, licensed under the MIT License.
 # See /LICENSE for more information.
 s6-svscanctl -t /var/run/s6/services
--- a/airflow/aria2-pro-docker/rootfs/etc/services.d/aria2/run
+++ b/airflow/aria2-pro-docker/rootfs/etc/services.d/aria2/run
@ -0,0 +1,18 @@
 #!/usr/bin/with-contenv bash
 #     _         _       ____    ____
 #    / \   _ __(_) __ _|___ \  |  _ \ _ __ ___
 #   / _ \ | '__| |/ _` | __) | | |_) | '__/ _ \
 #  / ___ \| |  | | (_| |/ __/  |  __/| | | (_) |
 # /_/   \_\_|  |_|\__,_|_____| |_|   |_|  \___/
 #
 # https://github.com/P3TERX/Aria2-Pro-Docker
 #
 # Copyright (c) 2020-2021 P3TERX <https://p3terx.com>
 #
 # This is free software, licensed under the MIT License.
 # See /LICENSE for more information.
 umask ${UMASK_SET:-022}
 exec s6-setuidgid p3terx aria2c \
    --conf-path=/config/aria2.conf
--- a/airflow/configs/docker-compose-dl.yaml.j2
+++ b/airflow/configs/docker-compose-dl.yaml.j2
@ -260,6 +260,37 @@ services:
      - proxynet
    restart: always
  aria2-pro:
    container_name: aria2-pro
    build:
      context: "{{ airflow_worker_dir }}/aria2-pro-docker"
    environment:
      - PUID=${AIRFLOW_UID:-50000}
      - PGID=0
      - UMASK_SET=022
      - RPC_SECRET={{ vault_aria2_rpc_secret }}
      - RPC_PORT=6800
      - LISTEN_PORT=6888
      - DISK_CACHE=64M
      - IPV6_MODE=false
      - UPDATE_TRACKERS=false
      - CUSTOM_TRACKER_URL=
      - TZ=Asia/Shanghai
    volumes:
      - ${AIRFLOW_PROJ_DIR:-.}/aria2-config:/config
      - ${AIRFLOW_PROJ_DIR:-.}/downloadfiles/videos/in-progress:/downloads
    ports:
      - "127.0.0.1:6800:6800"
      - "6888:6888"
      - "6888:6888/udp"
    networks:
      - proxynet
    restart: unless-stopped
    logging:
      driver: json-file
      options:
        max-size: 1m
 networks:
  proxynet:
    name: airflow_proxynet
--- a/airflow/configs/docker-compose-ytdlp-ops.yaml.j2
+++ b/airflow/configs/docker-compose-ytdlp-ops.yaml.j2
@ -132,6 +132,8 @@ services:
      - "--comms-log-root-dir"
      - "/app/logs/yt-dlp-ops/communication_logs"
      - "--bgutils-no-innertube"
      - "--visitor-rotation-threshold"
      - "250"
 {% endif %}
    restart: unless-stopped
    pull_policy: always
--- a/airflow/dags/ytdlp_mgmt_proxy_account.py
+++ b/airflow/dags/ytdlp_mgmt_proxy_account.py
@ -327,7 +327,7 @@ def manage_system_callable(**context):
    action = params["action"]
    # For Thrift actions, use the new management host/port
-    if entity not in ["airflow_meta", "activity_counters"]:
+    if entity not in ["activity_counters"]:
        host = params["management_host"]
        port = params["management_port"]
    else:
@ -343,7 +343,6 @@ def manage_system_callable(**context):
        "account": ["list_with_status", "ban", "unban", "unban_all", "delete_from_redis"],
        "client": ["list_with_status", "delete_from_redis"],
        "accounts_and_proxies": ["list_with_status", "ban", "unban", "ban_all", "unban_all", "delete_from_redis"],
        "airflow_meta": ["clear_dag_runs"],
        "activity_counters": ["list_with_status"],
    }
@ -364,41 +363,6 @@ def manage_system_callable(**context):
        if action in ["ban", "unban"] and not account_id:
            raise ValueError(f"An 'account_id' is required for account action '{action}'.")
    # --- Handle Airflow Meta actions separately as they don't use Thrift ---
    if entity == "airflow_meta":
        dag_id = params.get("dag_id_to_manage")
        if action == "clear_dag_runs":
            clear_scope = params.get("clear_scope")
            logger.info(f"Attempting to delete DagRuns for DAG '{dag_id}' with scope '{clear_scope}'.")
            with create_session() as session:
                dag_run_query = session.query(DagRun).filter(DagRun.dag_id == dag_id)
                if clear_scope == "last_run":
                    last_run = dag_run_query.order_by(DagRun.execution_date.desc()).first()
                    if not last_run:
                        logger.info(f"No runs found for DAG '{dag_id}'. Nothing to delete.")
                        print(f"\nNo runs found for DAG '{dag_id}'.\n")
                        return
                    logger.warning(f"Deleting last DagRun for DAG '{dag_id}' (run_id: {last_run.run_id}, execution_date: {last_run.execution_date}). This will also delete its task instances.")
                    # Deleting the DagRun object should cascade and delete related TaskInstances.
                    session.delete(last_run)
                    deleted_count = 1
                else:  # all_runs
                    logger.warning(f"Deleting ALL DagRuns and associated TaskInstances for DAG '{dag_id}'. This will remove all history from the UI.")
                    # To ensure all related data is cleared, we explicitly delete TaskInstances first.
                    # This is safer than relying on DB-level cascades which may not be configured.
                    ti_deleted_count = session.query(TaskInstance).filter(TaskInstance.dag_id == dag_id).delete(synchronize_session=False)
                    logger.info(f"Deleted {ti_deleted_count} TaskInstance records for DAG '{dag_id}'.")
                    deleted_count = dag_run_query.delete(synchronize_session=False)
                # The session is committed automatically by the `with create_session()` context manager.
                logger.info(f"Successfully deleted {deleted_count} DagRun(s) for DAG '{dag_id}'.")
                print(f"\nSuccessfully deleted {deleted_count} DagRun(s) for DAG '{dag_id}'.\n")
            return  # End execution
    # --- Handle Activity Counter action ---
    if entity == "activity_counters":
@ -855,13 +819,13 @@ with DAG(
        "entity": Param(
            "accounts_and_proxies",
            type="string",
-            enum=["account", "proxy", "client", "accounts_and_proxies", "activity_counters", "airflow_meta"],
+            enum=["account", "proxy", "client", "accounts_and_proxies", "activity_counters"],
            description="The type of entity to manage.",
        ),
        "action": Param(
            "list_with_status",
            type="string",
-            enum=["list_with_status", "ban", "unban", "ban_all", "unban_all", "delete_from_redis", "clear_dag_runs"],
+            enum=["list_with_status", "ban", "unban", "ban_all", "unban_all", "delete_from_redis"],
            description="""The management action to perform.
            ---
            #### Actions for `entity: proxy`
@ -895,10 +859,6 @@ with DAG(
            - `unban_all`: Un-ban all proxies for a `server_identity` (or all servers) AND all accounts (optionally filtered by `account_id` as a prefix).
            - `delete_from_redis`: Deletes both account and proxy status from Redis via Thrift service. For accounts, if `account_id` is provided as a prefix, it deletes all accounts matching that prefix. If `account_id` is empty, it deletes ALL accounts. For proxies, if `server_identity` is provided, it deletes all proxies for that server. If `server_identity` is empty, it deletes ALL proxies across all servers.
            #### Actions for `entity: airflow_meta`
            - `clear_dag_runs`: **(Destructive)** Deletes DAG run history and associated task instances from the database, removing them from the UI. This allows the runs to be re-created if backfilling is enabled.
                - `clear_scope: last_run`: Deletes only the most recent DAG run and its task instances.
                - `clear_scope: all_runs`: Deletes all historical DAG runs and task instances for the selected DAG.
            """,
        ),
        "server_identity": Param(
@ -922,20 +882,6 @@ with DAG(
            title="Redis Connection ID",
            description="The Airflow connection ID for the Redis server (used for 'delete_from_redis' and for fetching detailed account status).",
        ),
        "dag_id_to_manage": Param(
            "ytdlp_ops_v01_worker_per_url",
            type="string",
            enum=["ytdlp_ops_v01_orchestrator", "ytdlp_ops_v01_dispatcher", "ytdlp_ops_v01_worker_per_url", "ytdlp_ops_v02_orchestrator_auth", "ytdlp_ops_v02_dispatcher_auth", "ytdlp_ops_v02_worker_per_url_auth", "ytdlp_ops_v02_orchestrator_dl", "ytdlp_ops_v02_dispatcher_dl", "ytdlp_ops_v02_worker_per_url_dl"],
            title="[Airflow Meta] DAG ID",
            description="The DAG ID to perform the action on.",
        ),
        "clear_scope": Param(
            "last_run",
            type="string",
            enum=["last_run", "all_runs"],
            title="[Airflow Meta] Clear Scope",
            description="For 'clear_dag_runs' action, specifies the scope of runs to clear.",
        ),
    },
 ) as dag:
    system_management_task = PythonOperator(
--- a/airflow/dags/ytdlp_mgmt_queues.py
+++ b/airflow/dags/ytdlp_mgmt_queues.py
@ -15,7 +15,9 @@ from datetime import datetime
 from airflow.exceptions import AirflowException
 from airflow.models.dag import DAG
 from airflow.models.dagrun import DagRun
 from airflow.models.param import Param
 from airflow.models.taskinstance import TaskInstance
 from airflow.operators.python import PythonOperator, BranchPythonOperator
 from airflow.operators.empty import EmptyOperator
 from airflow.operators.bash import BashOperator
@ -23,6 +25,7 @@ from airflow.providers.celery.executors.celery_executor import app as celery_app
 from airflow.providers.redis.hooks.redis import RedisHook
 from airflow.utils.dates import days_ago
 from airflow.models.variable import Variable
 from airflow.utils.session import create_session
 import requests
 # Configure logging
@ -276,7 +279,10 @@ def dump_redis_data_to_csv(redis_client, dump_dir, patterns):
 def clear_queue_callable(**context):
-    """Dumps Redis data to CSV and/or clears specified Redis keys based on selection."""
+    """
    Dumps Redis data to CSV and/or clears specified Redis keys based on selection.
    The `_skipped` queue is for videos that are unavailable due to external reasons (e.g., private, removed).
    """
    params = context['params']
    ti = context['task_instance']
    logger.info(f"Task '{ti.task_id}' running on queue '{ti.queue}'.")
@ -315,7 +321,7 @@ def clear_queue_callable(**context):
        logger.info("Dumping is enabled. Performing dump before clearing.")
        dump_redis_data_to_csv(redis_client, dump_dir, dump_patterns)
-    all_suffixes = ['_inbox', '_fail', '_result', '_progress']
+    all_suffixes = ['_inbox', '_fail', '_result', '_progress', '_skipped']
    keys_to_delete = set()
    for queue_base_name in queue_base_names_to_clear:
        if '_all' in queues_to_clear_options:
@ -420,7 +426,10 @@ def list_contents_callable(**context):
 def check_status_callable(**context):
-    """Checks the status (type and size) of all standard Redis queues for a given base name."""
+    """
    Checks the status (type and size) of all standard Redis queues for a given base name.
    The `_skipped` queue is for videos that are unavailable due to external reasons (e.g., private, removed).
    """
    params = context['params']
    ti = context['task_instance']
    logger.info(f"Task '{ti.task_id}' running on queue '{ti.queue}'.")
@ -436,7 +445,7 @@ def check_status_callable(**context):
    else:
        raise ValueError(f"Invalid queue_system: {queue_system}")
-    queue_suffixes = ['_inbox', '_progress', '_result', '_fail']
+    queue_suffixes = ['_inbox', '_progress', '_result', '_fail', '_skipped']
    logger.info(f"--- Checking Status for Queue System: '{queue_system}' ---")
@ -575,6 +584,56 @@ def purge_celery_queue_callable(**context):
    logger.info("--- Purge complete. ---")
 def clear_dag_runs_callable(**context):
    """
    Deletes DAG run history and associated task instances from the database.
    """
    params = context['params']
    dag_id = params.get("dag_id_to_manage")
    clear_scope = params.get("clear_scope")
    log_target = f"DAG '{dag_id}'" if dag_id != "ALL_DAGS" else "ALL DAGS (except ytdlp_mgmt_queues)"
    logger.info(f"Attempting to delete DagRuns for {log_target} with scope '{clear_scope}'.")
    with create_session() as session:
        dag_run_query = session.query(DagRun)
        if dag_id == "ALL_DAGS":
            dag_run_query = dag_run_query.filter(DagRun.dag_id != 'ytdlp_mgmt_queues')
        else:
            dag_run_query = dag_run_query.filter(DagRun.dag_id == dag_id)
        if clear_scope == "last_run":
            if dag_id == "ALL_DAGS":
                raise AirflowException("Cannot clear 'last_run' for ALL_DAGS. Please select a specific DAG.")
            last_run = dag_run_query.order_by(DagRun.execution_date.desc()).first()
            if not last_run:
                logger.info(f"No runs found for DAG '{dag_id}'. Nothing to delete.")
                print(f"\nNo runs found for DAG '{dag_id}'.\n")
                return
            logger.warning(f"Deleting last DagRun for DAG '{dag_id}' (run_id: {last_run.run_id}, execution_date: {last_run.execution_date}). This will also delete its task instances.")
            session.delete(last_run)
            deleted_count = 1
        else:  # all_runs
            logger.warning(f"Deleting ALL DagRuns and associated TaskInstances for {log_target}. This will remove all history from the UI.")
            ti_query = session.query(TaskInstance)
            if dag_id == "ALL_DAGS":
                ti_query = ti_query.filter(TaskInstance.dag_id != 'ytdlp_mgmt_queues')
            else:
                ti_query = ti_query.filter(TaskInstance.dag_id == dag_id)
            ti_deleted_count = ti_query.delete(synchronize_session=False)
            logger.info(f"Deleted {ti_deleted_count} TaskInstance records for {log_target}.")
            deleted_count = dag_run_query.delete(synchronize_session=False)
        # The session is committed automatically by the `with create_session()` context manager.
        logger.info(f"Successfully deleted {deleted_count} DagRun(s) for {log_target}.")
        print(f"\nSuccessfully deleted {deleted_count} DagRun(s) for {log_target}.\n")
 def add_videos_to_queue_callable(**context):
    """
    Parses video inputs from manual text, a predefined file, or a file path/URL,
@ -671,12 +730,13 @@ with DAG(
    - `check_status`: Check the overall status of the queues.
    - `requeue_failed`: Copy all URLs from the `_fail` hash to the `_inbox` list and clear the `_fail` hash.
    - `purge_celery_queue`: **(Destructive)** Removes all tasks from a specified Celery worker queue (e.g., `queue-dl`). This is useful for clearing out a backlog of tasks that were queued before a dispatcher was paused.
    - `clear_dag_runs`: **(Destructive)** Deletes DAG run history and associated task instances from the database, removing them from the UI.
    """,
    params={
        "action": Param(
            "list_contents",
            type="string",
-            enum=["add_videos", "clear_queue", "list_contents", "check_status", "requeue_failed", "inspect_celery_cluster", "purge_celery_queue"],
+            enum=["add_videos", "clear_queue", "list_contents", "check_status", "requeue_failed", "inspect_celery_cluster", "purge_celery_queue", "clear_dag_runs"],
            title="Action",
            description="The management action to perform.",
        ),
@ -737,7 +797,7 @@ with DAG(
            description="Select which standard queues to clear. '_all' clears all four. If left empty, it defaults to '_all'.",
            items={
                "type": "string",
-                "enum": ["_inbox", "_fail", "_result", "_progress", "_all"],
+                "enum": ["_inbox", "_fail", "_result", "_progress", "_skipped", "_all"],
            }
        ),
        "confirm_clear": Param(
@ -766,7 +826,7 @@ with DAG(
        ),
        # --- Params for 'list_contents' ---
        "queue_to_list": Param(
-            'video_queue_inbox,queue2_auth_inbox,queue2_dl_result',
+            'video_queue_inbox,queue2_auth_inbox,queue2_dl_inbox,queue2_dl_result',
            type="string",
            title="[list_contents] Queues to List",
            description="Comma-separated list of exact Redis key names to list.",
@ -797,6 +857,21 @@ with DAG(
            title="[purge_celery_queue] Confirm Purge",
            description="Must be set to True to execute the 'purge_celery_queue' action. This is a destructive operation that removes all tasks from the specified Celery queue(s).",
        ),
        # --- Params for 'clear_dag_runs' ---
        "dag_id_to_manage": Param(
            "ALL_DAGS",
            type="string",
            enum=["ALL_DAGS", "ytdlp_ops_v01_orchestrator", "ytdlp_ops_v01_dispatcher", "ytdlp_ops_v01_worker_per_url", "ytdlp_ops_v02_orchestrator_auth", "ytdlp_ops_v02_dispatcher_auth", "ytdlp_ops_v02_worker_per_url_auth", "ytdlp_ops_v02_orchestrator_dl", "ytdlp_ops_v02_dispatcher_dl", "ytdlp_ops_v02_worker_per_url_dl"],
            title="[clear_dag_runs] DAG ID",
            description="The DAG ID to perform the action on. Select 'ALL_DAGS' to clear history for all DAGs.",
        ),
        "clear_scope": Param(
            "all_runs",
            type="string",
            enum=["last_run", "all_runs"],
            title="[clear_dag_runs] Clear Scope",
            description="For 'clear_dag_runs' action, specifies the scope of runs to clear.",
        ),
        # --- Common Params ---
        "redis_conn_id": Param(
            DEFAULT_REDIS_CONN_ID,
@ -866,6 +941,11 @@ with DAG(
        python_callable=purge_celery_queue_callable,
    )
    action_clear_dag_runs = PythonOperator(
        task_id="action_clear_dag_runs",
        python_callable=clear_dag_runs_callable,
    )
    # --- Wire up tasks ---
    branch_on_action >> [
        action_add_videos,
@ -875,4 +955,5 @@ with DAG(
        action_requeue_failed,
        action_inspect_celery_cluster,
        action_purge_celery_queue,
        action_clear_dag_runs,
    ]
--- a/airflow/dags/ytdlp_ops_v01_orchestrator.py
+++ b/airflow/dags/ytdlp_ops_v01_orchestrator.py
@ -20,7 +20,7 @@ from airflow.utils.dates import days_ago
 from airflow.api.common.trigger_dag import trigger_dag
 from airflow.models.dagrun import DagRun
 from airflow.models.dag import DagModel
-from datetime import timedelta
+from datetime import timedelta, datetime
 import logging
 import random
 import time
@ -37,41 +37,6 @@ from thrift.transport import TSocket, TTransport
 # Configure logging
 logger = logging.getLogger(__name__)
 DEFAULT_REQUEST_PARAMS_JSON = """{
  "context_reuse_policy": {
    "enabled": true,
    "max_age_seconds": 86400,
    "reuse_visitor_id": true,
    "reuse_cookies": true
  },
  "token_generation_strategy": {
    "youtubei_js": {
      "generate_po_token": true,
      "generate_gvs_token": true
    }
  },
  "ytdlp_params": {
    "use_curl_prefetch": false,
    "token_supplement_strategy": {
      "youtubepot_bgutilhttp_extractor": {
        "enabled": true
      }
    },
    "visitor_id_override": {
      "enabled": true
    }
  },
  "session_params": {
    "lang": "en-US",
    "location": "US",
    "deviceCategory": "MOBILE",
    "user_agents": {
      "youtubei_js": "Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)",
      "yt_dlp": "Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)"
    }
  }
 }"""
 # Default settings
 DEFAULT_QUEUE_NAME = 'video_queue'
 DEFAULT_REDIS_CONN_ID = 'redis_default'
@ -191,6 +156,17 @@ def orchestrate_workers_ignition_callable(**context):
    dag_run_id = context['dag_run'].run_id
    total_triggered = 0
    # --- Generate a consistent timestamped prefix for this orchestrator run ---
    # This ensures all workers spawned from this run use the same set of accounts.
    final_account_pool_prefix = params['account_pool']
    if params.get('prepend_client_to_account') and params.get('account_pool_size') is not None:
        clients_str = params.get('clients', '')
        primary_client = clients_str.split(',')[0].strip() if clients_str else 'unknown'
        # Use a timestamp from the orchestrator's run for consistency
        timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
        final_account_pool_prefix = f"{params['account_pool']}_{timestamp}_{primary_client}"
        logger.info(f"Generated consistent account prefix for this run: '{final_account_pool_prefix}'")
    for i, bunch in enumerate(bunches):
        logger.info(f"--- Triggering Bunch {i+1}/{len(bunches)} (contains {len(bunch)} dispatcher(s)) ---")
        for j, _ in enumerate(bunch):
@ -199,6 +175,8 @@ def orchestrate_workers_ignition_callable(**context):
            # Pass all orchestrator params to the dispatcher, which will then pass them to the worker.
            conf_to_pass = {p: params[p] for p in params}
            # Override account_pool with the generated prefix
            conf_to_pass['account_pool'] = final_account_pool_prefix
            logger.info(f"Triggering dispatcher {j+1}/{len(bunch)} in bunch {i+1} (run {total_triggered + 1}/{total_workers}) (Run ID: {run_id})")
            logger.debug(f"Full conf for dispatcher run {run_id}: {conf_to_pass}")
@ -343,18 +321,13 @@ with DAG(
                        "'proceed_loop': (Default) Mark URL as failed but continue the processing loop with a new URL. "
                        "'retry_with_new_token': Attempt to get a new token with a new account and retry the download once. If it fails again, proceed loop."
        ),
-        'request_params_json': Param('{}', type="string", title="[Worker Param] Request Params JSON", description="JSON string with request parameters for the token service."),
+        'request_params_json': Param('{}', type="string", title="[Worker Param] Request Params JSON", description="JSON string with per-request parameters to override server defaults. Can be a full JSON object or comma-separated key=value pairs (e.g., 'session_params.location=DE,ytdlp_params.skip_cache=true')."),
        'language_code': Param('en-US', type="string", title="[Worker Param] Language Code", description="The language code (e.g., 'en-US', 'de-DE') to use for the YouTube request headers."),
        'queue_name': Param(DEFAULT_QUEUE_NAME, type="string", description="[Worker Param] Base name for Redis queues."),
        'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="[Worker Param] Airflow Redis connection ID."),
        'clients': Param(
            'tv_simply',
            type="string",
            enum=[
                'tv_simply',
                'mweb',
                'tv',
                'custom',
            ],
            title="[Worker Param] Clients",
            description="[Worker Param] Comma-separated list of clients for token generation. Full list: web, web_safari, web_embedded, web_music, web_creator, mweb, web_camoufox, web_safari_camoufox, web_embedded_camoufox, web_music_camoufox, web_creator_camoufox, mweb_camoufox, android, android_music, android_creator, android_vr, ios, ios_music, ios_creator, tv, tv_simply, tv_embedded. See DAG documentation for details."
        ),
@ -370,23 +343,16 @@ with DAG(
        'delay_between_formats_s': Param(15, type="integer", title="[Worker Param] Delay Between Formats (s)", description="Delay in seconds between downloading each format when multiple formats are specified. A 22s wait may be effective for batch downloads, while 6-12s may suffice if cookies are refreshed regularly."),
        'yt_dlp_test_mode': Param(False, type="boolean", title="[Worker Param] yt-dlp Test Mode", description="If True, runs yt-dlp with --test flag (dry run without downloading)."),
        'skip_probe': Param(True, type="boolean", title="[Worker Param] Skip Probe", description="If True, skips the ffmpeg probe of downloaded files."),
-        'yt_dlp_cleanup_mode': Param(True, type="boolean", title="[Worker Param] yt-dlp Cleanup Mode", description="If True, creates a .empty file and deletes the original media file after successful download and probe."),
+        'yt_dlp_cleanup_mode': Param(False, type="boolean", title="[Worker Param] yt-dlp Cleanup Mode", description="If True, creates a .empty file and deletes the original media file after successful download and probe."),
        'socket_timeout': Param(15, type="integer", title="[Worker Param] Socket Timeout", description="Timeout in seconds for socket operations."),
-        'download_format_preset': Param(
+        'download_format': Param(
-            'format_1',
+            'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
            type="string",
-            enum=['format_1', 'format_2', 'custom'],
+            title="[Worker Param] Download Format",
-            title="[Worker Param] Download Format Preset",
+            description="Custom yt-dlp format string. Common presets: [1] 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best' (Default, best quality MP4). [2] '18-dashy/18,140-dashy/140,133-dashy/134-dashy/136-dashy/137-dashy/250-dashy/298-dashy/299-dashy' (Legacy formats). [3] '299-dashy/298-dashy/250-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy' (High-framerate formats)."
            description="Select a predefined format string or choose 'custom' to use the value from 'Custom Download Format'.\nformat_1: 18-dashy/18,140-dashy/140,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformat_2: (299/298/137/136/135/134/133)-dashy"
        ),
        'download_format_custom': Param(
            '18-dashy/18,140-dashy/140,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy',
            type="string",
            title="[Worker Param] Custom Download Format",
            description="Custom yt-dlp format string. Used when preset is 'custom'. E.g., 'ba[ext=m4a]/bestaudio/best'."
        ),
        'downloader': Param(
-            'py',
+            'cli',
            type="string",
            enum=['py', 'aria-rpc', 'cli'],
            title="[Worker Param] Download Tool",
@ -396,7 +362,7 @@ with DAG(
        'aria_port': Param(6800, type="integer", title="[Worker Param] Aria2c Port", description="For 'aria-rpc' downloader: Port of the aria2c RPC server. Can be set via Airflow Variable 'YTDLP_ARIA_PORT'."),
        'aria_secret': Param('SQGCQPLVFQIASMPNPOJYLVGJYLMIDIXDXAIXOTX', type="string", title="[Worker Param] Aria2c Secret", description="For 'aria-rpc' downloader: Secret token. Can be set via Airflow Variable 'YTDLP_ARIA_SECRET'."),
        'yt_dlp_extra_args': Param(
-            '--no-resize-buffer --buffer-size 4M --min-sleep-interval 5 --max-sleep-interval 10',
+            '',
            type=["string", "null"],
            title="[Worker Param] Extra yt-dlp arguments",
        ),
--- a/airflow/dags/ytdlp_ops_v01_worker_per_url.py
+++ b/airflow/dags/ytdlp_ops_v01_worker_per_url.py
@ -17,7 +17,7 @@ from __future__ import annotations
 from airflow.decorators import task, task_group
 from airflow.exceptions import AirflowException, AirflowSkipException
 from airflow.models import Variable
-from airflow.models.dag import DAG
+from airflow.models.dag import DAG, DagModel
 from airflow.models.param import Param
 from airflow.models.xcom_arg import XComArg
 from airflow.operators.dummy import DummyOperator
@ -174,14 +174,9 @@ def _get_account_pool(params: dict) -> list:
            is_prefix_mode = True
            pool_size = int(pool_size_param)
-            if params.get('prepend_client_to_account', True):
+            # The orchestrator now generates the full prefix if prepend_client_to_account is True.
-                clients_str = params.get('clients', '')
+            # The worker just appends the numbers.
-                primary_client = clients_str.split(',')[0].strip() if clients_str else 'unknown'
+            accounts = [f"{prefix}_{i:02d}" for i in range(1, pool_size + 1)]
                timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
                new_prefix = f"{prefix}_{timestamp}_{primary_client}"
                accounts = [f"{new_prefix}_{i:02d}" for i in range(1, pool_size + 1)]
            else:
                accounts = [f"{prefix}_{i:02d}" for i in range(1, pool_size + 1)]
        else:
            accounts = [prefix]
@ -258,12 +253,26 @@ def get_url_and_assign_account(**context):
    # For manual runs, we fall back to 'manual_url_to_process'.
    url_to_process = params.get('url_to_process')
    if not url_to_process:
-        url_to_process = params.get('manual_url_to_process')
+        manual_url_input = params.get('manual_url_to_process')
-        if url_to_process:
+        if manual_url_input:
-            logger.info(f"Using URL from manual run parameter: '{url_to_process}'")
+            logger.info(f"Using URL from manual run parameter: '{manual_url_input}'")
            if manual_url_input == 'PULL_FROM_QUEUE':
                logger.info("Manual run is set to pull from queue.")
                redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID)
                queue_name = params.get('queue_name', DEFAULT_QUEUE_NAME)
                inbox_queue = f"{queue_name}_inbox"
                client = _get_redis_client(redis_conn_id)
                url_bytes = client.lpop(inbox_queue)
                if not url_bytes:
                    logger.info("Redis queue is empty. No work to do. Skipping task.")
                    raise AirflowSkipException("Redis queue is empty. No work to do.")
                url_to_process = url_bytes.decode('utf-8')
                logger.info(f"Pulled URL '{url_to_process}' from queue '{inbox_queue}'.")
            else:
                url_to_process = manual_url_input
    if not url_to_process:
-        raise AirflowException("No URL to process. For manual runs, please provide a URL in the 'manual_url_to_process' parameter.")
+        raise AirflowException("No URL to process. For manual runs, please provide a URL in the 'manual_url_to_process' parameter, or 'PULL_FROM_QUEUE'.")
    logger.info(f"Received URL '{url_to_process}' to process.")
    # Mark the URL as in-progress in Redis
@ -310,9 +319,26 @@ def get_token(initial_data: dict, **context):
    host, port = params['service_ip'], int(params['service_port'])
    machine_id = params.get('machine_id') or socket.gethostname()
    clients = params.get('clients')
-    request_params_json = params.get('request_params_json', '{}')
+    request_params_json = params.get('request_params_json')
    language_code = params.get('language_code')
    assigned_proxy_url = params.get('assigned_proxy_url')
    if language_code:
        try:
            params_dict = json.loads(request_params_json)
            logger.info(f"Setting language for request: {language_code}")
            if 'session_params' not in params_dict:
                params_dict['session_params'] = {}
            params_dict['session_params']['lang'] = language_code
            request_params_json = json.dumps(params_dict)
        except (json.JSONDecodeError, TypeError):
            logger.warning("Could not parse request_params_json as JSON. Treating as key=value pairs and appending language code.")
            lang_kv = f"session_params.lang={language_code}"
            if request_params_json:
                request_params_json += f",{lang_kv}"
            else:
                request_params_json = lang_kv
    video_id = _extract_video_id(url)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    job_dir_name = f"{timestamp}-{video_id or 'unknown'}"
@ -355,18 +381,39 @@ def get_token(initial_data: dict, **context):
    if process.returncode != 0:
        error_message = "ytops-client failed. See logs for details."
-        for line in reversed(process.stderr.strip().split('\n')):
+        # Try to find a more specific error message from the Thrift client's output
-            if 'ERROR' in line or 'Thrift error' in line or 'Connection to server failed' in line:
+        thrift_error_match = re.search(r'A Thrift error occurred: (.*)', process.stderr)
-                error_message = line.strip()
+        if thrift_error_match:
-                break
+            error_message = thrift_error_match.group(1).strip()
        else:  # Fallback to old line-by-line parsing
            for line in reversed(process.stderr.strip().split('\n')):
                if 'ERROR' in line or 'Thrift error' in line or 'Connection to server failed' in line:
                    error_message = line.strip()
                    break
        # Determine error code for branching logic
        error_code = 'GET_INFO_CLIENT_FAIL'
-        if "BOT_DETECTED" in process.stderr:
+        stderr_lower = process.stderr.lower()
-            error_code = "BOT_DETECTED"
+        
-        elif "BOT_DETECTION_SIGN_IN_REQUIRED" in process.stderr:
+        # These patterns should match the error codes from PBUserException and others
-            error_code = "BOT_DETECTION_SIGN_IN_REQUIRED"
+        error_patterns = {
-        elif "Connection to server failed" in process.stderr:
+            "BOT_DETECTED": ["bot_detected"],
-            error_code = "TRANSPORT_ERROR"
+            "BOT_DETECTION_SIGN_IN_REQUIRED": ["bot_detection_sign_in_required"],
            "TRANSPORT_ERROR": ["connection to server failed"],
            "PRIVATE_VIDEO": ["private video"],
            "COPYRIGHT_REMOVAL": ["copyright"],
            "GEO_RESTRICTED": ["in your country"],
            "VIDEO_REMOVED": ["video has been removed"],
            "VIDEO_UNAVAILABLE": ["video unavailable"],
            "MEMBERS_ONLY": ["members-only"],
            "AGE_GATED_SIGN_IN": ["sign in to confirm your age"],
            "VIDEO_PROCESSING": ["processing this video"],
        }
        for code, patterns in error_patterns.items():
            if any(p in stderr_lower for p in patterns):
                error_code = code
                break  # Found a match, stop searching
        error_details = {
            'error_message': error_message,
@ -381,8 +428,23 @@ def get_token(initial_data: dict, **context):
    if proxy_match:
        proxy = proxy_match.group(1).strip()
    # Rename the info.json to include the proxy for the download worker
    final_info_json_path = info_json_path
    if proxy:
        # Sanitize for filename: replace '://' which is invalid in paths. Colons are usually fine.
        sanitized_proxy = proxy.replace('://', '---')
        new_filename = f"info_{video_id or 'unknown'}_{account_id}_{timestamp}_proxy_{sanitized_proxy}.json"
        new_path = os.path.join(job_dir_path, new_filename)
        try:
            os.rename(info_json_path, new_path)
            final_info_json_path = new_path
            logger.info(f"Renamed info.json to include proxy: {new_path}")
        except OSError as e:
            logger.error(f"Failed to rename info.json to include proxy: {e}. Using original path.")
    return {
-        'info_json_path': info_json_path,
+        'info_json_path': final_info_json_path,
        'job_dir_path': job_dir_path,
        'socks_proxy': proxy,
        'ytdlp_command': None,
@ -407,10 +469,15 @@ def handle_bannable_error_branch(task_id_to_check: str, **context):
    error_code = error_details.get('error_code', '').strip()
    policy = params.get('on_auth_failure', 'retry_with_new_account')
-    # Check if this is an age confirmation error - should not stop the loop
+    # Unrecoverable video errors that should not be retried or treated as system failures.
-    if "Sign in to confirm your age" in error_message or "confirm your age" in error_message.lower():
+    unrecoverable_video_errors = [
-        logger.info(f"Age confirmation error detected for '{task_id_to_check}'. This is a content restriction, not a bot detection issue.")
+        "AGE_GATED_SIGN_IN", "MEMBERS_ONLY", "VIDEO_PROCESSING", "COPYRIGHT_REMOVAL",
-        return 'handle_age_restriction_error'
+        "GEO_RESTRICTED", "VIDEO_UNAVAILABLE", "PRIVATE_VIDEO", "VIDEO_REMOVED"
    ]
    if error_code in unrecoverable_video_errors:
        logger.warning(f"Unrecoverable video error '{error_code}' detected for '{task_id_to_check}'. This is a content issue, not a system failure.")
        return 'handle_unrecoverable_video_error'
    # Fatal Thrift connection errors that should stop all processing.
    if error_code == 'TRANSPORT_ERROR':
@ -646,6 +713,65 @@ def list_available_formats(token_data: dict, **context):
        return []
 def _resolve_generic_selector(selector: str, info_json_path: str, logger) -> str | list[str] | None:
    """
    Uses yt-dlp to resolve a generic format selector into specific, numeric format ID(s).
    Returns a numeric selector string (e.g., '18'), a list of IDs for '+' selectors
    (e.g., ['299', '140']), or None if resolution fails.
    """
    import subprocess
    import shlex
    try:
        cmd = [
            'yt-dlp',
            '--print', 'format_id',
            '-f', selector,
            '--load-info-json', info_json_path,
        ]
        copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd)
        logger.info(f"Resolving generic selector '{selector}' with command: {copy_paste_cmd}")
        process = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
        if process.stderr:
            # yt-dlp often prints warnings to stderr that are not fatal.
            # e.g., "Requested format selector '...' contains no available formats"
            logger.info(f"yt-dlp resolver STDERR for selector '{selector}':\n{process.stderr}")
        if process.returncode != 0:
            logger.error(f"yt-dlp resolver for selector '{selector}' failed with exit code {process.returncode}")
            return None
        output_ids = process.stdout.strip().split('\n')
        output_ids = [fid for fid in output_ids if fid] # Remove empty lines
        if not output_ids:
            logger.warning(f"Selector '{selector}' resolved to no format IDs.")
            return None
        # yt-dlp might return '137+140' on one line, or '137\n140' on multiple.
        # We need to handle both to get individual IDs.
        final_ids = []
        for fid in output_ids:
            final_ids.extend(fid.split('+'))
        # If the original selector was for merging (contained '+'), return individual IDs for separate downloads.
        # Otherwise, yt-dlp has already chosen the best one from a fallback list, so we just use it.
        if '+' in selector:
            resolved_selector = final_ids
        else:
            resolved_selector = final_ids[0] # yt-dlp gives the single best choice
        logger.info(f"Successfully resolved selector '{selector}' to '{resolved_selector}'.")
        return resolved_selector
    except Exception as e:
        logger.error(f"An error occurred while resolving selector '{selector}': {e}", exc_info=True)
        return None
@task
 def download_and_probe(token_data: dict, available_formats: list[str], **context):
    """
@ -660,26 +786,33 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
    try:
        params = context['params']
        info_json_path = token_data.get('info_json_path')
        proxy = token_data.get('socks_proxy')
        original_url = token_data.get('original_url')
        # Extract proxy from filename, with fallback to token_data for backward compatibility
        proxy = None
        if info_json_path:
            filename = os.path.basename(info_json_path)
            proxy_match = re.search(r'_proxy_(.+)\.json$', filename)
            if proxy_match:
                sanitized_proxy = proxy_match.group(1)
                # Reverse sanitization from auth worker (replace '---' with '://')
                proxy = sanitized_proxy.replace('---', '://')
                logger.info(f"Extracted proxy '{proxy}' from filename.")
        if not proxy:
            logger.warning("Proxy not found in filename. Falling back to 'socks_proxy' from token_data.")
            proxy = token_data.get('socks_proxy')
        download_dir = token_data.get('job_dir_path')
        if not download_dir:
            # Fallback for older runs or if job_dir_path is missing
            download_dir = os.path.dirname(info_json_path)
-        format_preset = params.get('download_format_preset', 'format_1')
+        download_format = params.get('download_format')
-        if format_preset == 'custom':
+        if not download_format:
-            download_format = params.get('download_format_custom')
+            raise AirflowException("The 'download_format' parameter is missing or empty.")
            if not download_format:
                raise AirflowException("Format preset is 'custom' but no custom format string was provided.")
        elif format_preset == 'format_1':
            download_format = '18-dashy/18,140-dashy/140,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy'
        elif format_preset == 'format_2':
            download_format = '(299/298/137/136/135/134/133)-dashy'
        else:
            download_format = '18-dashy/18,140-dashy/140,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy'
-        output_template = params.get('output_path_template', "%(title)s [%(id)s].f%(format_id)s.%(ext)s")
+        output_template = params.get('output_path_template', "%(id)s.f%(format_id)s.%(ext)s")
        full_output_path = os.path.join(download_dir, output_template)
        retry_on_probe_failure = params.get('retry_on_probe_failure', False)
@ -706,15 +839,16 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
            downloader = params.get('downloader', 'py')
            cmd = ['ytops-client', 'download', downloader, '--load-info-json', info_json_path, '-f', format_selector]
            if proxy:
                cmd.extend(['--proxy', proxy])
            if downloader == 'py':
                if proxy:
                    cmd.extend(['--proxy', proxy])
                cmd.extend(['--output-dir', download_dir])
                # The 'py' tool maps many yt-dlp flags via --extra-ytdlp-args
                # The 'py' tool maps many yt-dlp flags via --extra-ytdlp-args
-                py_extra_args = []
+                py_extra_args = ['--output', output_template, '--no-resize-buffer', '--buffer-size', '4M']
                if params.get('fragment_retries'):
                    py_extra_args.extend(['--fragment-retries', str(params['fragment_retries'])])
                if params.get('socket_timeout'):
                    py_extra_args.extend(['--socket-timeout', str(params['socket_timeout'])])
                if params.get('yt_dlp_test_mode'):
@ -727,12 +861,29 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
                    cmd.extend(['--extra-ytdlp-args', final_extra_args_str])
            elif downloader == 'aria-rpc':
                # For aria2c running on the host, the proxy (if also on the host) should be referenced via localhost.
                # The user-agent is set by yt-dlp's extractor, not directly here. The default is Cobalt-based.
                if proxy:
                    proxy_port_match = re.search(r':(\d+)$', proxy)
                    if proxy_port_match:
                        proxy_port = proxy_port_match.group(1)
                        aria_proxy = f"socks5://127.0.0.1:{proxy_port}"
                        cmd.extend(['--proxy', aria_proxy])
                        logger.info(f"Using translated proxy for host-based aria2c: {aria_proxy}")
                    else:
                        logger.warning(f"Could not parse port from proxy '{proxy}'. Passing it to aria2c as-is.")
                        cmd.extend(['--proxy', proxy])
                # The remote-dir is the path relative to aria2c's working directory on the host.
                # The output-dir is the container's local path to the same shared volume.
                remote_dir = os.path.relpath(download_dir, '/opt/airflow/downloadfiles/videos')
                cmd.extend([
                    '--aria-host', params.get('aria_host', '172.17.0.1'),
                    '--aria-port', str(params.get('aria_port', 6800)),
                    '--aria-secret', params.get('aria_secret'),
                    '--wait',
                    '--output-dir', download_dir,
                    '--remote-dir', remote_dir,
                ])
                if 'dashy' in format_selector:
                    cmd.extend([
@ -743,9 +894,15 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
                    cmd.append('--cleanup')
            elif downloader == 'cli':
-                cmd.extend(['--output-dir', download_dir])
+                # Overwrite cmd to call yt-dlp directly
                cmd = ['yt-dlp', '--load-info-json', info_json_path, '-f', format_selector]
                if proxy:
                    cmd.extend(['--proxy', proxy])
                # The 'cli' tool is the old yt-dlp wrapper, so it takes similar arguments.
-                cli_extra_args = []
+                cli_extra_args = ['--output', full_output_path, '--no-resize-buffer', '--buffer-size', '4M']
                if params.get('fragment_retries'):
                    cli_extra_args.extend(['--fragment-retries', str(params['fragment_retries'])])
                if params.get('socket_timeout'):
                    cli_extra_args.extend(['--socket-timeout', str(params['socket_timeout'])])
                if params.get('yt_dlp_test_mode'):
@ -754,11 +911,12 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
                existing_extra = shlex.split(params.get('yt_dlp_extra_args') or '')
                final_extra_args = existing_extra + cli_extra_args
                if final_extra_args:
-                    cmd.extend(['--extra-ytdlp-args', shlex.join(final_extra_args)])
+                    cmd.extend(final_extra_args)
            copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd)
-            logger.info(f"--- Preparing to execute ytops-client ---")
+            tool_name = 'yt-dlp' if downloader == 'cli' else 'ytops-client'
-            logger.info(f"Full ytops-client command for format '{format_selector}':")
+            logger.info(f"--- Preparing to execute {tool_name} ---")
            logger.info(f"Full {tool_name} command for format '{format_selector}':")
            logger.info(copy_paste_cmd)
            logger.info(f"-----------------------------------------")
            process = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)
@ -768,23 +926,44 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
            if process.stderr:
                logger.info(f"Download tool STDERR for format '{format_selector}':\n{process.stderr}")
-            if process.returncode != 0:
+            if process.returncode != 0 or "ERROR:" in process.stderr:
                logger.error(f"Download tool failed for format '{format_selector}' with exit code {process.returncode}")
-                raise AirflowException(f"Download command failed for format '{format_selector}'. See logs for details.")
+                if "ERROR:" in process.stderr and process.returncode == 0:
                    logger.error("Detected 'ERROR:' in stderr, treating as failure despite exit code 0.")
                # Pass stderr in the exception for better parsing in the outer try/except block
                raise AirflowException(f"Download command failed for format '{format_selector}'. Stderr: {process.stderr}")
            output_files = []
-            for line in process.stdout.strip().split('\n'):
+            if downloader == 'cli':
-                # For aria-rpc, parse "Download and merge successful: <path>" or "Download successful: <path>"
+                # Parse yt-dlp's verbose output to find the final filename
-                match = re.search(r'successful: (.+)', line)
+                final_filename = None
-                if match:
+                for line in process.stdout.strip().split('\n'):
-                    filepath = match.group(1).strip()
+                    # Case 1: Simple download, no merge
-                    if os.path.exists(filepath):
+                    dest_match = re.search(r'\[download\] Destination: (.*)', line)
-                        output_files.append(filepath)
+                    if dest_match:
-                    else:
+                        final_filename = dest_match.group(1).strip()
-                        logger.warning(f"File path from aria-rpc output does not exist locally: '{filepath}'")
+                    
-                # For py/cli, it's just the path
+                    # Case 2: Formats are merged into a new file. This path is absolute if -o is absolute.
-                elif os.path.exists(line.strip()):
+                    merge_match = re.search(r'\[Merger\] Merging formats into "(.*)"', line)
-                    output_files.append(line.strip())
+                    if merge_match:
                        final_filename = merge_match.group(1).strip()
                if final_filename and os.path.exists(final_filename):
                    output_files.append(final_filename)
            else: # Logic for 'py' and 'aria-rpc'
                for line in process.stdout.strip().split('\n'):
                    # For aria-rpc, parse "Download and merge successful: <path>" or "Download successful: <path>"
                    match = re.search(r'successful: (.+)', line)
                    if match:
                        filepath = match.group(1).strip()
                        if os.path.exists(filepath):
                            output_files.append(filepath)
                        else:
                            logger.warning(f"File path from aria-rpc output does not exist locally: '{filepath}'")
                    # For py, it's just the path
                    elif os.path.exists(line.strip()):
                        output_files.append(line.strip())
            if not params.get('yt_dlp_test_mode') and not output_files:
                raise AirflowException(f"Download for format '{format_selector}' finished but no output files were found or exist.")
@ -797,7 +976,7 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
            """Probes a file with ffmpeg to check for corruption."""
            logger.info(f"Probing downloaded file: {filename}")
            try:
-                subprocess.run(['ffmpeg', '-v', 'error', '-i', filename, '-f', 'null', '-'], check=True, capture_output=True, text=True)
+                subprocess.run(['ffmpeg', '-v', 'error', '-sseof', '-10', '-i', filename, '-c', 'copy', '-f', 'null', '-'], check=True, capture_output=True, text=True)
                logger.info(f"SUCCESS: Probe confirmed valid media file: {filename}")
            except subprocess.CalledProcessError as e:
                logger.error(f"ffmpeg probe failed for '{filename}'. File may be corrupt.")
@ -864,30 +1043,58 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
        if not formats_to_download_initial:
            raise AirflowException("No valid download format selectors were found after parsing.")
-        # --- Filter requested formats against available formats ---
+        # --- Filter and resolve requested formats ---
        final_formats_to_download = []
        if not available_formats:
-            logger.warning("List of available formats is empty. Will attempt to download all requested formats without validation.")
+            logger.warning("List of available formats is empty. Cannot validate numeric selectors, but will attempt to resolve generic selectors.")
-            final_formats_to_download = formats_to_download_initial
+
-        else:
+        for selector in formats_to_download_initial:
-            for selector in formats_to_download_initial:
+            # A selector is considered generic if it contains keywords like 'best' or filter brackets '[]'.
-                # A selector can be '140' or '299/298/137' or '140-dashy'
+            is_generic = bool(re.search(r'(best|\[|\])', selector))
            if is_generic:
                resolved_selector = _resolve_generic_selector(selector, info_json_path, logger)
                if resolved_selector:
                    # The resolver returns a list for '+' selectors, or a string for others.
                    resolved_formats = resolved_selector if isinstance(resolved_selector, list) else [resolved_selector]
                    for res_format in resolved_formats:
                        # Prefer -dashy version if available and the format is a simple numeric ID
                        if res_format.isdigit() and f"{res_format}-dashy" in available_formats:
                            final_format = f"{res_format}-dashy"
                            logger.info(f"Resolved format '{res_format}' from selector '{selector}'. Preferred '-dashy' version: '{final_format}'.")
                        else:
                            final_format = res_format
                        # Validate the chosen format against available formats
                        if available_formats:
                            individual_ids = re.split(r'[/+]', final_format)
                            is_available = any(fid in available_formats for fid in individual_ids)
                            if is_available:
                                final_formats_to_download.append(final_format)
                            else:
                                logger.warning(f"Resolved format '{final_format}' (from '{selector}') contains no available formats. Skipping.")
                        else:
                            # Cannot validate, so we trust the resolver's output.
                            final_formats_to_download.append(final_format)
                else:
                    logger.warning(f"Could not resolve generic selector '{selector}' using yt-dlp. Skipping.")
            else:
                # This is a numeric-based selector (e.g., '140' or '299/298' or '140-dashy').
                # Validate it against the available formats.
                if not available_formats:
                    logger.warning(f"Cannot validate numeric selector '{selector}' because available formats list is empty. Assuming it's valid.")
                    final_formats_to_download.append(selector)
                    continue
                individual_ids = re.split(r'[/+]', selector)
-                
+                is_available = any(fid in available_formats for fid in individual_ids)
                # Extract the numeric part of the format ID for checking against available_formats
                is_available = False
                for fid in individual_ids:
                    numeric_id_match = re.match(r'^\d+', fid)
                    if numeric_id_match:
                        numeric_id = numeric_id_match.group(0)
                        if numeric_id in available_formats:
                            is_available = True
                            break # Found a match, no need to check other parts of the selector
                if is_available:
                    final_formats_to_download.append(selector)
                else:
-                    logger.warning(f"Requested format selector '{selector}' contains no available formats. Skipping.")
+                    logger.warning(f"Requested numeric format selector '{selector}' contains no available formats. Skipping.")
        if not final_formats_to_download:
            raise AirflowException("None of the requested formats are available for this video.")
@ -909,6 +1116,11 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
            logger.warning(f"Probe failed for {len(failed_files)} file(s). Attempting one re-download for failed files...")
            delay_between_formats = params.get('delay_between_formats_s', 0)
            if delay_between_formats > 0:
                logger.info(f"Waiting {delay_between_formats}s before re-download attempt...")
                time.sleep(delay_between_formats)
            format_ids_to_retry = []
            # Since each download is now for a specific selector and the output template
            # includes the format_id, we can always attempt to extract the format_id
@ -945,7 +1157,7 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
        if not final_success_list:
            raise AirflowException("Download and probe process completed but produced no valid files.")
-        if params.get('yt_dlp_cleanup_mode', True):
+        if params.get('yt_dlp_cleanup_mode', False):
            logger.info(f"Cleanup mode is enabled. Creating .empty files and deleting originals for {len(final_success_list)} files.")
            for f in final_success_list:
                try:
@ -965,6 +1177,26 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
            if not video_id:
                logger.error(f"Could not extract video_id from URL '{original_url}' for final move. Skipping.")
            else:
                # --- Rename info.json to a simple format before moving ---
                path_to_info_json_for_move = info_json_path # Default to original path
                try:
                    # info_json_path is the full path to the original info.json
                    if info_json_path and os.path.exists(info_json_path):
                        new_info_json_name = f"info_{video_id}.json"
                        new_info_json_path = os.path.join(os.path.dirname(info_json_path), new_info_json_name)
                        if info_json_path != new_info_json_path:
                            logger.info(f"Renaming '{info_json_path}' to '{new_info_json_path}' for final delivery.")
                            os.rename(info_json_path, new_info_json_path)
                            path_to_info_json_for_move = new_info_json_path
                        else:
                            logger.info("info.json already has the simple name. No rename needed.")
                    else:
                        logger.warning("Could not find info.json to rename before moving.")
                except Exception as rename_e:
                    logger.error(f"Failed to rename info.json before move: {rename_e}", exc_info=True)
                # --- End of rename logic ---
                source_dir = download_dir # This is the job_dir_path
                # Group downloads into 10-minute batch folders based on completion time.
@ -982,18 +1214,65 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
                    logger.warning(f"Destination '{final_dir_path}' already exists. It will be removed and replaced.")
                    shutil.rmtree(final_dir_path)
-                os.rename(source_dir, final_dir_path)
+                # Create the destination directory and move only the essential files, then clean up the source.
-                logger.info(f"Successfully moved job to '{final_dir_path}'.")
+                # This ensures no temporary or junk files are carried over.
                os.makedirs(final_dir_path)
                # 1. Move the info.json file
                if path_to_info_json_for_move and os.path.exists(path_to_info_json_for_move):
                    shutil.move(path_to_info_json_for_move, final_dir_path)
                    logger.info(f"Moved '{os.path.basename(path_to_info_json_for_move)}' to destination.")
                # 2. Move the media files (or their .empty placeholders)
                files_to_move = []
                if params.get('yt_dlp_cleanup_mode', False):
                    files_to_move = [f"{f}.empty" for f in final_success_list]
                else:
                    files_to_move = final_success_list
                for f in files_to_move:
                    if os.path.exists(f):
                        shutil.move(f, final_dir_path)
                        logger.info(f"Moved '{os.path.basename(f)}' to destination.")
                    else:
                        logger.warning(f"File '{f}' expected but not found for moving.")
                # 3. Clean up the original source directory
                logger.info(f"Cleaning up original source directory '{source_dir}'")
                shutil.rmtree(source_dir)
                logger.info(f"Successfully moved job to '{final_dir_path}' and cleaned up source.")
        except Exception as e:
            logger.error(f"Failed to move completed job directory: {e}", exc_info=True)
            # Do not fail the task for a move error, just log it.
        return final_success_list
    except Exception as e:
-        if 'HTTP Error 403: Forbidden' in str(e):
+        ti = context['task_instance']
-            logger.warning("Detected 'HTTP Error 403: Forbidden' in download error. Pushing details to XCom for branching.")
+        error_message = str(e)
-            ti = context['task_instance']
+        error_code = "DOWNLOAD_FAILED"
-            ti.xcom_push(key='download_error_details', value={'error_code': 'HTTP_403_FORBIDDEN', 'error_message': str(e)})
+        msg_lower = error_message.lower()
        unrecoverable_patterns = {
            "AGE_GATED_SIGN_IN": ['sign in to confirm your age'],
            "MEMBERS_ONLY": ['members-only content'],
            "VIDEO_PROCESSING": ['processing this video'],
            "COPYRIGHT_REMOVAL": ['copyright'],
            "GEO_RESTRICTED": ['in your country'],
            "PRIVATE_VIDEO": ['private video'],
            "VIDEO_REMOVED": ['video has been removed'],
            "VIDEO_UNAVAILABLE": ['video unavailable'],
            "HTTP_403_FORBIDDEN": ['http error 403: forbidden']
        }
        for code, patterns in unrecoverable_patterns.items():
            if any(p in msg_lower for p in patterns):
                error_code = code
                break
        # Always push details to XCom for the branch operator to inspect.
        error_details = {'error_code': error_code, 'error_message': error_message}
        ti.xcom_push(key='download_error_details', value=error_details)
        raise AirflowException(f"Download and probe failed: {e}") from e
@task
@ -1256,6 +1535,12 @@ def continue_processing_loop(**context):
        logger.info(f"DAG run '{dag_run.run_id}' does not appear to be triggered by the dispatcher. Stopping processing loop.")
        return
    dispatcher_dag_id = 'ytdlp_ops_v01_dispatcher'
    dag_model = DagModel.get_dagmodel(dispatcher_dag_id)
    if dag_model and dag_model.is_paused:
        logger.warning(f"Dispatcher DAG '{dispatcher_dag_id}' is paused. Stopping processing loop.")
        return
    # Create a new unique run_id for the dispatcher.
    # Using a timestamp and UUID ensures the ID is unique and does not grow in length over time,
    # preventing database errors.
@ -1270,7 +1555,7 @@ def continue_processing_loop(**context):
    logger.info(f"Worker finished successfully. Triggering a new dispatcher ('{new_dispatcher_run_id}') to continue the loop.")
    trigger_dag(
-        dag_id='ytdlp_ops_v01_dispatcher',
+        dag_id=dispatcher_dag_id,
        run_id=new_dispatcher_run_id,
        conf=conf_to_pass,
        replace_microseconds=False
@ -1292,10 +1577,15 @@ def handle_retry_failure_branch(task_id_to_check: str, **context):
    error_message = error_details.get('error_message', '').strip()
    error_code = error_details.get('error_code', '').strip()
-    # Check if this is an age confirmation error - should not stop the loop
+    # Unrecoverable video errors that should not be retried or treated as system failures.
-    if "Sign in to confirm your age" in error_message or "confirm your age" in error_message.lower():
+    unrecoverable_video_errors = [
-        logger.info(f"Age confirmation error detected on retry from '{task_id_to_check}'. This is a content restriction, not a bot detection issue.")
+        "AGE_GATED_SIGN_IN", "MEMBERS_ONLY", "VIDEO_PROCESSING", "COPYRIGHT_REMOVAL",
-        return 'handle_age_restriction_error'
+        "GEO_RESTRICTED", "VIDEO_UNAVAILABLE", "PRIVATE_VIDEO", "VIDEO_REMOVED"
    ]
    if error_code in unrecoverable_video_errors:
        logger.warning(f"Unrecoverable video error '{error_code}' detected on retry for '{task_id_to_check}'.")
        return 'handle_unrecoverable_video_error'
    if error_code == 'TRANSPORT_ERROR':
        logger.error(f"Fatal Thrift connection error on retry from '{task_id_to_check}'.")
@ -1337,6 +1627,17 @@ def handle_download_failure_branch(**context):
    # The full task_id for download_and_probe is 'download_processing.download_and_probe'
    download_error_details = ti.xcom_pull(task_ids='download_processing.download_and_probe', key='download_error_details')
    if download_error_details:
        error_code = download_error_details.get('error_code')
        unrecoverable_video_errors = [
            "AGE_GATED_SIGN_IN", "MEMBERS_ONLY", "VIDEO_PROCESSING", "COPYRIGHT_REMOVAL",
            "GEO_RESTRICTED", "VIDEO_UNAVAILABLE", "PRIVATE_VIDEO", "VIDEO_REMOVED",
            "HTTP_403_FORBIDDEN"
        ]
        if error_code in unrecoverable_video_errors:
            logger.warning(f"Unrecoverable video error '{error_code}' during download. Skipping.")
            return 'handle_unrecoverable_video_error'
    if policy == 'retry_with_new_token':
        logger.info("Download failed. Policy is to retry with a new token. Branching to retry logic.")
        return 'retry_logic_for_download'
@ -1366,6 +1667,58 @@ def coalesce_token_data(get_token_result=None, retry_get_token_result=None):
    raise AirflowException("Could not find a successful token result from any attempt.")
@task
 def handle_unrecoverable_video_error(**context):
    """
    Handles errors for videos that are unavailable (private, removed, etc.).
    These are not system failures, so the URL is logged to a 'skipped' queue
    and the processing loop continues without marking the run as failed.
    """
    params = context['params']
    ti = context['task_instance']
    url = params.get('url_to_process', 'unknown')
    # Collect error details from the failed task
    error_details = {}
    auth_error = ti.xcom_pull(task_ids='initial_attempt.get_token', key='error_details')
    auth_retry_error = ti.xcom_pull(task_ids='retry_logic.retry_get_token', key='error_details')
    download_error = ti.xcom_pull(task_ids='download_processing.download_and_probe', key='download_error_details')
    if auth_retry_error: error_details = auth_retry_error
    elif auth_error: error_details = auth_error
    elif download_error: error_details = download_error
    error_code = error_details.get('error_code', 'UNKNOWN_VIDEO_ERROR')
    error_message = error_details.get('error_message', 'Video is unavailable for an unknown reason.')
    logger.warning(f"Skipping URL '{url}' due to unrecoverable video error: {error_code} - {error_message}")
    result_data = {
        'status': 'skipped',
        'end_time': time.time(),
        'url': url,
        'dag_run_id': context['dag_run'].run_id,
        'reason': error_code,
        'details': error_message,
        'error_details': error_details
    }
    try:
        client = _get_redis_client(params['redis_conn_id'])
        skipped_queue = f"{params['queue_name']}_skipped"
        progress_queue = f"{params['queue_name']}_progress"
        with client.pipeline() as pipe:
            pipe.hset(skipped_queue, url, json.dumps(result_data))
            pipe.hdel(progress_queue, url)
            pipe.execute()
        logger.info(f"Stored skipped result for URL '{url}' in '{skipped_queue}' and removed from progress queue.")
    except Exception as e:
        logger.error(f"Could not report skipped video to Redis: {e}", exc_info=True)
@task
 def report_bannable_and_continue(**context):
    """
@ -1428,71 +1781,6 @@ def report_bannable_and_continue(**context):
        logger.error(f"Could not report bannable error to Redis: {e}", exc_info=True)
@task
 def handle_age_restriction_error(**context):
    """
    Handles age restriction errors specifically. These are content restrictions
    that cannot be bypassed by using different accounts, so we report the failure
    and continue the processing loop rather than stopping it.
    """
    params = context['params']
    ti = context['task_instance']
    url = params.get('url_to_process', 'unknown')
    # Collect error details
    error_details = {}
    first_token_task_id = 'get_token'
    retry_token_task_id = 'retry_get_token'
    first_token_error = ti.xcom_pull(task_ids=first_token_task_id, key='error_details')
    retry_token_error = ti.xcom_pull(task_ids=retry_token_task_id, key='error_details')
    # Use the most recent error details
    if retry_token_error:
        error_details = retry_token_error
    elif first_token_error:
        error_details = first_token_error
    logger.error(f"Age restriction error for URL '{url}'. This content requires age confirmation and cannot be bypassed.")
    # Report failure to Redis so the URL can be marked as failed
    try:
        client = _get_redis_client(params['redis_conn_id'])
        # Update client-specific stats
        try:
            machine_id = params.get('machine_id') or socket.gethostname()
            _update_client_stats(client, params.get('clients', ''), 'failure', url, machine_id, context['dag_run'].run_id)
        except Exception as e:
            logger.error(f"Could not update client stats on age restriction error: {e}", exc_info=True)
        result_data = {
            'status': 'failed',
            'end_time': time.time(),
            'url': url,
            'dag_run_id': context['dag_run'].run_id,
            'error': 'age_restriction',
            'error_message': 'Content requires age confirmation',
            'error_details': error_details
        }
        result_queue = f"{params['queue_name']}_result"
        fail_queue = f"{params['queue_name']}_fail"
        progress_queue = f"{params['queue_name']}_progress"
        with client.pipeline() as pipe:
            pipe.hset(result_queue, url, json.dumps(result_data))
            pipe.hset(fail_queue, url, json.dumps(result_data))
            pipe.hdel(progress_queue, url)
            pipe.execute()
        logger.info(f"Stored age restriction error for URL '{url}' in '{result_queue}' and '{fail_queue}'.")
    except Exception as e:
        logger.error(f"Could not report age restriction error to Redis: {e}", exc_info=True)
    # This is NOT a fatal error for the processing loop - we just continue with the next URL
 # =============================================================================
 # DAG Definition with TaskGroups
 # =============================================================================
@ -1533,28 +1821,23 @@ with DAG(
            description="Policy for handling download or probe failures."
        ),
        'request_params_json': Param('{}', type="string", title="[Worker Param] Request Params JSON", description="JSON string with request parameters for the token service."),
        'language_code': Param('en-US', type="string", title="[Worker Param] Language Code", description="The language code (e.g., 'en-US', 'de-DE') to use for the YouTube request headers."),
        'retry_on_probe_failure': Param(False, type="boolean"),
        'skip_probe': Param(False, type="boolean", title="[Worker Param] Skip Probe", description="If True, skips the ffmpeg probe of downloaded files."),
-        'yt_dlp_cleanup_mode': Param(True, type="boolean", title="[Worker Param] yt-dlp Cleanup Mode", description="If True, creates a .empty file and deletes the original media file after successful download and probe."),
+        'yt_dlp_cleanup_mode': Param(False, type="boolean", title="[Worker Param] yt-dlp Cleanup Mode", description="If True, creates a .empty file and deletes the original media file after successful download and probe."),
        'auto_create_new_accounts_on_exhaustion': Param(True, type="boolean"),
        'fragment_retries': Param(2, type="integer", title="[Worker Param] Fragment Retries", description="Number of retries for a fragment before giving up. Default is 2 to fail fast on expired tokens."),
        'delay_between_formats_s': Param(15, type="integer", title="[Worker Param] Delay Between Formats (s)", description="Delay in seconds between downloading each format when multiple formats are specified. A 22s wait may be effective for batch downloads, while 6-12s may suffice if cookies are refreshed regularly."),
        'yt_dlp_test_mode': Param(False, type="boolean", title="[Worker Param] yt-dlp Test Mode", description="If True, runs yt-dlp with --test flag (dry run without downloading)."),
        'socket_timeout': Param(15, type="integer", title="[Worker Param] Socket Timeout", description="Timeout in seconds for socket operations."),
-        'download_format_preset': Param(
+        'download_format': Param(
-            'format_1',
+            'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
            type="string",
-            enum=['format_1', 'format_2', 'custom'],
+            title="[Worker Param] Download Format",
-            title="Download Format Preset",
+            description="Custom yt-dlp format string. Common presets: [1] 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best' (Default, best quality MP4). [2] '18-dashy/18,140-dashy/140,133-dashy/134-dashy/136-dashy/137-dashy/250-dashy/298-dashy/299-dashy' (Legacy formats). [3] '299-dashy/298-dashy/250-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy' (High-framerate formats)."
            description="Select a predefined format string or choose 'custom'.\nformat_1: 18-dashy/18,140-dashy/140,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformat_2: (299/298/137/136/135/134/133)-dashy"
        ),
        'download_format_custom': Param(
            '18-dashy/18,140-dashy/140,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy',
            type="string",
            title="Custom Download Format",
            description="Custom yt-dlp format string. Used when preset is 'custom'. To download multiple formats, provide a comma-separated list of format IDs (e.g., '137,140')."
        ),
        'downloader': Param(
-            'py',
+            'cli',
            type="string",
            enum=['py', 'aria-rpc', 'cli'],
            title="Download Tool",
@ -1564,12 +1847,12 @@ with DAG(
        'aria_port': Param(6800, type="integer", title="Aria2c Port", description="For 'aria-rpc' downloader: Port of the aria2c RPC server."),
        'aria_secret': Param('SQGCQPLVFQIASMPNPOJYLVGJYLMIDIXDXAIXOTX', type="string", title="Aria2c Secret", description="For 'aria-rpc' downloader: Secret token."),
        'yt_dlp_extra_args': Param(
-            '--no-resize-buffer --buffer-size 4M --min-sleep-interval 5 --max-sleep-interval 10',
+            '',
            type=["string", "null"],
            title="Extra yt-dlp arguments",
        ),
        # --- Manual Run / Internal Parameters ---
-        'manual_url_to_process': Param('iPwdia3gAnk', type=["string", "null"], title="[Manual Run] URL to Process", description="For manual runs, provide a single YouTube URL to process. This is ignored if triggered by the dispatcher."),
+        'manual_url_to_process': Param('iPwdia3gAnk', type=["string", "null"], title="[Manual Run] URL to Process", description="For manual runs, provide a single YouTube URL, or the special value 'PULL_FROM_QUEUE' to pull one URL from the Redis inbox. This is ignored if triggered by the dispatcher."),
        'url_to_process': Param(None, type=["string", "null"], title="[Internal] URL from Dispatcher", description="This parameter is set by the dispatcher DAG and should not be used for manual runs."),
        'worker_queue': Param(None, type=["string", "null"], title="[Internal] Worker Queue", description="This parameter is set by the dispatcher DAG and should not be used for manual runs."),
    }
@ -1583,7 +1866,7 @@ with DAG(
    report_failure_and_stop_task = report_failure_and_stop()
    report_failure_task = report_failure_and_continue()
    continue_loop_task = continue_processing_loop()
-    age_restriction_task = handle_age_restriction_error()
+    unrecoverable_video_error_task = handle_unrecoverable_video_error()
    report_bannable_and_continue_task = report_bannable_and_continue()
    # --- Task Group 1: Initial Attempt ---
@ -1600,7 +1883,7 @@ with DAG(
        )
        first_token_attempt >> initial_branch_task
-        initial_branch_task >> [fatal_error_task, ban_and_report_immediately_task, age_restriction_task, report_bannable_and_continue_task]
+        initial_branch_task >> [fatal_error_task, ban_and_report_immediately_task, unrecoverable_video_error_task, report_bannable_and_continue_task]
    # --- Task Group 2: Retry Logic ---
    with TaskGroup("retry_logic", tooltip="Retry logic with account management") as retry_logic_group:
@ -1650,7 +1933,7 @@ with DAG(
        direct_retry_account_task >> coalesced_retry_data
        coalesced_retry_data >> retry_token_task
        retry_token_task >> retry_branch_task
-        retry_branch_task >> [fatal_error_task, report_failure_task, ban_after_retry_report_task, age_restriction_task, report_bannable_and_continue_task]
+        retry_branch_task >> [fatal_error_task, report_failure_task, ban_after_retry_report_task, unrecoverable_video_error_task, report_bannable_and_continue_task]
        ban_after_retry_report_task >> report_failure_and_stop_task
    # --- Task Group 3: Download and Processing ---
@ -1759,18 +2042,18 @@ with DAG(
    # --- DAG Dependencies between TaskGroups ---
    # Initial attempt can lead to retry logic or direct failure
-    initial_branch_task >> [retry_logic_group, fatal_error_task, ban_and_report_immediately_task, age_restriction_task, report_bannable_and_continue_task]
+    initial_branch_task >> [retry_logic_group, fatal_error_task, ban_and_report_immediately_task, unrecoverable_video_error_task, report_bannable_and_continue_task]
    # Ban and report immediately leads to failure reporting
    ban_and_report_immediately_task >> report_failure_and_stop_task
-    # Age restriction error leads to failure reporting and continues the loop
+    # Unrecoverable/bannable errors that don't stop the loop should continue processing
-    age_restriction_task >> continue_loop_task
+    unrecoverable_video_error_task >> continue_loop_task
    report_bannable_and_continue_task >> continue_loop_task
    report_failure_task >> continue_loop_task
    # Connect download failure branch to the new retry group
-    download_branch_task >> [retry_logic_for_download_group, report_failure_task, fatal_error_task]
+    download_branch_task >> [retry_logic_for_download_group, report_failure_task, fatal_error_task, unrecoverable_video_error_task]
    # Connect success paths to the coalescing tasks
    download_task >> final_files
--- a/airflow/dags/ytdlp_ops_v02_orchestrator_auth.py
+++ b/airflow/dags/ytdlp_ops_v02_orchestrator_auth.py
@ -18,7 +18,7 @@ from airflow.utils.dates import days_ago
 from airflow.api.common.trigger_dag import trigger_dag
 from airflow.models.dagrun import DagRun
 from airflow.models.dag import DagModel
-from datetime import timedelta
+from datetime import timedelta, datetime
 import logging
 import random
 import time
@ -35,41 +35,6 @@ from thrift.transport import TSocket, TTransport
 # Configure logging
 logger = logging.getLogger(__name__)
 DEFAULT_REQUEST_PARAMS_JSON = """{
  "context_reuse_policy": {
    "enabled": true,
    "max_age_seconds": 86400,
    "reuse_visitor_id": true,
    "reuse_cookies": true
  },
  "token_generation_strategy": {
    "youtubei_js": {
      "generate_po_token": true,
      "generate_gvs_token": true
    }
  },
  "ytdlp_params": {
    "use_curl_prefetch": false,
    "token_supplement_strategy": {
      "youtubepot_bgutilhttp_extractor": {
        "enabled": true
      }
    },
    "visitor_id_override": {
      "enabled": true
    }
  },
  "session_params": {
    "lang": "en-US",
    "location": "US",
    "deviceCategory": "MOBILE",
    "user_agents": {
      "youtubei_js": "Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)",
      "yt_dlp": "Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)"
    }
  }
 }"""
 # Default settings
 DEFAULT_REDIS_CONN_ID = 'redis_default'
 DEFAULT_TOTAL_WORKERS = 8
@ -188,6 +153,17 @@ def orchestrate_workers_ignition_callable(**context):
    dag_run_id = context['dag_run'].run_id
    total_triggered = 0
    # --- Generate a consistent timestamped prefix for this orchestrator run ---
    # This ensures all workers spawned from this run use the same set of accounts.
    final_account_pool_prefix = params['account_pool']
    if params.get('prepend_client_to_account') and params.get('account_pool_size') is not None:
        clients_str = params.get('clients', '')
        primary_client = clients_str.split(',')[0].strip() if clients_str else 'unknown'
        # Use a timestamp from the orchestrator's run for consistency
        timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
        final_account_pool_prefix = f"{params['account_pool']}_{timestamp}_{primary_client}"
        logger.info(f"Generated consistent account prefix for this run: '{final_account_pool_prefix}'")
    for i, bunch in enumerate(bunches):
        logger.info(f"--- Triggering Bunch {i+1}/{len(bunches)} (contains {len(bunch)} dispatcher(s)) ---")
        for j, _ in enumerate(bunch):
@ -196,6 +172,8 @@ def orchestrate_workers_ignition_callable(**context):
            # Pass all orchestrator params to the dispatcher, which will then pass them to the worker.
            conf_to_pass = {p: params[p] for p in params}
            # Override account_pool with the generated prefix
            conf_to_pass['account_pool'] = final_account_pool_prefix
            logger.info(f"Triggering dispatcher {j+1}/{len(bunch)} in bunch {i+1} (run {total_triggered + 1}/{total_workers}) (Run ID: {run_id})")
            logger.debug(f"Full conf for dispatcher run {run_id}: {conf_to_pass}")
@ -294,17 +272,12 @@ with DAG(
                        "'proceed_loop_under_manual_inspection': **BEWARE: MANUAL SUPERVISION REQUIRED.** Marks the URL as failed but continues the processing loop. Use this only when you can manually intervene by pausing the dispatcher DAG or creating a lock file (`/opt/airflow/inputfiles/AIRFLOW.PREVENT_URL_PULL.lockfile`) to prevent a runaway failure loop."
                        "'stop_loop_on_auth_proceed_on_download_error': **(Default)** Stops the loop on an authentication/token error (like 'stop_loop'), but continues the loop on a download/probe error (like 'proceed...')."
        ),
-        'request_params_json': Param('{}', type="string", title="[Worker Param] Request Params JSON", description="JSON string with request parameters for the token service."),
+        'request_params_json': Param('{}', type="string", title="[Worker Param] Request Params JSON", description="JSON string with per-request parameters to override server defaults. Can be a full JSON object or comma-separated key=value pairs (e.g., 'session_params.location=DE,ytdlp_params.skip_cache=true')."),
        'language_code': Param('en-US', type="string", title="[Worker Param] Language Code", description="The language code (e.g., 'en-US', 'de-DE') to use for the YouTube request headers."),
        'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="[Worker Param] Airflow Redis connection ID."),
        'clients': Param(
            'tv_simply',
            type="string",
            enum=[
                'tv_simply',
                'mweb',
                'tv',
                'custom',
            ],
            title="[Worker Param] Clients",
            description="[Worker Param] Comma-separated list of clients for token generation. Full list: web, web_safari, web_embedded, web_music, web_creator, mweb, web_camoufox, web_safari_camoufox, web_embedded_camoufox, web_music_camoufox, web_creator_camoufox, mweb_camoufox, android, android_music, android_creator, android_vr, ios, ios_music, ios_creator, tv, tv_simply, tv_embedded. See DAG documentation for details."
        ),
--- a/airflow/dags/ytdlp_ops_v02_orchestrator_dl.py
+++ b/airflow/dags/ytdlp_ops_v02_orchestrator_dl.py
@ -249,27 +249,20 @@ with DAG(
        'delay_between_formats_s': Param(15, type="integer", title="[Worker Param] Delay Between Formats (s)", description="Delay in seconds between downloading each format when multiple formats are specified. A 22s wait may be effective for batch downloads, while 6-12s may suffice if cookies are refreshed regularly."),
        'yt_dlp_test_mode': Param(False, type="boolean", title="[Worker Param] yt-dlp Test Mode", description="If True, runs yt-dlp with --test flag (dry run without downloading)."),
        'skip_probe': Param(True, type="boolean", title="[Worker Param] Skip Probe", description="If True, skips the ffmpeg probe of downloaded files."),
-        'yt_dlp_cleanup_mode': Param(True, type="boolean", title="[Worker Param] yt-dlp Cleanup Mode", description="If True, creates a .empty file and deletes the original media file after successful download and probe."),
+        'yt_dlp_cleanup_mode': Param(False, type="boolean", title="[Worker Param] yt-dlp Cleanup Mode", description="If True, creates a .empty file and deletes the original media file after successful download and probe."),
        'fragment_retries': Param(2, type="integer", title="[Worker Param] Fragment Retries", description="Number of retries for a fragment before giving up."),
        'limit_rate': Param('5M', type=["string", "null"], title="[Worker Param] Limit Rate", description="Download speed limit (e.g., 50K, 4.2M)."),
        'socket_timeout': Param(15, type="integer", title="[Worker Param] Socket Timeout", description="Timeout in seconds for socket operations."),
        'min_sleep_interval': Param(5, type="integer", title="[Worker Param] Min Sleep Interval", description="Minimum time to sleep between downloads (seconds)."),
        'max_sleep_interval': Param(10, type="integer", title="[Worker Param] Max Sleep Interval", description="Maximum time to sleep between downloads (seconds)."),
-        'download_format_preset': Param(
+        'download_format': Param(
-            'formats_2',
+            'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
            type="string",
-            enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'],
+            title="[Worker Param] Download Format",
-            title="[Worker Param] Download Format Preset",
+            description="Custom yt-dlp format string. Common presets: [1] 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best' (Default, best quality MP4). [2] '18-dashy/18,140-dashy/140,133-dashy/134-dashy/136-dashy/137-dashy/250-dashy/298-dashy/299-dashy' (Legacy formats). [3] '299-dashy/298-dashy/250-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy' (High-framerate formats)."
            description="Select a predefined format string or choose 'custom' to use the value from 'Custom Download Format'.\nformats_0: 18,140\nformats_2: 18,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
        ),
        'download_format_custom': Param(
            '18,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy',
            type="string",
            title="[Worker Param] Custom Download Format",
            description="Custom yt-dlp format string. Used when preset is 'custom'. E.g., 'ba[ext=m4a]/bestaudio/best'."
        ),
        'downloader': Param(
-            'py',
+            'cli',
            type="string",
            enum=['py', 'aria-rpc', 'cli'],
            title="[Worker Param] Download Tool",
@ -279,7 +272,7 @@ with DAG(
        'aria_port': Param(6800, type="integer", title="[Worker Param] Aria2c Port", description="For 'aria-rpc' downloader: Port of the aria2c RPC server. Can be set via Airflow Variable 'YTDLP_ARIA_PORT'."),
        'aria_secret': Param('SQGCQPLVFQIASMPNPOJYLVGJYLMIDIXDXAIXOTX', type="string", title="[Worker Param] Aria2c Secret", description="For 'aria-rpc' downloader: Secret token. Can be set via Airflow Variable 'YTDLP_ARIA_SECRET'."),
        'yt_dlp_extra_args': Param(
-            '--restrict-filenames',
+            '--no-part --restrict-filenames',
            type=["string", "null"],
            title="[Worker Param] Extra yt-dlp arguments",
            description="Extra command-line arguments for yt-dlp during download."
--- a/airflow/dags/ytdlp_ops_v02_worker_per_url_auth.py
+++ b/airflow/dags/ytdlp_ops_v02_worker_per_url_auth.py
@ -17,14 +17,14 @@ from __future__ import annotations
 from airflow.decorators import task, task_group
 from airflow.exceptions import AirflowException, AirflowSkipException
 from airflow.models import Variable
-from airflow.models.dag import DAG
+from airflow.models.dag import DAG, DagModel
 from airflow.models.param import Param
 from airflow.models.xcom_arg import XComArg
 from airflow.operators.dummy import DummyOperator
 from airflow.utils.dates import days_ago
 from airflow.utils.task_group import TaskGroup
 from airflow.api.common.trigger_dag import trigger_dag
-from copy import copy
+import copy
 from datetime import datetime, timedelta
 import concurrent.futures
 import json
@ -143,10 +143,12 @@ DEFAULT_REQUEST_PARAMS = {
  "session_params": {
    "lang": "en-US",
    "location": "US",
-    "deviceCategory": "MOBILE",
+    "deviceCategory": "TV",
    "user_agents": {
-      "youtubei_js": "Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)",
+      # "youtubei_js": "Mozilla/5.0 (Linux; Cobalt) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
-      "yt_dlp": "Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)"
+      "youtubei_js": "Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version",
      # "yt_dlp": "Mozilla/5.0 (Linux; Cobalt) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"
      "yt_dlp": "Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version"
    }
  }
 }
@ -208,14 +210,9 @@ def _get_account_pool(params: dict) -> list:
            is_prefix_mode = True
            pool_size = int(pool_size_param)
-            if params.get('prepend_client_to_account', True):
+            # The orchestrator now generates the full prefix if prepend_client_to_account is True.
-                clients_str = params.get('clients', '')
+            # The worker just appends the numbers.
-                primary_client = clients_str.split(',')[0].strip() if clients_str else 'unknown'
+            accounts = [f"{prefix}_{i:02d}" for i in range(1, pool_size + 1)]
                timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
                new_prefix = f"{prefix}_{timestamp}_{primary_client}"
                accounts = [f"{new_prefix}_{i:02d}" for i in range(1, pool_size + 1)]
            else:
                accounts = [f"{prefix}_{i:02d}" for i in range(1, pool_size + 1)]
        else:
            accounts = [prefix]
@ -347,12 +344,26 @@ def get_url_and_assign_account(**context):
    # For manual runs, we fall back to 'manual_url_to_process'.
    url_to_process = params.get('url_to_process')
    if not url_to_process:
-        url_to_process = params.get('manual_url_to_process')
+        manual_url_input = params.get('manual_url_to_process')
-        if url_to_process:
+        if manual_url_input:
-            logger.info(f"Using URL from manual run parameter: '{url_to_process}'")
+            logger.info(f"Using URL from manual run parameter: '{manual_url_input}'")
            if manual_url_input == 'PULL_FROM_QUEUE':
                logger.info("Manual run is set to pull from queue.")
                redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID)
                queue_name = params.get('queue_name', DEFAULT_QUEUE_NAME)
                inbox_queue = f"{queue_name}_inbox"
                client = _get_redis_client(redis_conn_id)
                url_bytes = client.lpop(inbox_queue)
                if not url_bytes:
                    logger.info("Redis queue is empty. No work to do. Skipping task.")
                    raise AirflowSkipException("Redis queue is empty. No work to do.")
                url_to_process = url_bytes.decode('utf-8')
                logger.info(f"Pulled URL '{url_to_process}' from queue '{inbox_queue}'.")
            else:
                url_to_process = manual_url_input
    if not url_to_process:
-        raise AirflowException("No URL to process. For manual runs, please provide a URL in the 'manual_url_to_process' parameter.")
+        raise AirflowException("No URL to process. For manual runs, please provide a URL in the 'manual_url_to_process' parameter, or 'PULL_FROM_QUEUE'.")
    logger.info(f"Received URL '{url_to_process}' to process.")
    # Mark the URL as in-progress in Redis
@ -399,9 +410,29 @@ def get_token(initial_data: dict, **context):
    host, port = params['service_ip'], int(params['service_port'])
    machine_id = params.get('machine_id') or socket.gethostname()
    clients = params.get('clients')
-    request_params_json = params.get('request_params_json', '{}')
+    request_params_json = params.get('request_params_json')
    language_code = params.get('language_code')
    assigned_proxy_url = params.get('assigned_proxy_url')
    if language_code:
        try:
            params_dict = json.loads(request_params_json)
            if not params_dict:
                params_dict = copy.deepcopy(DEFAULT_REQUEST_PARAMS)
            logger.info(f"Setting language for request: {language_code}")
            if 'session_params' not in params_dict:
                params_dict['session_params'] = {}
            params_dict['session_params']['lang'] = language_code
            request_params_json = json.dumps(params_dict)
        except (json.JSONDecodeError, TypeError):
            logger.warning("Could not parse request_params_json as JSON. Treating as key=value pairs and appending language code.")
            lang_kv = f"session_params.lang={language_code}"
            if request_params_json:
                request_params_json += f",{lang_kv}"
            else:
                request_params_json = lang_kv
    video_id = _extract_video_id(url)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    job_dir_name = f"{timestamp}-{video_id or 'unknown'}"
@ -445,18 +476,39 @@ def get_token(initial_data: dict, **context):
    if process.returncode != 0:
        error_message = "ytops-client failed. See logs for details."
-        for line in reversed(process.stderr.strip().split('\n')):
+        # Try to find a more specific error message from the Thrift client's output
-            if 'ERROR' in line or 'Thrift error' in line or 'Connection to server failed' in line:
+        thrift_error_match = re.search(r'A Thrift error occurred: (.*)', process.stderr)
-                error_message = line.strip()
+        if thrift_error_match:
-                break
+            error_message = thrift_error_match.group(1).strip()
        else:  # Fallback to old line-by-line parsing
            for line in reversed(process.stderr.strip().split('\n')):
                if 'ERROR' in line or 'Thrift error' in line or 'Connection to server failed' in line:
                    error_message = line.strip()
                    break
        # Determine error code for branching logic
        error_code = 'GET_INFO_CLIENT_FAIL'
-        if "BOT_DETECTED" in process.stderr:
+        stderr_lower = process.stderr.lower()
-            error_code = "BOT_DETECTED"
+
-        elif "BOT_DETECTION_SIGN_IN_REQUIRED" in process.stderr:
+        # These patterns should match the error codes from PBUserException and others
-            error_code = "BOT_DETECTION_SIGN_IN_REQUIRED"
+        error_patterns = {
-        elif "Connection to server failed" in process.stderr:
+            "BOT_DETECTED": ["bot_detected"],
-            error_code = "TRANSPORT_ERROR"
+            "BOT_DETECTION_SIGN_IN_REQUIRED": ["bot_detection_sign_in_required"],
            "TRANSPORT_ERROR": ["connection to server failed"],
            "PRIVATE_VIDEO": ["private video"],
            "COPYRIGHT_REMOVAL": ["copyright"],
            "GEO_RESTRICTED": ["in your country"],
            "VIDEO_REMOVED": ["video has been removed"],
            "VIDEO_UNAVAILABLE": ["video unavailable"],
            "MEMBERS_ONLY": ["members-only"],
            "AGE_GATED_SIGN_IN": ["sign in to confirm your age"],
            "VIDEO_PROCESSING": ["processing this video"],
        }
        for code, patterns in error_patterns.items():
            if any(p in stderr_lower for p in patterns):
                error_code = code
                break  # Found a match, stop searching
        error_details = {
            'error_message': error_message,
@ -471,8 +523,23 @@ def get_token(initial_data: dict, **context):
    if proxy_match:
        proxy = proxy_match.group(1).strip()
    # Rename the info.json to include the proxy for the download worker
    final_info_json_path = info_json_path
    if proxy:
        # Sanitize for filename: replace '://' which is invalid in paths. Colons are usually fine.
        sanitized_proxy = proxy.replace('://', '---')
        new_filename = f"info_{video_id or 'unknown'}_{account_id}_{timestamp}_proxy_{sanitized_proxy}.json"
        new_path = os.path.join(job_dir_path, new_filename)
        try:
            os.rename(info_json_path, new_path)
            final_info_json_path = new_path
            logger.info(f"Renamed info.json to include proxy: {new_path}")
        except OSError as e:
            logger.error(f"Failed to rename info.json to include proxy: {e}. Using original path.")
    return {
-        'info_json_path': info_json_path,
+        'info_json_path': final_info_json_path,
        'job_dir_path': job_dir_path,
        'socks_proxy': proxy,
        'ytdlp_command': None,
@ -498,10 +565,15 @@ def handle_bannable_error_branch(task_id_to_check: str, **context):
    error_code = error_details.get('error_code', '').strip()
    policy = params.get('on_bannable_failure', 'retry_with_new_account')
-    # Check if this is an age confirmation error - should not stop the loop
+    # Unrecoverable video errors that should not be retried or treated as system failures.
-    if "Sign in to confirm your age" in error_message or "confirm your age" in error_message.lower():
+    unrecoverable_video_errors = [
-        logger.info(f"Age confirmation error detected for '{task_id_to_check}'. This is a content restriction, not a bot detection issue.")
+        "AGE_GATED_SIGN_IN", "MEMBERS_ONLY", "VIDEO_PROCESSING", "COPYRIGHT_REMOVAL",
-        return 'handle_age_restriction_error'
+        "GEO_RESTRICTED", "VIDEO_UNAVAILABLE", "PRIVATE_VIDEO", "VIDEO_REMOVED"
    ]
    if error_code in unrecoverable_video_errors:
        logger.warning(f"Unrecoverable video error '{error_code}' detected for '{task_id_to_check}'. This is a content issue, not a system failure.")
        return 'handle_unrecoverable_video_error'
    # Fatal Thrift connection errors that should stop all processing.
    if error_code == 'TRANSPORT_ERROR':
@ -718,6 +790,59 @@ def push_auth_success_to_redis(initial_data: dict, token_data: dict, **context):
    logger.info(f"Pushed successful auth data for URL '{url}' to '{dl_inbox_queue}'.")
    logger.info(f"Stored success result for auth on URL '{url}' in '{auth_result_queue}'.")
@task
 def handle_unrecoverable_video_error(**context):
    """
    Handles errors for videos that are unavailable (private, removed, etc.).
    These are not system failures, so the URL is logged to a 'skipped' queue
    and the processing loop continues without marking the run as failed.
    """
    params = context['params']
    ti = context['task_instance']
    url = params.get('url_to_process', 'unknown')
    # Collect error details from the failed get_token task
    error_details = {}
    first_token_error = ti.xcom_pull(task_ids='initial_attempt.get_token', key='error_details')
    retry_token_error = ti.xcom_pull(task_ids='retry_logic.retry_get_token', key='error_details')
    if retry_token_error:
        error_details = retry_token_error
    elif first_token_error:
        error_details = first_token_error
    error_code = error_details.get('error_code', 'UNKNOWN_VIDEO_ERROR')
    error_message = error_details.get('error_message', 'Video is unavailable for an unknown reason.')
    logger.warning(f"Skipping URL '{url}' due to unrecoverable video error: {error_code} - {error_message}")
    result_data = {
        'status': 'skipped',
        'end_time': time.time(),
        'url': url,
        'dag_run_id': context['dag_run'].run_id,
        'reason': error_code,
        'details': error_message,
        'error_details': error_details
    }
    try:
        client = _get_redis_client(params['redis_conn_id'])
        # New queue for skipped videos
        skipped_queue = f"{params['queue_name']}_skipped"
        progress_queue = f"{params['queue_name']}_progress"
        with client.pipeline() as pipe:
            pipe.hset(skipped_queue, url, json.dumps(result_data))
            pipe.hdel(progress_queue, url)
            pipe.execute()
        logger.info(f"Stored skipped result for URL '{url}' in '{skipped_queue}' and removed from progress queue.")
    except Exception as e:
        logger.error(f"Could not report skipped video to Redis: {e}", exc_info=True)
@task(trigger_rule='one_failed')
 def report_failure_and_continue(**context):
    """
@ -732,8 +857,8 @@ def report_failure_and_continue(**context):
    error_details = {}
    # Check for error details from get_token tasks
-    first_token_task_id = 'get_token'
+    first_token_task_id = 'initial_attempt.get_token'
-    retry_token_task_id = 'retry_get_token'
+    retry_token_task_id = 'retry_logic.retry_get_token'
    first_token_error = ti.xcom_pull(task_ids=first_token_task_id, key='error_details')
    retry_token_error = ti.xcom_pull(task_ids=retry_token_task_id, key='error_details')
@ -798,8 +923,8 @@ def handle_fatal_error(**context):
    # Collect error details
    error_details = {}
-    first_token_task_id = 'get_token'
+    first_token_task_id = 'initial_attempt.get_token'
-    retry_token_task_id = 'retry_get_token'
+    retry_token_task_id = 'retry_logic.retry_get_token'
    first_token_error = ti.xcom_pull(task_ids=first_token_task_id, key='error_details')
    retry_token_error = ti.xcom_pull(task_ids=retry_token_task_id, key='error_details')
@ -866,6 +991,12 @@ def continue_processing_loop(**context):
        logger.info(f"DAG run '{dag_run.run_id}' does not appear to be triggered by the dispatcher. Stopping processing loop.")
        return
    dispatcher_dag_id = 'ytdlp_ops_v02_dispatcher_auth'
    dag_model = DagModel.get_dagmodel(dispatcher_dag_id)
    if dag_model and dag_model.is_paused:
        logger.warning(f"Dispatcher DAG '{dispatcher_dag_id}' is paused. Stopping processing loop.")
        return
    # Create a new unique run_id for the dispatcher.
    # Using a timestamp and UUID ensures the ID is unique and does not grow in length over time,
    # preventing database errors.
@ -880,7 +1011,7 @@ def continue_processing_loop(**context):
    logger.info(f"Worker finished successfully. Triggering a new dispatcher ('{new_dispatcher_run_id}') to continue the loop.")
    trigger_dag(
-        dag_id='ytdlp_ops_v02_dispatcher_auth',
+        dag_id=dispatcher_dag_id,
        run_id=new_dispatcher_run_id,
        conf=conf_to_pass,
        replace_microseconds=False
@ -902,10 +1033,15 @@ def handle_retry_failure_branch(task_id_to_check: str, **context):
    error_message = error_details.get('error_message', '').strip()
    error_code = error_details.get('error_code', '').strip()
-    # Check if this is an age confirmation error - should not stop the loop
+    # Unrecoverable video errors that should not be retried or treated as system failures.
-    if "Sign in to confirm your age" in error_message or "confirm your age" in error_message.lower():
+    unrecoverable_video_errors = [
-        logger.info(f"Age confirmation error detected on retry from '{task_id_to_check}'. This is a content restriction, not a bot detection issue.")
+        "AGE_GATED_SIGN_IN", "MEMBERS_ONLY", "VIDEO_PROCESSING", "COPYRIGHT_REMOVAL",
-        return 'handle_age_restriction_error'
+        "GEO_RESTRICTED", "VIDEO_UNAVAILABLE", "PRIVATE_VIDEO", "VIDEO_REMOVED"
    ]
    if error_code in unrecoverable_video_errors:
        logger.warning(f"Unrecoverable video error '{error_code}' detected on retry for '{task_id_to_check}'.")
        return 'handle_unrecoverable_video_error'
    if error_code == 'TRANSPORT_ERROR':
        logger.error(f"Fatal Thrift connection error on retry from '{task_id_to_check}'.")
@ -964,8 +1100,8 @@ def report_bannable_and_continue(**context):
    # Collect error details
    error_details = {}
-    first_token_task_id = 'get_token'
+    first_token_task_id = 'initial_attempt.get_token'
-    retry_token_task_id = 'retry_get_token'
+    retry_token_task_id = 'retry_logic.retry_get_token'
    first_token_error = ti.xcom_pull(task_ids=first_token_task_id, key='error_details')
    retry_token_error = ti.xcom_pull(task_ids=retry_token_task_id, key='error_details')
@ -1014,71 +1150,6 @@ def report_bannable_and_continue(**context):
        logger.error(f"Could not report bannable error to Redis: {e}", exc_info=True)
@task
 def handle_age_restriction_error(**context):
    """
    Handles age restriction errors specifically. These are content restrictions
    that cannot be bypassed by using different accounts, so we report the failure
    and continue the processing loop rather than stopping it.
    """
    params = context['params']
    ti = context['task_instance']
    url = params.get('url_to_process', 'unknown')
    # Collect error details
    error_details = {}
    first_token_task_id = 'get_token'
    retry_token_task_id = 'retry_get_token'
    first_token_error = ti.xcom_pull(task_ids=first_token_task_id, key='error_details')
    retry_token_error = ti.xcom_pull(task_ids=retry_token_task_id, key='error_details')
    # Use the most recent error details
    if retry_token_error:
        error_details = retry_token_error
    elif first_token_error:
        error_details = first_token_error
    logger.error(f"Age restriction error for URL '{url}'. This content requires age confirmation and cannot be bypassed.")
    # Report failure to Redis so the URL can be marked as failed
    try:
        client = _get_redis_client(params['redis_conn_id'])
        # Update client-specific stats
        try:
            machine_id = params.get('machine_id') or socket.gethostname()
            _update_client_stats(client, params.get('clients', ''), 'failure', url, machine_id, context['dag_run'].run_id)
        except Exception as e:
            logger.error(f"Could not update client stats on age restriction error: {e}", exc_info=True)
        result_data = {
            'status': 'failed',
            'end_time': time.time(),
            'url': url,
            'dag_run_id': context['dag_run'].run_id,
            'error': 'age_restriction',
            'error_message': 'Content requires age confirmation',
            'error_details': error_details
        }
        result_queue = f"{params['queue_name']}_result"
        fail_queue = f"{params['queue_name']}_fail"
        progress_queue = f"{params['queue_name']}_progress"
        with client.pipeline() as pipe:
            pipe.hset(result_queue, url, json.dumps(result_data))
            pipe.hset(fail_queue, url, json.dumps(result_data))
            pipe.hdel(progress_queue, url)
            pipe.execute()
        logger.info(f"Stored age restriction error for URL '{url}' in '{result_queue}' and '{fail_queue}'.")
    except Exception as e:
        logger.error(f"Could not report age restriction error to Redis: {e}", exc_info=True)
    # This is NOT a fatal error for the processing loop - we just continue with the next URL
 # =============================================================================
 # DAG Definition with TaskGroups
 # =============================================================================
@ -1106,9 +1177,10 @@ with DAG(
        'timeout': Param(DEFAULT_TIMEOUT, type="integer"),
        'on_bannable_failure': Param('stop_loop_on_auth_proceed_on_download_error', type="string", enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'retry_and_ban_account_only', 'retry_on_connection_error', 'proceed_loop_under_manual_inspection', 'stop_loop_on_auth_proceed_on_download_error']),
        'request_params_json': Param(json.dumps(DEFAULT_REQUEST_PARAMS), type="string", title="[Worker Param] Request Params JSON", description="JSON string with request parameters for the token service."),
        'language_code': Param('en-US', type="string", title="[Worker Param] Language Code", description="The language code (e.g., 'en-US', 'de-DE') to use for the YouTube request headers."),
        'auto_create_new_accounts_on_exhaustion': Param(True, type="boolean"),
        # --- Manual Run / Internal Parameters ---
-        'manual_url_to_process': Param('iPwdia3gAnk', type=["string", "null"], title="[Manual Run] URL to Process", description="For manual runs, provide a single YouTube URL to process. This is ignored if triggered by the dispatcher."),
+        'manual_url_to_process': Param('iPwdia3gAnk', type=["string", "null"], title="[Manual Run] URL to Process", description="For manual runs, provide a single YouTube URL, or the special value 'PULL_FROM_QUEUE' to pull one URL from the Redis inbox. This is ignored if triggered by the dispatcher."),
        'url_to_process': Param(None, type=["string", "null"], title="[Internal] URL from Dispatcher", description="This parameter is set by the dispatcher DAG and should not be used for manual runs."),
        'worker_queue': Param(None, type=["string", "null"], title="[Internal] Worker Queue", description="This parameter is set by the dispatcher DAG and should not be used for manual runs."),
    }
@ -1121,7 +1193,7 @@ with DAG(
    fatal_error_task = handle_fatal_error()
    report_failure_task = report_failure_and_continue()
    continue_loop_task = continue_processing_loop()
-    age_restriction_task = handle_age_restriction_error()
+    unrecoverable_video_error_task = handle_unrecoverable_video_error()
    report_bannable_and_continue_task = report_bannable_and_continue()
    # --- Task Group 1: Initial Attempt ---
@ -1138,7 +1210,7 @@ with DAG(
        )
        first_token_attempt >> initial_branch_task
-        initial_branch_task >> [fatal_error_task, ban_and_report_immediately_task, age_restriction_task, report_bannable_and_continue_task]
+        initial_branch_task >> [fatal_error_task, ban_and_report_immediately_task, unrecoverable_video_error_task, report_bannable_and_continue_task]
    # --- Task Group 2: Retry Logic ---
    with TaskGroup("retry_logic", tooltip="Retry logic with account management") as retry_logic_group:
@ -1188,7 +1260,7 @@ with DAG(
        direct_retry_account_task >> coalesced_retry_data
        coalesced_retry_data >> retry_token_task
        retry_token_task >> retry_branch_task
-        retry_branch_task >> [fatal_error_task, report_failure_task, ban_after_retry_report_task, age_restriction_task, report_bannable_and_continue_task]
+        retry_branch_task >> [fatal_error_task, report_failure_task, ban_after_retry_report_task, unrecoverable_video_error_task, report_bannable_and_continue_task]
        ban_after_retry_report_task >> report_failure_task
    # --- Task Group 3: Success/Continuation Logic ---
@ -1210,7 +1282,7 @@ with DAG(
    # --- DAG Dependencies between TaskGroups ---
    # Initial attempt can lead to retry logic or direct failure
-    initial_branch_task >> [retry_logic_group, fatal_error_task, ban_and_report_immediately_task, age_restriction_task, report_bannable_and_continue_task]
+    initial_branch_task >> [retry_logic_group, fatal_error_task, ban_and_report_immediately_task, unrecoverable_video_error_task, report_bannable_and_continue_task]
    # A successful initial attempt bypasses retry and goes straight to the success group
    initial_attempt_group >> success_group
@ -1222,6 +1294,6 @@ with DAG(
    # Ban and report immediately leads to failure reporting
    ban_and_report_immediately_task >> report_failure_task
-    # Age restriction error leads to failure reporting and continues the loop
+    # Unrecoverable/bannable errors that don't stop the loop should continue processing
-    age_restriction_task >> continue_loop_task
+    unrecoverable_video_error_task >> continue_loop_task
    report_bannable_and_continue_task >> continue_loop_task
--- a/airflow/dags/ytdlp_ops_v02_worker_per_url_dl.py
+++ b/airflow/dags/ytdlp_ops_v02_worker_per_url_dl.py
--- a/airflow/dags/ytdlp_s3_uploader.py
+++ b/airflow/dags/ytdlp_s3_uploader.py
@ -41,17 +41,24 @@ def run_s3_upload_batch(**context):
    Dry run mode is non-destructive and will pause briefly after checking to prevent tight loops.
    """
    params = context['params']
    ti = context['task_instance']
    # Log the configured execution timeout for debugging purposes.
    # This helps verify that the timeout setting from the DAG file is being applied.
    timeout_delta = ti.task.execution_timeout
    logger.info(f"Task is configured with execution_timeout: {timeout_delta}")
    concurrency = params['concurrency']
    mode = params['mode']
    dry_run = params['dry_run']
    sleep_interval_min = params['sleep_if_no_videos_min']
    sleep_interval_sec = sleep_interval_min * 60
    s3_conn_id = params['s3_conn_id']
    s3_bucket = params['s3_bucket_name']
    s3_access_key_id = None
    s3_secret_access_key = None
    s3_endpoint = None
    s3_bucket = None
    s3_region = None
    config_source = "Unknown"
    profile_name = "rusonyx"
@ -68,12 +75,11 @@ def run_s3_upload_batch(**context):
            s3_endpoint = s3_conn.host
            extra_config = s3_conn.extra_dejson
            s3_bucket = extra_config.get('bucket')
            s3_region = extra_config.get('region_name')
-            if not all([s3_access_key_id, s3_secret_access_key, s3_endpoint, s3_bucket, s3_region]):
+            if not all([s3_access_key_id, s3_secret_access_key, s3_endpoint, s3_region]):
-                logger.warning("S3 connection from Airflow is missing one or more required fields. Will attempt to fall back to environment variables.")
+                logger.warning("S3 connection from Airflow is missing one or more required fields (excluding bucket). Will attempt to fall back to environment variables.")
-                s3_access_key_id = s3_secret_access_key = s3_endpoint = s3_bucket = s3_region = None # Reset all
+                s3_access_key_id = s3_secret_access_key = s3_endpoint = s3_region = None # Reset all
            else:
                config_source = f"Airflow Connection '{s3_conn_id}'"
                profile_name = "rusonyx-airflow"
@ -82,17 +88,16 @@ def run_s3_upload_batch(**context):
            logger.warning(f"Failed to load S3 configuration from Airflow connection '{s3_conn_id}': {e}. Will attempt to fall back to environment variables.")
    # --- Attempt 2: Fallback to Environment Variables ---
-    if not all([s3_access_key_id, s3_secret_access_key, s3_endpoint, s3_bucket, s3_region]):
+    if not all([s3_access_key_id, s3_secret_access_key, s3_endpoint, s3_region]):
        try:
            logger.info("Attempting to load S3 configuration from environment variables as a fallback.")
            s3_access_key_id = os.environ['S3_DELIVERY_AWS_ACCESS_KEY_ID']
            s3_secret_access_key = os.environ['S3_DELIVERY_AWS_SECRET_ACCESS_KEY']
            s3_endpoint = os.environ['S3_DELIVERY_ENDPOINT']
            s3_bucket = os.environ['S3_DELIVERY_BUCKET']
            s3_region = os.environ['S3_DELIVERY_AWS_REGION']
-            if not all([s3_access_key_id, s3_secret_access_key, s3_endpoint, s3_bucket, s3_region]):
+            if not all([s3_access_key_id, s3_secret_access_key, s3_endpoint, s3_region]):
-                 raise ValueError("One or more S3 configuration environment variables are empty.")
+                 raise ValueError("One or more S3 configuration environment variables are empty (excluding bucket).")
            config_source = "Environment Variables"
            profile_name = "rusonyx"
@ -100,6 +105,9 @@ def run_s3_upload_batch(**context):
            logger.error(f"Having problems reading S3 configuration from environment variables: {e}", exc_info=True)
            raise AirflowException("S3 configuration is missing. Could not load from Airflow connection or environment variables.")
    if not s3_bucket:
        raise AirflowException("S3 bucket name is not specified in DAG parameters.")
    s3_destination = f"s3://{s3_bucket}/"
    logger.info(f"Starting S3 upload loop. Watching source '{READY_PATH}' for delivery to '{s3_destination}'.")
@ -328,6 +336,21 @@ with DAG(
 2.  Ansible updates an Airflow Variable named `s3_worker_hostnames` with a JSON list of all active uploader workers (typically dlXXX machines). Each worker listens to its own queue (e.g., `queue-dl-dl001`).
 3.  This DAG reads the variable on manual trigger or after a pause/resume cycle to create the dynamic tasks. This allows for easy inspection of per-worker logs and status from the Airflow UI.
 4.  Each dynamic task watches a shared folder (`/opt/airflow/downloadfiles/videos/ready`). Download workers place completed videos into timestamped sub-folders (e.g., `20241122T1050`). The uploader processes these 10-minute batches, copying them to S3 with `s5cmd` and then deleting the source directories. This design avoids race conditions and improves performance.
 #### Why use 10-minute batch folders?
 While an `mv` command (atomic on the same filesystem) is sufficient to ensure a single video directory is complete when it appears in the `ready` folder, the batching system solves higher-level concurrency and efficiency problems in a high-throughput environment.
 - **Concurrency Management**: The uploader needs to process a discrete *set* of videos. By working on batches from a *previous* time window (e.g., uploading the `10:40` batch after `10:50`), it guarantees that no new files will be added to that batch while it's being processed. This creates a clean, reliable unit of work and prevents the uploader from missing videos that are moved in while it's compiling its list.
 - **Bulk Operation Efficiency**: It is far more efficient to upload hundreds of videos in a single bulk command than one by one. The batching system allows videos to accumulate, and the uploader sends them all to S3 in one highly optimized `s5cmd run` command. Similarly, after a successful upload, the uploader can delete the single parent batch directory, which is much faster than deleting hundreds of individual video folders.
 - **Continuous Operation**: The uploader task is a long-running loop. If processing a batch takes longer than 10 minutes (e.g., due to a large volume of videos or slow network), the uploader will continue working on that batch until it is complete. It only sleeps when it has processed all available completed batches and is waiting for new ones to become ready.
 #### Cleanup Method: `rsync` vs `shutil.rmtree`
 The cleanup process uses the `rsync` empty-folder trick to delete the contents of the batch directory before removing the directory itself. This is a deliberate performance optimization. The command is effectively: `rsync -a --delete /path/to/empty/ /path/to/delete/`.
 - Python's `shutil.rmtree` can be slow as it makes an individual `os.remove()` system call for every file.
 - The `rsync` method is a well-known and highly efficient alternative for this scenario, as `rsync` is a mature C program optimized for these operations. More details on this performance difference can be found here: https://stackoverflow.com/questions/5470939/why-is-shutil-rmtree-so-slow
    """,
    params={
        'mode': Param(
@ -339,9 +362,15 @@ with DAG(
            description="If True, the DAG will perform all steps except the actual upload and cleanup. `s5cmd` will be run with `--dry-run`, and the final directory removal will be skipped. Log messages will indicate what would have happened."
        ),
        'concurrency': Param(10, type="integer", title="s5cmd Concurrency"),
-        'sleep_if_no_videos_min': Param(10, type="integer", title="Sleep if Idle (minutes)", description="How many minutes the task should sleep if no videos are found to upload."),
+        'sleep_if_no_videos_min': Param(5, type="integer", title="Sleep if Idle (minutes)", description="How many minutes the task should sleep if no videos are found to upload. This should be less than any external timeout (e.g., Celery's worker_proc_timeout)."),
        'batch_completion_wait_min': Param(0, type="integer", title="Batch Completion Wait (minutes)", description="How many minutes to wait after a 10-minute batch window closes before considering it for upload. Default is 0, which processes the current batch immediately. A value of 10 restores the old behavior of waiting for the next 10-minute window."),
        's3_conn_id': Param('s3_delivery_connection', type="string", title="S3 Connection ID", description="The Airflow connection ID for the S3-compatible storage. If this connection is invalid or missing, the task will fall back to environment variables."),
        's3_bucket_name': Param(
            'videos',
            type="string",
            title="S3 Bucket Name",
            description="The name of the S3 bucket to upload to. Common values are 'videos' or 'videos-prod'."
        ),
    }
 ) as dag:
@ -410,7 +439,8 @@ with DAG(
            # Create a task for each worker, pinned to its specific queue
            upload_task = task(
                task_id=f'upload_batch_on_{task_id_hostname}',
-                queue=f'queue-s3-{hostname}'
+                queue=f'queue-s3-{hostname}',
                execution_timeout=timedelta(days=1),
            )(run_s3_upload_batch)()
            worker_tasks.append(upload_task)
--- a/airflow/generate_envoy_config.py
+++ b/airflow/generate_envoy_config.py
@ -138,6 +138,7 @@ def generate_configs():
        logging.info(f"Service role for generation: '{service_role}'")
        # --- Camoufox Configuration (only for worker/all-in-one roles) ---
        logging.info("--- Camoufox (Remote Browser) Configuration ---")
        camoufox_proxies = []
        expanded_camoufox_proxies_str = ""
        if service_role != 'management':
@ -210,7 +211,7 @@ def generate_configs():
            logging.info("This file maps each proxy to a list of WebSocket endpoints for Camoufox.")
            logging.info("The token_generator uses this map to connect to the correct remote browser.")
        else:
-            logging.info("Skipping Camoufox configuration generation for 'management' role.")
+            logging.info("Skipping Camoufox configuration generation.")
        # --- Generate docker-compose-ytdlp-ops.yaml ---
        ytdlp_ops_template = env.get_template('docker-compose-ytdlp-ops.yaml.j2')
--- a/ansible/README.md
+++ b/ansible/README.md
@ -1,64 +1,46 @@
-# Ansible for YT-DLP Cluster
+# Ansible Deployment for YT-DLP Cluster
-This directory contains the Ansible playbooks, roles, and configurations for deploying and managing the YT-DLP Airflow cluster.
+This document provides an overview of the Ansible playbooks used to deploy and manage the YT-DLP Airflow cluster.
-**Note:** All commands should be run from the project root, not from within this directory.
+## Main Playbooks
 Example: `ansible-playbook ansible/playbook-full.yml`
-## Full Deployment
+These are the primary entry points for cluster management.
-### Deploy entire cluster with proxies (recommended for new setups):
+- `playbook-full-with-proxies.yml`: **(Recommended Entry Point)** Deploys shadowsocks proxies and then the entire application stack.
 - `playbook-full.yml`: Deploys the entire application stack (master and workers) without touching proxies.
 - `playbook-master.yml`: Deploys/updates only the Airflow master node.
 - `playbook-worker.yml`: Deploys/updates all Airflow worker nodes.
 - `playbook-proxies.yml`: Deploys/updates only the shadowsocks proxy services on all nodes.
-```bash
+## Component & Utility Playbooks
 ansible-playbook ansible/playbook-full-with-proxies.yml
 ```
-### Deploy cluster without proxies:
+These playbooks are used for more specific tasks or are called by the main playbooks.
-```bash
+### Core Deployment Logic
-ansible-playbook ansible/playbook-full.yml
+- `roles/airflow-master/tasks/main.yml`: Contains all tasks for setting up the Airflow master services.
-```
+- `roles/airflow-worker/tasks/main.yml`: Contains all tasks for setting up the Airflow worker services.
 - `roles/ytdlp-master/tasks/main.yml`: Contains tasks for setting up the YT-DLP management services on the master.
 - `roles/ytdlp-worker/tasks/main.yml`: Contains tasks for setting up YT-DLP, Camoufox, and other worker-specific services.
-## Targeted Deployments
+### Utility & Maintenance
 - `playbook-dags.yml`: Quickly syncs only the `dags/` and `config/` directories to all nodes.
 - `playbook-hook.yml`: Syncs Airflow custom hooks and restarts relevant services.
 - `playbook-sync-local.yml`: Syncs local development files (e.g., `ytops_client`, `pangramia`) to workers.
 - `playbooks/pause_worker.yml`: Pauses a worker by creating a lock file, preventing it from taking new tasks.
 - `playbooks/resume_worker.yml`: Resumes a paused worker by removing the lock file.
 - `playbooks/playbook-bgutils-start.yml`: Starts the `bgutil-provider` container.
 - `playbooks/playbook-bgutils-stop.yml`: Stops the `bgutil-provider` container.
 - `playbook-update-s3-vars.yml`: Updates the `s3_delivery_connection` in Airflow.
 - `playbook-update-regression-script.yml`: Updates the `regression.py` script on the master.
-### Deploy only to master node:
+### Deprecated
 - `playbook-dl.yml`: Older worker deployment logic. Superseded by `playbook-worker.yml`.
 - `playbook-depricated.dl.yml`: Older worker deployment logic. Superseded by `playbook-worker.yml`.
-```bash
+## Current Goal: Disable Camoufox & Enable Aria2
 ansible-playbook ansible/playbook-master.yml --limit="af-test"
 ```
-### Deploy only to worker nodes:
+The current objective is to modify the worker deployment (`playbook-worker.yml` and its role `roles/ytdlp-worker/tasks/main.yml`) to:
 1.  **Disable Camoufox**: Prevent the build, configuration generation, and startup of all `camoufox` services.
 2.  **Enable Aria2**: Ensure the `aria2-pro` service is built and started correctly on worker nodes.
-```bash
+The `playbook-worker.yml` has already been updated to build the `aria2-pro` image. The next steps will involve modifying `roles/ytdlp-worker/tasks/main.yml` to remove the Camoufox-related tasks.
 ansible-playbook ansible/playbook-worker.yml
 ```
 ## DAGs Only Deployment
 To update only DAG files and configurations:
 ```bash
 ansible-playbook ansible/playbook-dags.yml
 ```
 ## Managing Worker State (Pause/Resume)
 The system allows for gracefully pausing a worker to prevent it from picking up new tasks. This is useful for maintenance or decommissioning a node. The mechanism uses a lock file (`AIRFLOW.PREVENT_URL_PULL.lock`) on the worker host.
 ### To Pause a Worker
 This command creates the lock file, causing the `ytdlp_ops_dispatcher` DAG to skip task execution on this host.
 ```bash
 # Replace "worker-hostname" with the target host from your inventory
 ansible-playbook ansible/playbooks/pause_worker.yml --limit "worker-hostname"
 ```
 ### To Resume a Worker
 This command removes the lock file, allowing the worker to resume picking up tasks.
 ```bash
 # Replace "worker-hostname" with the target host from your inventory
 ansible-playbook ansible/playbooks/resume_worker.yml --limit "worker-hostname"
 ```
--- a/ansible/group_vars/all/vault.yml
+++ b/ansible/group_vars/all/vault.yml
@ -13,3 +13,4 @@ vault_s3_delivery_secret_access_key: "33b155c5d2ea4fccb0faeeefb420d7ac"
 vault_s3_delivery_endpoint: "https://s3.rusonyxcloud.ru"
 vault_s3_delivery_bucket: "videos"
 vault_s3_delivery_aws_region: "ru-msk"
 vault_aria2_rpc_secret: "aR1a2_sEcReT_pWd_f0r_yTd1p"
--- a/ansible/playbook-dags.yml
+++ b/ansible/playbook-dags.yml
@ -11,7 +11,7 @@
        src: "../airflow/dags/"
        dest: /srv/airflow_master/dags/
        archive: yes
-        delete: yes
+        delete: no
        rsync_path: "sudo rsync"
        rsync_opts:
          - "--exclude=__pycache__/"
@ -42,7 +42,7 @@
        src: "../airflow/dags/"
        dest: /srv/airflow_dl_worker/dags/
        archive: yes
-        delete: yes
+        delete: no
        rsync_path: "sudo rsync"
        rsync_opts:
          - "--exclude=__pycache__/"
--- a/ansible/playbook-depricated.dl.yml
+++ b/ansible/playbook-depricated.dl.yml
@ -91,7 +91,6 @@
        files:
          - configs/docker-compose-dl.yaml
          - configs/docker-compose-ytdlp-ops.yaml
          - configs/docker-compose.camoufox.yaml
        state: present
        remove_orphans: true
        pull: "{{ 'never' if fast_deploy | default(false) else 'missing' }}"
--- a/ansible/playbook-worker.yml
+++ b/ansible/playbook-worker.yml
@ -216,6 +216,17 @@
      become: yes
      become_user: "{{ ansible_user }}"
    - name: Sync aria2-pro-docker to worker for build context
      ansible.posix.synchronize:
        src: "../airflow/aria2-pro-docker/"
        dest: "{{ airflow_worker_dir }}/aria2-pro-docker/"
        rsync_opts:
          - "--delete"
        recursive: yes
        perms: yes
      become: yes
      become_user: "{{ ansible_user }}"
    - name: Ensure bin directory exists on worker for build context
      ansible.builtin.file:
        path: "{{ airflow_worker_dir }}/bin"
@ -275,15 +286,6 @@
    - name: Include Docker health check tasks
      include_tasks: tasks/docker_health_check.yml
    - name: Build local Docker images (e.g., camoufox)
      ansible.builtin.command: >
        docker compose --project-directory . -f configs/docker-compose-ytdlp-ops.yaml build
      args:
        chdir: "{{ airflow_worker_dir }}"
      become: yes
      become_user: "{{ ansible_user }}"
      register: docker_build_result
      changed_when: "'Building' in docker_build_result.stdout or 'writing image' in docker_build_result.stdout"
    - name: Pull pre-built Docker images for ytdlp-ops services
      ansible.builtin.command: >
--- a/ansible/roles/ytdlp-master/tasks/main.yml
+++ b/ansible/roles/ytdlp-master/tasks/main.yml
@ -47,7 +47,6 @@
    - "docker-compose-ytdlp-ops.yaml.j2"
    - "docker-compose.config-generate.yaml"
    - "envoy.yaml.j2"
    - "docker-compose.camoufox.yaml.j2"
 - name: Create .env file for YT-DLP master service
  template:
@ -117,19 +116,6 @@
    recurse: yes
  become: yes
 - name: Create dummy camoufox compose file for master to prevent errors
  copy:
    content: |
      # This is a placeholder file.
      # The master node does not run Camoufox, but the shared docker-compose-ytdlp-ops.yaml
      # may unconditionally include this file, causing an error if it's missing.
      # This file provides an empty services block to satisfy the include.
      services: {}
    dest: "{{ airflow_master_dir }}/configs/docker-compose.camoufox.yaml"
    mode: "{{ file_permissions }}"
    owner: "{{ ssh_user }}"
    group: "{{ deploy_group }}"
  become: yes
 - name: Check for shadowsocks-rust proxy compose file
  stat:
--- a/ansible/roles/ytdlp-worker/tasks/main.yml
+++ b/ansible/roles/ytdlp-worker/tasks/main.yml
@ -66,18 +66,7 @@
 - name: "Log: Syncing YT-DLP service files"
  debug:
-    msg: "Syncing YT-DLP service components (config generator, envoy/camoufox templates) to the worker node."
+    msg: "Syncing YT-DLP service components (config generator, envoy templates) to the worker node."
 - name: Sync YT-DLP service files to worker
  synchronize:
    src: "../{{ item }}"
    dest: "{{ airflow_worker_dir }}/"
    archive: yes
    recursive: yes
    rsync_path: "sudo rsync"
    rsync_opts: "{{ rsync_default_opts }}"
  loop:
    - "airflow/camoufox"
 - name: Sync YT-DLP config generator to worker
  synchronize:
@ -99,7 +88,6 @@
    - "docker-compose-ytdlp-ops.yaml.j2"
    - "docker-compose.config-generate.yaml"
    - "envoy.yaml.j2"
    - "docker-compose.camoufox.yaml.j2"
 - name: Sync Airflow build context to worker
  synchronize:
@ -209,19 +197,35 @@
    force_source: true
  when: not fast_deploy | default(false)
- name: "Log: Building Camoufox (remote browser) image"
+- name: "Log: Building aria2-pro image"
  debug:
-    msg: "Building the Camoufox image locally. This image provides remote-controlled Firefox browsers for token generation."
+    msg: "Building the aria2-pro image locally. This image provides the download manager."
 - name: Build Camoufox image from local Dockerfile
  community.docker.docker_image:
    name: "camoufox:latest"
    build:
      path: "{{ airflow_worker_dir }}/camoufox"
    source: build
    force_source: true
  when: not fast_deploy | default(false)
 - name: Build aria2-pro image from docker-compose
  ansible.builtin.command: >
    docker compose -f configs/docker-compose.airflow.yml build aria2-pro
  args:
    chdir: "{{ airflow_worker_dir }}"
  become: yes
  become_user: "{{ ansible_user }}"
  register: docker_build_result
  changed_when: "'Building' in docker_build_result.stdout or 'writing image' in docker_build_result.stdout"
  when: not fast_deploy | default(false)
 # - name: "Log: Building Camoufox (remote browser) image"
 #   debug:
 #     msg: "Building the Camoufox image locally. This image provides remote-controlled Firefox browsers for token generation."
 #
 # - name: Build Camoufox image from local Dockerfile
 #   community.docker.docker_image:
 #     name: "camoufox:latest"
 #     build:
 #       path: "{{ airflow_worker_dir }}/camoufox"
 #     source: build
 #     force_source: true
 #   when: not fast_deploy | default(false)
 - name: Ensure correct permissions for build context after generation
  file:
    path: "{{ airflow_worker_dir }}"
@ -245,7 +249,6 @@
    project_src: "{{ airflow_worker_dir }}"
    files:
      - "configs/docker-compose-ytdlp-ops.yaml"
      - "configs/docker-compose.camoufox.yaml"
      - "configs/docker-compose.airflow.yml"
    state: absent
    remove_volumes: true # Corresponds to docker compose down -v
@ -259,20 +262,19 @@
 - name: "Log: Starting all worker services"
  debug:
-    msg: "Starting all worker services: ytdlp-ops, camoufox, and airflow-worker."
+    msg: "Starting all worker services: ytdlp-ops, and airflow-worker."
 - name: Start all worker services
  community.docker.docker_compose_v2:
    project_src: "{{ airflow_worker_dir }}"
    files:
      - "configs/docker-compose-ytdlp-ops.yaml"
      - "configs/docker-compose.camoufox.yaml"
      - "configs/docker-compose.airflow.yml"
    state: present
    remove_orphans: true
    pull: "{{ 'never' if fast_deploy | default(false) else 'missing' }}"
    recreate: always # Corresponds to --force-recreate
- name: Include camoufox verification tasks
+# - name: Include camoufox verification tasks
-  include_tasks: ../../../tasks/verify_camoufox.yml
+#   include_tasks: ../../../tasks/verify_camoufox.yml
-  when: not fast_deploy | default(false)
+#   when: not fast_deploy | default(false)
--- a/policies/4_custom_scenarios.yaml
+++ b/policies/4_custom_scenarios.yaml
@ -27,19 +27,55 @@ execution_control:
 info_json_generation_policy:
  # Use a standard client. The server will handle token generation.
-  client: web
+  client: tv_simply
 ---
-# Policy: Test download specific DASH formats from a folder of info.jsons.
+# Policy: Full-stack test with visitor ID rotation and test download.
-# This policy uses a single worker to test-download a list of video-only DASH
+# This policy uses a single worker to fetch info.json files for a list of URLs,
-# formats from a directory of existing info.json files. It only downloads the
+# and then immediately performs a test download (first 10KB) of specified formats.
-# first 10KB of each format and sleeps between each file.
+# It simulates user churn by creating a new profile (and thus a new visitor_id and POT)
-name: download_dashy_formats_test
+# every 250 requests. A short sleep is used between requests.
 name: full_stack_with_visitor_id_rotation
 settings:
  mode: full_stack
  urls_file: "urls.txt" # Placeholder, should be overridden with --set
  info_json_script: "bin/ytops-client get-info"
  # Use the modern profile management system to rotate visitor_id.
  profile_mode: per_worker_with_rotation
  profile_management:
    prefix: "visitor_rotator"
    # Rotate to a new profile generation after 250 requests.
    max_requests_per_profile: 250
 execution_control:
  run_until: { cycles: 1 } # Run through the URL list once.
  workers: 1 # Run with a single worker thread.
  # A short, fixed sleep between each info.json request.
  sleep_between_tasks: { min_seconds: 0.75, max_seconds: 0.75 }
 info_json_generation_policy:
  # Use a standard client. The server will handle token generation.
  client: tv_simply
 download_policy:
  formats: "299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy"
  downloader: "native-py"
  extra_args: '--test --cleanup'
  output_dir: "downloads/fetch_and_test"
  sleep_between_formats: { min_seconds: 6, max_seconds: 6 }
 ---
 # Policy: Download-only test from a fetch folder (Batch Mode).
 # This policy scans a directory of existing info.json files once, and performs
 # a test download (first 10KB) for specific formats. It is designed to run as
 # a batch job after a 'fetch_only' policy has completed.
 name: download_only_test_from_fetch_folder
 settings:
  mode: download_only
  # Directory of info.json files to process.
-  info_json_dir: "fetched_info_jsons/visitor_id_rotation" # Assumes output from the above policy
+  info_json_dir: "fetched_info_jsons/visitor_id_rotation" # Assumes output from 'fetch_with_visitor_id_rotation'
 execution_control:
  run_until: { cycles: 1 } # Run through the info.json directory once.
@ -49,10 +85,42 @@ execution_control:
 download_policy:
  # A specific list of video-only DASH formats to test.
  # The "-dashy" suffix is illustrative; the format IDs must exist in the info.json.
  formats: "299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy"
  # Use the native Python downloader for better performance and control.
  downloader: "native-py"
-  # Pass extra arguments to yt-dlp to perform a "test" download (first 10KB).
+  # Pass extra arguments to perform a "test" download.
-  extra_args: '--download-sections "*0-10240"'
+  extra_args: '--test --cleanup'
  output_dir: "downloads/dash_test"
 ---
 # Policy: Live download from a watch folder (Continuous Mode).
 # This policy continuously watches a directory for new info.json files and
 # processes them as they appear. It is designed to work as the second stage
 # of a pipeline, consuming files generated by a 'fetch_only' policy.
 name: live_download_from_watch_folder
 settings:
  mode: download_only
  info_json_dir: "live_info_json" # A different directory for the live pipeline
  directory_scan_mode: continuous
  mark_processed_files: true # Rename files to *.processed to avoid re-downloading.
  max_files_per_cycle: 50 # Process up to 50 new files each time it checks.
  sleep_if_no_new_files_seconds: 15
 execution_control:
  # For 'continuous' mode, a time-based run_until is typical.
  # {cycles: 1} will scan once, process new files, and exit.
  # To run for 2 hours, for example, use: run_until: { minutes: 120 }
  run_until: { cycles: 1 }
  workers: 4 # Use a few workers to process files in parallel.
  # sleep_between_tasks controls the pause between processing different info.json files.
  # To pause before each download attempt starts, use 'pause_before_download_seconds'
  # in the download_policy section below.
  sleep_between_tasks: { min_seconds: 0, max_seconds: 0 }
 download_policy:
  formats: "299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy"
  downloader: "native-py"
  # Example: Pause for a few seconds before starting each download attempt.
  # pause_before_download_seconds: 2
  extra_args: '--test --cleanup'
  output_dir: "downloads/live_dash_test"
--- a/policies/5_ban_test_policies.yaml
+++ b/policies/5_ban_test_policies.yaml
@ -0,0 +1,84 @@
 # This file contains policies for testing ban rates and profile survival
 # under high request counts.
 ---
 # Policy: Single Profile Ban Test (500 Requests)
 # This policy uses a single worker and a single, non-rotating profile to make
 # 500 consecutive info.json requests. It is designed to test if and when a
 # single profile/visitor_id gets banned or rate-limited by YouTube.
 #
 # It explicitly disables the server's automatic visitor ID rotation to ensure
 # the same identity is used for all requests.
 #
 # The test will stop if it encounters 3 errors within any 1-minute window,
 # or a total of 8 errors within any 60-minute window.
 name: single_profile_ban_test_500
 settings:
  mode: fetch_only
  urls_file: "urls.txt" # Override with --set settings.urls_file=...
  info_json_script: "bin/ytops-client get-info"
  save_info_json_dir: "fetched_info_jsons/ban_test_single_profile"
  # Use one worker with one profile that does not rotate automatically.
  profile_mode: per_worker_with_rotation
  profile_management:
    prefix: "ban_test_user"
    # Set a high request limit to prevent the orchestrator from rotating the profile.
    max_requests_per_profile: 1000
 execution_control:
  run_until: { requests: 500 } # Stop after 500 total requests.
  workers: 1
  sleep_between_tasks: { min_seconds: 1, max_seconds: 2 }
 info_json_generation_policy:
  client: "tv_simply" # A typical client for this kind of test.
  # Explicitly disable the server's visitor ID rotation mechanism.
  request_params:
    session_params:
      visitor_rotation_threshold: 0
 stop_conditions:
  # Stop if we get 3 or more errors in any 1-minute window (rapid failure).
  on_error_rate: { max_errors: 3, per_minutes: 1 }
  # Stop if we get 8 or more 403 errors in any 60-minute window (ban detection).
  on_cumulative_403: { max_errors: 8, per_minutes: 60 }
 ---
 # Policy: Multi-Profile Survival Test
 # This policy uses 5 parallel workers, each with its own unique profile.
 # It tests whether using multiple profiles with the server's default automatic
 # visitor ID rotation (every 250 requests) can sustain a high request rate
 # without getting banned.
 #
 # The test will run until 1250 total requests have been made (250 per worker),
 # which should trigger one rotation for each profile.
 name: multi_profile_survival_test
 settings:
  mode: fetch_only
  urls_file: "urls.txt" # Override with --set settings.urls_file=...
  info_json_script: "bin/ytops-client get-info"
  save_info_json_dir: "fetched_info_jsons/ban_test_multi_profile"
  # Use 5 workers, each getting its own rotating profile.
  profile_mode: per_worker_with_rotation
  profile_management:
    prefix: "survival_test_user"
    # Use the default rotation threshold of 250 requests per profile.
    max_requests_per_profile: 250
 execution_control:
  run_until: { requests: 1250 } # 5 workers * 250 requests/rotation = 1250 total.
  workers: 5
  sleep_between_tasks: { min_seconds: 1, max_seconds: 2 }
 info_json_generation_policy:
  client: "tv_simply"
  # No request_params are needed here; we want to use the server's default
  # visitor ID rotation behavior.
 stop_conditions:
  # Stop if we get 3 or more errors in any 1-minute window (rapid failure).
  on_error_rate: { max_errors: 3, per_minutes: 1 }
  # Stop if we get 8 or more 403 errors in any 60-minute window (ban detection).
  on_cumulative_403: { max_errors: 8, per_minutes: 60 }
--- a/ytops_client/cli.py
+++ b/ytops_client/cli.py
@ -27,7 +27,10 @@ def main():
        if last_arg.startswith('-') and len(last_arg) == 11:
            import re
            if re.fullmatch(r'-[a-zA-Z0-9_-]{10}', last_arg):
-                sys.argv.insert(len(sys.argv) - 1, '--')
+                # Only insert '--' if it's not already the preceding argument.
                # This prevents `stress_policy_tool` which already adds '--' from causing an error.
                if sys.argv[-2] != '--':
                    sys.argv.insert(len(sys.argv) - 1, '--')
    parser = argparse.ArgumentParser(
        description="YT Ops Client Tools",
--- a/ytops_client/download_aria_tool.py
+++ b/ytops_client/download_aria_tool.py
@ -12,14 +12,16 @@ import glob
 import shutil
 import re
 import shlex
 import threading
 import time
 from urllib.parse import urljoin
 try:
    import aria2p
    from aria2p.utils import human_readable_bytes
    import yt_dlp
 except ImportError:
-    print("aria2p is not installed. Please install it with: pip install aria2p", file=sys.stderr)
+    print("aria2p or yt-dlp is not installed. Please install them with: pip install aria2p yt-dlp", file=sys.stderr)
    sys.exit(1)
 logger = logging.getLogger('download_aria_tool')
@ -61,15 +63,18 @@ cat latest-info.json | yt-ops-client download aria-rpc -f "299/137" \\
    parser.add_argument('--aria-host', default='localhost', help='The host of the aria2c RPC server. Default: localhost.')
    parser.add_argument('--aria-port', type=int, default=6800, help='The port of the aria2c RPC server. Default: 6800.')
    parser.add_argument('--aria-secret', help='The secret token for the aria2c RPC server (often required, e.g., "SQGCQPLVFQIASMPNPOJYLVGJYLMIDIXDXAIXOTX").')
-    parser.add_argument('--proxy', help='Proxy to use for the download, e.g., "socks5://127.0.0.1:1080".')
+    parser.add_argument('--proxy', help='Proxy to use for the download, e.g., "socks5://127.0.0.1:1080". This sets the "all-proxy" option in aria2c.')
    parser.add_argument('--downloader-args', help='Arguments for aria2c, in yt-dlp format (e.g., "aria2c:[-x 8, -k 1M]").')
    parser.add_argument('--wait', action='store_true', help='Wait for the download to complete and report its status. Note: This makes the operation synchronous and will block until the download finishes.')
    parser.add_argument('--wait-timeout', help='Timeout in seconds for waiting on downloads. Use "auto" to calculate based on a minimum speed of 200KiB/s. Requires --wait. Default: no timeout.')
    parser.add_argument('--max-concurrent-fragments', type=int, default=8, help='Maximum number of fragments to download concurrently when using --wait. Mimics aria2c\'s -j option. Default: 8.')
    parser.add_argument('--auto-merge-fragments', action='store_true', help='Automatically merge fragments after download. Requires --wait and assumes the script has filesystem access to the aria2c host.')
    parser.add_argument('--remove-fragments-after-merge', action='store_true', help='Delete individual fragment files after a successful merge. Requires --auto-merge-fragments.')
    parser.add_argument('--cleanup', action='store_true', help='After a successful download, remove the final file(s) from the filesystem. For fragmented downloads, this implies --remove-fragments-after-merge.')
    parser.add_argument('--remove-on-complete', action=argparse.BooleanOptionalAction, default=True, help='Remove the download from aria2c history on successful completion. Use --no-remove-on-complete to disable. May fail on older aria2c daemons.')
    parser.add_argument('--purge-on-complete', action='store_true', help='Use aria2.purgeDownloadResult to clear ALL completed/failed downloads from history on success. Use as a workaround for older daemons.')
    parser.add_argument('--add-header', action='append', help='Add a custom HTTP header for the download. Format: "Key: Value". Can be used multiple times.')
    parser.add_argument('--user-agent', help='Specify a custom User-Agent. Overrides any User-Agent from info.json, --add-header, or the default.')
    parser.add_argument('--verbose', action='store_true', help='Enable verbose output for this script.')
    return parser
@ -101,6 +106,10 @@ def parse_aria_error(download):
    if not error_message:
        return f"Unknown aria2c error (Code: {error_code})"
    # Handle specific error codes that provide more context
    if error_code == 24:  # Authorization failed
        return f"HTTP Authorization Failed (Error 24). The URL may have expired or requires valid cookies/headers. Raw message: {error_message}"
    # Check for common HTTP errors in the message
    http_status_match = re.search(r'HTTP status (\d+)', error_message)
    if http_status_match:
@ -144,6 +153,8 @@ def parse_aria_args_to_options(args_str):
    parser.add_argument('-x', '--max-connection-per-server')
    parser.add_argument('-k', '--min-split-size')
    parser.add_argument('-s', '--split')
    parser.add_argument('--http-proxy')
    parser.add_argument('--https-proxy')
    parser.add_argument('--all-proxy')
    try:
@ -151,8 +162,10 @@ def parse_aria_args_to_options(args_str):
        known_args, unknown_args = parser.parse_known_args(arg_list)
        if unknown_args:
            logger.warning(f"Ignoring unknown arguments in --downloader-args: {unknown_args}")
-        # Convert to dict, removing None values
+        # Convert to dict, removing None values.
-        return {k: v for k, v in vars(known_args).items() if v is not None}
+        # Convert to dict, removing None values, and converting underscores back to hyphens
        # to match the option format expected by aria2c's RPC interface.
        return {k.replace('_', '-'): v for k, v in vars(known_args).items() if v is not None}
    except Exception:
        logger.warning(f"Failed to parse arguments inside --downloader-args: '{inner_args_str}'")
        return {}
@ -161,6 +174,9 @@ def parse_aria_args_to_options(args_str):
 def main_download_aria(args):
    """Main logic for the 'download-aria' command."""
    log_level = logging.DEBUG if args.verbose else logging.INFO
    # Reconfigure root logger to ensure our settings are applied.
    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
    logging.basicConfig(level=log_level, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', stream=sys.stderr)
    if args.remove_fragments_after_merge and not args.auto_merge_fragments:
@ -198,25 +214,43 @@ def main_download_aria(args):
        logger.error(f"Failed to parse info.json from {input_source_name}. Is the input valid JSON?")
        return 1
-    # Find the requested format, supporting yt-dlp style selectors
+    # Find the requested format using yt-dlp's own selection logic
-    target_format = None
+    try:
-    # A format selector can be a comma-separated list of preferences,
+        # We don't need a full ydl instance, just the format selection logic.
-    # where each preference can be a slash-separated list of format_ids.
+        ydl = yt_dlp.YoutubeDL({'quiet': True, 'logger': logger, 'format': args.format})
-    # e.g., "299/137/136,140" means try 299, then 137, then 136, then 140.
+        formats = info_data.get('formats', [])
-    format_preferences = [item.strip() for sublist in (i.split('/') for i in args.format.split(',')) for item in sublist if item.strip()]
+        selector = ydl.build_format_selector(args.format)
        ctx = {
            'formats': formats,
            'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
            'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats)
                                   or all(f.get('acodec') == 'none' for f in formats)),
        }
        selected_formats = list(selector(ctx))
    except Exception as e:
        logger.error(f"Failed to select format with selector '{args.format}': {e}", exc_info=args.verbose)
        return 1
-    available_formats_map = {f['format_id']: f for f in info_data.get('formats', []) if 'format_id' in f}
+    if not selected_formats:
    for format_id in format_preferences:
        if format_id in available_formats_map:
            target_format = available_formats_map[format_id]
            logger.info(f"Selected format ID '{format_id}' from selector '{args.format}'.")
            break
    if not target_format:
        logger.error(f"No suitable format found for selector '{args.format}' in info.json.")
        return 1
    # The selector might return multiple results if ',' is used. We'll process the first one.
    target_format = selected_formats[0]
    if len(selected_formats) > 1:
        logger.warning(f"Format selector '{args.format}' resolved to multiple format combinations. Only the first one will be downloaded.")
    formats_to_download = target_format.get('requested_formats', [target_format])
    if len(formats_to_download) > 1:
        logger.warning(
            f"The selected format is a combination of {len(formats_to_download)} streams. "
            f"This tool does not support merging separate video/audio streams. "
            f"Only the first stream (format_id: {formats_to_download[0].get('format_id')}) will be downloaded. "
            f"To download all streams, please specify their format IDs separately."
        )
    target_format = formats_to_download[0]
    # Get file size for auto-timeout and dynamic options
    total_filesize = target_format.get('filesize') or target_format.get('filesize_approx')
@ -231,9 +265,9 @@ def main_download_aria(args):
    # Prepare options for aria2
    aria_options = {
        # Options from yt-dlp's aria2c integration for performance and reliability
        'continue': 'true',
        'max-connection-per-server': 16,
        'split': 16,
        'min-split-size': '1M',
        'http-accept-gzip': 'true',
        'file-allocation': 'none',
    }
@ -243,20 +277,59 @@ def main_download_aria(args):
    custom_options = parse_aria_args_to_options(args.downloader_args)
-    # Dynamically set min-split-size if not overridden by user
+    # Set min-split-size. yt-dlp's default is 1M.
-    if 'min_split_size' not in custom_options and total_filesize:
+    if 'min-split-size' not in custom_options:
-        if total_filesize > 100 * 1024 * 1024:  # 100 MiB
+        if total_filesize and total_filesize > 100 * 1024 * 1024:  # 100 MiB
            aria_options['min-split-size'] = '5M'
            logger.info("File is > 100MiB, dynamically setting min-split-size to 5M.")
        else:
            aria_options['min-split-size'] = '1M'
    if custom_options:
        aria_options.update(custom_options)
        logger.info(f"Applied custom aria2c options from --downloader-args: {custom_options}")
    # For older aria2c versions, SOCKS5 proxy must be specified with an 'http://' scheme.
    if 'all-proxy' in aria_options and isinstance(aria_options['all-proxy'], str) and aria_options['all-proxy'].startswith('socks5://'):
        proxy_url = aria_options['all-proxy']
        logger.info("Replacing 'socks5://' with 'http://' in proxy URL for aria2c compatibility.")
        aria_options['all-proxy'] = 'http://' + proxy_url[len('socks5://'):]
    aria_options['out'] = filename
-    # Add headers from info.json, mimicking yt-dlp's behavior for aria2c
+    # Add headers from info.json, and allow overriding/adding with --add-header
-    headers = target_format.get('http_headers')
+    headers = target_format.get('http_headers', {}).copy()
    if args.add_header:
        for header in args.add_header:
            if ':' not in header:
                logger.error(f"Invalid header format in --add-header: '{header}'. Expected 'Key: Value'.")
                return 1
            key, value = header.split(':', 1)
            key = key.strip()
            value = value.strip()
            if key in headers:
                logger.info(f"Overwriting header '{key}' from info.json with value from command line.")
            else:
                logger.info(f"Adding header from command line: {key}: {value}")
            headers[key] = value
    # Enforce a consistent User-Agent.
    # First, remove any User-Agent that might have come from info.json, case-insensitively.
    for key in list(headers.keys()):
        if key.lower() == 'user-agent':
            del headers[key]
    # Set the default Cobalt User-Agent.
    default_user_agent = 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version'
    headers['User-Agent'] = default_user_agent
    logger.info(f"Set default User-Agent to: {default_user_agent}")
    # The --user-agent flag has the highest precedence and can override the default.
    if args.user_agent:
        headers['User-Agent'] = args.user_agent
        logger.info(f"Overriding User-Agent with value from --user-agent: {args.user_agent}")
    if headers:
        header_list = [f'{key}: {value}' for key, value in headers.items()]
        aria_options['header'] = header_list
@ -268,6 +341,12 @@ def main_download_aria(args):
                else:
                    logger.debug(f"  Header: {h}")
    # Final check: ensure all option values are strings, as required by aria2c RPC.
    # The 'header' option is a list of strings, which is a special case and should be preserved.
    for key, value in aria_options.items():
        if key != 'header' and not isinstance(value, str):
            aria_options[key] = str(value)
    is_fragmented = 'fragments' in target_format
    if not is_fragmented:
        url = target_format.get('url')
@ -305,10 +384,20 @@ def main_download_aria(args):
                    logger.error(f"Invalid --wait-timeout value: '{args.wait_timeout}'. Must be a positive integer or 'auto'.")
                    return 1
        # Determine the download directory for aria2c.
        # If --remote-dir is specified, it takes precedence.
        # Otherwise, assume a local setup and use --output-dir.
        # It's crucial to use an absolute path to avoid ambiguity for the aria2c daemon.
        download_dir_for_aria = args.remote_dir
        if not download_dir_for_aria:
            local_dir = args.output_dir or '.'
            download_dir_for_aria = os.path.abspath(local_dir)
            logger.info(f"No --remote-dir specified. Using local path for aria2c download directory: {download_dir_for_aria}")
        if is_fragmented:
-            return download_fragments_aria(args, api, target_format, filename, aria_options, timeout_seconds, remote_dir=args.remote_dir)
+            return download_fragments_aria(args, api, target_format, filename, aria_options, timeout_seconds, remote_dir=download_dir_for_aria)
        else:
-            return download_url_aria(args, api, url, filename, aria_options, timeout_seconds, remote_dir=args.remote_dir)
+            return download_url_aria(args, api, url, filename, aria_options, timeout_seconds, remote_dir=download_dir_for_aria)
    except Exception as e:
        logger.error(f"An error occurred while communicating with aria2c: {e}", exc_info=args.verbose)
@ -325,87 +414,98 @@ def download_url_aria(args, api, url, filename, aria_options, timeout_seconds, r
        logger.error("Failed to add download to aria2c. The API returned an empty result.")
        return 1
    # Handle older aria2p versions that return a single Download object instead of a list
    download = downloads[0] if isinstance(downloads, list) else downloads
    logger.info(f"Successfully added download to aria2c. GID: {download.gid}")
    if args.wait:
-        logger.info(f"Waiting for download {download.gid} to complete...")
+        logger.info(f"Waiting for download {download.gid} to complete using WebSocket events...")
-        start_time = time.time()
+        download_finished_event = threading.Event()
        final_status = {}
        def on_complete(api_ref, event_gid):
            if event_gid == download.gid:
                logger.debug(f"WebSocket: GID {event_gid} completed.")
                final_status['status'] = 'complete'
                download_finished_event.set()
        def on_error(api_ref, event_gid):
            if event_gid == download.gid:
                logger.debug(f"WebSocket: GID {event_gid} errored.")
                final_status['status'] = 'error'
                download_finished_event.set()
        def on_stop(api_ref, event_gid):
            if event_gid == download.gid:
                logger.debug(f"WebSocket: GID {event_gid} stopped.")
                final_status['status'] = 'stopped'
                download_finished_event.set()
        listener_thread = threading.Thread(
            target=api.listen_to_notifications,
            kwargs={
                'on_download_complete': on_complete,
                'on_download_error': on_error,
                'on_download_stop': on_stop,
                'timeout': 1,
                'handle_signals': False
            },
            daemon=True
        )
        try:
-            while True:
+            listener_thread.start()
-                if timeout_seconds and (time.time() - start_time > timeout_seconds):
+            finished = download_finished_event.wait(timeout=timeout_seconds)
-                    raise TimeoutError(f"Download did not complete within {timeout_seconds}s timeout.")
+            if not finished and not download_finished_event.is_set():
-
+                raise TimeoutError(f"Download did not complete within {timeout_seconds}s timeout.")
-                # Re-fetch the download object to get the latest status
+        except KeyboardInterrupt:
                download.update()
                # A download is no longer active if it's complete, errored, paused, or removed.
                if download.status not in ('active', 'waiting'):
                    break
                progress_info = (
                    f"\rGID {download.gid}: {download.status} "
                    f"{download.progress_string()} "
                    f"({download.download_speed_string()}) "
                    f"ETA: {download.eta_string()}"
                )
                sys.stdout.write(progress_info)
                sys.stdout.flush()
                time.sleep(0.5)
        except (KeyboardInterrupt, TimeoutError) as e:
            sys.stdout.write('\n')
-            if isinstance(e, KeyboardInterrupt):
+            logger.warning("Wait interrupted by user. Cleaning up download...")
-                logger.warning("Wait interrupted by user. Cleaning up download...")
+            cleanup_aria_download(api, [download])
-                cleanup_aria_download(api, [download])
+            return 130
-                return 130
+        except TimeoutError as e:
-            else:  # TimeoutError
+            logger.error(f"Download timed out. Cleaning up... Error: {e}")
-                logger.error(f"Download timed out. Cleaning up... Error: {e}")
+            cleanup_aria_download(api, [download])
-                cleanup_aria_download(api, [download])
+            return 1
-                return 1
+        finally:
            api.stop_listening()
            if listener_thread.is_alive():
                listener_thread.join(timeout=2)
        # Re-fetch download object to get final details
        try:
            download.update()
        except aria2p.ClientException as e:
-            # This can happen if the download completes and is removed by aria2c
+            logger.warning(f"Could not update final status for GID {download.gid} (maybe removed on completion?): {e}.")
-            # before we can check its final status. Assume success in this case.
+            if final_status.get('status') != 'complete':
-            logger.warning(f"Could not get final status for GID {download.gid} (maybe removed on completion?): {e}. Assuming success.")
+                 logger.error(f"Download {download.gid} failed, but could not retrieve final error details.")
-            print(f"Download for GID {download.gid} presumed successful.")
+                 return 1
            return 0
-        sys.stdout.write('\n')  # Newline after progress bar
+        if final_status.get('status') == 'complete':
        # Final status check (no need to update again, we have the latest status)
        if download.status == 'complete':
            logger.info(f"Download {download.gid} completed successfully.")
-            
+            downloaded_filepath_remote = download.files[0].path if download.files else None
-            downloaded_filepath_remote = None
+            if downloaded_filepath_remote:
            if download.files:
                downloaded_filepath_remote = download.files[0].path
                print(f"Download successful: {downloaded_filepath_remote}")
            else:
                print("Download successful, but no file path reported by aria2c.")
            if args.cleanup and downloaded_filepath_remote:
                local_filepath = None
                # To map remote path to local, we need remote_dir and a local equivalent.
                # We'll use fragments_dir as the local equivalent, which defaults to output_dir.
                local_base_dir = args.fragments_dir or args.output_dir or '.'
-                if remote_dir:
+                if remote_dir and downloaded_filepath_remote.startswith(remote_dir):
-                    if downloaded_filepath_remote.startswith(remote_dir):
+                    relative_path = os.path.relpath(downloaded_filepath_remote, remote_dir)
-                        relative_path = os.path.relpath(downloaded_filepath_remote, remote_dir)
+                    local_filepath = os.path.join(local_base_dir, relative_path)
                        local_filepath = os.path.join(local_base_dir, relative_path)
                    else:
                        logger.warning(f"Cleanup: Downloaded file path '{downloaded_filepath_remote}' does not start with remote-dir '{remote_dir}'. Cannot map to local path.")
                else:
                    logger.warning(f"Cleanup: --remote-dir not specified. Assuming download path is accessible locally as '{downloaded_filepath_remote}'.")
                    local_filepath = downloaded_filepath_remote
                    if not remote_dir:
                        logger.warning(f"Cleanup: --remote-dir not specified. Assuming download path is accessible locally as '{local_filepath}'.")
-                if local_filepath:
+                try:
-                    try:
+                    if os.path.exists(local_filepath):
-                        if os.path.exists(local_filepath):
+                        os.remove(local_filepath)
-                            os.remove(local_filepath)
+                        logger.info(f"Cleanup: Removed downloaded file '{local_filepath}'")
-                            logger.info(f"Cleanup: Removed downloaded file '{local_filepath}'")
+                    else:
-                        else:
+                        logger.warning(f"Cleanup: File not found at expected local path '{local_filepath}'. Skipping removal.")
-                            logger.warning(f"Cleanup: File not found at expected local path '{local_filepath}'. Skipping removal.")
+                except OSError as e:
-                    except OSError as e:
+                    logger.error(f"Cleanup failed: Could not remove file '{local_filepath}': {e}")
                        logger.error(f"Cleanup failed: Could not remove file '{local_filepath}': {e}")
            elif args.cleanup:
                logger.warning("Cleanup requested, but no downloaded file path was reported by aria2c.")
@ -417,11 +517,10 @@ def download_url_aria(args, api, url, filename, aria_options, timeout_seconds, r
                    logger.warning(f"Failed to purge download history: {e}")
            elif args.remove_on_complete:
                try:
-                    api.remove_download_result(download)
+                    api.client.remove_download_result(download.gid)
                    logger.info(f"Removed download {download.gid} from aria2c history.")
                except Exception as e:
                    logger.warning(f"Failed to remove download {download.gid} from history: {e}")
            return 0
        else:
            detailed_error = parse_aria_error(download)
@ -445,243 +544,236 @@ def download_fragments_aria(args, api, target_format, filename, aria_options, ti
        )
        return 1
    # We need to set the 'dir' option for all fragments if specified.
    # The 'out' option will be set per-fragment.
    frag_aria_options = aria_options.copy()
-    frag_aria_options.pop('out', None)  # Remove the main 'out' option
+    frag_aria_options.pop('out', None)
    if remote_dir:
        frag_aria_options['dir'] = remote_dir
        logger.info(f"Instructing remote aria2c to save fragments to: {remote_dir}")
    base_filename, file_ext = os.path.splitext(filename)
-
+    logger.info(f"Preparing {len(fragments)} fragments for a batch submission to aria2c...")
-    calls = []
+    multicall_payload = []
    for i, fragment in enumerate(fragments):
-        frag_url = fragment.get('url')
+        frag_url = fragment.get('url') or urljoin(fragment_base_url, fragment['path'])
        if not frag_url:
-            if not fragment_base_url:
+            logger.error(f"Fragment {i} has no URL and no fragment_base_url is available. Aborting.")
-                logger.error(f"Fragment {i} has no URL and no fragment_base_url is available. Aborting.")
+            return 1
                return 1
            frag_url = urljoin(fragment_base_url, fragment['path'])
        # Use the base filename from the main file, but add fragment identifier
        fragment_filename = f"{base_filename}-Frag{i}{file_ext}"
        current_frag_options = frag_aria_options.copy()
        current_frag_options['out'] = os.path.basename(fragment_filename)
-        # Prepare parameters for multicall in the format:
+        # The aria2p library will handle adding the secret token to each call in the multicall.
        # {"methodName": "aria2.addUri", "params": [["url"], {"out": "file.mp4"}]}
        # The secret token is automatically added by aria2p.
        params = [[frag_url], current_frag_options]
-        call_struct = {
+        multicall_payload.append({'methodName': 'aria2.addUri', 'params': params})
            "methodName": api.client.ADD_URI,
            "params": params
        }
        calls.append(call_struct)
-    results = api.client.multicall(calls)
+    if not args.wait:
-    if not results:
+        # Asynchronous mode: submit all fragments at once and exit.
-        logger.error("Failed to add fragments to aria2c. The API returned an empty result.")
+        gids, failed_count = [], 0
        return 1
    # The result of a multicall of addUri is a list of lists, where each inner list
    # contains the GID of one download, e.g., [['gid1'], ['gid2']].
    # A failed call for a fragment may result in a fault struct dict instead of a list.
    # We extract GIDs from successful calls.
    gids = [result[0] for result in results if isinstance(result, list) and result]
    if len(gids) != len(fragments):
        failed_count = len(fragments) - len(gids)
        logger.warning(f"{failed_count} out of {len(fragments)} fragments failed to be added to aria2c.")
    if not gids:
        logger.error("Failed to add any fragments to aria2c. All submissions failed.")
        return 1
    logger.info(f"Successfully added {len(gids)} fragments to aria2c.")
    if args.verbose:
        logger.debug(f"GIDs: {gids}")
    if args.wait:
        logger.info(f"Waiting for {len(gids)} fragments to complete...")
        start_time = time.time()
        downloads_to_cleanup = []
        try:
-            while True:
+            logger.info(f"Submitting {len(multicall_payload)} fragments to aria2c in a single batch request...")
-                if timeout_seconds and (time.time() - start_time > timeout_seconds):
+            # The aria2p client library correctly handles authentication for multicalls.
-                    raise TimeoutError(f"Fragment downloads did not complete within {timeout_seconds}s timeout.")
+            results = api.client.multicall(multicall_payload)
            for i, result in enumerate(results):
                if isinstance(result, list) and len(result) == 1 and isinstance(result[0], str):
                    gids.append(result[0])
                else:
                    failed_count += 1
                    logger.warning(f"Failed to add fragment {i + 1}: {result[0] if isinstance(result, list) else result}")
        except Exception as e:
            logger.error(f"Batch submission to aria2c failed: {e}", exc_info=args.verbose)
            return 1
        if failed_count > 0:
            logger.warning(f"{failed_count} out of {len(fragments)} fragments failed to be added to aria2c.")
        if not gids:
            logger.error("Failed to add any fragments to aria2c. All submissions failed.")
            return 1
        print(f"Successfully added {len(gids)} fragments. GIDs: {gids}\nThese fragments will need to be merged manually after download.")
        return 0
-                downloads = api.get_downloads(gids)
+    # Synchronous (--wait) mode with WebSockets
-                downloads_to_cleanup = downloads  # Store for potential cleanup
+    MAX_CONCURRENT_FRAGMENTS = args.max_concurrent_fragments
-                # A download is considered "active" if it's currently downloading or waiting in the queue.
+    all_gids, failed_submission_count = [], 0
-                # It is "not active" if it is complete, errored, paused, or removed.
+    submitted_gids, completed_gids = set(), set()
-                active_downloads = [d for d in downloads if d.status in ('active', 'waiting')]
+    lock = threading.Lock()
-                if not active_downloads:
+    pending_fragments = list(enumerate(multicall_payload))
-                    break  # All downloads are complete or have stopped for other reasons
+    total_fragment_count = len(pending_fragments)
    logger.info(f"Waiting for {total_fragment_count} fragments to complete using WebSocket events...")
    logger.info(f"Will maintain up to {MAX_CONCURRENT_FRAGMENTS} active fragment downloads.")
-                for d in active_downloads:
+    def on_event(api_ref, event_gid):
-                    d.update()
+        with lock:
            if event_gid in submitted_gids:
                completed_gids.add(event_gid)
-                completed_count = len(downloads) - len(active_downloads)
+    listener_thread = threading.Thread(
-                total_bytes = sum(d.total_length for d in downloads)
+        target=api.listen_to_notifications,
-                downloaded_bytes = sum(d.completed_length for d in downloads)
+        kwargs={'on_download_complete': on_event, 'on_download_error': on_event, 'on_download_stop': on_event, 'timeout': 1, 'handle_signals': False},
-                total_speed = sum(d.download_speed for d in downloads)
+        daemon=True
-                progress_percent = (downloaded_bytes / total_bytes * 100) if total_bytes > 0 else 0
+    )
    listener_thread.start()
    start_time = time.time()
-                progress_info = (
+    try:
-                    f"\rProgress: {completed_count}/{len(downloads)} fragments | "
+        while True:
-                    f"{progress_percent:.1f}% "
+            with lock:
-                    f"({human_readable_bytes(downloaded_bytes)}/{human_readable_bytes(total_bytes)}) "
+                if len(completed_gids) >= total_fragment_count:
-                    f"Speed: {human_readable_bytes(total_speed)}/s"
+                    break
-                )
+            if timeout_seconds and (time.time() - start_time > timeout_seconds):
-                sys.stdout.write(progress_info)
+                raise TimeoutError(f"Fragment downloads did not complete within {timeout_seconds}s timeout.")
                sys.stdout.flush()
                time.sleep(0.5)
        except (KeyboardInterrupt, TimeoutError) as e:
            sys.stdout.write('\n')
            if isinstance(e, KeyboardInterrupt):
                logger.warning("Wait interrupted by user. Cleaning up fragments...")
                cleanup_aria_download(api, downloads_to_cleanup)
                return 130
            else:  # TimeoutError
                logger.error(f"Download timed out. Cleaning up fragments... Error: {e}")
                cleanup_aria_download(api, downloads_to_cleanup)
                return 1
        except aria2p.ClientException as e:
            # This can happen if downloads complete and are removed by aria2c
            # before we can check their final status. Assume success in this case.
            logger.warning(f"Could not get final status for some fragments (maybe removed on completion?): {e}. Assuming success.")
            with lock:
                active_gids_count = len(submitted_gids) - len(completed_gids)
            num_to_submit = MAX_CONCURRENT_FRAGMENTS - active_gids_count
            if num_to_submit > 0 and pending_fragments:
                chunk_to_submit = pending_fragments[:num_to_submit]
                pending_fragments = pending_fragments[num_to_submit:]
                indices = [item[0] for item in chunk_to_submit]
                payloads = [item[1] for item in chunk_to_submit]
                try:
                    # The aria2p client library correctly handles authentication for multicalls.
                    results = api.client.multicall(payloads)
                    with lock:
                        for i, result in enumerate(results):
                            original_index = indices[i]
                            if isinstance(result, list) and len(result) == 1 and isinstance(result[0], str):
                                gid = result[0]
                                all_gids.append(gid)
                                submitted_gids.add(gid)
                            else:
                                failed_submission_count += 1
                                completed_gids.add(f"failed-submission-{original_index}")
                                logger.warning(f"Failed to add fragment {original_index + 1}: {result[0] if isinstance(result, list) else result}")
                except Exception as e:
                    logger.error(f"Batch submission to aria2c failed for a chunk: {e}", exc_info=args.verbose)
                    with lock:
                        for i in indices:
                            failed_submission_count += 1
                            completed_gids.add(f"failed-submission-{i}")
            with lock:
                completed_download_count = len(completed_gids)
            progress_percent = (completed_download_count / total_fragment_count * 100) if total_fragment_count > 0 else 0
            sys.stdout.write(f"\rProgress: {completed_download_count}/{total_fragment_count} fragments | {progress_percent:.1f}%")
            sys.stdout.flush()
            time.sleep(0.5)
    except (KeyboardInterrupt, TimeoutError) as e:
        sys.stdout.write('\n')
        if isinstance(e, KeyboardInterrupt):
            logger.warning("Wait interrupted by user. Cleaning up fragments...")
        else:
            logger.error(f"Download timed out. Cleaning up fragments... Error: {e}")
        cleanup_aria_download(api, api.get_downloads(list(submitted_gids)))
        return 130 if isinstance(e, KeyboardInterrupt) else 1
    finally:
        api.stop_listening()
        if listener_thread.is_alive():
            listener_thread.join(timeout=2)
-        # Final status check
+    sys.stdout.write('\n')
-        failed_downloads = []
+    if failed_submission_count > 0:
        logger.error(f"{failed_submission_count} fragments failed to be submitted to aria2c.")
    final_downloads = []
    if all_gids:
        try:
-            downloads = api.get_downloads(gids)
+            final_downloads = api.get_downloads(all_gids)
            failed_downloads = [d for d in downloads if d.status != 'complete']
        except aria2p.ClientException as e:
            logger.warning(f"Could not perform final status check for fragments (maybe removed on completion?): {e}. Assuming success.")
            # If we can't check, we assume success based on the earlier wait loop not failing catastrophically.
            failed_downloads = []
-        if failed_downloads:
+    failed_downloads = [d for d in final_downloads if d.status != 'complete']
-            logger.error(f"{len(failed_downloads)} fragments failed to download.")
+    if failed_downloads:
-            for d in failed_downloads:
+        logger.error(f"{len(failed_downloads)} fragments failed to download.")
-                detailed_error = parse_aria_error(d)
+        for d in failed_downloads[:5]:
-                logger.error(f"  GID {d.gid}: {detailed_error}")
+            logger.error(f"  GID {d.gid}: {parse_aria_error(d)}")
-            return 1
+        if len(failed_downloads) > 5:
-        else:
+            logger.error(f"  ... and {len(failed_downloads) - 5} more errors.")
-            logger.info("All fragments downloaded successfully.")
+        return 1
-            output_dir = args.output_dir or '.'
+    if failed_submission_count > 0:
-            final_filepath = os.path.join(output_dir, filename)
+        logger.error("Aborting due to fragment submission failures.")
-            fragments_lookup_dir = args.fragments_dir or output_dir
+        return 1
-            if args.auto_merge_fragments:
+    logger.info("All fragments downloaded successfully.")
-                logger.info(f"Attempting to merge fragments into: {final_filepath}")
+    output_dir = args.output_dir or '.'
-                logger.info(f"Searching for fragments in local directory: {os.path.abspath(fragments_lookup_dir)}")
+    final_filepath = os.path.join(output_dir, filename)
    fragments_lookup_dir = args.fragments_dir or output_dir
    if args.auto_merge_fragments:
        logger.info(f"Attempting to merge fragments into: {final_filepath}")
        logger.info(f"Searching for fragments in local directory: {os.path.abspath(fragments_lookup_dir)}")
        try:
            escaped_base = glob.escape(base_filename)
            search_path = os.path.join(fragments_lookup_dir, f"{escaped_base}-Frag*{file_ext}")
            fragment_files = sorted(glob.glob(search_path), key=lambda f: int(re.search(r'Frag(\d+)', os.path.basename(f)).group(1)))
            if not fragment_files:
                logger.error(f"No fragment files found with pattern: {search_path}")
                return 1
            with open(final_filepath, 'wb') as dest_file:
                for frag_path in fragment_files:
                    with open(frag_path, 'rb') as src_file:
                        shutil.copyfileobj(src_file, dest_file)
            logger.info(f"Successfully merged {len(fragment_files)} fragments into {final_filepath}")
            if args.remove_fragments_after_merge or args.cleanup:
                logger.info("Removing fragment files...")
                for frag_path in fragment_files: os.remove(frag_path)
                logger.info("Fragment files removed.")
            if args.cleanup:
                try:
-                    # base_filename and file_ext are available from earlier in the function
+                    os.remove(final_filepath)
-                    # We must escape the base filename in case it contains glob special characters like [ or ].
+                    logger.info(f"Cleanup: Removed merged file '{final_filepath}'")
-                    escaped_base = glob.escape(base_filename)
+                except OSError as e:
-                    search_path = os.path.join(fragments_lookup_dir, f"{escaped_base}-Frag*{file_ext}")
+                    logger.error(f"Cleanup failed: Could not remove merged file '{final_filepath}': {e}")
                    fragment_files = glob.glob(search_path)
-                    if not fragment_files:
+            print(f"Download and merge successful: {final_filepath}")
                        logger.error(f"No fragment files found with pattern: {search_path}")
                        return 1
                    def fragment_sort_key(f):
                        match = re.search(r'Frag(\d+)', os.path.basename(f))
                        return int(match.group(1)) if match else -1
                    fragment_files.sort(key=fragment_sort_key)
                    with open(final_filepath, 'wb') as dest_file:
                        for frag_path in fragment_files:
                            with open(frag_path, 'rb') as src_file:
                                shutil.copyfileobj(src_file, dest_file)
                    logger.info(f"Successfully merged {len(fragment_files)} fragments into {final_filepath}")
                    if args.remove_fragments_after_merge or args.cleanup:
                        logger.info("Removing fragment files...")
                        for frag_path in fragment_files:
                            os.remove(frag_path)
                        logger.info("Fragment files removed.")
                    if args.cleanup:
                        try:
                            os.remove(final_filepath)
                            logger.info(f"Cleanup: Removed merged file '{final_filepath}'")
                        except OSError as e:
                            logger.error(f"Cleanup failed: Could not remove merged file '{final_filepath}': {e}")
                    print(f"Download and merge successful: {final_filepath}")
                    if args.purge_on_complete:
                        try:
                            api.purge_download_result()
                            logger.info("Purged all completed/failed downloads from aria2c history.")
                        except Exception as e:
                            logger.warning(f"Failed to purge download history: {e}")
                    elif args.remove_on_complete:
                        try:
                            # The `downloads` variable from the last status check should be valid here.
                            api.remove_download_result(downloads)
                            logger.info(f"Removed {len(downloads)} fragment downloads from aria2c history.")
                        except aria2p.ClientException as e:
                            logger.warning(f"Could not remove fragment downloads from history (maybe already gone?): {e}")
                        except Exception as e:
                            logger.warning(f"Failed to remove fragment downloads from history: {e}")
                    return 0
            if args.purge_on_complete:
                try:
                    api.purge_download_result()
                    logger.info("Purged all completed/failed downloads from aria2c history.")
                except Exception as e:
-                    logger.error(f"An error occurred during merging: {e}", exc_info=args.verbose)
+                    logger.warning(f"Failed to purge download history: {e}")
-                    logger.error("Fragments were downloaded but not merged.")
+            elif args.remove_on_complete:
-                    return 1
+                try:
-            else:
+                    for d in final_downloads:
-                print("Download successful. Fragments now need to be merged manually.")
+                        try: api.client.remove_download_result(d.gid)
-                print(f"The final merged file should be named: {final_filepath}")
+                        except aria2p.ClientException: pass
-                print("You can merge them with a command like:")
+                    logger.info(f"Removed {len(final_downloads)} fragment downloads from aria2c history.")
-                print(f"  cat `ls -v '{os.path.join(fragments_lookup_dir, base_filename)}'-Frag*'{file_ext}'` > '{final_filepath}'")
+                except Exception as e:
-
+                    logger.warning(f"Failed to remove fragment downloads from history: {e}")
-                if args.cleanup:
+            return 0
-                    logger.info("Cleanup requested. Removing downloaded fragments...")
+        except Exception as e:
-                    try:
+            logger.error(f"An error occurred during merging: {e}", exc_info=args.verbose)
-                        # base_filename and file_ext are available from earlier in the function
+            logger.error("Fragments were downloaded but not merged.")
-                        escaped_base = glob.escape(base_filename)
+            return 1
                        search_path = os.path.join(fragments_lookup_dir, f"{escaped_base}-Frag*{file_ext}")
                        fragment_files = glob.glob(search_path)
                        if not fragment_files:
                            logger.warning(f"Cleanup: No fragment files found with pattern: {search_path}")
                        else:
                            for frag_path in fragment_files:
                                os.remove(frag_path)
                            logger.info(f"Removed {len(fragment_files)} fragment files.")
                    except Exception as e:
                        logger.error(f"An error occurred during fragment cleanup: {e}", exc_info=args.verbose)
                if args.purge_on_complete:
                    try:
                        api.purge_download_result()
                        logger.info("Purged all completed/failed downloads from aria2c history.")
                    except Exception as e:
                        logger.warning(f"Failed to purge download history: {e}")
                elif args.remove_on_complete:
                    try:
                        # The `downloads` variable from the last status check should be valid here.
                        api.remove_download_result(downloads)
                        logger.info(f"Removed {len(downloads)} fragment downloads from aria2c history.")
                    except aria2p.ClientException as e:
                        logger.warning(f"Could not remove fragment downloads from history (maybe already gone?): {e}")
                    except Exception as e:
                        logger.warning(f"Failed to remove fragment downloads from history: {e}")
                return 0
    else:
-        print(f"Successfully added {len(gids)} fragments. GIDs: {gids}")
+        print(f"Download successful. Fragments now need to be merged manually.\nThe final merged file should be named: {final_filepath}")
-        print("These fragments will need to be merged manually after download.")
+        print(f"You can merge them with a command like:\n  cat `ls -v '{os.path.join(fragments_lookup_dir, base_filename)}'-Frag*'{file_ext}'` > '{final_filepath}'")
        if args.cleanup:
            logger.info("Cleanup requested. Removing downloaded fragments...")
            try:
                escaped_base = glob.escape(base_filename)
                search_path = os.path.join(fragments_lookup_dir, f"{escaped_base}-Frag*{file_ext}")
                fragment_files = glob.glob(search_path)
                if not fragment_files:
                    logger.warning(f"Cleanup: No fragment files found with pattern: {search_path}")
                else:
                    for frag_path in fragment_files: os.remove(frag_path)
                    logger.info(f"Removed {len(fragment_files)} fragment files.")
            except Exception as e:
                logger.error(f"An error occurred during fragment cleanup: {e}", exc_info=args.verbose)
        if args.purge_on_complete:
            try:
                api.purge_download_result()
                logger.info("Purged all completed/failed downloads from aria2c history.")
            except Exception as e:
                logger.warning(f"Failed to purge download history: {e}")
        elif args.remove_on_complete:
            try:
                api.remove_download_result(final_downloads)
                logger.info(f"Removed {len(final_downloads)} fragment downloads from aria2c history.")
            except Exception as e:
                logger.warning(f"Could not remove fragment downloads from history (maybe already gone?): {e}")
        return 0
--- a/ytops_client/download_native_py_tool.py
+++ b/ytops_client/download_native_py_tool.py
@ -84,12 +84,19 @@ def add_download_native_py_parser(subparsers):
    parser.add_argument('--output-buffer', action='store_true', help='Download to an in-memory buffer and print raw bytes to stdout. Final filename is printed to stderr.')
    parser.add_argument('--cleanup', action='store_true', help='After download, rename the file to include a timestamp and truncate it to 0 bytes.')
    parser.add_argument('--merge-output-format', help='Container format to merge to (e.g., "mp4", "mkv"). Overrides config file.')
    parser.add_argument('--retries', type=int, help='Number of retries for the entire download (default: 10).')
    parser.add_argument('--fragment-retries', type=int, help='Number of retries for each fragment (default: 10).')
    parser.add_argument('--socket-timeout', type=int, help='Timeout for socket operations in seconds (default: 20).')
    parser.add_argument('--add-header', action='append', help='Add a custom HTTP header for the download. Format: "Key: Value". Can be used multiple times.')
    # Arguments to pass through to yt-dlp
    parser.add_argument('--download-sections', help='yt-dlp --download-sections argument (e.g., "*0-10240").')
    parser.add_argument('--test', action='store_true', help='yt-dlp --test argument (download small part).')
    return parser
 def main_download_native_py(args):
    """Main logic for the 'download-native-py' command."""
-    # If outputting to buffer, all logging must go to stderr to keep stdout clean for binary data.
+    # All logging should go to stderr to keep stdout clean for the final filename, or for binary data with --output-buffer.
-    log_stream = sys.stderr if args.output_buffer else sys.stdout
+    log_stream = sys.stderr
    log_level = logging.DEBUG if args.verbose else logging.INFO
    # Reconfigure root logger
    for handler in logging.root.handlers[:]:
@ -176,7 +183,10 @@ def main_download_native_py(args):
        logger.info(f"Adding {len(extra_args_list)} extra arguments from --extra-ytdlp-args.")
        base_opts_args.extend(extra_args_list)
-    ydl_opts = {}
+    ydl_opts = {
        'noresizebuffer': True,
        'buffersize': '4M',
    }
    if base_opts_args:
        try:
            logger.info(f"Parsing {len(base_opts_args)} arguments from config/extra_args...")
@ -192,6 +202,17 @@ def main_download_native_py(args):
                # Handle flags (no value)
                is_flag = i + 1 >= len(base_opts_args) or base_opts_args[i + 1].startswith('--')
                if key == 'resize_buffer':
                    ydl_opts['noresizebuffer'] = False
                    logger.debug(f"Parsed flag: noresizebuffer = False")
                    i += 1
                    continue
                elif key == 'no_resize_buffer':
                    ydl_opts['noresizebuffer'] = True
                    logger.debug(f"Parsed flag: noresizebuffer = True")
                    i += 1
                    continue
                if is_flag:
                    if key.startswith('no_'):
@ -229,6 +250,8 @@ def main_download_native_py(args):
                    # Special handling for keys that differ from CLI arg, e.g. --limit-rate -> ratelimit
                    if key == 'limit_rate':
                        key = 'ratelimit'
                    elif key == 'buffer_size':
                        key = 'buffersize'
                    ydl_opts[key] = value
                    logger.debug(f"Parsed option: {key} = {value}")
@ -257,6 +280,21 @@ def main_download_native_py(args):
        ydl_opts['paths'] = {'temp': args.temp_path}
        logger.info(f"Using temporary path: {args.temp_path}")
    if args.add_header:
        if 'http_headers' not in ydl_opts:
            ydl_opts['http_headers'] = {}
        elif not isinstance(ydl_opts['http_headers'], dict):
            logger.warning(f"Overwriting non-dictionary http_headers from config with headers from command line.")
            ydl_opts['http_headers'] = {}
        for header in args.add_header:
            if ':' not in header:
                logger.error(f"Invalid header format in --add-header: '{header}'. Expected 'Key: Value'.")
                return 1
            key, value = header.split(':', 1)
            ydl_opts['http_headers'][key.strip()] = value.strip()
            logger.info(f"Adding/overwriting header: {key.strip()}: {value.strip()}")
    if args.download_continue:
        ydl_opts['continuedl'] = True
        ydl_opts['nooverwrites'] = True
@ -279,6 +317,19 @@ def main_download_native_py(args):
    if args.merge_output_format:
        ydl_opts['merge_output_format'] = args.merge_output_format
    if args.download_sections:
        ydl_opts['download_sections'] = args.download_sections
    if args.test:
        ydl_opts['test'] = True
    if args.retries is not None:
        ydl_opts['retries'] = args.retries
    if args.fragment_retries is not None:
        ydl_opts['fragment_retries'] = args.fragment_retries
    if args.socket_timeout is not None:
        ydl_opts['socket_timeout'] = args.socket_timeout
    try:
        logger.info(f"Starting download for format '{args.format}' using yt-dlp library...")
@ -301,6 +352,13 @@ def main_download_native_py(args):
        # The success path is now always taken if no exception was raised.
        if retcode == 0:
            if ytdlp_logger.is_403:
                logger.error("Download failed: yt-dlp reported HTTP Error 403: Forbidden. The URL has likely expired.")
                return 1
            if ytdlp_logger.is_timeout:
                logger.error("Download failed: yt-dlp reported a timeout.")
                return 1
            logger.info("yt-dlp download completed successfully.")
            if args.output_buffer:
--- a/ytops_client/download_tool.py
+++ b/ytops_client/download_tool.py
@ -44,6 +44,14 @@ def add_download_parser(subparsers):
    parser.add_argument('--downloader', help='Name of the external downloader to use (e.g., "aria2c", "native").')
    parser.add_argument('--downloader-args', help='Arguments to pass to the external downloader (e.g., "aria2c:-x 8").')
    parser.add_argument('--merge-output-format', help='Container format to merge to (e.g., "mp4", "mkv"). Overrides config file.')
    parser.add_argument('--retries', help='Number of retries for the entire download (default: 10).')
    parser.add_argument('--fragment-retries', help='Number of retries for each fragment (default: 10).')
    parser.add_argument('--socket-timeout', help='Timeout for socket operations in seconds (default: 20).')
    parser.add_argument('--lang', help='Language code for the request (e.g., "fr", "ja"). Affects metadata language.')
    parser.add_argument('--timezone', help='Timezone for the request (e.g., "UTC", "America/New_York"). Note: not supported by yt-dlp.')
    # Arguments to pass through to yt-dlp
    parser.add_argument('--download-sections', help='yt-dlp --download-sections argument (e.g., "*0-10240").')
    parser.add_argument('--test', action='store_true', help='yt-dlp --test argument (download small part).')
    return parser
 def main_download(args):
@ -151,6 +159,19 @@ def main_download(args):
        if args.merge_output_format:
            cmd.extend(['--merge-output-format', args.merge_output_format])
        if args.download_sections:
            cmd.extend(['--download-sections', args.download_sections])
        if args.test:
            cmd.append('--test')
        if args.retries:
            cmd.extend(['--retries', str(args.retries)])
        if args.fragment_retries:
            cmd.extend(['--fragment-retries', str(args.fragment_retries)])
        if args.socket_timeout:
            cmd.extend(['--socket-timeout', str(args.socket_timeout)])
        if args.download_continue:
            cmd.extend(['--continue', '--part'])
@ -172,6 +193,12 @@ def main_download(args):
        if proxy_url:
            cmd.extend(['--proxy', proxy_url])
        if args.lang:
            cmd.extend(['--extractor-args', f'youtube:lang={args.lang}'])
        if args.timezone:
            logger.warning(f"Timezone override ('{args.timezone}') is not supported by yt-dlp and will be ignored.")
        # Determine if we need to capture output.
        capture_output = args.cleanup or args.log_file or args.print_traffic
@ -208,6 +235,16 @@ def main_download(args):
            stdout_data, stderr_data = process.communicate()
            return_code = process.returncode
            # Post-run check for silent failures, like 403 errors where yt-dlp might still exit 0.
            if return_code == 0:
                output_text = (stdout_data or "") + (stderr_data or "")
                if "HTTP Error 403" in output_text:
                    logger.error("yt-dlp exited successfully, but a 403 error was detected in its output. Forcing failure.")
                    return_code = 1  # Override success code
                elif "timed out" in output_text.lower() or "timeout" in output_text.lower():
                    logger.error("yt-dlp exited successfully, but a timeout was detected in its output. Forcing failure.")
                    return_code = 1
            # Write captured output to terminal and log file
            if stdout_data:
                sys.stdout.write(stdout_data)
--- a/ytops_client/get_info_tool.py
+++ b/ytops_client/get_info_tool.py
@ -124,7 +124,9 @@ the browser-based generation strategy.''')
    parser.add_argument('--direct', action='store_true', help='Use the direct yt-dlp info.json generation method, bypassing Node.js token generation.')
    parser.add_argument('--print-info-out', action='store_true', help='Print the final info.json to stdout. By default, output is suppressed unless writing to a file.')
    parser.add_argument('--request-params-json', help=REQUEST_PARAMS_HELP_STRING + '\nCan also be a comma-separated string of key=value pairs (e.g., "caching_policy.mode=force_refresh").')
-    parser.add_argument('--force-renew', help='Comma-separated list of items to force-renew: cookies, visitor_id, po_token, nsig_cache, all.')
+    parser.add_argument('--force-renew', help='Comma-separated list of items to force-renew: cookies, visitor_id, po_token, nsig_cache, info_json, all.')
    parser.add_argument('--lang', help='Language code for the request (e.g., "fr", "ja"). Affects metadata language.')
    parser.add_argument('--timezone', help='Timezone for the request (e.g., "UTC", "America/New_York"). Note: experimental, may not be fully supported.')
    return parser
 def main_get_info(args):
@ -188,6 +190,16 @@ def main_get_info(args):
                items_to_renew = [item.strip() for item in args.force_renew.split(',')]
                request_params['force_renew'] = items_to_renew
                logger.info(f"Requesting force renew for: {items_to_renew}")
            if args.lang:
                session_params = request_params.setdefault('session_params', {})
                session_params['lang'] = args.lang
                logger.info(f"Requesting language: {args.lang}")
            if args.timezone:
                session_params = request_params.setdefault('session_params', {})
                session_params['timeZone'] = args.timezone
                logger.info(f"Requesting timezone: {args.timezone}")
            if args.verbose:
                # Add verbose flag for yt-dlp on the server
@ -244,6 +256,15 @@ def main_get_info(args):
        if not token_data or not hasattr(token_data, 'infoJson') or not token_data.infoJson:
            logger.error("Server did not return valid info.json data.")
            if args.verbose:
                logger.debug(f"Received token_data from server: {token_data!r}")
            if not token_data:
                logger.error("Reason: The entire token_data object received from the server is null.")
            elif not hasattr(token_data, 'infoJson'):
                logger.error("Reason: The received token_data object does not have an 'infoJson' attribute.")
            elif not token_data.infoJson:
                logger.error("Reason: The 'infoJson' attribute in the received token_data object is empty or null.")
            print("Error: Server did not return valid info.json data.", file=sys.stderr)
            return 1
--- a/ytops_client/list_formats_tool.py
+++ b/ytops_client/list_formats_tool.py
@ -9,6 +9,11 @@ import re
 from urllib.parse import urlparse, parse_qs
 from datetime import datetime, timezone
 try:
    import yt_dlp
 except ImportError:
    yt_dlp = None
 def format_size(b):
    """Format size in bytes to human-readable string."""
    if b is None:
@ -32,9 +37,39 @@ def list_formats(info_json, requested_formats_str=None, file=sys.stdout):
    requested_formats = []
    requested_order = {}
    if requested_formats_str:
-        # Split by comma or slash, and filter out empty strings
+        if yt_dlp:
-        requested_formats = [item for item in re.split(r'[,/]', requested_formats_str) if item]
+            try:
-        requested_order = {fmt: i for i, fmt in enumerate(requested_formats)}
+                ydl = yt_dlp.YoutubeDL({'quiet': True})
                formats = info_json.get('formats', [])
                selector = ydl.build_format_selector(requested_formats_str)
                ctx = {
                    'formats': formats,
                    'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
                    'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats)
                                           or all(f.get('acodec') == 'none' for f in formats)),
                }
                selected_formats = list(selector(ctx))
                all_selected_ids = []
                for f in selected_formats:
                    if 'requested_formats' in f:
                        all_selected_ids.extend(rf['format_id'] for rf in f['requested_formats'])
                    else:
                        all_selected_ids.append(f['format_id'])
                requested_formats = all_selected_ids
                requested_order = {fmt: i for i, fmt in enumerate(requested_formats)}
            except Exception as e:
                print(f"WARNING: Could not parse format selector '{requested_formats_str}': {e}", file=sys.stderr)
                # Fallback to simple parsing
                requested_formats = [item for item in re.split(r'[,/]', requested_formats_str) if item]
                requested_order = {fmt: i for i, fmt in enumerate(requested_formats)}
        else:
            # Fallback to simple parsing if yt-dlp is not installed
            print("WARNING: yt-dlp not installed. Using simple format selector parsing.", file=sys.stderr)
            requested_formats = [item for item in re.split(r'[,/]', requested_formats_str) if item]
            requested_order = {fmt: i for i, fmt in enumerate(requested_formats)}
    def sort_key(f):
        fid = f.get('format_id', '')
--- a/ytops_client/request_params_help.py
+++ b/ytops_client/request_params_help.py
@ -20,6 +20,7 @@ Example of a full configuration JSON showing default values (use single quotes t
    "use_curl_prefetch": false,
    "skip_cache": false,
    "visitor_id_override_enabled": true,
    "webpo_bind_to_visitor_id": true,
    "extractor_args": {
      "youtubepot-bgutilhttp": {
        "base_url": "http://172.17.0.1:4416"
@ -28,21 +29,22 @@ Example of a full configuration JSON showing default values (use single quotes t
        "pot_trace": "true",
        "formats": "duplicate",
        "player_js_version": "actual"
      },
      "youtubepot-webpo": {
        "bind_to_visitor_id": "true"
      }
    }
  },
  "_comment_ytdlp_params": "Parameters passed directly to the yt-dlp wrapper for info.json generation.",
  "_comment_webpo_bind_to_visitor_id": "If true (default), binds the PO Token cache to the visitor ID. Set to false for TV clients if caching issues occur, as this is not recommended for them.",
  "_comment_visitor_id_override_enabled": "If true (default), the server validates the visitor ID from the token generator and creates a new one if it is invalid. Set to false to force using the provided visitor ID without validation, which is useful for debugging.",
  "_comment_extractor_args": "Directly override yt-dlp extractor arguments. To use BGUtils in script mode, replace 'youtubepot-bgutilhttp' with 'youtubepot-bgutilscript'. The script path is '/opt/bgutil-ytdlp-pot-provider-server/build/generate_once.js'. To disable any explicit provider (like '--bgutils-mode none' on the server), remove both 'youtubepot-bgutilhttp' and 'youtubepot-bgutilscript' keys.",
  "session_params": {
    "lang": "en-US",
    "timeZone": "UTC",
    "location": "US",
    "deviceCategory": "MOBILE",
-    "user_agent": "Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)"
+    "user_agent": "Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)",
    "visitor_rotation_threshold": 250
  },
-  "_comment_session_params": "Parameters for the token generation session (primarily for Node.js)."
+  "_comment_session_params": "Parameters for the token generation session. `visitor_rotation_threshold` overrides the server's default request limit before a profile's visitor ID is rotated. Set to 0 to disable rotation.",
  "_comment_lang_and_tz": "`lang` sets the 'hl' parameter for YouTube's API, affecting metadata language. `timeZone` is intended to set the timezone for requests, but is not fully supported by yt-dlp yet."
 }'"""
--- a/ytops_client/stress_policy_tool.py
+++ b/ytops_client/stress_policy_tool.py
@ -148,7 +148,8 @@ def get_profile_from_filename(path, regex_pattern):
 class StateManager:
    """Tracks statistics, manages rate limits, and persists state across runs."""
-    def __init__(self, policy_name):
+    def __init__(self, policy_name, disable_log_writing=False):
        self.disable_log_writing = disable_log_writing
        self.state_file_path = Path(f"{policy_name}_state.json")
        self.stats_file_path = Path(f"{policy_name}_stats.jsonl")
        self.lock = threading.RLock()
@ -174,6 +175,9 @@ class StateManager:
        self._open_stats_log()
    def _load_state(self):
        if self.disable_log_writing:
            logger.info("Log writing is disabled. State will not be loaded from disk.")
            return
        if not self.state_file_path.exists():
            logger.info(f"State file not found at '{self.state_file_path}', starting fresh.")
            return
@ -198,6 +202,8 @@ class StateManager:
            logger.error(f"Could not load or parse state file {self.state_file_path}: {e}. Starting fresh.")
    def _save_state(self):
        if self.disable_log_writing:
            return
        with self.lock:
            try:
                with open(self.state_file_path, 'w', encoding='utf-8') as f:
@ -207,6 +213,8 @@ class StateManager:
                logger.error(f"Could not save state to {self.state_file_path}: {e}")
    def _open_stats_log(self):
        if self.disable_log_writing:
            return
        try:
            self.stats_file_handle = open(self.stats_file_path, 'a', encoding='utf-8')
        except IOError as e:
@ -737,6 +745,18 @@ class StateManager:
                    logger.info("Requests per proxy:")
                    for proxy, count in sorted(proxy_counts.items()):
                        logger.info(f"  - {proxy}: {count}")
                profile_counts = collections.Counter(e.get('profile') for e in fetch_events if e.get('profile'))
                if profile_counts:
                    logger.info("Requests per profile:")
                    for profile, count in sorted(profile_counts.items()):
                        logger.info(f"  - {profile}: {count}")
                proxy_counts = collections.Counter(e.get('proxy_url') for e in fetch_events if e.get('proxy_url'))
                if proxy_counts:
                    logger.info("Requests per proxy:")
                    for proxy, count in sorted(proxy_counts.items()):
                        logger.info(f"  - {proxy}: {count}")
            if download_events:
                total_attempts = len(download_events)
@ -1104,9 +1124,11 @@ def run_download_worker(info_json_path, info_json_content, format_to_download, p
        if proxy_rename:
            download_cmd.extend(['--proxy-rename', str(proxy_rename)])
        # The 'extra_args' from the policy are for the download script itself, not for yt-dlp.
        # We need to split them and add them to the command.
        extra_args = download_policy.get('extra_args')
        if extra_args:
-            download_cmd.extend(['--extra-ytdlp-args', str(extra_args)])
+            download_cmd.extend(shlex.split(extra_args))
        # Pass through downloader settings for yt-dlp to use
        # e.g. to tell yt-dlp to use aria2c as its backend
@ -1227,6 +1249,11 @@ def process_info_json_cycle(path, content, policy, state_manager, proxy_url=None
            requested_formats = [f.strip() for f in format_selection.split(',') if f.strip()]
            formats_to_test = []
            for req_fmt in requested_formats:
                # If it's a complex selector with slashes, don't try to validate it against available formats.
                if '/' in req_fmt:
                    formats_to_test.append(req_fmt)
                    continue
                # Check for exact match first
                if req_fmt in available_formats:
                    formats_to_test.append(req_fmt)
@ -1661,6 +1688,7 @@ Overridable Policy Parameters via --set:
    parser.add_argument('--verbose', action='store_true', help='Enable verbose output for the orchestrator and underlying scripts.')
    parser.add_argument('--dry-run', action='store_true', help='Print the effective policy and exit without running the test.')
    parser.add_argument('--disable-log-writing', action='store_true', help='Disable writing state, stats, and log files. By default, files are created for each run.')
    return parser
@ -1761,11 +1789,6 @@ def main_stress_policy(args):
        print_policy_overrides(policy)
        return 0
    log_level = logging.DEBUG if args.verbose else logging.INFO
    log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' if args.verbose else '%(asctime)s - %(message)s'
    date_format = None if args.verbose else '%H:%M:%S'
    logging.basicConfig(level=log_level, format=log_format, datefmt=date_format, stream=sys.stdout)
    policy = load_policy(args.policy, args.policy_name)
    policy = apply_overrides(policy, args.set)
@ -1782,8 +1805,37 @@ def main_stress_policy(args):
        policy.setdefault('download_policy', {})['cleanup'] = args.cleanup
    policy_name = policy.get('name', args.policy_name or Path(args.policy).stem)
    # --- Logging Setup ---
    log_level = logging.DEBUG if args.verbose else logging.INFO
    log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' if args.verbose else '%(asctime)s - %(message)s'
    date_format = None if args.verbose else '%H:%M:%S'
-    state_manager = StateManager(policy_name)
+    root_logger = logging.getLogger()
    root_logger.setLevel(log_level)
    # Remove any existing handlers to avoid duplicate logs
    for handler in root_logger.handlers[:]:
        root_logger.removeHandler(handler)
    # Add console handler
    console_handler = logging.StreamHandler(sys.stdout)
    console_handler.setFormatter(logging.Formatter(log_format, datefmt=date_format))
    root_logger.addHandler(console_handler)
    if not args.disable_log_writing:
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        log_filename = f"stress-policy-{timestamp}-{policy_name}.log"
        try:
            file_handler = logging.FileHandler(log_filename, encoding='utf-8')
            file_handler.setFormatter(logging.Formatter(log_format, datefmt=date_format))
            root_logger.addHandler(file_handler)
            # Use print because logger is just being set up.
            print(f"Logging to file: {log_filename}", file=sys.stderr)
        except IOError as e:
            print(f"Error: Could not open log file {log_filename}: {e}", file=sys.stderr)
    state_manager = StateManager(policy_name, disable_log_writing=args.disable_log_writing)
    # --- Graceful shutdown handler ---
    def shutdown_handler(signum, frame):
@ -1881,26 +1933,20 @@ def main_stress_policy(args):
        logger.error("No sources (URLs or info.json files) to process. Exiting.")
        return 1
-    # --- Group sources by profile if in download_only mode with regex ---
+    # Grouping of sources by profile is now handled inside the main loop to support continuous mode.
    profile_tasks = None
    task_items = sources # Default to list of sources
    profile_extraction_regex = settings.get('profile_extraction_regex')
    # For 'auto' worker calculation and initial display, we need to group sources once.
    # This will be re-calculated inside the loop for continuous mode.
    profile_tasks = None
    if mode == 'download_only' and profile_extraction_regex:
        logger.info(f"Grouping info.json files by profile using regex: {profile_extraction_regex}")
        profile_tasks = collections.defaultdict(list)
        for source_path in sources:
            profile_name = get_profile_from_filename(source_path, profile_extraction_regex)
            if profile_name:
                profile_tasks[profile_name].append(source_path)
            else:
                # Assign to a default profile if no match
                profile_tasks['unmatched_profile'].append(source_path)
        num_profiles = len(profile_tasks)
        logger.info(f"Found {num_profiles} unique profiles. Tasks will be processed sequentially per profile.")
        # The new "sources" for the purpose of task distribution are the profiles.
        task_items = list(profile_tasks.items())
    # --- Auto-calculate workers if needed ---
    exec_control = policy.get('execution_control', {})
@ -1977,12 +2023,12 @@ def main_stress_policy(args):
                # --- Step 1: Get info.json content ---
                info_json_content = None
                profile_name = None
                if mode in ['full_stack', 'fetch_only']:
                    gen_policy = policy.get('info_json_generation_policy', {})
                    cmd_template = gen_policy.get('command_template')
-                    
+            
                    # --- Profile Generation ---
                    profile_name = None
                    profile_mode = settings.get('profile_mode')
                    pm_policy = settings.get('profile_management')
@ -2303,6 +2349,28 @@ def main_stress_policy(args):
                    time.sleep(10)
                    continue
            # --- Group sources for this cycle ---
            task_items = sources
            profile_tasks = None
            if mode == 'download_only' and profile_extraction_regex:
                profile_tasks = collections.defaultdict(list)
                for source_path in sources:
                    profile_name = get_profile_from_filename(source_path, profile_extraction_regex)
                    if profile_name:
                        profile_tasks[profile_name].append(source_path)
                    else:
                        profile_tasks['unmatched_profile'].append(source_path)
                task_items = list(profile_tasks.items())
            # If there's nothing to do this cycle, skip.
            if not task_items:
                if mode == 'download_only' and settings.get('directory_scan_mode') == 'continuous':
                    # The sleep logic is handled inside the rescanning block.
                    continue
                else:
                    logger.info("No more sources to process. Ending test.")
                    break
            cycles += 1
            if max_cycles > 0 and cycles > max_cycles:
                logger.info(f"Reached max cycles ({max_cycles}). Stopping.")